summaryrefslogtreecommitdiff
path: root/ANDROID_3.4.5/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'ANDROID_3.4.5/fs/ext4')
-rw-r--r--ANDROID_3.4.5/fs/ext4/Kconfig85
-rw-r--r--ANDROID_3.4.5/fs/ext4/Makefile14
-rw-r--r--ANDROID_3.4.5/fs/ext4/acl.c439
-rw-r--r--ANDROID_3.4.5/fs/ext4/acl.h77
-rw-r--r--ANDROID_3.4.5/fs/ext4/balloc.c766
-rw-r--r--ANDROID_3.4.5/fs/ext4/bitmap.c31
-rw-r--r--ANDROID_3.4.5/fs/ext4/block_validity.c268
-rw-r--r--ANDROID_3.4.5/fs/ext4/dir.c667
-rw-r--r--ANDROID_3.4.5/fs/ext4/ext4.h2372
-rw-r--r--ANDROID_3.4.5/fs/ext4/ext4_extents.h296
-rw-r--r--ANDROID_3.4.5/fs/ext4/ext4_jbd2.c154
-rw-r--r--ANDROID_3.4.5/fs/ext4/ext4_jbd2.h399
-rw-r--r--ANDROID_3.4.5/fs/ext4/extents.c4866
-rw-r--r--ANDROID_3.4.5/fs/ext4/file.c262
-rw-r--r--ANDROID_3.4.5/fs/ext4/fsync.c271
-rw-r--r--ANDROID_3.4.5/fs/ext4/hash.c208
-rw-r--r--ANDROID_3.4.5/fs/ext4/ialloc.c1161
-rw-r--r--ANDROID_3.4.5/fs/ext4/indirect.c1502
-rw-r--r--ANDROID_3.4.5/fs/ext4/inode.c4676
-rw-r--r--ANDROID_3.4.5/fs/ext4/ioctl.c509
-rw-r--r--ANDROID_3.4.5/fs/ext4/mballoc.c5047
-rw-r--r--ANDROID_3.4.5/fs/ext4/mballoc.h222
-rw-r--r--ANDROID_3.4.5/fs/ext4/migrate.c604
-rw-r--r--ANDROID_3.4.5/fs/ext4/mmp.c353
-rw-r--r--ANDROID_3.4.5/fs/ext4/move_extent.c1423
-rw-r--r--ANDROID_3.4.5/fs/ext4/namei.c2607
-rw-r--r--ANDROID_3.4.5/fs/ext4/page-io.c433
-rw-r--r--ANDROID_3.4.5/fs/ext4/resize.c1689
-rw-r--r--ANDROID_3.4.5/fs/ext4/super.c4980
-rw-r--r--ANDROID_3.4.5/fs/ext4/symlink.c56
-rw-r--r--ANDROID_3.4.5/fs/ext4/truncate.h43
-rw-r--r--ANDROID_3.4.5/fs/ext4/xattr.c1608
-rw-r--r--ANDROID_3.4.5/fs/ext4/xattr.h155
-rw-r--r--ANDROID_3.4.5/fs/ext4/xattr_security.c82
-rw-r--r--ANDROID_3.4.5/fs/ext4/xattr_trusted.c58
-rw-r--r--ANDROID_3.4.5/fs/ext4/xattr_user.c61
36 files changed, 0 insertions, 38444 deletions
diff --git a/ANDROID_3.4.5/fs/ext4/Kconfig b/ANDROID_3.4.5/fs/ext4/Kconfig
deleted file mode 100644
index 9ed1bb1f..00000000
--- a/ANDROID_3.4.5/fs/ext4/Kconfig
+++ /dev/null
@@ -1,85 +0,0 @@
-config EXT4_FS
- tristate "The Extended 4 (ext4) filesystem"
- select JBD2
- select CRC16
- help
- This is the next generation of the ext3 filesystem.
-
- Unlike the change from ext2 filesystem to ext3 filesystem,
- the on-disk format of ext4 is not forwards compatible with
- ext3; it is based on extent maps and it supports 48-bit
- physical block numbers. The ext4 filesystem also supports delayed
- allocation, persistent preallocation, high resolution time stamps,
- and a number of other features to improve performance and speed
- up fsck time. For more information, please see the web pages at
- http://ext4.wiki.kernel.org.
-
- The ext4 filesystem will support mounting an ext3
- filesystem; while there will be some performance gains from
- the delayed allocation and inode table readahead, the best
- performance gains will require enabling ext4 features in the
- filesystem, or formatting a new filesystem as an ext4
- filesystem initially.
-
- To compile this file system support as a module, choose M here. The
- module will be called ext4.
-
- If unsure, say N.
-
-config EXT4_USE_FOR_EXT23
- bool "Use ext4 for ext2/ext3 file systems"
- depends on EXT4_FS
- depends on EXT3_FS=n || EXT2_FS=n
- default y
- help
- Allow the ext4 file system driver code to be used for ext2 or
- ext3 file system mounts. This allows users to reduce their
- compiled kernel size by using one file system driver for
- ext2, ext3, and ext4 file systems.
-
-config EXT4_FS_XATTR
- bool "Ext4 extended attributes"
- depends on EXT4_FS
- default y
- help
- Extended attributes are name:value pairs associated with inodes by
- the kernel or by users (see the attr(5) manual page, or visit
- <http://acl.bestbits.at/> for details).
-
- If unsure, say N.
-
- You need this for POSIX ACL support on ext4.
-
-config EXT4_FS_POSIX_ACL
- bool "Ext4 POSIX Access Control Lists"
- depends on EXT4_FS_XATTR
- select FS_POSIX_ACL
- help
- POSIX Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
-
- To learn more about Access Control Lists, visit the POSIX ACLs for
- Linux website <http://acl.bestbits.at/>.
-
- If you don't know what Access Control Lists are, say N
-
-config EXT4_FS_SECURITY
- bool "Ext4 Security Labels"
- depends on EXT4_FS_XATTR
- help
- Security labels support alternative access control models
- implemented by security modules like SELinux. This option
- enables an extended attribute handler for file security
- labels in the ext4 filesystem.
-
- If you are not using a security module that requires using
- extended attributes for file security labels, say N.
-
-config EXT4_DEBUG
- bool "EXT4 debugging support"
- depends on EXT4_FS
- help
- Enables run-time debugging support for the ext4 filesystem.
-
- If you select Y here, then you will be able to turn on debugging
- with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug"
diff --git a/ANDROID_3.4.5/fs/ext4/Makefile b/ANDROID_3.4.5/fs/ext4/Makefile
deleted file mode 100644
index 56fd8f86..00000000
--- a/ANDROID_3.4.5/fs/ext4/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-#
-# Makefile for the linux ext4-filesystem routines.
-#
-
-obj-$(CONFIG_EXT4_FS) += ext4.o
-
-ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
- ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
- mmp.o indirect.o
-
-ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
-ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
-ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
diff --git a/ANDROID_3.4.5/fs/ext4/acl.c b/ANDROID_3.4.5/fs/ext4/acl.c
deleted file mode 100644
index a5c29bb3..00000000
--- a/ANDROID_3.4.5/fs/ext4/acl.c
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
- * linux/fs/ext4/acl.c
- *
- * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- */
-
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-#include "xattr.h"
-#include "acl.h"
-
-/*
- * Convert from filesystem to in-memory representation.
- */
-static struct posix_acl *
-ext4_acl_from_disk(const void *value, size_t size)
-{
- const char *end = (char *)value + size;
- int n, count;
- struct posix_acl *acl;
-
- if (!value)
- return NULL;
- if (size < sizeof(ext4_acl_header))
- return ERR_PTR(-EINVAL);
- if (((ext4_acl_header *)value)->a_version !=
- cpu_to_le32(EXT4_ACL_VERSION))
- return ERR_PTR(-EINVAL);
- value = (char *)value + sizeof(ext4_acl_header);
- count = ext4_acl_count(size);
- if (count < 0)
- return ERR_PTR(-EINVAL);
- if (count == 0)
- return NULL;
- acl = posix_acl_alloc(count, GFP_NOFS);
- if (!acl)
- return ERR_PTR(-ENOMEM);
- for (n = 0; n < count; n++) {
- ext4_acl_entry *entry =
- (ext4_acl_entry *)value;
- if ((char *)value + sizeof(ext4_acl_entry_short) > end)
- goto fail;
- acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
- acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
-
- switch (acl->a_entries[n].e_tag) {
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- value = (char *)value +
- sizeof(ext4_acl_entry_short);
- acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
- break;
-
- case ACL_USER:
- case ACL_GROUP:
- value = (char *)value + sizeof(ext4_acl_entry);
- if ((char *)value > end)
- goto fail;
- acl->a_entries[n].e_id =
- le32_to_cpu(entry->e_id);
- break;
-
- default:
- goto fail;
- }
- }
- if (value != end)
- goto fail;
- return acl;
-
-fail:
- posix_acl_release(acl);
- return ERR_PTR(-EINVAL);
-}
-
-/*
- * Convert from in-memory to filesystem representation.
- */
-static void *
-ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
-{
- ext4_acl_header *ext_acl;
- char *e;
- size_t n;
-
- *size = ext4_acl_size(acl->a_count);
- ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
- sizeof(ext4_acl_entry), GFP_NOFS);
- if (!ext_acl)
- return ERR_PTR(-ENOMEM);
- ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
- e = (char *)ext_acl + sizeof(ext4_acl_header);
- for (n = 0; n < acl->a_count; n++) {
- ext4_acl_entry *entry = (ext4_acl_entry *)e;
- entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
- entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
- switch (acl->a_entries[n].e_tag) {
- case ACL_USER:
- case ACL_GROUP:
- entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
- e += sizeof(ext4_acl_entry);
- break;
-
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- e += sizeof(ext4_acl_entry_short);
- break;
-
- default:
- goto fail;
- }
- }
- return (char *)ext_acl;
-
-fail:
- kfree(ext_acl);
- return ERR_PTR(-EINVAL);
-}
-
-/*
- * Inode operation get_posix_acl().
- *
- * inode->i_mutex: don't care
- */
-struct posix_acl *
-ext4_get_acl(struct inode *inode, int type)
-{
- int name_index;
- char *value = NULL;
- struct posix_acl *acl;
- int retval;
-
- if (!test_opt(inode->i_sb, POSIX_ACL))
- return NULL;
-
- acl = get_cached_acl(inode, type);
- if (acl != ACL_NOT_CACHED)
- return acl;
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- break;
- case ACL_TYPE_DEFAULT:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
- break;
- default:
- BUG();
- }
- retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
- if (retval > 0) {
- value = kmalloc(retval, GFP_NOFS);
- if (!value)
- return ERR_PTR(-ENOMEM);
- retval = ext4_xattr_get(inode, name_index, "", value, retval);
- }
- if (retval > 0)
- acl = ext4_acl_from_disk(value, retval);
- else if (retval == -ENODATA || retval == -ENOSYS)
- acl = NULL;
- else
- acl = ERR_PTR(retval);
- kfree(value);
-
- if (!IS_ERR(acl))
- set_cached_acl(inode, type, acl);
-
- return acl;
-}
-
-/*
- * Set the access or default ACL of an inode.
- *
- * inode->i_mutex: down unless called from ext4_new_inode
- */
-static int
-ext4_set_acl(handle_t *handle, struct inode *inode, int type,
- struct posix_acl *acl)
-{
- int name_index;
- void *value = NULL;
- size_t size = 0;
- int error;
-
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- if (acl) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
- return error;
- else {
- inode->i_ctime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- if (error == 0)
- acl = NULL;
- }
- }
- break;
-
- case ACL_TYPE_DEFAULT:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EACCES : 0;
- break;
-
- default:
- return -EINVAL;
- }
- if (acl) {
- value = ext4_acl_to_disk(acl, &size);
- if (IS_ERR(value))
- return (int)PTR_ERR(value);
- }
-
- error = ext4_xattr_set_handle(handle, inode, name_index, "",
- value, size, 0);
-
- kfree(value);
- if (!error)
- set_cached_acl(inode, type, acl);
-
- return error;
-}
-
-/*
- * Initialize the ACLs of a new inode. Called from ext4_new_inode.
- *
- * dir->i_mutex: down
- * inode->i_mutex: up (access to inode is still exclusive)
- */
-int
-ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
-{
- struct posix_acl *acl = NULL;
- int error = 0;
-
- if (!S_ISLNK(inode->i_mode)) {
- if (test_opt(dir->i_sb, POSIX_ACL)) {
- acl = ext4_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- }
- if (!acl)
- inode->i_mode &= ~current_umask();
- }
- if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
- if (S_ISDIR(inode->i_mode)) {
- error = ext4_set_acl(handle, inode,
- ACL_TYPE_DEFAULT, acl);
- if (error)
- goto cleanup;
- }
- error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
- if (error < 0)
- return error;
-
- if (error > 0) {
- /* This is an extended ACL */
- error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
- }
- }
-cleanup:
- posix_acl_release(acl);
- return error;
-}
-
-/*
- * Does chmod for an inode that may have an Access Control List. The
- * inode->i_mode field must be updated to the desired value by the caller
- * before calling this function.
- * Returns 0 on success, or a negative error number.
- *
- * We change the ACL rather than storing some ACL entries in the file
- * mode permission bits (which would be more efficient), because that
- * would break once additional permissions (like ACL_APPEND, ACL_DELETE
- * for directories) are added. There are no more bits available in the
- * file mode.
- *
- * inode->i_mutex: down
- */
-int
-ext4_acl_chmod(struct inode *inode)
-{
- struct posix_acl *acl;
- handle_t *handle;
- int retries = 0;
- int error;
-
-
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
- if (!test_opt(inode->i_sb, POSIX_ACL))
- return 0;
- acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl) || !acl)
- return PTR_ERR(acl);
- error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
- if (error)
- return error;
-retry:
- handle = ext4_journal_start(inode,
- EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- ext4_std_error(inode->i_sb, error);
- goto out;
- }
- error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
- ext4_journal_stop(handle);
- if (error == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
-out:
- posix_acl_release(acl);
- return error;
-}
-
-/*
- * Extended attribute handlers
- */
-static size_t
-ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
- const char *name, size_t name_len, int type)
-{
- const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
-
- if (!test_opt(dentry->d_sb, POSIX_ACL))
- return 0;
- if (list && size <= list_len)
- memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
- return size;
-}
-
-static size_t
-ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
- const char *name, size_t name_len, int type)
-{
- const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
-
- if (!test_opt(dentry->d_sb, POSIX_ACL))
- return 0;
- if (list && size <= list_len)
- memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
- return size;
-}
-
-static int
-ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
- size_t size, int type)
-{
- struct posix_acl *acl;
- int error;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!test_opt(dentry->d_sb, POSIX_ACL))
- return -EOPNOTSUPP;
-
- acl = ext4_get_acl(dentry->d_inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl == NULL)
- return -ENODATA;
- error = posix_acl_to_xattr(acl, buffer, size);
- posix_acl_release(acl);
-
- return error;
-}
-
-static int
-ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags, int type)
-{
- struct inode *inode = dentry->d_inode;
- handle_t *handle;
- struct posix_acl *acl;
- int error, retries = 0;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!test_opt(inode->i_sb, POSIX_ACL))
- return -EOPNOTSUPP;
- if (!inode_owner_or_capable(inode))
- return -EPERM;
-
- if (value) {
- acl = posix_acl_from_xattr(value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- else if (acl) {
- error = posix_acl_valid(acl);
- if (error)
- goto release_and_out;
- }
- } else
- acl = NULL;
-
-retry:
- handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- error = ext4_set_acl(handle, inode, type, acl);
- ext4_journal_stop(handle);
- if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
-
-release_and_out:
- posix_acl_release(acl);
- return error;
-}
-
-const struct xattr_handler ext4_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .list = ext4_xattr_list_acl_access,
- .get = ext4_xattr_get_acl,
- .set = ext4_xattr_set_acl,
-};
-
-const struct xattr_handler ext4_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .list = ext4_xattr_list_acl_default,
- .get = ext4_xattr_get_acl,
- .set = ext4_xattr_set_acl,
-};
diff --git a/ANDROID_3.4.5/fs/ext4/acl.h b/ANDROID_3.4.5/fs/ext4/acl.h
deleted file mode 100644
index 18cb39ed..00000000
--- a/ANDROID_3.4.5/fs/ext4/acl.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- File: fs/ext4/acl.h
-
- (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-*/
-
-#include <linux/posix_acl_xattr.h>
-
-#define EXT4_ACL_VERSION 0x0001
-
-typedef struct {
- __le16 e_tag;
- __le16 e_perm;
- __le32 e_id;
-} ext4_acl_entry;
-
-typedef struct {
- __le16 e_tag;
- __le16 e_perm;
-} ext4_acl_entry_short;
-
-typedef struct {
- __le32 a_version;
-} ext4_acl_header;
-
-static inline size_t ext4_acl_size(int count)
-{
- if (count <= 4) {
- return sizeof(ext4_acl_header) +
- count * sizeof(ext4_acl_entry_short);
- } else {
- return sizeof(ext4_acl_header) +
- 4 * sizeof(ext4_acl_entry_short) +
- (count - 4) * sizeof(ext4_acl_entry);
- }
-}
-
-static inline int ext4_acl_count(size_t size)
-{
- ssize_t s;
- size -= sizeof(ext4_acl_header);
- s = size - 4 * sizeof(ext4_acl_entry_short);
- if (s < 0) {
- if (size % sizeof(ext4_acl_entry_short))
- return -1;
- return size / sizeof(ext4_acl_entry_short);
- } else {
- if (s % sizeof(ext4_acl_entry))
- return -1;
- return s / sizeof(ext4_acl_entry) + 4;
- }
-}
-
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-
-/* acl.c */
-struct posix_acl *ext4_get_acl(struct inode *inode, int type);
-extern int ext4_acl_chmod(struct inode *);
-extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
-
-#else /* CONFIG_EXT4_FS_POSIX_ACL */
-#include <linux/sched.h>
-#define ext4_get_acl NULL
-
-static inline int
-ext4_acl_chmod(struct inode *inode)
-{
- return 0;
-}
-
-static inline int
-ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
-{
- return 0;
-}
-#endif /* CONFIG_EXT4_FS_POSIX_ACL */
-
diff --git a/ANDROID_3.4.5/fs/ext4/balloc.c b/ANDROID_3.4.5/fs/ext4/balloc.c
deleted file mode 100644
index 8da837be..00000000
--- a/ANDROID_3.4.5/fs/ext4/balloc.c
+++ /dev/null
@@ -1,766 +0,0 @@
-/*
- * linux/fs/ext4/balloc.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
- * Big-endian to little-endian byte-swapping/bitmaps by
- * David S. Miller (davem@caip.rutgers.edu), 1995
- */
-
-#include <linux/time.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
-#include <linux/jbd2.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include "ext4.h"
-#include "ext4_jbd2.h"
-#include "mballoc.h"
-
-#include <trace/events/ext4.h>
-
-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
- ext4_group_t block_group);
-/*
- * balloc.c contains the blocks allocation and deallocation routines
- */
-
-/*
- * Calculate the block group number and offset into the block/cluster
- * allocation bitmap, given a block number
- */
-void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
- ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- ext4_grpblk_t offset;
-
- blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
- offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
- EXT4_SB(sb)->s_cluster_bits;
- if (offsetp)
- *offsetp = offset;
- if (blockgrpp)
- *blockgrpp = blocknr;
-
-}
-
-static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
- ext4_group_t block_group)
-{
- ext4_group_t actual_group;
- ext4_get_group_no_and_offset(sb, block, &actual_group, NULL);
- if (actual_group == block_group)
- return 1;
- return 0;
-}
-
-/* Return the number of clusters used for file system metadata; this
- * represents the overhead needed by the file system.
- */
-unsigned ext4_num_overhead_clusters(struct super_block *sb,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp)
-{
- unsigned num_clusters;
- int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
- ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
- ext4_fsblk_t itbl_blk;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- /* This is the number of clusters used by the superblock,
- * block group descriptors, and reserved block group
- * descriptor blocks */
- num_clusters = ext4_num_base_meta_clusters(sb, block_group);
-
- /*
- * For the allocation bitmaps and inode table, we first need
- * to check to see if the block is in the block group. If it
- * is, then check to see if the cluster is already accounted
- * for in the clusters used for the base metadata cluster, or
- * if we can increment the base metadata cluster to include
- * that block. Otherwise, we will have to track the cluster
- * used for the allocation bitmap or inode table explicitly.
- * Normally all of these blocks are contiguous, so the special
- * case handling shouldn't be necessary except for *very*
- * unusual file system layouts.
- */
- if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
- block_cluster = EXT4_B2C(sbi,
- ext4_block_bitmap(sb, gdp) - start);
- if (block_cluster < num_clusters)
- block_cluster = -1;
- else if (block_cluster == num_clusters) {
- num_clusters++;
- block_cluster = -1;
- }
- }
-
- if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
- inode_cluster = EXT4_B2C(sbi,
- ext4_inode_bitmap(sb, gdp) - start);
- if (inode_cluster < num_clusters)
- inode_cluster = -1;
- else if (inode_cluster == num_clusters) {
- num_clusters++;
- inode_cluster = -1;
- }
- }
-
- itbl_blk = ext4_inode_table(sb, gdp);
- for (i = 0; i < sbi->s_itb_per_group; i++) {
- if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
- c = EXT4_B2C(sbi, itbl_blk + i - start);
- if ((c < num_clusters) || (c == inode_cluster) ||
- (c == block_cluster) || (c == itbl_cluster))
- continue;
- if (c == num_clusters) {
- num_clusters++;
- continue;
- }
- num_clusters++;
- itbl_cluster = c;
- }
- }
-
- if (block_cluster != -1)
- num_clusters++;
- if (inode_cluster != -1)
- num_clusters++;
-
- return num_clusters;
-}
-
-static unsigned int num_clusters_in_group(struct super_block *sb,
- ext4_group_t block_group)
-{
- unsigned int blocks;
-
- if (block_group == ext4_get_groups_count(sb) - 1) {
- /*
- * Even though mke2fs always initializes the first and
- * last group, just in case some other tool was used,
- * we need to make sure we calculate the right free
- * blocks.
- */
- blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
- ext4_group_first_block_no(sb, block_group);
- } else
- blocks = EXT4_BLOCKS_PER_GROUP(sb);
- return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
-}
-
-/* Initializes an uninitialized block bitmap */
-void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp)
-{
- unsigned int bit, bit_max;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_fsblk_t start, tmp;
- int flex_bg = 0;
-
- J_ASSERT_BH(bh, buffer_locked(bh));
-
- /* If checksum is bad mark all blocks used to prevent allocation
- * essentially implementing a per-group read-only flag. */
- if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
- ext4_error(sb, "Checksum bad for group %u", block_group);
- ext4_free_group_clusters_set(sb, gdp, 0);
- ext4_free_inodes_set(sb, gdp, 0);
- ext4_itable_unused_set(sb, gdp, 0);
- memset(bh->b_data, 0xff, sb->s_blocksize);
- return;
- }
- memset(bh->b_data, 0, sb->s_blocksize);
-
- bit_max = ext4_num_base_meta_clusters(sb, block_group);
- for (bit = 0; bit < bit_max; bit++)
- ext4_set_bit(bit, bh->b_data);
-
- start = ext4_group_first_block_no(sb, block_group);
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
- flex_bg = 1;
-
- /* Set bits for block and inode bitmaps, and inode table */
- tmp = ext4_block_bitmap(sb, gdp);
- if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
- ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
-
- tmp = ext4_inode_bitmap(sb, gdp);
- if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
- ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
-
- tmp = ext4_inode_table(sb, gdp);
- for (; tmp < ext4_inode_table(sb, gdp) +
- sbi->s_itb_per_group; tmp++) {
- if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
- ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
- }
-
- /*
- * Also if the number of blocks within the group is less than
- * the blocksize * 8 ( which is the size of bitmap ), set rest
- * of the block bitmap to 1
- */
- ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
- sb->s_blocksize * 8, bh->b_data);
-}
-
-/* Return the number of free blocks in a block group. It is used when
- * the block bitmap is uninitialized, so we can't just count the bits
- * in the bitmap. */
-unsigned ext4_free_clusters_after_init(struct super_block *sb,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp)
-{
- return num_clusters_in_group(sb, block_group) -
- ext4_num_overhead_clusters(sb, block_group, gdp);
-}
-
-/*
- * The free blocks are managed by bitmaps. A file system contains several
- * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
- * block for inodes, N blocks for the inode table and data blocks.
- *
- * The file system contains group descriptors which are located after the
- * super block. Each descriptor contains the number of the bitmap block and
- * the free blocks count in the block. The descriptors are loaded in memory
- * when a file system is mounted (see ext4_fill_super).
- */
-
-/**
- * ext4_get_group_desc() -- load group descriptor from disk
- * @sb: super block
- * @block_group: given block group
- * @bh: pointer to the buffer head to store the block
- * group descriptor
- */
-struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
- ext4_group_t block_group,
- struct buffer_head **bh)
-{
- unsigned int group_desc;
- unsigned int offset;
- ext4_group_t ngroups = ext4_get_groups_count(sb);
- struct ext4_group_desc *desc;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- if (block_group >= ngroups) {
- ext4_error(sb, "block_group >= groups_count - block_group = %u,"
- " groups_count = %u", block_group, ngroups);
-
- return NULL;
- }
-
- group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
- offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- if (!sbi->s_group_desc[group_desc]) {
- ext4_error(sb, "Group descriptor not loaded - "
- "block_group = %u, group_desc = %u, desc = %u",
- block_group, group_desc, offset);
- return NULL;
- }
-
- desc = (struct ext4_group_desc *)(
- (__u8 *)sbi->s_group_desc[group_desc]->b_data +
- offset * EXT4_DESC_SIZE(sb));
- if (bh)
- *bh = sbi->s_group_desc[group_desc];
- return desc;
-}
-
-static int ext4_valid_block_bitmap(struct super_block *sb,
- struct ext4_group_desc *desc,
- unsigned int block_group,
- struct buffer_head *bh)
-{
- ext4_grpblk_t offset;
- ext4_grpblk_t next_zero_bit;
- ext4_fsblk_t bitmap_blk;
- ext4_fsblk_t group_first_block;
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
- /* with FLEX_BG, the inode/block bitmaps and itable
- * blocks may not be in the group at all
- * so the bitmap validation will be skipped for those groups
- * or it has to also read the block group where the bitmaps
- * are located to verify they are set.
- */
- return 1;
- }
- group_first_block = ext4_group_first_block_no(sb, block_group);
-
- /* check whether block bitmap block number is set */
- bitmap_blk = ext4_block_bitmap(sb, desc);
- offset = bitmap_blk - group_first_block;
- if (!ext4_test_bit(offset, bh->b_data))
- /* bad block bitmap */
- goto err_out;
-
- /* check whether the inode bitmap block number is set */
- bitmap_blk = ext4_inode_bitmap(sb, desc);
- offset = bitmap_blk - group_first_block;
- if (!ext4_test_bit(offset, bh->b_data))
- /* bad block bitmap */
- goto err_out;
-
- /* check whether the inode table block number is set */
- bitmap_blk = ext4_inode_table(sb, desc);
- offset = bitmap_blk - group_first_block;
- next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
- offset + EXT4_SB(sb)->s_itb_per_group,
- offset);
- if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group)
- /* good bitmap for inode tables */
- return 1;
-
-err_out:
- ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
- block_group, bitmap_blk);
- return 0;
-}
-/**
- * ext4_read_block_bitmap()
- * @sb: super block
- * @block_group: given block group
- *
- * Read the bitmap for a given block_group,and validate the
- * bits for block/inode/inode tables are set in the bitmaps
- *
- * Return buffer_head on success or NULL in case of failure.
- */
-struct buffer_head *
-ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
-{
- struct ext4_group_desc *desc;
- struct buffer_head *bh;
- ext4_fsblk_t bitmap_blk;
-
- desc = ext4_get_group_desc(sb, block_group, NULL);
- if (!desc)
- return NULL;
- bitmap_blk = ext4_block_bitmap(sb, desc);
- bh = sb_getblk(sb, bitmap_blk);
- if (unlikely(!bh)) {
- ext4_error(sb, "Cannot get buffer for block bitmap - "
- "block_group = %u, block_bitmap = %llu",
- block_group, bitmap_blk);
- return NULL;
- }
-
- if (bitmap_uptodate(bh))
- return bh;
-
- lock_buffer(bh);
- if (bitmap_uptodate(bh)) {
- unlock_buffer(bh);
- return bh;
- }
- ext4_lock_group(sb, block_group);
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- ext4_init_block_bitmap(sb, bh, block_group, desc);
- set_bitmap_uptodate(bh);
- set_buffer_uptodate(bh);
- ext4_unlock_group(sb, block_group);
- unlock_buffer(bh);
- return bh;
- }
- ext4_unlock_group(sb, block_group);
- if (buffer_uptodate(bh)) {
- /*
- * if not uninit if bh is uptodate,
- * bitmap is also uptodate
- */
- set_bitmap_uptodate(bh);
- unlock_buffer(bh);
- return bh;
- }
- /*
- * submit the buffer_head for reading
- */
- set_buffer_new(bh);
- trace_ext4_read_block_bitmap_load(sb, block_group);
- bh->b_end_io = ext4_end_bitmap_read;
- get_bh(bh);
- submit_bh(READ, bh);
- return bh;
-}
-
-/* Returns 0 on success, 1 on error */
-int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
- struct buffer_head *bh)
-{
- struct ext4_group_desc *desc;
-
- if (!buffer_new(bh))
- return 0;
- desc = ext4_get_group_desc(sb, block_group, NULL);
- if (!desc)
- return 1;
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- ext4_error(sb, "Cannot read block bitmap - "
- "block_group = %u, block_bitmap = %llu",
- block_group, (unsigned long long) bh->b_blocknr);
- return 1;
- }
- clear_buffer_new(bh);
- /* Panic or remount fs read-only if block bitmap is invalid */
- ext4_valid_block_bitmap(sb, desc, block_group, bh);
- return 0;
-}
-
-struct buffer_head *
-ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
-{
- struct buffer_head *bh;
-
- bh = ext4_read_block_bitmap_nowait(sb, block_group);
- if (ext4_wait_block_bitmap(sb, block_group, bh)) {
- put_bh(bh);
- return NULL;
- }
- return bh;
-}
-
-/**
- * ext4_has_free_clusters()
- * @sbi: in-core super block structure.
- * @nclusters: number of needed blocks
- * @flags: flags from ext4_mb_new_blocks()
- *
- * Check if filesystem has nclusters free & available for allocation.
- * On success return 1, return 0 on failure.
- */
-static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
- s64 nclusters, unsigned int flags)
-{
- s64 free_clusters, dirty_clusters, root_clusters;
- struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
- struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
-
- free_clusters = percpu_counter_read_positive(fcc);
- dirty_clusters = percpu_counter_read_positive(dcc);
- root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
-
- if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
- EXT4_FREECLUSTERS_WATERMARK) {
- free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
- dirty_clusters = percpu_counter_sum_positive(dcc);
- }
- /* Check whether we have space after accounting for current
- * dirty clusters & root reserved clusters.
- */
- if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
- return 1;
-
- /* Hm, nope. Are (enough) root reserved clusters available? */
- if (sbi->s_resuid == current_fsuid() ||
- ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
- capable(CAP_SYS_RESOURCE) ||
- (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
-
- if (free_clusters >= (nclusters + dirty_clusters))
- return 1;
- }
-
- return 0;
-}
-
-int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
- s64 nclusters, unsigned int flags)
-{
- if (ext4_has_free_clusters(sbi, nclusters, flags)) {
- percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
- return 0;
- } else
- return -ENOSPC;
-}
-
-/**
- * ext4_should_retry_alloc()
- * @sb: super block
- * @retries number of attemps has been made
- *
- * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
- * it is profitable to retry the operation, this function will wait
- * for the current or committing transaction to complete, and then
- * return TRUE.
- *
- * if the total number of retries exceed three times, return FALSE.
- */
-int ext4_should_retry_alloc(struct super_block *sb, int *retries)
-{
- if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
- (*retries)++ > 3 ||
- !EXT4_SB(sb)->s_journal)
- return 0;
-
- jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
-
- return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
-}
-
-/*
- * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
- *
- * @handle: handle to this transaction
- * @inode: file inode
- * @goal: given target block(filesystem wide)
- * @count: pointer to total number of clusters needed
- * @errp: error code
- *
- * Return 1st allocated block number on success, *count stores total account
- * error stores in errp pointer
- */
-ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, unsigned int flags,
- unsigned long *count, int *errp)
-{
- struct ext4_allocation_request ar;
- ext4_fsblk_t ret;
-
- memset(&ar, 0, sizeof(ar));
- /* Fill with neighbour allocated blocks */
- ar.inode = inode;
- ar.goal = goal;
- ar.len = count ? *count : 1;
- ar.flags = flags;
-
- ret = ext4_mb_new_blocks(handle, &ar, errp);
- if (count)
- *count = ar.len;
- /*
- * Account for the allocated meta blocks. We will never
- * fail EDQUOT for metdata, but we do account for it.
- */
- if (!(*errp) &&
- ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- dquot_alloc_block_nofail(inode,
- EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
- }
- return ret;
-}
-
-/**
- * ext4_count_free_clusters() -- count filesystem free clusters
- * @sb: superblock
- *
- * Adds up the number of free clusters from each block group.
- */
-ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
-{
- ext4_fsblk_t desc_count;
- struct ext4_group_desc *gdp;
- ext4_group_t i;
- ext4_group_t ngroups = ext4_get_groups_count(sb);
-#ifdef EXT4FS_DEBUG
- struct ext4_super_block *es;
- ext4_fsblk_t bitmap_count;
- unsigned int x;
- struct buffer_head *bitmap_bh = NULL;
-
- es = EXT4_SB(sb)->s_es;
- desc_count = 0;
- bitmap_count = 0;
- gdp = NULL;
-
- for (i = 0; i < ngroups; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
- if (!gdp)
- continue;
- desc_count += ext4_free_group_clusters(sb, gdp);
- brelse(bitmap_bh);
- bitmap_bh = ext4_read_block_bitmap(sb, i);
- if (bitmap_bh == NULL)
- continue;
-
- x = ext4_count_free(bitmap_bh, sb->s_blocksize);
- printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
- i, ext4_free_group_clusters(sb, gdp), x);
- bitmap_count += x;
- }
- brelse(bitmap_bh);
- printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
- ", computed = %llu, %llu\n",
- EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
- desc_count, bitmap_count);
- return bitmap_count;
-#else
- desc_count = 0;
- for (i = 0; i < ngroups; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
- if (!gdp)
- continue;
- desc_count += ext4_free_group_clusters(sb, gdp);
- }
-
- return desc_count;
-#endif
-}
-
-static inline int test_root(ext4_group_t a, int b)
-{
- int num = b;
-
- while (a > num)
- num *= b;
- return num == a;
-}
-
-static int ext4_group_sparse(ext4_group_t group)
-{
- if (group <= 1)
- return 1;
- if (!(group & 1))
- return 0;
- return (test_root(group, 7) || test_root(group, 5) ||
- test_root(group, 3));
-}
-
-/**
- * ext4_bg_has_super - number of blocks used by the superblock in group
- * @sb: superblock for filesystem
- * @group: group number to check
- *
- * Return the number of blocks used by the superblock (primary or backup)
- * in this group. Currently this will be only 0 or 1.
- */
-int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
-{
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
- !ext4_group_sparse(group))
- return 0;
- return 1;
-}
-
-static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
- ext4_group_t group)
-{
- unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
- ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
- ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
-
- if (group == first || group == first + 1 || group == last)
- return 1;
- return 0;
-}
-
-static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
- ext4_group_t group)
-{
- if (!ext4_bg_has_super(sb, group))
- return 0;
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
- return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
- else
- return EXT4_SB(sb)->s_gdb_count;
-}
-
-/**
- * ext4_bg_num_gdb - number of blocks used by the group table in group
- * @sb: superblock for filesystem
- * @group: group number to check
- *
- * Return the number of blocks used by the group descriptor table
- * (primary or backup) in this group. In the future there may be a
- * different number of descriptor blocks in each group.
- */
-unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
-{
- unsigned long first_meta_bg =
- le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
- unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
-
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
- metagroup < first_meta_bg)
- return ext4_bg_num_gdb_nometa(sb, group);
-
- return ext4_bg_num_gdb_meta(sb,group);
-
-}
-
-/*
- * This function returns the number of file system metadata clusters at
- * the beginning of a block group, including the reserved gdt blocks.
- */
-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
- ext4_group_t block_group)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned num;
-
- /* Check for superblock and gdt backups in this group */
- num = ext4_bg_has_super(sb, block_group);
-
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
- block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
- sbi->s_desc_per_block) {
- if (num) {
- num += ext4_bg_num_gdb(sb, block_group);
- num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
- }
- } else { /* For META_BG_BLOCK_GROUPS */
- num += ext4_bg_num_gdb(sb, block_group);
- }
- return EXT4_NUM_B2C(sbi, num);
-}
-/**
- * ext4_inode_to_goal_block - return a hint for block allocation
- * @inode: inode for block allocation
- *
- * Return the ideal location to start allocating blocks for a
- * newly created inode.
- */
-ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- ext4_group_t block_group;
- ext4_grpblk_t colour;
- int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
- ext4_fsblk_t bg_start;
- ext4_fsblk_t last_block;
-
- block_group = ei->i_block_group;
- if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
- /*
- * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
- * block groups per flexgroup, reserve the first block
- * group for directories and special files. Regular
- * files will start at the second block group. This
- * tends to speed up directory access and improves
- * fsck times.
- */
- block_group &= ~(flex_size-1);
- if (S_ISREG(inode->i_mode))
- block_group++;
- }
- bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
- last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
-
- /*
- * If we are doing delayed allocation, we don't need take
- * colour into account.
- */
- if (test_opt(inode->i_sb, DELALLOC))
- return bg_start;
-
- if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
- colour = (current->pid % 16) *
- (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
- else
- colour = (current->pid % 16) * ((last_block - bg_start) / 16);
- return bg_start + colour;
-}
-
diff --git a/ANDROID_3.4.5/fs/ext4/bitmap.c b/ANDROID_3.4.5/fs/ext4/bitmap.c
deleted file mode 100644
index fa3af81a..00000000
--- a/ANDROID_3.4.5/fs/ext4/bitmap.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * linux/fs/ext4/bitmap.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- */
-
-#include <linux/buffer_head.h>
-#include <linux/jbd2.h>
-#include "ext4.h"
-
-#ifdef EXT4FS_DEBUG
-
-static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-
-unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
-{
- unsigned int i, sum = 0;
-
- if (!map)
- return 0;
- for (i = 0; i < numchars; i++)
- sum += nibblemap[map->b_data[i] & 0xf] +
- nibblemap[(map->b_data[i] >> 4) & 0xf];
- return sum;
-}
-
-#endif /* EXT4FS_DEBUG */
-
diff --git a/ANDROID_3.4.5/fs/ext4/block_validity.c b/ANDROID_3.4.5/fs/ext4/block_validity.c
deleted file mode 100644
index 3f11656b..00000000
--- a/ANDROID_3.4.5/fs/ext4/block_validity.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * linux/fs/ext4/block_validity.c
- *
- * Copyright (C) 2009
- * Theodore Ts'o (tytso@mit.edu)
- *
- * Track which blocks in the filesystem are metadata blocks that
- * should never be used as data blocks by files or directories.
- */
-
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/namei.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-#include <linux/blkdev.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include "ext4.h"
-
-struct ext4_system_zone {
- struct rb_node node;
- ext4_fsblk_t start_blk;
- unsigned int count;
-};
-
-static struct kmem_cache *ext4_system_zone_cachep;
-
-int __init ext4_init_system_zone(void)
-{
- ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
- if (ext4_system_zone_cachep == NULL)
- return -ENOMEM;
- return 0;
-}
-
-void ext4_exit_system_zone(void)
-{
- kmem_cache_destroy(ext4_system_zone_cachep);
-}
-
-static inline int can_merge(struct ext4_system_zone *entry1,
- struct ext4_system_zone *entry2)
-{
- if ((entry1->start_blk + entry1->count) == entry2->start_blk)
- return 1;
- return 0;
-}
-
-/*
- * Mark a range of blocks as belonging to the "system zone" --- that
- * is, filesystem metadata blocks which should never be used by
- * inodes.
- */
-static int add_system_zone(struct ext4_sb_info *sbi,
- ext4_fsblk_t start_blk,
- unsigned int count)
-{
- struct ext4_system_zone *new_entry = NULL, *entry;
- struct rb_node **n = &sbi->system_blks.rb_node, *node;
- struct rb_node *parent = NULL, *new_node = NULL;
-
- while (*n) {
- parent = *n;
- entry = rb_entry(parent, struct ext4_system_zone, node);
- if (start_blk < entry->start_blk)
- n = &(*n)->rb_left;
- else if (start_blk >= (entry->start_blk + entry->count))
- n = &(*n)->rb_right;
- else {
- if (start_blk + count > (entry->start_blk +
- entry->count))
- entry->count = (start_blk + count -
- entry->start_blk);
- new_node = *n;
- new_entry = rb_entry(new_node, struct ext4_system_zone,
- node);
- break;
- }
- }
-
- if (!new_entry) {
- new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
- GFP_KERNEL);
- if (!new_entry)
- return -ENOMEM;
- new_entry->start_blk = start_blk;
- new_entry->count = count;
- new_node = &new_entry->node;
-
- rb_link_node(new_node, parent, n);
- rb_insert_color(new_node, &sbi->system_blks);
- }
-
- /* Can we merge to the left? */
- node = rb_prev(new_node);
- if (node) {
- entry = rb_entry(node, struct ext4_system_zone, node);
- if (can_merge(entry, new_entry)) {
- new_entry->start_blk = entry->start_blk;
- new_entry->count += entry->count;
- rb_erase(node, &sbi->system_blks);
- kmem_cache_free(ext4_system_zone_cachep, entry);
- }
- }
-
- /* Can we merge to the right? */
- node = rb_next(new_node);
- if (node) {
- entry = rb_entry(node, struct ext4_system_zone, node);
- if (can_merge(new_entry, entry)) {
- new_entry->count += entry->count;
- rb_erase(node, &sbi->system_blks);
- kmem_cache_free(ext4_system_zone_cachep, entry);
- }
- }
- return 0;
-}
-
-static void debug_print_tree(struct ext4_sb_info *sbi)
-{
- struct rb_node *node;
- struct ext4_system_zone *entry;
- int first = 1;
-
- printk(KERN_INFO "System zones: ");
- node = rb_first(&sbi->system_blks);
- while (node) {
- entry = rb_entry(node, struct ext4_system_zone, node);
- printk("%s%llu-%llu", first ? "" : ", ",
- entry->start_blk, entry->start_blk + entry->count - 1);
- first = 0;
- node = rb_next(node);
- }
- printk("\n");
-}
-
-int ext4_setup_system_zone(struct super_block *sb)
-{
- ext4_group_t ngroups = ext4_get_groups_count(sb);
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_group_desc *gdp;
- ext4_group_t i;
- int flex_size = ext4_flex_bg_size(sbi);
- int ret;
-
- if (!test_opt(sb, BLOCK_VALIDITY)) {
- if (EXT4_SB(sb)->system_blks.rb_node)
- ext4_release_system_zone(sb);
- return 0;
- }
- if (EXT4_SB(sb)->system_blks.rb_node)
- return 0;
-
- for (i=0; i < ngroups; i++) {
- if (ext4_bg_has_super(sb, i) &&
- ((i < 5) || ((i % flex_size) == 0)))
- add_system_zone(sbi, ext4_group_first_block_no(sb, i),
- ext4_bg_num_gdb(sb, i) + 1);
- gdp = ext4_get_group_desc(sb, i, NULL);
- ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
- if (ret)
- return ret;
- ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
- if (ret)
- return ret;
- ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
- sbi->s_itb_per_group);
- if (ret)
- return ret;
- }
-
- if (test_opt(sb, DEBUG))
- debug_print_tree(EXT4_SB(sb));
- return 0;
-}
-
-/* Called when the filesystem is unmounted */
-void ext4_release_system_zone(struct super_block *sb)
-{
- struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node;
- struct rb_node *parent;
- struct ext4_system_zone *entry;
-
- while (n) {
- /* Do the node's children first */
- if (n->rb_left) {
- n = n->rb_left;
- continue;
- }
- if (n->rb_right) {
- n = n->rb_right;
- continue;
- }
- /*
- * The node has no children; free it, and then zero
- * out parent's link to it. Finally go to the
- * beginning of the loop and try to free the parent
- * node.
- */
- parent = rb_parent(n);
- entry = rb_entry(n, struct ext4_system_zone, node);
- kmem_cache_free(ext4_system_zone_cachep, entry);
- if (!parent)
- EXT4_SB(sb)->system_blks = RB_ROOT;
- else if (parent->rb_left == n)
- parent->rb_left = NULL;
- else if (parent->rb_right == n)
- parent->rb_right = NULL;
- n = parent;
- }
- EXT4_SB(sb)->system_blks = RB_ROOT;
-}
-
-/*
- * Returns 1 if the passed-in block region (start_blk,
- * start_blk+count) is valid; 0 if some part of the block region
- * overlaps with filesystem metadata blocks.
- */
-int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
- unsigned int count)
-{
- struct ext4_system_zone *entry;
- struct rb_node *n = sbi->system_blks.rb_node;
-
- if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
- (start_blk + count < start_blk) ||
- (start_blk + count > ext4_blocks_count(sbi->s_es))) {
- sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
- return 0;
- }
- while (n) {
- entry = rb_entry(n, struct ext4_system_zone, node);
- if (start_blk + count - 1 < entry->start_blk)
- n = n->rb_left;
- else if (start_blk >= (entry->start_blk + entry->count))
- n = n->rb_right;
- else {
- sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
- return 0;
- }
- }
- return 1;
-}
-
-int ext4_check_blockref(const char *function, unsigned int line,
- struct inode *inode, __le32 *p, unsigned int max)
-{
- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
- __le32 *bref = p;
- unsigned int blk;
-
- while (bref < p+max) {
- blk = le32_to_cpu(*bref++);
- if (blk &&
- unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- blk, 1))) {
- es->s_last_error_block = cpu_to_le64(blk);
- ext4_error_inode(inode, function, line, blk,
- "invalid block");
- return -EIO;
- }
- }
- return 0;
-}
-
diff --git a/ANDROID_3.4.5/fs/ext4/dir.c b/ANDROID_3.4.5/fs/ext4/dir.c
deleted file mode 100644
index b8678620..00000000
--- a/ANDROID_3.4.5/fs/ext4/dir.c
+++ /dev/null
@@ -1,667 +0,0 @@
-/*
- * linux/fs/ext4/dir.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/dir.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext4 directory handling functions
- *
- * Big-endian to little-endian byte-swapping/bitmaps by
- * David S. Miller (davem@caip.rutgers.edu), 1995
- *
- * Hash Tree Directory indexing (c) 2001 Daniel Phillips
- *
- */
-
-#include <linux/fs.h>
-#include <linux/jbd2.h>
-#include <linux/buffer_head.h>
-#include <linux/slab.h>
-#include <linux/rbtree.h>
-#include "ext4.h"
-
-static unsigned char ext4_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-};
-
-static int ext4_dx_readdir(struct file *filp,
- void *dirent, filldir_t filldir);
-
-static unsigned char get_dtype(struct super_block *sb, int filetype)
-{
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
- (filetype >= EXT4_FT_MAX))
- return DT_UNKNOWN;
-
- return (ext4_filetype_table[filetype]);
-}
-
-/**
- * Check if the given dir-inode refers to an htree-indexed directory
- * (or a directory which chould potentially get coverted to use htree
- * indexing).
- *
- * Return 1 if it is a dx dir, 0 if not
- */
-static int is_dx_dir(struct inode *inode)
-{
- struct super_block *sb = inode->i_sb;
-
- if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_COMPAT_DIR_INDEX) &&
- ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
- ((inode->i_size >> sb->s_blocksize_bits) == 1)))
- return 1;
-
- return 0;
-}
-
-/*
- * Return 0 if the directory entry is OK, and 1 if there is a problem
- *
- * Note: this is the opposite of what ext2 and ext3 historically returned...
- */
-int __ext4_check_dir_entry(const char *function, unsigned int line,
- struct inode *dir, struct file *filp,
- struct ext4_dir_entry_2 *de,
- struct buffer_head *bh,
- unsigned int offset)
-{
- const char *error_msg = NULL;
- const int rlen = ext4_rec_len_from_disk(de->rec_len,
- dir->i_sb->s_blocksize);
-
- if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
- error_msg = "rec_len is smaller than minimal";
- else if (unlikely(rlen % 4 != 0))
- error_msg = "rec_len % 4 != 0";
- else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
- error_msg = "rec_len is too small for name_len";
- else if (unlikely(((char *) de - bh->b_data) + rlen >
- dir->i_sb->s_blocksize))
- error_msg = "directory entry across blocks";
- else if (unlikely(le32_to_cpu(de->inode) >
- le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
- error_msg = "inode out of bounds";
- else
- return 0;
-
- if (filp)
- ext4_error_file(filp, function, line, bh->b_blocknr,
- "bad entry in directory: %s - offset=%u(%u), "
- "inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset % bh->b_size),
- offset, le32_to_cpu(de->inode),
- rlen, de->name_len);
- else
- ext4_error_inode(dir, function, line, bh->b_blocknr,
- "bad entry in directory: %s - offset=%u(%u), "
- "inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset % bh->b_size),
- offset, le32_to_cpu(de->inode),
- rlen, de->name_len);
-
- return 1;
-}
-
-static int ext4_readdir(struct file *filp,
- void *dirent, filldir_t filldir)
-{
- int error = 0;
- unsigned int offset;
- int i, stored;
- struct ext4_dir_entry_2 *de;
- int err;
- struct inode *inode = filp->f_path.dentry->d_inode;
- struct super_block *sb = inode->i_sb;
- int ret = 0;
- int dir_has_error = 0;
-
- if (is_dx_dir(inode)) {
- err = ext4_dx_readdir(filp, dirent, filldir);
- if (err != ERR_BAD_DX_DIR) {
- ret = err;
- goto out;
- }
- /*
- * We don't set the inode dirty flag since it's not
- * critical that it get flushed back to the disk.
- */
- ext4_clear_inode_flag(filp->f_path.dentry->d_inode,
- EXT4_INODE_INDEX);
- }
- stored = 0;
- offset = filp->f_pos & (sb->s_blocksize - 1);
-
- while (!error && !stored && filp->f_pos < inode->i_size) {
- struct ext4_map_blocks map;
- struct buffer_head *bh = NULL;
-
- map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
- map.m_len = 1;
- err = ext4_map_blocks(NULL, inode, &map, 0);
- if (err > 0) {
- pgoff_t index = map.m_pblk >>
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
- if (!ra_has_index(&filp->f_ra, index))
- page_cache_sync_readahead(
- sb->s_bdev->bd_inode->i_mapping,
- &filp->f_ra, filp,
- index, 1);
- filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
- bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
- }
-
- /*
- * We ignore I/O errors on directories so users have a chance
- * of recovering data when there's a bad sector
- */
- if (!bh) {
- if (!dir_has_error) {
- EXT4_ERROR_FILE(filp, 0,
- "directory contains a "
- "hole at offset %llu",
- (unsigned long long) filp->f_pos);
- dir_has_error = 1;
- }
- /* corrupt size? Maybe no more blocks to read */
- if (filp->f_pos > inode->i_blocks << 9)
- break;
- filp->f_pos += sb->s_blocksize - offset;
- continue;
- }
-
-revalidate:
- /* If the dir block has changed since the last call to
- * readdir(2), then we might be pointing to an invalid
- * dirent right now. Scan from the start of the block
- * to make sure. */
- if (filp->f_version != inode->i_version) {
- for (i = 0; i < sb->s_blocksize && i < offset; ) {
- de = (struct ext4_dir_entry_2 *)
- (bh->b_data + i);
- /* It's too expensive to do a full
- * dirent test each time round this
- * loop, but we do have to test at
- * least that it is non-zero. A
- * failure will be detected in the
- * dirent test below. */
- if (ext4_rec_len_from_disk(de->rec_len,
- sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
- break;
- i += ext4_rec_len_from_disk(de->rec_len,
- sb->s_blocksize);
- }
- offset = i;
- filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
- | offset;
- filp->f_version = inode->i_version;
- }
-
- while (!error && filp->f_pos < inode->i_size
- && offset < sb->s_blocksize) {
- de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
- if (ext4_check_dir_entry(inode, filp, de,
- bh, offset)) {
- /*
- * On error, skip the f_pos to the next block
- */
- filp->f_pos = (filp->f_pos |
- (sb->s_blocksize - 1)) + 1;
- brelse(bh);
- ret = stored;
- goto out;
- }
- offset += ext4_rec_len_from_disk(de->rec_len,
- sb->s_blocksize);
- if (le32_to_cpu(de->inode)) {
- /* We might block in the next section
- * if the data destination is
- * currently swapped out. So, use a
- * version stamp to detect whether or
- * not the directory has been modified
- * during the copy operation.
- */
- u64 version = filp->f_version;
-
- error = filldir(dirent, de->name,
- de->name_len,
- filp->f_pos,
- le32_to_cpu(de->inode),
- get_dtype(sb, de->file_type));
- if (error)
- break;
- if (version != filp->f_version)
- goto revalidate;
- stored++;
- }
- filp->f_pos += ext4_rec_len_from_disk(de->rec_len,
- sb->s_blocksize);
- }
- offset = 0;
- brelse(bh);
- }
-out:
- return ret;
-}
-
-static inline int is_32bit_api(void)
-{
-#ifdef CONFIG_COMPAT
- return is_compat_task();
-#else
- return (BITS_PER_LONG == 32);
-#endif
-}
-
-/*
- * These functions convert from the major/minor hash to an f_pos
- * value for dx directories
- *
- * Upper layer (for example NFS) should specify FMODE_32BITHASH or
- * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
- * directly on both 32-bit and 64-bit nodes, under such case, neither
- * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
- */
-static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
-{
- if ((filp->f_mode & FMODE_32BITHASH) ||
- (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
- return major >> 1;
- else
- return ((__u64)(major >> 1) << 32) | (__u64)minor;
-}
-
-static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
-{
- if ((filp->f_mode & FMODE_32BITHASH) ||
- (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
- return (pos << 1) & 0xffffffff;
- else
- return ((pos >> 32) << 1) & 0xffffffff;
-}
-
-static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
-{
- if ((filp->f_mode & FMODE_32BITHASH) ||
- (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
- return 0;
- else
- return pos & 0xffffffff;
-}
-
-/*
- * Return 32- or 64-bit end-of-file for dx directories
- */
-static inline loff_t ext4_get_htree_eof(struct file *filp)
-{
- if ((filp->f_mode & FMODE_32BITHASH) ||
- (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
- return EXT4_HTREE_EOF_32BIT;
- else
- return EXT4_HTREE_EOF_64BIT;
-}
-
-
-/*
- * ext4_dir_llseek() based on generic_file_llseek() to handle both
- * non-htree and htree directories, where the "offset" is in terms
- * of the filename hash value instead of the byte offset.
- *
- * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
- * will be invalid once the directory was converted into a dx directory
- */
-loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file->f_mapping->host;
- loff_t ret = -EINVAL;
- int dx_dir = is_dx_dir(inode);
-
- mutex_lock(&inode->i_mutex);
-
- /* NOTE: relative offsets with dx directories might not work
- * as expected, as it is difficult to figure out the
- * correct offset between dx hashes */
-
- switch (origin) {
- case SEEK_END:
- if (unlikely(offset > 0))
- goto out_err; /* not supported for directories */
-
- /* so only negative offsets are left, does that have a
- * meaning for directories at all? */
- if (dx_dir)
- offset += ext4_get_htree_eof(file);
- else
- offset += inode->i_size;
- break;
- case SEEK_CUR:
- /*
- * Here we special-case the lseek(fd, 0, SEEK_CUR)
- * position-querying operation. Avoid rewriting the "same"
- * f_pos value back to the file because a concurrent read(),
- * write() or lseek() might have altered it
- */
- if (offset == 0) {
- offset = file->f_pos;
- goto out_ok;
- }
-
- offset += file->f_pos;
- break;
- }
-
- if (unlikely(offset < 0))
- goto out_err;
-
- if (!dx_dir) {
- if (offset > inode->i_sb->s_maxbytes)
- goto out_err;
- } else if (offset > ext4_get_htree_eof(file))
- goto out_err;
-
- /* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
-
-out_ok:
- ret = offset;
-out_err:
- mutex_unlock(&inode->i_mutex);
-
- return ret;
-}
-
-/*
- * This structure holds the nodes of the red-black tree used to store
- * the directory entry in hash order.
- */
-struct fname {
- __u32 hash;
- __u32 minor_hash;
- struct rb_node rb_hash;
- struct fname *next;
- __u32 inode;
- __u8 name_len;
- __u8 file_type;
- char name[0];
-};
-
-/*
- * This functoin implements a non-recursive way of freeing all of the
- * nodes in the red-black tree.
- */
-static void free_rb_tree_fname(struct rb_root *root)
-{
- struct rb_node *n = root->rb_node;
- struct rb_node *parent;
- struct fname *fname;
-
- while (n) {
- /* Do the node's children first */
- if (n->rb_left) {
- n = n->rb_left;
- continue;
- }
- if (n->rb_right) {
- n = n->rb_right;
- continue;
- }
- /*
- * The node has no children; free it, and then zero
- * out parent's link to it. Finally go to the
- * beginning of the loop and try to free the parent
- * node.
- */
- parent = rb_parent(n);
- fname = rb_entry(n, struct fname, rb_hash);
- while (fname) {
- struct fname *old = fname;
- fname = fname->next;
- kfree(old);
- }
- if (!parent)
- *root = RB_ROOT;
- else if (parent->rb_left == n)
- parent->rb_left = NULL;
- else if (parent->rb_right == n)
- parent->rb_right = NULL;
- n = parent;
- }
-}
-
-
-static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
- loff_t pos)
-{
- struct dir_private_info *p;
-
- p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
- if (!p)
- return NULL;
- p->curr_hash = pos2maj_hash(filp, pos);
- p->curr_minor_hash = pos2min_hash(filp, pos);
- return p;
-}
-
-void ext4_htree_free_dir_info(struct dir_private_info *p)
-{
- free_rb_tree_fname(&p->root);
- kfree(p);
-}
-
-/*
- * Given a directory entry, enter it into the fname rb tree.
- */
-int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
- __u32 minor_hash,
- struct ext4_dir_entry_2 *dirent)
-{
- struct rb_node **p, *parent = NULL;
- struct fname *fname, *new_fn;
- struct dir_private_info *info;
- int len;
-
- info = dir_file->private_data;
- p = &info->root.rb_node;
-
- /* Create and allocate the fname structure */
- len = sizeof(struct fname) + dirent->name_len + 1;
- new_fn = kzalloc(len, GFP_KERNEL);
- if (!new_fn)
- return -ENOMEM;
- new_fn->hash = hash;
- new_fn->minor_hash = minor_hash;
- new_fn->inode = le32_to_cpu(dirent->inode);
- new_fn->name_len = dirent->name_len;
- new_fn->file_type = dirent->file_type;
- memcpy(new_fn->name, dirent->name, dirent->name_len);
- new_fn->name[dirent->name_len] = 0;
-
- while (*p) {
- parent = *p;
- fname = rb_entry(parent, struct fname, rb_hash);
-
- /*
- * If the hash and minor hash match up, then we put
- * them on a linked list. This rarely happens...
- */
- if ((new_fn->hash == fname->hash) &&
- (new_fn->minor_hash == fname->minor_hash)) {
- new_fn->next = fname->next;
- fname->next = new_fn;
- return 0;
- }
-
- if (new_fn->hash < fname->hash)
- p = &(*p)->rb_left;
- else if (new_fn->hash > fname->hash)
- p = &(*p)->rb_right;
- else if (new_fn->minor_hash < fname->minor_hash)
- p = &(*p)->rb_left;
- else /* if (new_fn->minor_hash > fname->minor_hash) */
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&new_fn->rb_hash, parent, p);
- rb_insert_color(&new_fn->rb_hash, &info->root);
- return 0;
-}
-
-
-
-/*
- * This is a helper function for ext4_dx_readdir. It calls filldir
- * for all entres on the fname linked list. (Normally there is only
- * one entry on the linked list, unless there are 62 bit hash collisions.)
- */
-static int call_filldir(struct file *filp, void *dirent,
- filldir_t filldir, struct fname *fname)
-{
- struct dir_private_info *info = filp->private_data;
- loff_t curr_pos;
- struct inode *inode = filp->f_path.dentry->d_inode;
- struct super_block *sb;
- int error;
-
- sb = inode->i_sb;
-
- if (!fname) {
- ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: "
- "called with null fname?!?", __func__, __LINE__,
- inode->i_ino, current->comm);
- return 0;
- }
- curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
- while (fname) {
- error = filldir(dirent, fname->name,
- fname->name_len, curr_pos,
- fname->inode,
- get_dtype(sb, fname->file_type));
- if (error) {
- filp->f_pos = curr_pos;
- info->extra_fname = fname;
- return error;
- }
- fname = fname->next;
- }
- return 0;
-}
-
-static int ext4_dx_readdir(struct file *filp,
- void *dirent, filldir_t filldir)
-{
- struct dir_private_info *info = filp->private_data;
- struct inode *inode = filp->f_path.dentry->d_inode;
- struct fname *fname;
- int ret;
-
- if (!info) {
- info = ext4_htree_create_dir_info(filp, filp->f_pos);
- if (!info)
- return -ENOMEM;
- filp->private_data = info;
- }
-
- if (filp->f_pos == ext4_get_htree_eof(filp))
- return 0; /* EOF */
-
- /* Some one has messed with f_pos; reset the world */
- if (info->last_pos != filp->f_pos) {
- free_rb_tree_fname(&info->root);
- info->curr_node = NULL;
- info->extra_fname = NULL;
- info->curr_hash = pos2maj_hash(filp, filp->f_pos);
- info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
- }
-
- /*
- * If there are any leftover names on the hash collision
- * chain, return them first.
- */
- if (info->extra_fname) {
- if (call_filldir(filp, dirent, filldir, info->extra_fname))
- goto finished;
- info->extra_fname = NULL;
- goto next_node;
- } else if (!info->curr_node)
- info->curr_node = rb_first(&info->root);
-
- while (1) {
- /*
- * Fill the rbtree if we have no more entries,
- * or the inode has changed since we last read in the
- * cached entries.
- */
- if ((!info->curr_node) ||
- (filp->f_version != inode->i_version)) {
- info->curr_node = NULL;
- free_rb_tree_fname(&info->root);
- filp->f_version = inode->i_version;
- ret = ext4_htree_fill_tree(filp, info->curr_hash,
- info->curr_minor_hash,
- &info->next_hash);
- if (ret < 0)
- return ret;
- if (ret == 0) {
- filp->f_pos = ext4_get_htree_eof(filp);
- break;
- }
- info->curr_node = rb_first(&info->root);
- }
-
- fname = rb_entry(info->curr_node, struct fname, rb_hash);
- info->curr_hash = fname->hash;
- info->curr_minor_hash = fname->minor_hash;
- if (call_filldir(filp, dirent, filldir, fname))
- break;
- next_node:
- info->curr_node = rb_next(info->curr_node);
- if (info->curr_node) {
- fname = rb_entry(info->curr_node, struct fname,
- rb_hash);
- info->curr_hash = fname->hash;
- info->curr_minor_hash = fname->minor_hash;
- } else {
- if (info->next_hash == ~0) {
- filp->f_pos = ext4_get_htree_eof(filp);
- break;
- }
- info->curr_hash = info->next_hash;
- info->curr_minor_hash = 0;
- }
- }
-finished:
- info->last_pos = filp->f_pos;
- return 0;
-}
-
-static int ext4_release_dir(struct inode *inode, struct file *filp)
-{
- if (filp->private_data)
- ext4_htree_free_dir_info(filp->private_data);
-
- return 0;
-}
-
-const struct file_operations ext4_dir_operations = {
- .llseek = ext4_dir_llseek,
- .read = generic_read_dir,
- .readdir = ext4_readdir,
- .unlocked_ioctl = ext4_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext4_compat_ioctl,
-#endif
- .fsync = ext4_sync_file,
- .release = ext4_release_dir,
-};
diff --git a/ANDROID_3.4.5/fs/ext4/ext4.h b/ANDROID_3.4.5/fs/ext4/ext4.h
deleted file mode 100644
index 0e01e90a..00000000
--- a/ANDROID_3.4.5/fs/ext4/ext4.h
+++ /dev/null
@@ -1,2372 +0,0 @@
-/*
- * ext4.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/include/linux/minix_fs.h
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- */
-
-#ifndef _EXT4_H
-#define _EXT4_H
-
-#include <linux/types.h>
-#include <linux/blkdev.h>
-#include <linux/magic.h>
-#include <linux/jbd2.h>
-#include <linux/quota.h>
-#include <linux/rwsem.h>
-#include <linux/rbtree.h>
-#include <linux/seqlock.h>
-#include <linux/mutex.h>
-#include <linux/timer.h>
-#include <linux/wait.h>
-#include <linux/blockgroup_lock.h>
-#include <linux/percpu_counter.h>
-#ifdef __KERNEL__
-#include <linux/compat.h>
-#endif
-
-/*
- * The fourth extended filesystem constants/structures
- */
-
-/*
- * Define EXT4FS_DEBUG to produce debug messages
- */
-#undef EXT4FS_DEBUG
-
-/*
- * Debug code
- */
-#ifdef EXT4FS_DEBUG
-#define ext4_debug(f, a...) \
- do { \
- printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
- __FILE__, __LINE__, __func__); \
- printk(KERN_DEBUG f, ## a); \
- } while (0)
-#else
-#define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
-#endif
-
-#define EXT4_ERROR_INODE(inode, fmt, a...) \
- ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
-
-#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
- ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
-
-#define EXT4_ERROR_FILE(file, block, fmt, a...) \
- ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
-
-/* data type for block offset of block group */
-typedef int ext4_grpblk_t;
-
-/* data type for filesystem-wide blocks number */
-typedef unsigned long long ext4_fsblk_t;
-
-/* data type for file logical block number */
-typedef __u32 ext4_lblk_t;
-
-/* data type for block group number */
-typedef unsigned int ext4_group_t;
-
-/*
- * Flags used in mballoc's allocation_context flags field.
- *
- * Also used to show what's going on for debugging purposes when the
- * flag field is exported via the traceport interface
- */
-
-/* prefer goal again. length */
-#define EXT4_MB_HINT_MERGE 0x0001
-/* blocks already reserved */
-#define EXT4_MB_HINT_RESERVED 0x0002
-/* metadata is being allocated */
-#define EXT4_MB_HINT_METADATA 0x0004
-/* first blocks in the file */
-#define EXT4_MB_HINT_FIRST 0x0008
-/* search for the best chunk */
-#define EXT4_MB_HINT_BEST 0x0010
-/* data is being allocated */
-#define EXT4_MB_HINT_DATA 0x0020
-/* don't preallocate (for tails) */
-#define EXT4_MB_HINT_NOPREALLOC 0x0040
-/* allocate for locality group */
-#define EXT4_MB_HINT_GROUP_ALLOC 0x0080
-/* allocate goal blocks or none */
-#define EXT4_MB_HINT_GOAL_ONLY 0x0100
-/* goal is meaningful */
-#define EXT4_MB_HINT_TRY_GOAL 0x0200
-/* blocks already pre-reserved by delayed allocation */
-#define EXT4_MB_DELALLOC_RESERVED 0x0400
-/* We are doing stream allocation */
-#define EXT4_MB_STREAM_ALLOC 0x0800
-/* Use reserved root blocks if needed */
-#define EXT4_MB_USE_ROOT_BLOCKS 0x1000
-
-struct ext4_allocation_request {
- /* target inode for block we're allocating */
- struct inode *inode;
- /* how many blocks we want to allocate */
- unsigned int len;
- /* logical block in target inode */
- ext4_lblk_t logical;
- /* the closest logical allocated block to the left */
- ext4_lblk_t lleft;
- /* the closest logical allocated block to the right */
- ext4_lblk_t lright;
- /* phys. target (a hint) */
- ext4_fsblk_t goal;
- /* phys. block for the closest logical allocated block to the left */
- ext4_fsblk_t pleft;
- /* phys. block for the closest logical allocated block to the right */
- ext4_fsblk_t pright;
- /* flags. see above EXT4_MB_HINT_* */
- unsigned int flags;
-};
-
-/*
- * Logical to physical block mapping, used by ext4_map_blocks()
- *
- * This structure is used to pass requests into ext4_map_blocks() as
- * well as to store the information returned by ext4_map_blocks(). It
- * takes less room on the stack than a struct buffer_head.
- */
-#define EXT4_MAP_NEW (1 << BH_New)
-#define EXT4_MAP_MAPPED (1 << BH_Mapped)
-#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
-#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
-#define EXT4_MAP_UNINIT (1 << BH_Uninit)
-/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
- * ext4_map_blocks wants to know whether or not the underlying cluster has
- * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
- * the requested mapping was from previously mapped (or delayed allocated)
- * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
- * should never appear on buffer_head's state flags.
- */
-#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
-#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
- EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
- EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
-
-struct ext4_map_blocks {
- ext4_fsblk_t m_pblk;
- ext4_lblk_t m_lblk;
- unsigned int m_len;
- unsigned int m_flags;
-};
-
-/*
- * For delayed allocation tracking
- */
-struct mpage_da_data {
- struct inode *inode;
- sector_t b_blocknr; /* start block number of extent */
- size_t b_size; /* size of extent */
- unsigned long b_state; /* state of the extent */
- unsigned long first_page, next_page; /* extent of pages */
- struct writeback_control *wbc;
- int io_done;
- int pages_written;
- int retval;
-};
-
-/*
- * Flags for ext4_io_end->flags
- */
-#define EXT4_IO_END_UNWRITTEN 0x0001
-#define EXT4_IO_END_ERROR 0x0002
-#define EXT4_IO_END_QUEUED 0x0004
-#define EXT4_IO_END_DIRECT 0x0008
-#define EXT4_IO_END_IN_FSYNC 0x0010
-
-struct ext4_io_page {
- struct page *p_page;
- atomic_t p_count;
-};
-
-#define MAX_IO_PAGES 128
-
-/*
- * For converting uninitialized extents on a work queue.
- *
- * 'page' is only used from the writepage() path; 'pages' is only used for
- * buffered writes; they are used to keep page references until conversion
- * takes place. For AIO/DIO, neither field is filled in.
- */
-typedef struct ext4_io_end {
- struct list_head list; /* per-file finished IO list */
- struct inode *inode; /* file being written to */
- unsigned int flag; /* unwritten or not */
- struct page *page; /* for writepage() path */
- loff_t offset; /* offset in the file */
- ssize_t size; /* size of the extent */
- struct work_struct work; /* data work queue */
- struct kiocb *iocb; /* iocb struct for AIO */
- int result; /* error value for AIO */
- int num_io_pages; /* for writepages() */
- struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */
-} ext4_io_end_t;
-
-struct ext4_io_submit {
- int io_op;
- struct bio *io_bio;
- ext4_io_end_t *io_end;
- struct ext4_io_page *io_page;
- sector_t io_next_block;
-};
-
-/*
- * Special inodes numbers
- */
-#define EXT4_BAD_INO 1 /* Bad blocks inode */
-#define EXT4_ROOT_INO 2 /* Root inode */
-#define EXT4_USR_QUOTA_INO 3 /* User quota inode */
-#define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */
-#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
-#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
-#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
-#define EXT4_JOURNAL_INO 8 /* Journal inode */
-
-/* First non-reserved inode for old ext4 filesystems */
-#define EXT4_GOOD_OLD_FIRST_INO 11
-
-/*
- * Maximal count of links to a file
- */
-#define EXT4_LINK_MAX 65000
-
-/*
- * Macro-instructions used to manage several block sizes
- */
-#define EXT4_MIN_BLOCK_SIZE 1024
-#define EXT4_MAX_BLOCK_SIZE 65536
-#define EXT4_MIN_BLOCK_LOG_SIZE 10
-#define EXT4_MAX_BLOCK_LOG_SIZE 16
-#ifdef __KERNEL__
-# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
-#else
-# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
-#endif
-#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
-#define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \
- EXT4_SB(s)->s_cluster_bits)
-#ifdef __KERNEL__
-# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
-# define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits)
-#else
-# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
-#endif
-#ifdef __KERNEL__
-#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
-#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size)
-#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino)
-#else
-#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
- EXT4_GOOD_OLD_INODE_SIZE : \
- (s)->s_inode_size)
-#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
- EXT4_GOOD_OLD_FIRST_INO : \
- (s)->s_first_ino)
-#endif
-#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
-
-/* Translate a block number to a cluster number */
-#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
-/* Translate a cluster number to a block number */
-#define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
-/* Translate # of blks to # of clusters */
-#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
- (sbi)->s_cluster_bits)
-
-/*
- * Structure of a blocks group descriptor
- */
-struct ext4_group_desc
-{
- __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
- __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
- __le32 bg_inode_table_lo; /* Inodes table block */
- __le16 bg_free_blocks_count_lo;/* Free blocks count */
- __le16 bg_free_inodes_count_lo;/* Free inodes count */
- __le16 bg_used_dirs_count_lo; /* Directories count */
- __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
- __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
- __le16 bg_itable_unused_lo; /* Unused inodes count */
- __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
- __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
- __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
- __le32 bg_inode_table_hi; /* Inodes table block MSB */
- __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
- __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
- __le16 bg_used_dirs_count_hi; /* Directories count MSB */
- __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
- __u32 bg_reserved2[3];
-};
-
-/*
- * Structure of a flex block group info
- */
-
-struct flex_groups {
- atomic_t free_inodes;
- atomic_t free_clusters;
- atomic_t used_dirs;
-};
-
-#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
-#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
-#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
-
-/*
- * Macro-instructions used to manage group descriptors
- */
-#define EXT4_MIN_DESC_SIZE 32
-#define EXT4_MIN_DESC_SIZE_64BIT 64
-#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE
-#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
-#ifdef __KERNEL__
-# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
-# define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group)
-# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
-# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
-# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
-#else
-# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
-# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
-# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
-#endif
-
-/*
- * Constants relative to the data blocks
- */
-#define EXT4_NDIR_BLOCKS 12
-#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
-#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
-#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
-#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
-
-/*
- * Inode flags
- */
-#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */
-#define EXT4_UNRM_FL 0x00000002 /* Undelete */
-#define EXT4_COMPR_FL 0x00000004 /* Compress file */
-#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */
-#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */
-#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */
-#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */
-#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */
-/* Reserved for compression usage... */
-#define EXT4_DIRTY_FL 0x00000100
-#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
-#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
-#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */
-/* End compression flags --- maybe not all used */
-#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
-#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
-#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
-#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
-#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
-#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
-#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
-#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
-#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
-#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
-#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-
-#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */
-
-/* Flags that should be inherited by new inodes from their parent. */
-#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
- EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
- EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
- EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
-
-/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
-
-/* Flags that are appropriate for non-directories/regular files. */
-#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
-
-/* Mask out flags that are inappropriate for the given type of inode. */
-static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
-{
- if (S_ISDIR(mode))
- return flags;
- else if (S_ISREG(mode))
- return flags & EXT4_REG_FLMASK;
- else
- return flags & EXT4_OTHER_FLMASK;
-}
-
-/*
- * Inode flags used for atomic set/get
- */
-enum {
- EXT4_INODE_SECRM = 0, /* Secure deletion */
- EXT4_INODE_UNRM = 1, /* Undelete */
- EXT4_INODE_COMPR = 2, /* Compress file */
- EXT4_INODE_SYNC = 3, /* Synchronous updates */
- EXT4_INODE_IMMUTABLE = 4, /* Immutable file */
- EXT4_INODE_APPEND = 5, /* writes to file may only append */
- EXT4_INODE_NODUMP = 6, /* do not dump file */
- EXT4_INODE_NOATIME = 7, /* do not update atime */
-/* Reserved for compression usage... */
- EXT4_INODE_DIRTY = 8,
- EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
- EXT4_INODE_NOCOMPR = 10, /* Don't compress */
- EXT4_INODE_ECOMPR = 11, /* Compression error */
-/* End compression flags --- maybe not all used */
- EXT4_INODE_INDEX = 12, /* hash-indexed directory */
- EXT4_INODE_IMAGIC = 13, /* AFS directory */
- EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */
- EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */
- EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */
- EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
- EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
- EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
- EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
- EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
- EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
-};
-
-#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
-#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \
- printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \
- EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); }
-
-/*
- * Since it's pretty easy to mix up bit numbers and hex values, and we
- * can't do a compile-time test for ENUM values, we use a run-time
- * test to make sure that EXT4_XXX_FL is consistent with respect to
- * EXT4_INODE_XXX. If all is well the printk and BUG_ON will all drop
- * out so it won't cost any extra space in the compiled kernel image.
- * But it's important that these values are the same, since we are
- * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL
- * must be consistent with the values of FS_XXX_FL defined in
- * include/linux/fs.h and the on-disk values found in ext2, ext3, and
- * ext4 filesystems, and of course the values defined in e2fsprogs.
- *
- * It's not paranoia if the Murphy's Law really *is* out to get you. :-)
- */
-static inline void ext4_check_flag_values(void)
-{
- CHECK_FLAG_VALUE(SECRM);
- CHECK_FLAG_VALUE(UNRM);
- CHECK_FLAG_VALUE(COMPR);
- CHECK_FLAG_VALUE(SYNC);
- CHECK_FLAG_VALUE(IMMUTABLE);
- CHECK_FLAG_VALUE(APPEND);
- CHECK_FLAG_VALUE(NODUMP);
- CHECK_FLAG_VALUE(NOATIME);
- CHECK_FLAG_VALUE(DIRTY);
- CHECK_FLAG_VALUE(COMPRBLK);
- CHECK_FLAG_VALUE(NOCOMPR);
- CHECK_FLAG_VALUE(ECOMPR);
- CHECK_FLAG_VALUE(INDEX);
- CHECK_FLAG_VALUE(IMAGIC);
- CHECK_FLAG_VALUE(JOURNAL_DATA);
- CHECK_FLAG_VALUE(NOTAIL);
- CHECK_FLAG_VALUE(DIRSYNC);
- CHECK_FLAG_VALUE(TOPDIR);
- CHECK_FLAG_VALUE(HUGE_FILE);
- CHECK_FLAG_VALUE(EXTENTS);
- CHECK_FLAG_VALUE(EA_INODE);
- CHECK_FLAG_VALUE(EOFBLOCKS);
- CHECK_FLAG_VALUE(RESERVED);
-}
-
-/* Used to pass group descriptor data when online resize is done */
-struct ext4_new_group_input {
- __u32 group; /* Group number for this data */
- __u64 block_bitmap; /* Absolute block number of block bitmap */
- __u64 inode_bitmap; /* Absolute block number of inode bitmap */
- __u64 inode_table; /* Absolute block number of inode table start */
- __u32 blocks_count; /* Total number of blocks in this group */
- __u16 reserved_blocks; /* Number of reserved blocks in this group */
- __u16 unused;
-};
-
-#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
-struct compat_ext4_new_group_input {
- u32 group;
- compat_u64 block_bitmap;
- compat_u64 inode_bitmap;
- compat_u64 inode_table;
- u32 blocks_count;
- u16 reserved_blocks;
- u16 unused;
-};
-#endif
-
-/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
-struct ext4_new_group_data {
- __u32 group;
- __u64 block_bitmap;
- __u64 inode_bitmap;
- __u64 inode_table;
- __u32 blocks_count;
- __u16 reserved_blocks;
- __u16 unused;
- __u32 free_blocks_count;
-};
-
-/* Indexes used to index group tables in ext4_new_group_data */
-enum {
- BLOCK_BITMAP = 0, /* block bitmap */
- INODE_BITMAP, /* inode bitmap */
- INODE_TABLE, /* inode tables */
- GROUP_TABLE_COUNT,
-};
-
-/*
- * Flags used by ext4_map_blocks()
- */
- /* Allocate any needed blocks and/or convert an unitialized
- extent to be an initialized ext4 */
-#define EXT4_GET_BLOCKS_CREATE 0x0001
- /* Request the creation of an unitialized extent */
-#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002
-#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
- EXT4_GET_BLOCKS_CREATE)
- /* Caller is from the delayed allocation writeout path,
- so set the magic i_delalloc_reserve_flag after taking the
- inode allocation semaphore for */
-#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
- /* caller is from the direct IO path, request to creation of an
- unitialized extents if not allocated, split the uninitialized
- extent if blocks has been preallocated already*/
-#define EXT4_GET_BLOCKS_PRE_IO 0x0008
-#define EXT4_GET_BLOCKS_CONVERT 0x0010
-#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
- EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
- /* Convert extent to initialized after IO complete */
-#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
- EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
- /* Punch out blocks of an extent */
-#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
- /* Don't normalize allocation size (used for fallocate) */
-#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
- /* Request will not result in inode size update (user for fallocate) */
-#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
-
-/*
- * Flags used by ext4_free_blocks
- */
-#define EXT4_FREE_BLOCKS_METADATA 0x0001
-#define EXT4_FREE_BLOCKS_FORGET 0x0002
-#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
-#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
-#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
-#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
-
-/*
- * Flags used by ext4_discard_partial_page_buffers
- */
-#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
-
-/*
- * ioctl commands
- */
-#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
-#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
-#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
-#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
-#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
-#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
-#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
-#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
-#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
-#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
-#define EXT4_IOC_MIGRATE _IO('f', 9)
- /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
- /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
-#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
-#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
-#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
-
-#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
-/*
- * ioctl commands in 32 bit emulation
- */
-#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
-#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
-#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
-#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
-#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
-#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
-#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
-#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
-#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
-#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
-#endif
-
-/* Max physical block we can address w/o extents */
-#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
-
-/*
- * Structure of an inode on the disk
- */
-struct ext4_inode {
- __le16 i_mode; /* File mode */
- __le16 i_uid; /* Low 16 bits of Owner Uid */
- __le32 i_size_lo; /* Size in bytes */
- __le32 i_atime; /* Access time */
- __le32 i_ctime; /* Inode Change time */
- __le32 i_mtime; /* Modification time */
- __le32 i_dtime; /* Deletion Time */
- __le16 i_gid; /* Low 16 bits of Group Id */
- __le16 i_links_count; /* Links count */
- __le32 i_blocks_lo; /* Blocks count */
- __le32 i_flags; /* File flags */
- union {
- struct {
- __le32 l_i_version;
- } linux1;
- struct {
- __u32 h_i_translator;
- } hurd1;
- struct {
- __u32 m_i_reserved1;
- } masix1;
- } osd1; /* OS dependent 1 */
- __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
- __le32 i_generation; /* File version (for NFS) */
- __le32 i_file_acl_lo; /* File ACL */
- __le32 i_size_high;
- __le32 i_obso_faddr; /* Obsoleted fragment address */
- union {
- struct {
- __le16 l_i_blocks_high; /* were l_i_reserved1 */
- __le16 l_i_file_acl_high;
- __le16 l_i_uid_high; /* these 2 fields */
- __le16 l_i_gid_high; /* were reserved2[0] */
- __u32 l_i_reserved2;
- } linux2;
- struct {
- __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
- __u16 h_i_mode_high;
- __u16 h_i_uid_high;
- __u16 h_i_gid_high;
- __u32 h_i_author;
- } hurd2;
- struct {
- __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
- __le16 m_i_file_acl_high;
- __u32 m_i_reserved2[2];
- } masix2;
- } osd2; /* OS dependent 2 */
- __le16 i_extra_isize;
- __le16 i_pad1;
- __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
- __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
- __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
- __le32 i_crtime; /* File Creation time */
- __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
- __le32 i_version_hi; /* high 32 bits for 64-bit version */
-};
-
-struct move_extent {
- __u32 reserved; /* should be zero */
- __u32 donor_fd; /* donor file descriptor */
- __u64 orig_start; /* logical start offset in block for orig */
- __u64 donor_start; /* logical start offset in block for donor */
- __u64 len; /* block length to be moved */
- __u64 moved_len; /* moved block length */
-};
-
-#define EXT4_EPOCH_BITS 2
-#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
-#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
-
-/*
- * Extended fields will fit into an inode if the filesystem was formatted
- * with large inodes (-I 256 or larger) and there are not currently any EAs
- * consuming all of the available space. For new inodes we always reserve
- * enough space for the kernel's known extended fields, but for inodes
- * created with an old kernel this might not have been the case. None of
- * the extended inode fields is critical for correct filesystem operation.
- * This macro checks if a certain field fits in the inode. Note that
- * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
- */
-#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
- ((offsetof(typeof(*ext4_inode), field) + \
- sizeof((ext4_inode)->field)) \
- <= (EXT4_GOOD_OLD_INODE_SIZE + \
- (einode)->i_extra_isize)) \
-
-static inline __le32 ext4_encode_extra_time(struct timespec *time)
-{
- return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
- (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) |
- ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK));
-}
-
-static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
-{
- if (sizeof(time->tv_sec) > 4)
- time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
- << 32;
- time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
-}
-
-#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
-do { \
- (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
- if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
- (raw_inode)->xtime ## _extra = \
- ext4_encode_extra_time(&(inode)->xtime); \
-} while (0)
-
-#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
-do { \
- if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
- (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
- if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
- (raw_inode)->xtime ## _extra = \
- ext4_encode_extra_time(&(einode)->xtime); \
-} while (0)
-
-#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
-do { \
- (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
- if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
- ext4_decode_extra_time(&(inode)->xtime, \
- raw_inode->xtime ## _extra); \
- else \
- (inode)->xtime.tv_nsec = 0; \
-} while (0)
-
-#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
-do { \
- if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
- (einode)->xtime.tv_sec = \
- (signed)le32_to_cpu((raw_inode)->xtime); \
- if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
- ext4_decode_extra_time(&(einode)->xtime, \
- raw_inode->xtime ## _extra); \
- else \
- (einode)->xtime.tv_nsec = 0; \
-} while (0)
-
-#define i_disk_version osd1.linux1.l_i_version
-
-#if defined(__KERNEL__) || defined(__linux__)
-#define i_reserved1 osd1.linux1.l_i_reserved1
-#define i_file_acl_high osd2.linux2.l_i_file_acl_high
-#define i_blocks_high osd2.linux2.l_i_blocks_high
-#define i_uid_low i_uid
-#define i_gid_low i_gid
-#define i_uid_high osd2.linux2.l_i_uid_high
-#define i_gid_high osd2.linux2.l_i_gid_high
-#define i_reserved2 osd2.linux2.l_i_reserved2
-
-#elif defined(__GNU__)
-
-#define i_translator osd1.hurd1.h_i_translator
-#define i_uid_high osd2.hurd2.h_i_uid_high
-#define i_gid_high osd2.hurd2.h_i_gid_high
-#define i_author osd2.hurd2.h_i_author
-
-#elif defined(__masix__)
-
-#define i_reserved1 osd1.masix1.m_i_reserved1
-#define i_file_acl_high osd2.masix2.m_i_file_acl_high
-#define i_reserved2 osd2.masix2.m_i_reserved2
-
-#endif /* defined(__KERNEL__) || defined(__linux__) */
-
-/*
- * storage for cached extent
- * If ec_len == 0, then the cache is invalid.
- * If ec_start == 0, then the cache represents a gap (null mapping)
- */
-struct ext4_ext_cache {
- ext4_fsblk_t ec_start;
- ext4_lblk_t ec_block;
- __u32 ec_len; /* must be 32bit to return holes */
-};
-
-/*
- * fourth extended file system inode data in memory
- */
-struct ext4_inode_info {
- __le32 i_data[15]; /* unconverted */
- __u32 i_dtime;
- ext4_fsblk_t i_file_acl;
-
- /*
- * i_block_group is the number of the block group which contains
- * this file's inode. Constant across the lifetime of the inode,
- * it is ued for making block allocation decisions - we try to
- * place a file's data blocks near its inode block, and new inodes
- * near to their parent directory's inode.
- */
- ext4_group_t i_block_group;
- ext4_lblk_t i_dir_start_lookup;
-#if (BITS_PER_LONG < 64)
- unsigned long i_state_flags; /* Dynamic state flags */
-#endif
- unsigned long i_flags;
-
-#ifdef CONFIG_EXT4_FS_XATTR
- /*
- * Extended attributes can be read independently of the main file
- * data. Taking i_mutex even when reading would cause contention
- * between readers of EAs and writers of regular file data, so
- * instead we synchronize on xattr_sem when reading or changing
- * EAs.
- */
- struct rw_semaphore xattr_sem;
-#endif
-
- struct list_head i_orphan; /* unlinked but open inodes */
-
- /*
- * i_disksize keeps track of what the inode size is ON DISK, not
- * in memory. During truncate, i_size is set to the new size by
- * the VFS prior to calling ext4_truncate(), but the filesystem won't
- * set i_disksize to 0 until the truncate is actually under way.
- *
- * The intent is that i_disksize always represents the blocks which
- * are used by this file. This allows recovery to restart truncate
- * on orphans if we crash during truncate. We actually write i_disksize
- * into the on-disk inode when writing inodes out, instead of i_size.
- *
- * The only time when i_disksize and i_size may be different is when
- * a truncate is in progress. The only things which change i_disksize
- * are ext4_get_block (growth) and ext4_truncate (shrinkth).
- */
- loff_t i_disksize;
-
- /*
- * i_data_sem is for serialising ext4_truncate() against
- * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
- * data tree are chopped off during truncate. We can't do that in
- * ext4 because whenever we perform intermediate commits during
- * truncate, the inode and all the metadata blocks *must* be in a
- * consistent state which allows truncation of the orphans to restart
- * during recovery. Hence we must fix the get_block-vs-truncate race
- * by other means, so we have i_data_sem.
- */
- struct rw_semaphore i_data_sem;
- struct inode vfs_inode;
- struct jbd2_inode *jinode;
-
- struct ext4_ext_cache i_cached_extent;
- /*
- * File creation time. Its function is same as that of
- * struct timespec i_{a,c,m}time in the generic inode.
- */
- struct timespec i_crtime;
-
- /* mballoc */
- struct list_head i_prealloc_list;
- spinlock_t i_prealloc_lock;
-
- /* ialloc */
- ext4_group_t i_last_alloc_group;
-
- /* allocation reservation info for delalloc */
- /* In case of bigalloc, these refer to clusters rather than blocks */
- unsigned int i_reserved_data_blocks;
- unsigned int i_reserved_meta_blocks;
- unsigned int i_allocated_meta_blocks;
- ext4_lblk_t i_da_metadata_calc_last_lblock;
- int i_da_metadata_calc_len;
-
- /* on-disk additional length */
- __u16 i_extra_isize;
-
-#ifdef CONFIG_QUOTA
- /* quota space reservation, managed internally by quota code */
- qsize_t i_reserved_quota;
-#endif
-
- /* completed IOs that might need unwritten extents handling */
- struct list_head i_completed_io_list;
- spinlock_t i_completed_io_lock;
- atomic_t i_ioend_count; /* Number of outstanding io_end structs */
- /* current io_end structure for async DIO write*/
- ext4_io_end_t *cur_aio_dio;
- atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
-
- spinlock_t i_block_reservation_lock;
-
- /*
- * Transactions that contain inode's metadata needed to complete
- * fsync and fdatasync, respectively.
- */
- tid_t i_sync_tid;
- tid_t i_datasync_tid;
-};
-
-/*
- * File system states
- */
-#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
-#define EXT4_ERROR_FS 0x0002 /* Errors detected */
-#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
-
-/*
- * Misc. filesystem flags
- */
-#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
-#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
-#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
-
-/*
- * Mount flags
- */
-#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
-#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
-#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
-#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
-#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
-#define EXT4_MOUNT_ERRORS_MASK 0x00070
-#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
-#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
-#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
-#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
-#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
-#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
-#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
-#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
-#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
-#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
-#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
-#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
-#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
-#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
-#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
-#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
-#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
-#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
-#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
-#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
-#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
-#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
-#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
-
-#define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
- specified delalloc */
-
-#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
- ~EXT4_MOUNT_##opt
-#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
- EXT4_MOUNT_##opt
-#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
- EXT4_MOUNT_##opt)
-
-#define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \
- ~EXT4_MOUNT2_##opt
-#define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \
- EXT4_MOUNT2_##opt
-#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
- EXT4_MOUNT2_##opt)
-
-#define ext4_test_and_set_bit __test_and_set_bit_le
-#define ext4_set_bit __set_bit_le
-#define ext4_set_bit_atomic ext2_set_bit_atomic
-#define ext4_test_and_clear_bit __test_and_clear_bit_le
-#define ext4_clear_bit __clear_bit_le
-#define ext4_clear_bit_atomic ext2_clear_bit_atomic
-#define ext4_test_bit test_bit_le
-#define ext4_find_next_zero_bit find_next_zero_bit_le
-#define ext4_find_next_bit find_next_bit_le
-
-extern void ext4_set_bits(void *bm, int cur, int len);
-
-/*
- * Maximal mount counts between two filesystem checks
- */
-#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
-#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */
-
-/*
- * Behaviour when detecting errors
- */
-#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */
-#define EXT4_ERRORS_RO 2 /* Remount fs read-only */
-#define EXT4_ERRORS_PANIC 3 /* Panic */
-#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
-
-/*
- * Structure of the super block
- */
-struct ext4_super_block {
-/*00*/ __le32 s_inodes_count; /* Inodes count */
- __le32 s_blocks_count_lo; /* Blocks count */
- __le32 s_r_blocks_count_lo; /* Reserved blocks count */
- __le32 s_free_blocks_count_lo; /* Free blocks count */
-/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
- __le32 s_first_data_block; /* First Data Block */
- __le32 s_log_block_size; /* Block size */
- __le32 s_log_cluster_size; /* Allocation cluster size */
-/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
- __le32 s_clusters_per_group; /* # Clusters per group */
- __le32 s_inodes_per_group; /* # Inodes per group */
- __le32 s_mtime; /* Mount time */
-/*30*/ __le32 s_wtime; /* Write time */
- __le16 s_mnt_count; /* Mount count */
- __le16 s_max_mnt_count; /* Maximal mount count */
- __le16 s_magic; /* Magic signature */
- __le16 s_state; /* File system state */
- __le16 s_errors; /* Behaviour when detecting errors */
- __le16 s_minor_rev_level; /* minor revision level */
-/*40*/ __le32 s_lastcheck; /* time of last check */
- __le32 s_checkinterval; /* max. time between checks */
- __le32 s_creator_os; /* OS */
- __le32 s_rev_level; /* Revision level */
-/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
- __le16 s_def_resgid; /* Default gid for reserved blocks */
- /*
- * These fields are for EXT4_DYNAMIC_REV superblocks only.
- *
- * Note: the difference between the compatible feature set and
- * the incompatible feature set is that if there is a bit set
- * in the incompatible feature set that the kernel doesn't
- * know about, it should refuse to mount the filesystem.
- *
- * e2fsck's requirements are more strict; if it doesn't know
- * about a feature in either the compatible or incompatible
- * feature set, it must abort and not try to meddle with
- * things it doesn't understand...
- */
- __le32 s_first_ino; /* First non-reserved inode */
- __le16 s_inode_size; /* size of inode structure */
- __le16 s_block_group_nr; /* block group # of this superblock */
- __le32 s_feature_compat; /* compatible feature set */
-/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
- __le32 s_feature_ro_compat; /* readonly-compatible feature set */
-/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
-/*78*/ char s_volume_name[16]; /* volume name */
-/*88*/ char s_last_mounted[64]; /* directory where last mounted */
-/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
- /*
- * Performance hints. Directory preallocation should only
- * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
- */
- __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
- __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
- __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
- /*
- * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
- */
-/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
-/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
- __le32 s_journal_dev; /* device number of journal file */
- __le32 s_last_orphan; /* start of list of inodes to delete */
- __le32 s_hash_seed[4]; /* HTREE hash seed */
- __u8 s_def_hash_version; /* Default hash version to use */
- __u8 s_jnl_backup_type;
- __le16 s_desc_size; /* size of group descriptor */
-/*100*/ __le32 s_default_mount_opts;
- __le32 s_first_meta_bg; /* First metablock block group */
- __le32 s_mkfs_time; /* When the filesystem was created */
- __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
- /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
-/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
- __le32 s_r_blocks_count_hi; /* Reserved blocks count */
- __le32 s_free_blocks_count_hi; /* Free blocks count */
- __le16 s_min_extra_isize; /* All inodes have at least # bytes */
- __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
- __le32 s_flags; /* Miscellaneous flags */
- __le16 s_raid_stride; /* RAID stride */
- __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
- __le64 s_mmp_block; /* Block for multi-mount protection */
- __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
- __u8 s_log_groups_per_flex; /* FLEX_BG group size */
- __u8 s_reserved_char_pad;
- __le16 s_reserved_pad;
- __le64 s_kbytes_written; /* nr of lifetime kilobytes written */
- __le32 s_snapshot_inum; /* Inode number of active snapshot */
- __le32 s_snapshot_id; /* sequential ID of active snapshot */
- __le64 s_snapshot_r_blocks_count; /* reserved blocks for active
- snapshot's future use */
- __le32 s_snapshot_list; /* inode number of the head of the
- on-disk snapshot list */
-#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
- __le32 s_error_count; /* number of fs errors */
- __le32 s_first_error_time; /* first time an error happened */
- __le32 s_first_error_ino; /* inode involved in first error */
- __le64 s_first_error_block; /* block involved of first error */
- __u8 s_first_error_func[32]; /* function where the error happened */
- __le32 s_first_error_line; /* line number where error happened */
- __le32 s_last_error_time; /* most recent time of an error */
- __le32 s_last_error_ino; /* inode involved in last error */
- __le32 s_last_error_line; /* line number where error happened */
- __le64 s_last_error_block; /* block involved of last error */
- __u8 s_last_error_func[32]; /* function where the error happened */
-#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
- __u8 s_mount_opts[64];
- __le32 s_usr_quota_inum; /* inode for tracking user quota */
- __le32 s_grp_quota_inum; /* inode for tracking group quota */
- __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
- __le32 s_reserved[109]; /* Padding to the end of the block */
-};
-
-#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
-
-#ifdef __KERNEL__
-
-/*
- * run-time mount flags
- */
-#define EXT4_MF_MNTDIR_SAMPLED 0x0001
-#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
-
-/*
- * fourth extended-fs super-block data in memory
- */
-struct ext4_sb_info {
- unsigned long s_desc_size; /* Size of a group descriptor in bytes */
- unsigned long s_inodes_per_block;/* Number of inodes per block */
- unsigned long s_blocks_per_group;/* Number of blocks in a group */
- unsigned long s_clusters_per_group; /* Number of clusters in a group */
- unsigned long s_inodes_per_group;/* Number of inodes in a group */
- unsigned long s_itb_per_group; /* Number of inode table blocks per group */
- unsigned long s_gdb_count; /* Number of group descriptor blocks */
- unsigned long s_desc_per_block; /* Number of group descriptors per block */
- ext4_group_t s_groups_count; /* Number of groups in the fs */
- ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
- unsigned long s_overhead_last; /* Last calculated overhead */
- unsigned long s_blocks_last; /* Last seen block count */
- unsigned int s_cluster_ratio; /* Number of blocks per cluster */
- unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
- loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
- struct buffer_head * s_sbh; /* Buffer containing the super block */
- struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
- struct buffer_head **s_group_desc;
- unsigned int s_mount_opt;
- unsigned int s_mount_opt2;
- unsigned int s_mount_flags;
- unsigned int s_def_mount_opt;
- ext4_fsblk_t s_sb_block;
- uid_t s_resuid;
- gid_t s_resgid;
- unsigned short s_mount_state;
- unsigned short s_pad;
- int s_addr_per_block_bits;
- int s_desc_per_block_bits;
- int s_inode_size;
- int s_first_ino;
- unsigned int s_inode_readahead_blks;
- unsigned int s_inode_goal;
- spinlock_t s_next_gen_lock;
- u32 s_next_generation;
- u32 s_hash_seed[4];
- int s_def_hash_version;
- int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
- struct percpu_counter s_freeclusters_counter;
- struct percpu_counter s_freeinodes_counter;
- struct percpu_counter s_dirs_counter;
- struct percpu_counter s_dirtyclusters_counter;
- struct blockgroup_lock *s_blockgroup_lock;
- struct proc_dir_entry *s_proc;
- struct kobject s_kobj;
- struct completion s_kobj_unregister;
-
- /* Journaling */
- struct journal_s *s_journal;
- struct list_head s_orphan;
- struct mutex s_orphan_lock;
- unsigned long s_resize_flags; /* Flags indicating if there
- is a resizer */
- unsigned long s_commit_interval;
- u32 s_max_batch_time;
- u32 s_min_batch_time;
- struct block_device *journal_bdev;
-#ifdef CONFIG_QUOTA
- char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
- int s_jquota_fmt; /* Format of quota to use */
-#endif
- unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
- struct rb_root system_blks;
-
-#ifdef EXTENTS_STATS
- /* ext4 extents stats */
- unsigned long s_ext_min;
- unsigned long s_ext_max;
- unsigned long s_depth_max;
- spinlock_t s_ext_stats_lock;
- unsigned long s_ext_blocks;
- unsigned long s_ext_extents;
-#endif
-
- /* for buddy allocator */
- struct ext4_group_info ***s_group_info;
- struct inode *s_buddy_cache;
- spinlock_t s_md_lock;
- unsigned short *s_mb_offsets;
- unsigned int *s_mb_maxs;
-
- /* tunables */
- unsigned long s_stripe;
- unsigned int s_mb_stream_request;
- unsigned int s_mb_max_to_scan;
- unsigned int s_mb_min_to_scan;
- unsigned int s_mb_stats;
- unsigned int s_mb_order2_reqs;
- unsigned int s_mb_group_prealloc;
- unsigned int s_max_writeback_mb_bump;
- /* where last allocation was done - for stream allocation */
- unsigned long s_mb_last_group;
- unsigned long s_mb_last_start;
-
- /* stats for buddy allocator */
- atomic_t s_bal_reqs; /* number of reqs with len > 1 */
- atomic_t s_bal_success; /* we found long enough chunks */
- atomic_t s_bal_allocated; /* in blocks */
- atomic_t s_bal_ex_scanned; /* total extents scanned */
- atomic_t s_bal_goals; /* goal hits */
- atomic_t s_bal_breaks; /* too long searches */
- atomic_t s_bal_2orders; /* 2^order hits */
- spinlock_t s_bal_lock;
- unsigned long s_mb_buddies_generated;
- unsigned long long s_mb_generation_time;
- atomic_t s_mb_lost_chunks;
- atomic_t s_mb_preallocated;
- atomic_t s_mb_discarded;
- atomic_t s_lock_busy;
-
- /* locality groups */
- struct ext4_locality_group __percpu *s_locality_groups;
-
- /* for write statistics */
- unsigned long s_sectors_written_start;
- u64 s_kbytes_written;
-
- unsigned int s_log_groups_per_flex;
- struct flex_groups *s_flex_groups;
-
- /* workqueue for dio unwritten */
- struct workqueue_struct *dio_unwritten_wq;
-
- /* timer for periodic error stats printing */
- struct timer_list s_err_report;
-
- /* Lazy inode table initialization info */
- struct ext4_li_request *s_li_request;
- /* Wait multiplier for lazy initialization thread */
- unsigned int s_li_wait_mult;
-
- /* Kernel thread for multiple mount protection */
- struct task_struct *s_mmp_tsk;
-
- /* record the last minlen when FITRIM is called. */
- atomic_t s_last_trim_minblks;
-};
-
-static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
-{
- return sb->s_fs_info;
-}
-static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
-{
- return container_of(inode, struct ext4_inode_info, vfs_inode);
-}
-
-static inline struct timespec ext4_current_time(struct inode *inode)
-{
- return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
- current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
-}
-
-static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
-{
- return ino == EXT4_ROOT_INO ||
- ino == EXT4_JOURNAL_INO ||
- ino == EXT4_RESIZE_INO ||
- (ino >= EXT4_FIRST_INO(sb) &&
- ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
-}
-
-static inline void ext4_set_io_unwritten_flag(struct inode *inode,
- struct ext4_io_end *io_end)
-{
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
- io_end->flag |= EXT4_IO_END_UNWRITTEN;
- atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
- }
-}
-
-/*
- * Inode dynamic state flags
- */
-enum {
- EXT4_STATE_JDATA, /* journaled data exists */
- EXT4_STATE_NEW, /* inode is newly created */
- EXT4_STATE_XATTR, /* has in-inode xattrs */
- EXT4_STATE_NO_EXPAND, /* No space for expansion */
- EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
- EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
- EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
- EXT4_STATE_NEWENTRY, /* File just added to dir */
- EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
-};
-
-#define EXT4_INODE_BIT_FNS(name, field, offset) \
-static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
-{ \
- return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
-} \
-static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
-{ \
- set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
-} \
-static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
-{ \
- clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
-}
-
-EXT4_INODE_BIT_FNS(flag, flags, 0)
-#if (BITS_PER_LONG < 64)
-EXT4_INODE_BIT_FNS(state, state_flags, 0)
-
-static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
-{
- (ei)->i_state_flags = 0;
-}
-#else
-EXT4_INODE_BIT_FNS(state, flags, 32)
-
-static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
-{
- /* We depend on the fact that callers will set i_flags */
-}
-#endif
-#else
-/* Assume that user mode programs are passing in an ext4fs superblock, not
- * a kernel struct super_block. This will allow us to call the feature-test
- * macros from user land. */
-#define EXT4_SB(sb) (sb)
-#endif
-
-#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
-
-/*
- * Codes for operating systems
- */
-#define EXT4_OS_LINUX 0
-#define EXT4_OS_HURD 1
-#define EXT4_OS_MASIX 2
-#define EXT4_OS_FREEBSD 3
-#define EXT4_OS_LITES 4
-
-/*
- * Revision levels
- */
-#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
-#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
-
-#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
-#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
-
-#define EXT4_GOOD_OLD_INODE_SIZE 128
-
-/*
- * Feature set definitions
- */
-
-#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
-#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
-#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
- ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
-#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
-#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
-#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
-#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
-#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
-#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
- EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
-
-#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
-#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
-#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
-#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008
-#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
-#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
-
-#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
-#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
-#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
-#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
-#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
-#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
-#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
-#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
-#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
-#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
-
-#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
-#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
-#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
-#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
-#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
-#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
-#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
-#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
-#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
-#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
-#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
-#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
-#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
-#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */
-
-#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
-#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
- EXT4_FEATURE_INCOMPAT_META_BG)
-#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
- EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
- EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
-
-#define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
-#define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
- EXT4_FEATURE_INCOMPAT_RECOVER| \
- EXT4_FEATURE_INCOMPAT_META_BG)
-#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
- EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
- EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
-
-#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
-#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
- EXT4_FEATURE_INCOMPAT_RECOVER| \
- EXT4_FEATURE_INCOMPAT_META_BG| \
- EXT4_FEATURE_INCOMPAT_EXTENTS| \
- EXT4_FEATURE_INCOMPAT_64BIT| \
- EXT4_FEATURE_INCOMPAT_FLEX_BG| \
- EXT4_FEATURE_INCOMPAT_MMP)
-#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
- EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
- EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
- EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
- EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
- EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
- EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
- EXT4_FEATURE_RO_COMPAT_BIGALLOC)
-
-/*
- * Default values for user and/or group using reserved blocks
- */
-#define EXT4_DEF_RESUID 0
-#define EXT4_DEF_RESGID 0
-
-#define EXT4_DEF_INODE_READAHEAD_BLKS 32
-
-/*
- * Default mount options
- */
-#define EXT4_DEFM_DEBUG 0x0001
-#define EXT4_DEFM_BSDGROUPS 0x0002
-#define EXT4_DEFM_XATTR_USER 0x0004
-#define EXT4_DEFM_ACL 0x0008
-#define EXT4_DEFM_UID16 0x0010
-#define EXT4_DEFM_JMODE 0x0060
-#define EXT4_DEFM_JMODE_DATA 0x0020
-#define EXT4_DEFM_JMODE_ORDERED 0x0040
-#define EXT4_DEFM_JMODE_WBACK 0x0060
-#define EXT4_DEFM_NOBARRIER 0x0100
-#define EXT4_DEFM_BLOCK_VALIDITY 0x0200
-#define EXT4_DEFM_DISCARD 0x0400
-#define EXT4_DEFM_NODELALLOC 0x0800
-
-/*
- * Default journal batch times
- */
-#define EXT4_DEF_MIN_BATCH_TIME 0
-#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
-
-/*
- * Minimum number of groups in a flexgroup before we separate out
- * directories into the first block group of a flexgroup
- */
-#define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4
-
-/*
- * Structure of a directory entry
- */
-#define EXT4_NAME_LEN 255
-
-struct ext4_dir_entry {
- __le32 inode; /* Inode number */
- __le16 rec_len; /* Directory entry length */
- __le16 name_len; /* Name length */
- char name[EXT4_NAME_LEN]; /* File name */
-};
-
-/*
- * The new version of the directory entry. Since EXT4 structures are
- * stored in intel byte order, and the name_len field could never be
- * bigger than 255 chars, it's safe to reclaim the extra byte for the
- * file_type field.
- */
-struct ext4_dir_entry_2 {
- __le32 inode; /* Inode number */
- __le16 rec_len; /* Directory entry length */
- __u8 name_len; /* Name length */
- __u8 file_type;
- char name[EXT4_NAME_LEN]; /* File name */
-};
-
-/*
- * Ext4 directory file types. Only the low 3 bits are used. The
- * other bits are reserved for now.
- */
-#define EXT4_FT_UNKNOWN 0
-#define EXT4_FT_REG_FILE 1
-#define EXT4_FT_DIR 2
-#define EXT4_FT_CHRDEV 3
-#define EXT4_FT_BLKDEV 4
-#define EXT4_FT_FIFO 5
-#define EXT4_FT_SOCK 6
-#define EXT4_FT_SYMLINK 7
-
-#define EXT4_FT_MAX 8
-
-/*
- * EXT4_DIR_PAD defines the directory entries boundaries
- *
- * NOTE: It must be a multiple of 4
- */
-#define EXT4_DIR_PAD 4
-#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
-#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
- ~EXT4_DIR_ROUND)
-#define EXT4_MAX_REC_LEN ((1<<16)-1)
-
-/*
- * If we ever get support for fs block sizes > page_size, we'll need
- * to remove the #if statements in the next two functions...
- */
-static inline unsigned int
-ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
-{
- unsigned len = le16_to_cpu(dlen);
-
-#if (PAGE_CACHE_SIZE >= 65536)
- if (len == EXT4_MAX_REC_LEN || len == 0)
- return blocksize;
- return (len & 65532) | ((len & 3) << 16);
-#else
- return len;
-#endif
-}
-
-static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
-{
- if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
- BUG();
-#if (PAGE_CACHE_SIZE >= 65536)
- if (len < 65536)
- return cpu_to_le16(len);
- if (len == blocksize) {
- if (blocksize == 65536)
- return cpu_to_le16(EXT4_MAX_REC_LEN);
- else
- return cpu_to_le16(0);
- }
- return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
-#else
- return cpu_to_le16(len);
-#endif
-}
-
-/*
- * Hash Tree Directory indexing
- * (c) Daniel Phillips, 2001
- */
-
-#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
- EXT4_FEATURE_COMPAT_DIR_INDEX) && \
- ext4_test_inode_flag((dir), EXT4_INODE_INDEX))
-#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
-#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-
-/* Legal values for the dx_root hash_version field: */
-
-#define DX_HASH_LEGACY 0
-#define DX_HASH_HALF_MD4 1
-#define DX_HASH_TEA 2
-#define DX_HASH_LEGACY_UNSIGNED 3
-#define DX_HASH_HALF_MD4_UNSIGNED 4
-#define DX_HASH_TEA_UNSIGNED 5
-
-#ifdef __KERNEL__
-
-/* hash info structure used by the directory hash */
-struct dx_hash_info
-{
- u32 hash;
- u32 minor_hash;
- int hash_version;
- u32 *seed;
-};
-
-
-/* 32 and 64 bit signed EOF for dx directories */
-#define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
-#define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
-
-
-/*
- * Control parameters used by ext4_htree_next_block
- */
-#define HASH_NB_ALWAYS 1
-
-
-/*
- * Describe an inode's exact location on disk and in memory
- */
-struct ext4_iloc
-{
- struct buffer_head *bh;
- unsigned long offset;
- ext4_group_t block_group;
-};
-
-static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
-{
- return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
-}
-
-/*
- * This structure is stuffed into the struct file's private_data field
- * for directories. It is where we put information so that we can do
- * readdir operations in hash tree order.
- */
-struct dir_private_info {
- struct rb_root root;
- struct rb_node *curr_node;
- struct fname *extra_fname;
- loff_t last_pos;
- __u32 curr_hash;
- __u32 curr_minor_hash;
- __u32 next_hash;
-};
-
-/* calculate the first block number of the group */
-static inline ext4_fsblk_t
-ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
-{
- return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
- le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-}
-
-/*
- * Special error return code only used by dx_probe() and its callers.
- */
-#define ERR_BAD_DX_DIR -75000
-
-void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
- ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
-
-/*
- * Timeout and state flag for lazy initialization inode thread.
- */
-#define EXT4_DEF_LI_WAIT_MULT 10
-#define EXT4_DEF_LI_MAX_START_DELAY 5
-#define EXT4_LAZYINIT_QUIT 0x0001
-#define EXT4_LAZYINIT_RUNNING 0x0002
-
-/*
- * Lazy inode table initialization info
- */
-struct ext4_lazy_init {
- unsigned long li_state;
- struct list_head li_request_list;
- struct mutex li_list_mtx;
-};
-
-struct ext4_li_request {
- struct super_block *lr_super;
- struct ext4_sb_info *lr_sbi;
- ext4_group_t lr_next_group;
- struct list_head lr_request;
- unsigned long lr_next_sched;
- unsigned long lr_timeout;
-};
-
-struct ext4_features {
- struct kobject f_kobj;
- struct completion f_kobj_unregister;
-};
-
-/*
- * This structure will be used for multiple mount protection. It will be
- * written into the block number saved in the s_mmp_block field in the
- * superblock. Programs that check MMP should assume that if
- * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
- * to use the filesystem, regardless of how old the timestamp is.
- */
-#define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */
-#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
-#define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */
-#define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */
-
-struct mmp_struct {
- __le32 mmp_magic; /* Magic number for MMP */
- __le32 mmp_seq; /* Sequence no. updated periodically */
-
- /*
- * mmp_time, mmp_nodename & mmp_bdevname are only used for information
- * purposes and do not affect the correctness of the algorithm
- */
- __le64 mmp_time; /* Time last updated */
- char mmp_nodename[64]; /* Node which last updated MMP block */
- char mmp_bdevname[32]; /* Bdev which last updated MMP block */
-
- /*
- * mmp_check_interval is used to verify if the MMP block has been
- * updated on the block device. The value is updated based on the
- * maximum time to write the MMP block during an update cycle.
- */
- __le16 mmp_check_interval;
-
- __le16 mmp_pad1;
- __le32 mmp_pad2[227];
-};
-
-/* arguments passed to the mmp thread */
-struct mmpd_data {
- struct buffer_head *bh; /* bh from initial read_mmp_block() */
- struct super_block *sb; /* super block of the fs */
-};
-
-/*
- * Check interval multiplier
- * The MMP block is written every update interval and initially checked every
- * update interval x the multiplier (the value is then adapted based on the
- * write latency). The reason is that writes can be delayed under load and we
- * don't want readers to incorrectly assume that the filesystem is no longer
- * in use.
- */
-#define EXT4_MMP_CHECK_MULT 2UL
-
-/*
- * Minimum interval for MMP checking in seconds.
- */
-#define EXT4_MMP_MIN_CHECK_INTERVAL 5UL
-
-/*
- * Maximum interval for MMP checking in seconds.
- */
-#define EXT4_MMP_MAX_CHECK_INTERVAL 300UL
-
-/*
- * Function prototypes
- */
-
-/*
- * Ok, these declarations are also in <linux/kernel.h> but none of the
- * ext4 source programs needs to include it so they are duplicated here.
- */
-# define NORET_TYPE /**/
-# define ATTRIB_NORET __attribute__((noreturn))
-# define NORET_AND noreturn,
-
-/* bitmap.c */
-extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
-
-/* balloc.c */
-extern unsigned int ext4_block_group(struct super_block *sb,
- ext4_fsblk_t blocknr);
-extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
- ext4_fsblk_t blocknr);
-extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
-extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
- ext4_group_t group);
-extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal,
- unsigned int flags,
- unsigned long *count,
- int *errp);
-extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
- s64 nclusters, unsigned int flags);
-extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
-extern void ext4_check_blocks_bitmap(struct super_block *);
-extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
- ext4_group_t block_group,
- struct buffer_head ** bh);
-extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
-
-extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
- ext4_group_t block_group);
-extern int ext4_wait_block_bitmap(struct super_block *sb,
- ext4_group_t block_group,
- struct buffer_head *bh);
-extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
- ext4_group_t block_group);
-extern void ext4_init_block_bitmap(struct super_block *sb,
- struct buffer_head *bh,
- ext4_group_t group,
- struct ext4_group_desc *desc);
-extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp);
-extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp);
-ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
-
-/* dir.c */
-extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
- struct file *,
- struct ext4_dir_entry_2 *,
- struct buffer_head *, unsigned int);
-#define ext4_check_dir_entry(dir, filp, de, bh, offset) \
- unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
- (de), (bh), (offset)))
-extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
- __u32 minor_hash,
- struct ext4_dir_entry_2 *dirent);
-extern void ext4_htree_free_dir_info(struct dir_private_info *p);
-
-/* fsync.c */
-extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
-extern int ext4_flush_completed_IO(struct inode *);
-
-/* hash.c */
-extern int ext4fs_dirhash(const char *name, int len, struct
- dx_hash_info *hinfo);
-
-/* ialloc.c */
-extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
- const struct qstr *qstr, __u32 goal,
- uid_t *owner);
-extern void ext4_free_inode(handle_t *, struct inode *);
-extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
-extern unsigned long ext4_count_free_inodes(struct super_block *);
-extern unsigned long ext4_count_dirs(struct super_block *);
-extern void ext4_check_inodes_bitmap(struct super_block *);
-extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
-extern int ext4_init_inode_table(struct super_block *sb,
- ext4_group_t group, int barrier);
-extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
-
-/* mballoc.c */
-extern long ext4_mb_stats;
-extern long ext4_mb_max_to_scan;
-extern int ext4_mb_init(struct super_block *, int);
-extern int ext4_mb_release(struct super_block *);
-extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
- struct ext4_allocation_request *, int *);
-extern int ext4_mb_reserve_blocks(struct super_block *, int);
-extern void ext4_discard_preallocations(struct inode *);
-extern int __init ext4_init_mballoc(void);
-extern void ext4_exit_mballoc(void);
-extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t block,
- unsigned long count, int flags);
-extern int ext4_mb_add_groupinfo(struct super_block *sb,
- ext4_group_t i, struct ext4_group_desc *desc);
-extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
- ext4_fsblk_t block, unsigned long count);
-extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
-
-/* inode.c */
-struct buffer_head *ext4_getblk(handle_t *, struct inode *,
- ext4_lblk_t, int, int *);
-struct buffer_head *ext4_bread(handle_t *, struct inode *,
- ext4_lblk_t, int, int *);
-int ext4_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
-
-extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern int ext4_write_inode(struct inode *, struct writeback_control *);
-extern int ext4_setattr(struct dentry *, struct iattr *);
-extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
-extern void ext4_evict_inode(struct inode *);
-extern void ext4_clear_inode(struct inode *);
-extern int ext4_sync_inode(handle_t *, struct inode *);
-extern void ext4_dirty_inode(struct inode *, int);
-extern int ext4_change_inode_journal_flag(struct inode *, int);
-extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
-extern int ext4_can_truncate(struct inode *inode);
-extern void ext4_truncate(struct inode *);
-extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
-extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
-extern void ext4_set_inode_flags(struct inode *);
-extern void ext4_get_inode_flags(struct ext4_inode_info *);
-extern int ext4_alloc_da_blocks(struct inode *inode);
-extern void ext4_set_aops(struct inode *inode);
-extern int ext4_writepage_trans_blocks(struct inode *);
-extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
-extern int ext4_discard_partial_page_buffers(handle_t *handle,
- struct address_space *mapping, loff_t from,
- loff_t length, int flags);
-extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
-extern qsize_t *ext4_get_reserved_space(struct inode *inode);
-extern void ext4_da_update_reserve_space(struct inode *inode,
- int used, int quota_claim);
-
-/* indirect.c */
-extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map, int flags);
-extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs);
-extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
-extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
-extern void ext4_ind_truncate(struct inode *inode);
-
-/* ioctl.c */
-extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
-extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
-
-/* migrate.c */
-extern int ext4_ext_migrate(struct inode *);
-
-/* namei.c */
-extern int ext4_orphan_add(handle_t *, struct inode *);
-extern int ext4_orphan_del(handle_t *, struct inode *);
-extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
- __u32 start_minor_hash, __u32 *next_hash);
-
-/* resize.c */
-extern int ext4_group_add(struct super_block *sb,
- struct ext4_new_group_data *input);
-extern int ext4_group_extend(struct super_block *sb,
- struct ext4_super_block *es,
- ext4_fsblk_t n_blocks_count);
-extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
-
-/* super.c */
-extern void *ext4_kvmalloc(size_t size, gfp_t flags);
-extern void *ext4_kvzalloc(size_t size, gfp_t flags);
-extern void ext4_kvfree(void *ptr);
-extern __printf(4, 5)
-void __ext4_error(struct super_block *, const char *, unsigned int,
- const char *, ...);
-#define ext4_error(sb, message...) __ext4_error(sb, __func__, \
- __LINE__, ## message)
-extern __printf(5, 6)
-void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t,
- const char *, ...);
-extern __printf(5, 6)
-void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
- const char *, ...);
-extern void __ext4_std_error(struct super_block *, const char *,
- unsigned int, int);
-extern __printf(4, 5)
-void __ext4_abort(struct super_block *, const char *, unsigned int,
- const char *, ...);
-#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \
- __LINE__, ## message)
-extern __printf(4, 5)
-void __ext4_warning(struct super_block *, const char *, unsigned int,
- const char *, ...);
-#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \
- __LINE__, ## message)
-extern __printf(3, 4)
-void ext4_msg(struct super_block *, const char *, const char *, ...);
-extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
- const char *, unsigned int, const char *);
-#define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \
- __LINE__, msg)
-extern __printf(7, 8)
-void __ext4_grp_locked_error(const char *, unsigned int,
- struct super_block *, ext4_group_t,
- unsigned long, ext4_fsblk_t,
- const char *, ...);
-#define ext4_grp_locked_error(sb, grp, message...) \
- __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
-extern void ext4_update_dynamic_rev(struct super_block *sb);
-extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
- __u32 compat);
-extern int ext4_update_rocompat_feature(handle_t *handle,
- struct super_block *sb, __u32 rocompat);
-extern int ext4_update_incompat_feature(handle_t *handle,
- struct super_block *sb, __u32 incompat);
-extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
- struct ext4_group_desc *bg);
-extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
- struct ext4_group_desc *bg);
-extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
- struct ext4_group_desc *bg);
-extern __u32 ext4_free_group_clusters(struct super_block *sb,
- struct ext4_group_desc *bg);
-extern __u32 ext4_free_inodes_count(struct super_block *sb,
- struct ext4_group_desc *bg);
-extern __u32 ext4_used_dirs_count(struct super_block *sb,
- struct ext4_group_desc *bg);
-extern __u32 ext4_itable_unused_count(struct super_block *sb,
- struct ext4_group_desc *bg);
-extern void ext4_block_bitmap_set(struct super_block *sb,
- struct ext4_group_desc *bg, ext4_fsblk_t blk);
-extern void ext4_inode_bitmap_set(struct super_block *sb,
- struct ext4_group_desc *bg, ext4_fsblk_t blk);
-extern void ext4_inode_table_set(struct super_block *sb,
- struct ext4_group_desc *bg, ext4_fsblk_t blk);
-extern void ext4_free_group_clusters_set(struct super_block *sb,
- struct ext4_group_desc *bg,
- __u32 count);
-extern void ext4_free_inodes_set(struct super_block *sb,
- struct ext4_group_desc *bg, __u32 count);
-extern void ext4_used_dirs_set(struct super_block *sb,
- struct ext4_group_desc *bg, __u32 count);
-extern void ext4_itable_unused_set(struct super_block *sb,
- struct ext4_group_desc *bg, __u32 count);
-extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
- struct ext4_group_desc *gdp);
-extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
- struct ext4_group_desc *gdp);
-
-static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
-{
- return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
- le32_to_cpu(es->s_blocks_count_lo);
-}
-
-static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
-{
- return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
- le32_to_cpu(es->s_r_blocks_count_lo);
-}
-
-static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
-{
- return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
- le32_to_cpu(es->s_free_blocks_count_lo);
-}
-
-static inline void ext4_blocks_count_set(struct ext4_super_block *es,
- ext4_fsblk_t blk)
-{
- es->s_blocks_count_lo = cpu_to_le32((u32)blk);
- es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
-}
-
-static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
- ext4_fsblk_t blk)
-{
- es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
- es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
-}
-
-static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
- ext4_fsblk_t blk)
-{
- es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
- es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
-}
-
-static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
-{
- if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
- return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
- le32_to_cpu(raw_inode->i_size_lo);
- else
- return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
-}
-
-static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
-{
- raw_inode->i_size_lo = cpu_to_le32(i_size);
- raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
-}
-
-static inline
-struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
- ext4_group_t group)
-{
- struct ext4_group_info ***grp_info;
- long indexv, indexh;
- grp_info = EXT4_SB(sb)->s_group_info;
- indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
- indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
- return grp_info[indexv][indexh];
-}
-
-/*
- * Reading s_groups_count requires using smp_rmb() afterwards. See
- * the locking protocol documented in the comments of ext4_group_add()
- * in resize.c
- */
-static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
-{
- ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
-
- smp_rmb();
- return ngroups;
-}
-
-static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
- ext4_group_t block_group)
-{
- return block_group >> sbi->s_log_groups_per_flex;
-}
-
-static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
-{
- return 1 << sbi->s_log_groups_per_flex;
-}
-
-#define ext4_std_error(sb, errno) \
-do { \
- if ((errno)) \
- __ext4_std_error((sb), __func__, __LINE__, (errno)); \
-} while (0)
-
-#ifdef CONFIG_SMP
-/* Each CPU can accumulate percpu_counter_batch clusters in their local
- * counters. So we need to make sure we have free clusters more
- * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
- */
-#define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
-#else
-#define EXT4_FREECLUSTERS_WATERMARK 0
-#endif
-
-static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
-{
- /*
- * XXX: replace with spinlock if seen contended -bzzz
- */
- down_write(&EXT4_I(inode)->i_data_sem);
- if (newsize > EXT4_I(inode)->i_disksize)
- EXT4_I(inode)->i_disksize = newsize;
- up_write(&EXT4_I(inode)->i_data_sem);
- return ;
-}
-
-struct ext4_group_info {
- unsigned long bb_state;
- struct rb_root bb_free_root;
- ext4_grpblk_t bb_first_free; /* first free block */
- ext4_grpblk_t bb_free; /* total free blocks */
- ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
- ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
- struct list_head bb_prealloc_list;
-#ifdef DOUBLE_CHECK
- void *bb_bitmap;
-#endif
- struct rw_semaphore alloc_sem;
- ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
- * regions, index is order.
- * bb_counters[3] = 5 means
- * 5 free 8-block regions. */
-};
-
-#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
-#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
-
-#define EXT4_MB_GRP_NEED_INIT(grp) \
- (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-
-#define EXT4_MB_GRP_WAS_TRIMMED(grp) \
- (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
-#define EXT4_MB_GRP_SET_TRIMMED(grp) \
- (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
-#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \
- (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
-
-#define EXT4_MAX_CONTENTION 8
-#define EXT4_CONTENTION_THRESHOLD 2
-
-static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
- ext4_group_t group)
-{
- return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
-}
-
-/*
- * Returns true if the filesystem is busy enough that attempts to
- * access the block group locks has run into contention.
- */
-static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
-{
- return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
-}
-
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
-{
- spinlock_t *lock = ext4_group_lock_ptr(sb, group);
- if (spin_trylock(lock))
- /*
- * We're able to grab the lock right away, so drop the
- * lock contention counter.
- */
- atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
- else {
- /*
- * The lock is busy, so bump the contention counter,
- * and then wait on the spin lock.
- */
- atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
- EXT4_MAX_CONTENTION);
- spin_lock(lock);
- }
-}
-
-static inline void ext4_unlock_group(struct super_block *sb,
- ext4_group_t group)
-{
- spin_unlock(ext4_group_lock_ptr(sb, group));
-}
-
-static inline void ext4_mark_super_dirty(struct super_block *sb)
-{
- if (EXT4_SB(sb)->s_journal == NULL)
- sb->s_dirt =1;
-}
-
-/*
- * Block validity checking
- */
-#define ext4_check_indirect_blockref(inode, bh) \
- ext4_check_blockref(__func__, __LINE__, inode, \
- (__le32 *)(bh)->b_data, \
- EXT4_ADDR_PER_BLOCK((inode)->i_sb))
-
-#define ext4_ind_check_inode(inode) \
- ext4_check_blockref(__func__, __LINE__, inode, \
- EXT4_I(inode)->i_data, \
- EXT4_NDIR_BLOCKS)
-
-/*
- * Inodes and files operations
- */
-
-/* dir.c */
-extern const struct file_operations ext4_dir_operations;
-
-/* file.c */
-extern const struct inode_operations ext4_file_inode_operations;
-extern const struct file_operations ext4_file_operations;
-extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
-
-/* namei.c */
-extern const struct inode_operations ext4_dir_inode_operations;
-extern const struct inode_operations ext4_special_inode_operations;
-extern struct dentry *ext4_get_parent(struct dentry *child);
-
-/* symlink.c */
-extern const struct inode_operations ext4_symlink_inode_operations;
-extern const struct inode_operations ext4_fast_symlink_inode_operations;
-
-/* block_validity */
-extern void ext4_release_system_zone(struct super_block *sb);
-extern int ext4_setup_system_zone(struct super_block *sb);
-extern int __init ext4_init_system_zone(void);
-extern void ext4_exit_system_zone(void);
-extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
- ext4_fsblk_t start_blk,
- unsigned int count);
-extern int ext4_check_blockref(const char *, unsigned int,
- struct inode *, __le32 *, unsigned int);
-
-/* extents.c */
-extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
-extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
-extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
- int chunk);
-extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map, int flags);
-extern void ext4_ext_truncate(struct inode *);
-extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
- loff_t length);
-extern void ext4_ext_init(struct super_block *);
-extern void ext4_ext_release(struct super_block *);
-extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
- loff_t len);
-extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
- ssize_t len);
-extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map, int flags);
-extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len);
-/* move_extent.c */
-extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
- __u64 start_orig, __u64 start_donor,
- __u64 len, __u64 *moved_len);
-
-/* page-io.c */
-extern int __init ext4_init_pageio(void);
-extern void ext4_exit_pageio(void);
-extern void ext4_ioend_wait(struct inode *);
-extern void ext4_free_io_end(ext4_io_end_t *io);
-extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
-extern int ext4_end_io_nolock(ext4_io_end_t *io);
-extern void ext4_io_submit(struct ext4_io_submit *io);
-extern int ext4_bio_write_page(struct ext4_io_submit *io,
- struct page *page,
- int len,
- struct writeback_control *wbc);
-
-/* mmp.c */
-extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
-
-/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
-enum ext4_state_bits {
- BH_Uninit /* blocks are allocated but uninitialized on disk */
- = BH_JBDPrivateStart,
- BH_AllocFromCluster, /* allocated blocks were part of already
- * allocated cluster. Note that this flag will
- * never, ever appear in a buffer_head's state
- * flag. See EXT4_MAP_FROM_CLUSTER to see where
- * this is used. */
- BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This
- * flag is set when ext4_map_blocks is called on a
- * delayed allocated block to get its real mapping. */
-};
-
-BUFFER_FNS(Uninit, uninit)
-TAS_BUFFER_FNS(Uninit, uninit)
-BUFFER_FNS(Da_Mapped, da_mapped)
-
-/*
- * Add new method to test wether block and inode bitmaps are properly
- * initialized. With uninit_bg reading the block from disk is not enough
- * to mark the bitmap uptodate. We need to also zero-out the bitmap
- */
-#define BH_BITMAP_UPTODATE BH_JBDPrivateStart
-
-static inline int bitmap_uptodate(struct buffer_head *bh)
-{
- return (buffer_uptodate(bh) &&
- test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state));
-}
-static inline void set_bitmap_uptodate(struct buffer_head *bh)
-{
- set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
-}
-
-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
-
-/* For ioend & aio unwritten conversion wait queues */
-#define EXT4_WQ_HASH_SZ 37
-#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
- EXT4_WQ_HASH_SZ])
-#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
- EXT4_WQ_HASH_SZ])
-extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
-
-#define EXT4_RESIZING 0
-extern int ext4_resize_begin(struct super_block *sb);
-extern void ext4_resize_end(struct super_block *sb);
-
-#endif /* __KERNEL__ */
-
-#include "ext4_extents.h"
-
-#endif /* _EXT4_H */
diff --git a/ANDROID_3.4.5/fs/ext4/ext4_extents.h b/ANDROID_3.4.5/fs/ext4/ext4_extents.h
deleted file mode 100644
index 0f58b86e..00000000
--- a/ANDROID_3.4.5/fs/ext4/ext4_extents.h
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
- * Written by Alex Tomas <alex@clusterfs.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
- */
-
-#ifndef _EXT4_EXTENTS
-#define _EXT4_EXTENTS
-
-#include "ext4.h"
-
-/*
- * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
- * becomes very small, so index split, in-depth growing and
- * other hard changes happen much more often.
- * This is for debug purposes only.
- */
-#define AGGRESSIVE_TEST_
-
-/*
- * With EXTENTS_STATS defined, the number of blocks and extents
- * are collected in the truncate path. They'll be shown at
- * umount time.
- */
-#define EXTENTS_STATS__
-
-/*
- * If CHECK_BINSEARCH is defined, then the results of the binary search
- * will also be checked by linear search.
- */
-#define CHECK_BINSEARCH__
-
-/*
- * Turn on EXT_DEBUG to get lots of info about extents operations.
- */
-#define EXT_DEBUG__
-#ifdef EXT_DEBUG
-#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
-#else
-#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
-#endif
-
-/*
- * If EXT_STATS is defined then stats numbers are collected.
- * These number will be displayed at umount time.
- */
-#define EXT_STATS_
-
-
-/*
- * ext4_inode has i_block array (60 bytes total).
- * The first 12 bytes store ext4_extent_header;
- * the remainder stores an array of ext4_extent.
- */
-
-/*
- * This is the extent on-disk structure.
- * It's used at the bottom of the tree.
- */
-struct ext4_extent {
- __le32 ee_block; /* first logical block extent covers */
- __le16 ee_len; /* number of blocks covered by extent */
- __le16 ee_start_hi; /* high 16 bits of physical block */
- __le32 ee_start_lo; /* low 32 bits of physical block */
-};
-
-/*
- * This is index on-disk structure.
- * It's used at all the levels except the bottom.
- */
-struct ext4_extent_idx {
- __le32 ei_block; /* index covers logical blocks from 'block' */
- __le32 ei_leaf_lo; /* pointer to the physical block of the next *
- * level. leaf or next index could be there */
- __le16 ei_leaf_hi; /* high 16 bits of physical block */
- __u16 ei_unused;
-};
-
-/*
- * Each block (leaves and indexes), even inode-stored has header.
- */
-struct ext4_extent_header {
- __le16 eh_magic; /* probably will support different formats */
- __le16 eh_entries; /* number of valid entries */
- __le16 eh_max; /* capacity of store in entries */
- __le16 eh_depth; /* has tree real underlying blocks? */
- __le32 eh_generation; /* generation of the tree */
-};
-
-#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
-
-/*
- * Array of ext4_ext_path contains path to some extent.
- * Creation/lookup routines use it for traversal/splitting/etc.
- * Truncate uses it to simulate recursive walking.
- */
-struct ext4_ext_path {
- ext4_fsblk_t p_block;
- __u16 p_depth;
- struct ext4_extent *p_ext;
- struct ext4_extent_idx *p_idx;
- struct ext4_extent_header *p_hdr;
- struct buffer_head *p_bh;
-};
-
-/*
- * structure for external API
- */
-
-/*
- * to be called by ext4_ext_walk_space()
- * negative retcode - error
- * positive retcode - signal for ext4_ext_walk_space(), see below
- * callback must return valid extent (passed or newly created)
- */
-typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t,
- struct ext4_ext_cache *,
- struct ext4_extent *, void *);
-
-#define EXT_CONTINUE 0
-#define EXT_BREAK 1
-#define EXT_REPEAT 2
-
-/*
- * Maximum number of logical blocks in a file; ext4_extent's ee_block is
- * __le32.
- */
-#define EXT_MAX_BLOCKS 0xffffffff
-
-/*
- * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
- * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
- * MSB of ee_len field in the extent datastructure to signify if this
- * particular extent is an initialized extent or an uninitialized (i.e.
- * preallocated).
- * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
- * uninitialized extent.
- * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
- * uninitialized one. In other words, if MSB of ee_len is set, it is an
- * uninitialized extent with only one special scenario when ee_len = 0x8000.
- * In this case we can not have an uninitialized extent of zero length and
- * thus we make it as a special case of initialized extent with 0x8000 length.
- * This way we get better extent-to-group alignment for initialized extents.
- * Hence, the maximum number of blocks we can have in an *initialized*
- * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
- */
-#define EXT_INIT_MAX_LEN (1UL << 15)
-#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
-
-
-#define EXT_FIRST_EXTENT(__hdr__) \
- ((struct ext4_extent *) (((char *) (__hdr__)) + \
- sizeof(struct ext4_extent_header)))
-#define EXT_FIRST_INDEX(__hdr__) \
- ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \
- sizeof(struct ext4_extent_header)))
-#define EXT_HAS_FREE_INDEX(__path__) \
- (le16_to_cpu((__path__)->p_hdr->eh_entries) \
- < le16_to_cpu((__path__)->p_hdr->eh_max))
-#define EXT_LAST_EXTENT(__hdr__) \
- (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
-#define EXT_LAST_INDEX(__hdr__) \
- (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
-#define EXT_MAX_EXTENT(__hdr__) \
- (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
-#define EXT_MAX_INDEX(__hdr__) \
- (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
-
-static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
-{
- return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
-}
-
-static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
-{
- return (struct ext4_extent_header *) bh->b_data;
-}
-
-static inline unsigned short ext_depth(struct inode *inode)
-{
- return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
-}
-
-static inline void
-ext4_ext_invalidate_cache(struct inode *inode)
-{
- EXT4_I(inode)->i_cached_extent.ec_len = 0;
-}
-
-static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
-{
- /* We can not have an uninitialized extent of zero length! */
- BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
- ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
-}
-
-static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
-{
- /* Extent with ee_len of 0x8000 is treated as an initialized extent */
- return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
-}
-
-static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
-{
- return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
- le16_to_cpu(ext->ee_len) :
- (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
-}
-
-static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
-{
- ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
-}
-
-/*
- * ext4_ext_pblock:
- * combine low and high parts of physical block number into ext4_fsblk_t
- */
-static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
-{
- ext4_fsblk_t block;
-
- block = le32_to_cpu(ex->ee_start_lo);
- block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
- return block;
-}
-
-/*
- * ext4_idx_pblock:
- * combine low and high parts of a leaf physical block number into ext4_fsblk_t
- */
-static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
-{
- ext4_fsblk_t block;
-
- block = le32_to_cpu(ix->ei_leaf_lo);
- block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
- return block;
-}
-
-/*
- * ext4_ext_store_pblock:
- * stores a large physical block number into an extent struct,
- * breaking it into parts
- */
-static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
- ext4_fsblk_t pb)
-{
- ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
- ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
- 0xffff);
-}
-
-/*
- * ext4_idx_store_pblock:
- * stores a large physical block number into an index struct,
- * breaking it into parts
- */
-static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
- ext4_fsblk_t pb)
-{
- ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
- ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
- 0xffff);
-}
-
-extern int ext4_ext_calc_metadata_amount(struct inode *inode,
- ext4_lblk_t lblocks);
-extern int ext4_extent_tree_init(handle_t *, struct inode *);
-extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
- int num,
- struct ext4_ext_path *path);
-extern int ext4_can_extents_be_merged(struct inode *inode,
- struct ext4_extent *ex1,
- struct ext4_extent *ex2);
-extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
-extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
- struct ext4_ext_path *);
-extern void ext4_ext_drop_refs(struct ext4_ext_path *);
-extern int ext4_ext_check_inode(struct inode *inode);
-extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
- int search_hint_reverse);
-#endif /* _EXT4_EXTENTS */
-
diff --git a/ANDROID_3.4.5/fs/ext4/ext4_jbd2.c b/ANDROID_3.4.5/fs/ext4/ext4_jbd2.c
deleted file mode 100644
index aca17901..00000000
--- a/ANDROID_3.4.5/fs/ext4/ext4_jbd2.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Interface between ext4 and JBD
- */
-
-#include "ext4_jbd2.h"
-
-#include <trace/events/ext4.h>
-
-int __ext4_journal_get_write_access(const char *where, unsigned int line,
- handle_t *handle, struct buffer_head *bh)
-{
- int err = 0;
-
- if (ext4_handle_valid(handle)) {
- err = jbd2_journal_get_write_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, line, __func__, bh,
- handle, err);
- }
- return err;
-}
-
-/*
- * The ext4 forget function must perform a revoke if we are freeing data
- * which has been journaled. Metadata (eg. indirect blocks) must be
- * revoked in all cases.
- *
- * "bh" may be NULL: a metadata block may have been freed from memory
- * but there may still be a record of it in the journal, and that record
- * still needs to be revoked.
- *
- * If the handle isn't valid we're not journaling, but we still need to
- * call into ext4_journal_revoke() to put the buffer head.
- */
-int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
- int is_metadata, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t blocknr)
-{
- int err;
-
- might_sleep();
-
- trace_ext4_forget(inode, is_metadata, blocknr);
- BUFFER_TRACE(bh, "enter");
-
- jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
- "data mode %x\n",
- bh, is_metadata, inode->i_mode,
- test_opt(inode->i_sb, DATA_FLAGS));
-
- /* In the no journal case, we can just do a bforget and return */
- if (!ext4_handle_valid(handle)) {
- bforget(bh);
- return 0;
- }
-
- /* Never use the revoke function if we are doing full data
- * journaling: there is no need to, and a V1 superblock won't
- * support it. Otherwise, only skip the revoke on un-journaled
- * data blocks. */
-
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
- (!is_metadata && !ext4_should_journal_data(inode))) {
- if (bh) {
- BUFFER_TRACE(bh, "call jbd2_journal_forget");
- err = jbd2_journal_forget(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, line, __func__,
- bh, handle, err);
- return err;
- }
- return 0;
- }
-
- /*
- * data!=journal && (is_metadata || should_journal_data(inode))
- */
- BUFFER_TRACE(bh, "call jbd2_journal_revoke");
- err = jbd2_journal_revoke(handle, blocknr, bh);
- if (err) {
- ext4_journal_abort_handle(where, line, __func__,
- bh, handle, err);
- __ext4_abort(inode->i_sb, where, line,
- "error %d when attempting revoke", err);
- }
- BUFFER_TRACE(bh, "exit");
- return err;
-}
-
-int __ext4_journal_get_create_access(const char *where, unsigned int line,
- handle_t *handle, struct buffer_head *bh)
-{
- int err = 0;
-
- if (ext4_handle_valid(handle)) {
- err = jbd2_journal_get_create_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, line, __func__,
- bh, handle, err);
- }
- return err;
-}
-
-int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
- handle_t *handle, struct inode *inode,
- struct buffer_head *bh)
-{
- int err = 0;
-
- if (ext4_handle_valid(handle)) {
- err = jbd2_journal_dirty_metadata(handle, bh);
- if (err) {
- /* Errors can only happen if there is a bug */
- handle->h_err = err;
- __ext4_journal_stop(where, line, handle);
- }
- } else {
- if (inode)
- mark_buffer_dirty_inode(bh, inode);
- else
- mark_buffer_dirty(bh);
- if (inode && inode_needs_sync(inode)) {
- sync_dirty_buffer(bh);
- if (buffer_req(bh) && !buffer_uptodate(bh)) {
- struct ext4_super_block *es;
-
- es = EXT4_SB(inode->i_sb)->s_es;
- es->s_last_error_block =
- cpu_to_le64(bh->b_blocknr);
- ext4_error_inode(inode, where, line,
- bh->b_blocknr,
- "IO error syncing itable block");
- err = -EIO;
- }
- }
- }
- return err;
-}
-
-int __ext4_handle_dirty_super(const char *where, unsigned int line,
- handle_t *handle, struct super_block *sb)
-{
- struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
- int err = 0;
-
- if (ext4_handle_valid(handle)) {
- err = jbd2_journal_dirty_metadata(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, line, __func__,
- bh, handle, err);
- } else
- sb->s_dirt = 1;
- return err;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/ext4_jbd2.h b/ANDROID_3.4.5/fs/ext4/ext4_jbd2.h
deleted file mode 100644
index 83b20fcf..00000000
--- a/ANDROID_3.4.5/fs/ext4/ext4_jbd2.h
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- * ext4_jbd2.h
- *
- * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
- *
- * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * Ext4-specific journaling extensions.
- */
-
-#ifndef _EXT4_JBD2_H
-#define _EXT4_JBD2_H
-
-#include <linux/fs.h>
-#include <linux/jbd2.h>
-#include "ext4.h"
-
-#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
-
-/* Define the number of blocks we need to account to a transaction to
- * modify one block of data.
- *
- * We may have to touch one inode, one bitmap buffer, up to three
- * indirection blocks, the group and superblock summaries, and the data
- * block to complete the transaction.
- *
- * For extents-enabled fs we may have to allocate and modify up to
- * 5 levels of tree + root which are stored in the inode. */
-
-#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
- (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
- ? 27U : 8U)
-
-/* Extended attribute operations touch at most two data buffers,
- * two bitmap buffers, and two group summaries, in addition to the inode
- * and the superblock, which are already accounted for. */
-
-#define EXT4_XATTR_TRANS_BLOCKS 6U
-
-/* Define the minimum size for a transaction which modifies data. This
- * needs to take into account the fact that we may end up modifying two
- * quota files too (one for the group, one for the user quota). The
- * superblock only gets updated once, of course, so don't bother
- * counting that again for the quota updates. */
-
-#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
- EXT4_XATTR_TRANS_BLOCKS - 2 + \
- EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
-
-/*
- * Define the number of metadata blocks we need to account to modify data.
- *
- * This include super block, inode block, quota blocks and xattr blocks
- */
-#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
- EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
-
-/* Delete operations potentially hit one directory's namespace plus an
- * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
- * generous. We can grow the delete transaction later if necessary. */
-
-#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
-
-/* Define an arbitrary limit for the amount of data we will anticipate
- * writing to any given transaction. For unbounded transactions such as
- * write(2) and truncate(2) we can write more than this, but we always
- * start off at the maximum transaction size and grow the transaction
- * optimistically as we go. */
-
-#define EXT4_MAX_TRANS_DATA 64U
-
-/* We break up a large truncate or write transaction once the handle's
- * buffer credits gets this low, we need either to extend the
- * transaction or to start a new one. Reserve enough space here for
- * inode, bitmap, superblock, group and indirection updates for at least
- * one block, plus two quota updates. Quota allocations are not
- * needed. */
-
-#define EXT4_RESERVE_TRANS_BLOCKS 12U
-
-#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
-
-#ifdef CONFIG_QUOTA
-/* Amount of blocks needed for quota update - we know that the structure was
- * allocated so we need to update only data block */
-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
-/* Amount of blocks needed for quota insert/delete - we do some block writes
- * but inode, sb and group updates are done only once */
-#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
- (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
-
-#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
- (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
-#else
-#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
-#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
-#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
-#endif
-#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
-#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
-#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
-
-/**
- * struct ext4_journal_cb_entry - Base structure for callback information.
- *
- * This struct is a 'seed' structure for a using with your own callback
- * structs. If you are using callbacks you must allocate one of these
- * or another struct of your own definition which has this struct
- * as it's first element and pass it to ext4_journal_callback_add().
- */
-struct ext4_journal_cb_entry {
- /* list information for other callbacks attached to the same handle */
- struct list_head jce_list;
-
- /* Function to call with this callback structure */
- void (*jce_func)(struct super_block *sb,
- struct ext4_journal_cb_entry *jce, int error);
-
- /* user data goes here */
-};
-
-/**
- * ext4_journal_callback_add: add a function to call after transaction commit
- * @handle: active journal transaction handle to register callback on
- * @func: callback function to call after the transaction has committed:
- * @sb: superblock of current filesystem for transaction
- * @jce: returned journal callback data
- * @rc: journal state at commit (0 = transaction committed properly)
- * @jce: journal callback data (internal and function private data struct)
- *
- * The registered function will be called in the context of the journal thread
- * after the transaction for which the handle was created has completed.
- *
- * No locks are held when the callback function is called, so it is safe to
- * call blocking functions from within the callback, but the callback should
- * not block or run for too long, or the filesystem will be blocked waiting for
- * the next transaction to commit. No journaling functions can be used, or
- * there is a risk of deadlock.
- *
- * There is no guaranteed calling order of multiple registered callbacks on
- * the same transaction.
- */
-static inline void ext4_journal_callback_add(handle_t *handle,
- void (*func)(struct super_block *sb,
- struct ext4_journal_cb_entry *jce,
- int rc),
- struct ext4_journal_cb_entry *jce)
-{
- struct ext4_sb_info *sbi =
- EXT4_SB(handle->h_transaction->t_journal->j_private);
-
- /* Add the jce to transaction's private list */
- jce->jce_func = func;
- spin_lock(&sbi->s_md_lock);
- list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
- spin_unlock(&sbi->s_md_lock);
-}
-
-/**
- * ext4_journal_callback_del: delete a registered callback
- * @handle: active journal transaction handle on which callback was registered
- * @jce: registered journal callback entry to unregister
- */
-static inline void ext4_journal_callback_del(handle_t *handle,
- struct ext4_journal_cb_entry *jce)
-{
- struct ext4_sb_info *sbi =
- EXT4_SB(handle->h_transaction->t_journal->j_private);
-
- spin_lock(&sbi->s_md_lock);
- list_del_init(&jce->jce_list);
- spin_unlock(&sbi->s_md_lock);
-}
-
-int
-ext4_mark_iloc_dirty(handle_t *handle,
- struct inode *inode,
- struct ext4_iloc *iloc);
-
-/*
- * On success, We end up with an outstanding reference count against
- * iloc->bh. This _must_ be cleaned up later.
- */
-
-int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
- struct ext4_iloc *iloc);
-
-int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
-
-/*
- * Wrapper functions with which ext4 calls into JBD.
- */
-void ext4_journal_abort_handle(const char *caller, unsigned int line,
- const char *err_fn,
- struct buffer_head *bh, handle_t *handle, int err);
-
-int __ext4_journal_get_write_access(const char *where, unsigned int line,
- handle_t *handle, struct buffer_head *bh);
-
-int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
- int is_metadata, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t blocknr);
-
-int __ext4_journal_get_create_access(const char *where, unsigned int line,
- handle_t *handle, struct buffer_head *bh);
-
-int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
- handle_t *handle, struct inode *inode,
- struct buffer_head *bh);
-
-int __ext4_handle_dirty_super(const char *where, unsigned int line,
- handle_t *handle, struct super_block *sb);
-
-#define ext4_journal_get_write_access(handle, bh) \
- __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
-#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
- __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
- (bh), (block_nr))
-#define ext4_journal_get_create_access(handle, bh) \
- __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
-#define ext4_handle_dirty_metadata(handle, inode, bh) \
- __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
- (bh))
-#define ext4_handle_dirty_super(handle, sb) \
- __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
-
-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
-int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
-
-#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
-
-/* Note: Do not use this for NULL handles. This is only to determine if
- * a properly allocated handle is using a journal or not. */
-static inline int ext4_handle_valid(handle_t *handle)
-{
- if ((unsigned long)handle < EXT4_NOJOURNAL_MAX_REF_COUNT)
- return 0;
- return 1;
-}
-
-static inline void ext4_handle_sync(handle_t *handle)
-{
- if (ext4_handle_valid(handle))
- handle->h_sync = 1;
-}
-
-static inline void ext4_handle_release_buffer(handle_t *handle,
- struct buffer_head *bh)
-{
- if (ext4_handle_valid(handle))
- jbd2_journal_release_buffer(handle, bh);
-}
-
-static inline int ext4_handle_is_aborted(handle_t *handle)
-{
- if (ext4_handle_valid(handle))
- return is_handle_aborted(handle);
- return 0;
-}
-
-static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
-{
- if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
- return 0;
- return 1;
-}
-
-static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
-{
- return ext4_journal_start_sb(inode->i_sb, nblocks);
-}
-
-#define ext4_journal_stop(handle) \
- __ext4_journal_stop(__func__, __LINE__, (handle))
-
-static inline handle_t *ext4_journal_current_handle(void)
-{
- return journal_current_handle();
-}
-
-static inline int ext4_journal_extend(handle_t *handle, int nblocks)
-{
- if (ext4_handle_valid(handle))
- return jbd2_journal_extend(handle, nblocks);
- return 0;
-}
-
-static inline int ext4_journal_restart(handle_t *handle, int nblocks)
-{
- if (ext4_handle_valid(handle))
- return jbd2_journal_restart(handle, nblocks);
- return 0;
-}
-
-static inline int ext4_journal_blocks_per_page(struct inode *inode)
-{
- if (EXT4_JOURNAL(inode) != NULL)
- return jbd2_journal_blocks_per_page(inode);
- return 0;
-}
-
-static inline int ext4_journal_force_commit(journal_t *journal)
-{
- if (journal)
- return jbd2_journal_force_commit(journal);
- return 0;
-}
-
-static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
-{
- if (ext4_handle_valid(handle))
- return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
- return 0;
-}
-
-static inline void ext4_update_inode_fsync_trans(handle_t *handle,
- struct inode *inode,
- int datasync)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
-
- if (ext4_handle_valid(handle)) {
- ei->i_sync_tid = handle->h_transaction->t_tid;
- if (datasync)
- ei->i_datasync_tid = handle->h_transaction->t_tid;
- }
-}
-
-/* super.c */
-int ext4_force_commit(struct super_block *sb);
-
-/*
- * Ext4 inode journal modes
- */
-#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */
-#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
-#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
-
-static inline int ext4_inode_journal_mode(struct inode *inode)
-{
- if (EXT4_JOURNAL(inode) == NULL)
- return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
- /* We do not support data journalling with delayed allocation */
- if (!S_ISREG(inode->i_mode) ||
- test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
- !test_opt(inode->i_sb, DELALLOC))
- return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
- return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
- return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
- else
- BUG();
-}
-
-static inline int ext4_should_journal_data(struct inode *inode)
-{
- return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
-}
-
-static inline int ext4_should_order_data(struct inode *inode)
-{
- return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
-}
-
-static inline int ext4_should_writeback_data(struct inode *inode)
-{
- return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
-}
-
-/*
- * This function controls whether or not we should try to go down the
- * dioread_nolock code paths, which makes it safe to avoid taking
- * i_mutex for direct I/O reads. This only works for extent-based
- * files, and it doesn't work if data journaling is enabled, since the
- * dioread_nolock code uses b_private to pass information back to the
- * I/O completion handler, and this conflicts with the jbd's use of
- * b_private.
- */
-static inline int ext4_should_dioread_nolock(struct inode *inode)
-{
- if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
- return 0;
- if (!S_ISREG(inode->i_mode))
- return 0;
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- return 0;
- if (ext4_should_journal_data(inode))
- return 0;
- return 1;
-}
-
-#endif /* _EXT4_JBD2_H */
diff --git a/ANDROID_3.4.5/fs/ext4/extents.c b/ANDROID_3.4.5/fs/ext4/extents.c
deleted file mode 100644
index abcdeab6..00000000
--- a/ANDROID_3.4.5/fs/ext4/extents.c
+++ /dev/null
@@ -1,4866 +0,0 @@
-/*
- * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
- * Written by Alex Tomas <alex@clusterfs.com>
- *
- * Architecture independence:
- * Copyright (c) 2005, Bull S.A.
- * Written by Pierre Peiffer <pierre.peiffer@bull.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
- */
-
-/*
- * Extents support for EXT4
- *
- * TODO:
- * - ext4*_error() should be used in some situations
- * - analyze all BUG()/BUG_ON(), use -EIO where appropriate
- * - smart tree reduction
- */
-
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/jbd2.h>
-#include <linux/highuid.h>
-#include <linux/pagemap.h>
-#include <linux/quotaops.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/falloc.h>
-#include <asm/uaccess.h>
-#include <linux/fiemap.h>
-#include "ext4_jbd2.h"
-
-#include <trace/events/ext4.h>
-
-/*
- * used by extent splitting.
- */
-#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
- due to ENOSPC */
-#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
-#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
-
-static int ext4_split_extent(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_map_blocks *map,
- int split_flag,
- int flags);
-
-static int ext4_split_extent_at(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t split,
- int split_flag,
- int flags);
-
-static int ext4_ext_truncate_extend_restart(handle_t *handle,
- struct inode *inode,
- int needed)
-{
- int err;
-
- if (!ext4_handle_valid(handle))
- return 0;
- if (handle->h_buffer_credits > needed)
- return 0;
- err = ext4_journal_extend(handle, needed);
- if (err <= 0)
- return err;
- err = ext4_truncate_restart_trans(handle, inode, needed);
- if (err == 0)
- err = -EAGAIN;
-
- return err;
-}
-
-/*
- * could return:
- * - EROFS
- * - ENOMEM
- */
-static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
-{
- if (path->p_bh) {
- /* path points to block */
- return ext4_journal_get_write_access(handle, path->p_bh);
- }
- /* path points to leaf/index in inode body */
- /* we use in-core data, no need to protect them */
- return 0;
-}
-
-/*
- * could return:
- * - EROFS
- * - ENOMEM
- * - EIO
- */
-#define ext4_ext_dirty(handle, inode, path) \
- __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
-static int __ext4_ext_dirty(const char *where, unsigned int line,
- handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
-{
- int err;
- if (path->p_bh) {
- /* path points to block */
- err = __ext4_handle_dirty_metadata(where, line, handle,
- inode, path->p_bh);
- } else {
- /* path points to leaf/index in inode body */
- err = ext4_mark_inode_dirty(handle, inode);
- }
- return err;
-}
-
-static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t block)
-{
- if (path) {
- int depth = path->p_depth;
- struct ext4_extent *ex;
-
- /*
- * Try to predict block placement assuming that we are
- * filling in a file which will eventually be
- * non-sparse --- i.e., in the case of libbfd writing
- * an ELF object sections out-of-order but in a way
- * the eventually results in a contiguous object or
- * executable file, or some database extending a table
- * space file. However, this is actually somewhat
- * non-ideal if we are writing a sparse file such as
- * qemu or KVM writing a raw image file that is going
- * to stay fairly sparse, since it will end up
- * fragmenting the file system's free space. Maybe we
- * should have some hueristics or some way to allow
- * userspace to pass a hint to file system,
- * especially if the latter case turns out to be
- * common.
- */
- ex = path[depth].p_ext;
- if (ex) {
- ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
- ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
-
- if (block > ext_block)
- return ext_pblk + (block - ext_block);
- else
- return ext_pblk - (ext_block - block);
- }
-
- /* it looks like index is empty;
- * try to find starting block from index itself */
- if (path[depth].p_bh)
- return path[depth].p_bh->b_blocknr;
- }
-
- /* OK. use inode's group */
- return ext4_inode_to_goal_block(inode);
-}
-
-/*
- * Allocation for a meta data block
- */
-static ext4_fsblk_t
-ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *ex, int *err, unsigned int flags)
-{
- ext4_fsblk_t goal, newblock;
-
- goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
- newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
- NULL, err);
- return newblock;
-}
-
-static inline int ext4_ext_space_block(struct inode *inode, int check)
-{
- int size;
-
- size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
- / sizeof(struct ext4_extent);
-#ifdef AGGRESSIVE_TEST
- if (!check && size > 6)
- size = 6;
-#endif
- return size;
-}
-
-static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
-{
- int size;
-
- size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
- / sizeof(struct ext4_extent_idx);
-#ifdef AGGRESSIVE_TEST
- if (!check && size > 5)
- size = 5;
-#endif
- return size;
-}
-
-static inline int ext4_ext_space_root(struct inode *inode, int check)
-{
- int size;
-
- size = sizeof(EXT4_I(inode)->i_data);
- size -= sizeof(struct ext4_extent_header);
- size /= sizeof(struct ext4_extent);
-#ifdef AGGRESSIVE_TEST
- if (!check && size > 3)
- size = 3;
-#endif
- return size;
-}
-
-static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
-{
- int size;
-
- size = sizeof(EXT4_I(inode)->i_data);
- size -= sizeof(struct ext4_extent_header);
- size /= sizeof(struct ext4_extent_idx);
-#ifdef AGGRESSIVE_TEST
- if (!check && size > 4)
- size = 4;
-#endif
- return size;
-}
-
-/*
- * Calculate the number of metadata blocks needed
- * to allocate @blocks
- * Worse case is one block per extent
- */
-int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- int idxs;
-
- idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
- / sizeof(struct ext4_extent_idx));
-
- /*
- * If the new delayed allocation block is contiguous with the
- * previous da block, it can share index blocks with the
- * previous block, so we only need to allocate a new index
- * block every idxs leaf blocks. At ldxs**2 blocks, we need
- * an additional index block, and at ldxs**3 blocks, yet
- * another index blocks.
- */
- if (ei->i_da_metadata_calc_len &&
- ei->i_da_metadata_calc_last_lblock+1 == lblock) {
- int num = 0;
-
- if ((ei->i_da_metadata_calc_len % idxs) == 0)
- num++;
- if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
- num++;
- if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
- num++;
- ei->i_da_metadata_calc_len = 0;
- } else
- ei->i_da_metadata_calc_len++;
- ei->i_da_metadata_calc_last_lblock++;
- return num;
- }
-
- /*
- * In the worst case we need a new set of index blocks at
- * every level of the inode's extent tree.
- */
- ei->i_da_metadata_calc_len = 1;
- ei->i_da_metadata_calc_last_lblock = lblock;
- return ext_depth(inode) + 1;
-}
-
-static int
-ext4_ext_max_entries(struct inode *inode, int depth)
-{
- int max;
-
- if (depth == ext_depth(inode)) {
- if (depth == 0)
- max = ext4_ext_space_root(inode, 1);
- else
- max = ext4_ext_space_root_idx(inode, 1);
- } else {
- if (depth == 0)
- max = ext4_ext_space_block(inode, 1);
- else
- max = ext4_ext_space_block_idx(inode, 1);
- }
-
- return max;
-}
-
-static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
-{
- ext4_fsblk_t block = ext4_ext_pblock(ext);
- int len = ext4_ext_get_actual_len(ext);
-
- if (len == 0)
- return 0;
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
-}
-
-static int ext4_valid_extent_idx(struct inode *inode,
- struct ext4_extent_idx *ext_idx)
-{
- ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
-
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
-}
-
-static int ext4_valid_extent_entries(struct inode *inode,
- struct ext4_extent_header *eh,
- int depth)
-{
- unsigned short entries;
- if (eh->eh_entries == 0)
- return 1;
-
- entries = le16_to_cpu(eh->eh_entries);
-
- if (depth == 0) {
- /* leaf entries */
- struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
- while (entries) {
- if (!ext4_valid_extent(inode, ext))
- return 0;
- ext++;
- entries--;
- }
- } else {
- struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
- while (entries) {
- if (!ext4_valid_extent_idx(inode, ext_idx))
- return 0;
- ext_idx++;
- entries--;
- }
- }
- return 1;
-}
-
-static int __ext4_ext_check(const char *function, unsigned int line,
- struct inode *inode, struct ext4_extent_header *eh,
- int depth)
-{
- const char *error_msg;
- int max = 0;
-
- if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
- error_msg = "invalid magic";
- goto corrupted;
- }
- if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
- error_msg = "unexpected eh_depth";
- goto corrupted;
- }
- if (unlikely(eh->eh_max == 0)) {
- error_msg = "invalid eh_max";
- goto corrupted;
- }
- max = ext4_ext_max_entries(inode, depth);
- if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
- error_msg = "too large eh_max";
- goto corrupted;
- }
- if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
- error_msg = "invalid eh_entries";
- goto corrupted;
- }
- if (!ext4_valid_extent_entries(inode, eh, depth)) {
- error_msg = "invalid extent entries";
- goto corrupted;
- }
- return 0;
-
-corrupted:
- ext4_error_inode(inode, function, line, 0,
- "bad header/extent: %s - magic %x, "
- "entries %u, max %u(%u), depth %u(%u)",
- error_msg, le16_to_cpu(eh->eh_magic),
- le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
- max, le16_to_cpu(eh->eh_depth), depth);
-
- return -EIO;
-}
-
-#define ext4_ext_check(inode, eh, depth) \
- __ext4_ext_check(__func__, __LINE__, inode, eh, depth)
-
-int ext4_ext_check_inode(struct inode *inode)
-{
- return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
-}
-
-#ifdef EXT_DEBUG
-static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
-{
- int k, l = path->p_depth;
-
- ext_debug("path:");
- for (k = 0; k <= l; k++, path++) {
- if (path->p_idx) {
- ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
- ext4_idx_pblock(path->p_idx));
- } else if (path->p_ext) {
- ext_debug(" %d:[%d]%d:%llu ",
- le32_to_cpu(path->p_ext->ee_block),
- ext4_ext_is_uninitialized(path->p_ext),
- ext4_ext_get_actual_len(path->p_ext),
- ext4_ext_pblock(path->p_ext));
- } else
- ext_debug(" []");
- }
- ext_debug("\n");
-}
-
-static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
-{
- int depth = ext_depth(inode);
- struct ext4_extent_header *eh;
- struct ext4_extent *ex;
- int i;
-
- if (!path)
- return;
-
- eh = path[depth].p_hdr;
- ex = EXT_FIRST_EXTENT(eh);
-
- ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
-
- for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
- ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
- ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
- }
- ext_debug("\n");
-}
-
-static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
- ext4_fsblk_t newblock, int level)
-{
- int depth = ext_depth(inode);
- struct ext4_extent *ex;
-
- if (depth != level) {
- struct ext4_extent_idx *idx;
- idx = path[level].p_idx;
- while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
- ext_debug("%d: move %d:%llu in new index %llu\n", level,
- le32_to_cpu(idx->ei_block),
- ext4_idx_pblock(idx),
- newblock);
- idx++;
- }
-
- return;
- }
-
- ex = path[depth].p_ext;
- while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
- ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
- le32_to_cpu(ex->ee_block),
- ext4_ext_pblock(ex),
- ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex),
- newblock);
- ex++;
- }
-}
-
-#else
-#define ext4_ext_show_path(inode, path)
-#define ext4_ext_show_leaf(inode, path)
-#define ext4_ext_show_move(inode, path, newblock, level)
-#endif
-
-void ext4_ext_drop_refs(struct ext4_ext_path *path)
-{
- int depth = path->p_depth;
- int i;
-
- for (i = 0; i <= depth; i++, path++)
- if (path->p_bh) {
- brelse(path->p_bh);
- path->p_bh = NULL;
- }
-}
-
-/*
- * ext4_ext_binsearch_idx:
- * binary search for the closest index of the given block
- * the header must be checked before calling this
- */
-static void
-ext4_ext_binsearch_idx(struct inode *inode,
- struct ext4_ext_path *path, ext4_lblk_t block)
-{
- struct ext4_extent_header *eh = path->p_hdr;
- struct ext4_extent_idx *r, *l, *m;
-
-
- ext_debug("binsearch for %u(idx): ", block);
-
- l = EXT_FIRST_INDEX(eh) + 1;
- r = EXT_LAST_INDEX(eh);
- while (l <= r) {
- m = l + (r - l) / 2;
- if (block < le32_to_cpu(m->ei_block))
- r = m - 1;
- else
- l = m + 1;
- ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
- m, le32_to_cpu(m->ei_block),
- r, le32_to_cpu(r->ei_block));
- }
-
- path->p_idx = l - 1;
- ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
- ext4_idx_pblock(path->p_idx));
-
-#ifdef CHECK_BINSEARCH
- {
- struct ext4_extent_idx *chix, *ix;
- int k;
-
- chix = ix = EXT_FIRST_INDEX(eh);
- for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
- if (k != 0 &&
- le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
- printk(KERN_DEBUG "k=%d, ix=0x%p, "
- "first=0x%p\n", k,
- ix, EXT_FIRST_INDEX(eh));
- printk(KERN_DEBUG "%u <= %u\n",
- le32_to_cpu(ix->ei_block),
- le32_to_cpu(ix[-1].ei_block));
- }
- BUG_ON(k && le32_to_cpu(ix->ei_block)
- <= le32_to_cpu(ix[-1].ei_block));
- if (block < le32_to_cpu(ix->ei_block))
- break;
- chix = ix;
- }
- BUG_ON(chix != path->p_idx);
- }
-#endif
-
-}
-
-/*
- * ext4_ext_binsearch:
- * binary search for closest extent of the given block
- * the header must be checked before calling this
- */
-static void
-ext4_ext_binsearch(struct inode *inode,
- struct ext4_ext_path *path, ext4_lblk_t block)
-{
- struct ext4_extent_header *eh = path->p_hdr;
- struct ext4_extent *r, *l, *m;
-
- if (eh->eh_entries == 0) {
- /*
- * this leaf is empty:
- * we get such a leaf in split/add case
- */
- return;
- }
-
- ext_debug("binsearch for %u: ", block);
-
- l = EXT_FIRST_EXTENT(eh) + 1;
- r = EXT_LAST_EXTENT(eh);
-
- while (l <= r) {
- m = l + (r - l) / 2;
- if (block < le32_to_cpu(m->ee_block))
- r = m - 1;
- else
- l = m + 1;
- ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
- m, le32_to_cpu(m->ee_block),
- r, le32_to_cpu(r->ee_block));
- }
-
- path->p_ext = l - 1;
- ext_debug(" -> %d:%llu:[%d]%d ",
- le32_to_cpu(path->p_ext->ee_block),
- ext4_ext_pblock(path->p_ext),
- ext4_ext_is_uninitialized(path->p_ext),
- ext4_ext_get_actual_len(path->p_ext));
-
-#ifdef CHECK_BINSEARCH
- {
- struct ext4_extent *chex, *ex;
- int k;
-
- chex = ex = EXT_FIRST_EXTENT(eh);
- for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
- BUG_ON(k && le32_to_cpu(ex->ee_block)
- <= le32_to_cpu(ex[-1].ee_block));
- if (block < le32_to_cpu(ex->ee_block))
- break;
- chex = ex;
- }
- BUG_ON(chex != path->p_ext);
- }
-#endif
-
-}
-
-int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
-{
- struct ext4_extent_header *eh;
-
- eh = ext_inode_hdr(inode);
- eh->eh_depth = 0;
- eh->eh_entries = 0;
- eh->eh_magic = EXT4_EXT_MAGIC;
- eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
- ext4_mark_inode_dirty(handle, inode);
- ext4_ext_invalidate_cache(inode);
- return 0;
-}
-
-struct ext4_ext_path *
-ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
- struct ext4_ext_path *path)
-{
- struct ext4_extent_header *eh;
- struct buffer_head *bh;
- short int depth, i, ppos = 0, alloc = 0;
-
- eh = ext_inode_hdr(inode);
- depth = ext_depth(inode);
-
- /* account possible depth increase */
- if (!path) {
- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
- GFP_NOFS);
- if (!path)
- return ERR_PTR(-ENOMEM);
- alloc = 1;
- }
- path[0].p_hdr = eh;
- path[0].p_bh = NULL;
-
- i = depth;
- /* walk through the tree */
- while (i) {
- int need_to_validate = 0;
-
- ext_debug("depth %d: num %d, max %d\n",
- ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
-
- ext4_ext_binsearch_idx(inode, path + ppos, block);
- path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
- path[ppos].p_depth = i;
- path[ppos].p_ext = NULL;
-
- bh = sb_getblk(inode->i_sb, path[ppos].p_block);
- if (unlikely(!bh))
- goto err;
- if (!bh_uptodate_or_lock(bh)) {
- trace_ext4_ext_load_extent(inode, block,
- path[ppos].p_block);
- if (bh_submit_read(bh) < 0) {
- put_bh(bh);
- goto err;
- }
- /* validate the extent entries */
- need_to_validate = 1;
- }
- eh = ext_block_hdr(bh);
- ppos++;
- if (unlikely(ppos > depth)) {
- put_bh(bh);
- EXT4_ERROR_INODE(inode,
- "ppos %d > depth %d", ppos, depth);
- goto err;
- }
- path[ppos].p_bh = bh;
- path[ppos].p_hdr = eh;
- i--;
-
- if (need_to_validate && ext4_ext_check(inode, eh, i))
- goto err;
- }
-
- path[ppos].p_depth = i;
- path[ppos].p_ext = NULL;
- path[ppos].p_idx = NULL;
-
- /* find extent */
- ext4_ext_binsearch(inode, path + ppos, block);
- /* if not an empty leaf */
- if (path[ppos].p_ext)
- path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
-
- ext4_ext_show_path(inode, path);
-
- return path;
-
-err:
- ext4_ext_drop_refs(path);
- if (alloc)
- kfree(path);
- return ERR_PTR(-EIO);
-}
-
-/*
- * ext4_ext_insert_index:
- * insert new index [@logical;@ptr] into the block at @curp;
- * check where to insert: before @curp or after @curp
- */
-static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *curp,
- int logical, ext4_fsblk_t ptr)
-{
- struct ext4_extent_idx *ix;
- int len, err;
-
- err = ext4_ext_get_access(handle, inode, curp);
- if (err)
- return err;
-
- if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
- EXT4_ERROR_INODE(inode,
- "logical %d == ei_block %d!",
- logical, le32_to_cpu(curp->p_idx->ei_block));
- return -EIO;
- }
-
- if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
- >= le16_to_cpu(curp->p_hdr->eh_max))) {
- EXT4_ERROR_INODE(inode,
- "eh_entries %d >= eh_max %d!",
- le16_to_cpu(curp->p_hdr->eh_entries),
- le16_to_cpu(curp->p_hdr->eh_max));
- return -EIO;
- }
-
- if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
- /* insert after */
- ext_debug("insert new index %d after: %llu\n", logical, ptr);
- ix = curp->p_idx + 1;
- } else {
- /* insert before */
- ext_debug("insert new index %d before: %llu\n", logical, ptr);
- ix = curp->p_idx;
- }
-
- len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
- BUG_ON(len < 0);
- if (len > 0) {
- ext_debug("insert new index %d: "
- "move %d indices from 0x%p to 0x%p\n",
- logical, len, ix, ix + 1);
- memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
- }
-
- if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
- EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
- return -EIO;
- }
-
- ix->ei_block = cpu_to_le32(logical);
- ext4_idx_store_pblock(ix, ptr);
- le16_add_cpu(&curp->p_hdr->eh_entries, 1);
-
- if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
- EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
- return -EIO;
- }
-
- err = ext4_ext_dirty(handle, inode, curp);
- ext4_std_error(inode->i_sb, err);
-
- return err;
-}
-
-/*
- * ext4_ext_split:
- * inserts new subtree into the path, using free index entry
- * at depth @at:
- * - allocates all needed blocks (new leaf and all intermediate index blocks)
- * - makes decision where to split
- * - moves remaining extents and index entries (right to the split point)
- * into the newly allocated blocks
- * - initializes subtree
- */
-static int ext4_ext_split(handle_t *handle, struct inode *inode,
- unsigned int flags,
- struct ext4_ext_path *path,
- struct ext4_extent *newext, int at)
-{
- struct buffer_head *bh = NULL;
- int depth = ext_depth(inode);
- struct ext4_extent_header *neh;
- struct ext4_extent_idx *fidx;
- int i = at, k, m, a;
- ext4_fsblk_t newblock, oldblock;
- __le32 border;
- ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
- int err = 0;
-
- /* make decision: where to split? */
- /* FIXME: now decision is simplest: at current extent */
-
- /* if current leaf will be split, then we should use
- * border from split point */
- if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
- EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
- return -EIO;
- }
- if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
- border = path[depth].p_ext[1].ee_block;
- ext_debug("leaf will be split."
- " next leaf starts at %d\n",
- le32_to_cpu(border));
- } else {
- border = newext->ee_block;
- ext_debug("leaf will be added."
- " next leaf starts at %d\n",
- le32_to_cpu(border));
- }
-
- /*
- * If error occurs, then we break processing
- * and mark filesystem read-only. index won't
- * be inserted and tree will be in consistent
- * state. Next mount will repair buffers too.
- */
-
- /*
- * Get array to track all allocated blocks.
- * We need this to handle errors and free blocks
- * upon them.
- */
- ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
- if (!ablocks)
- return -ENOMEM;
-
- /* allocate all needed blocks */
- ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
- for (a = 0; a < depth - at; a++) {
- newblock = ext4_ext_new_meta_block(handle, inode, path,
- newext, &err, flags);
- if (newblock == 0)
- goto cleanup;
- ablocks[a] = newblock;
- }
-
- /* initialize new leaf */
- newblock = ablocks[--a];
- if (unlikely(newblock == 0)) {
- EXT4_ERROR_INODE(inode, "newblock == 0!");
- err = -EIO;
- goto cleanup;
- }
- bh = sb_getblk(inode->i_sb, newblock);
- if (!bh) {
- err = -EIO;
- goto cleanup;
- }
- lock_buffer(bh);
-
- err = ext4_journal_get_create_access(handle, bh);
- if (err)
- goto cleanup;
-
- neh = ext_block_hdr(bh);
- neh->eh_entries = 0;
- neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
- neh->eh_magic = EXT4_EXT_MAGIC;
- neh->eh_depth = 0;
-
- /* move remainder of path[depth] to the new leaf */
- if (unlikely(path[depth].p_hdr->eh_entries !=
- path[depth].p_hdr->eh_max)) {
- EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
- path[depth].p_hdr->eh_entries,
- path[depth].p_hdr->eh_max);
- err = -EIO;
- goto cleanup;
- }
- /* start copy from next extent */
- m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
- ext4_ext_show_move(inode, path, newblock, depth);
- if (m) {
- struct ext4_extent *ex;
- ex = EXT_FIRST_EXTENT(neh);
- memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
- le16_add_cpu(&neh->eh_entries, m);
- }
-
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
-
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (err)
- goto cleanup;
- brelse(bh);
- bh = NULL;
-
- /* correct old leaf */
- if (m) {
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto cleanup;
- le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
- err = ext4_ext_dirty(handle, inode, path + depth);
- if (err)
- goto cleanup;
-
- }
-
- /* create intermediate indexes */
- k = depth - at - 1;
- if (unlikely(k < 0)) {
- EXT4_ERROR_INODE(inode, "k %d < 0!", k);
- err = -EIO;
- goto cleanup;
- }
- if (k)
- ext_debug("create %d intermediate indices\n", k);
- /* insert new index into current index block */
- /* current depth stored in i var */
- i = depth - 1;
- while (k--) {
- oldblock = newblock;
- newblock = ablocks[--a];
- bh = sb_getblk(inode->i_sb, newblock);
- if (!bh) {
- err = -EIO;
- goto cleanup;
- }
- lock_buffer(bh);
-
- err = ext4_journal_get_create_access(handle, bh);
- if (err)
- goto cleanup;
-
- neh = ext_block_hdr(bh);
- neh->eh_entries = cpu_to_le16(1);
- neh->eh_magic = EXT4_EXT_MAGIC;
- neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
- neh->eh_depth = cpu_to_le16(depth - i);
- fidx = EXT_FIRST_INDEX(neh);
- fidx->ei_block = border;
- ext4_idx_store_pblock(fidx, oldblock);
-
- ext_debug("int.index at %d (block %llu): %u -> %llu\n",
- i, newblock, le32_to_cpu(border), oldblock);
-
- /* move remainder of path[i] to the new index block */
- if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
- EXT_LAST_INDEX(path[i].p_hdr))) {
- EXT4_ERROR_INODE(inode,
- "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
- le32_to_cpu(path[i].p_ext->ee_block));
- err = -EIO;
- goto cleanup;
- }
- /* start copy indexes */
- m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
- ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
- EXT_MAX_INDEX(path[i].p_hdr));
- ext4_ext_show_move(inode, path, newblock, i);
- if (m) {
- memmove(++fidx, path[i].p_idx,
- sizeof(struct ext4_extent_idx) * m);
- le16_add_cpu(&neh->eh_entries, m);
- }
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
-
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (err)
- goto cleanup;
- brelse(bh);
- bh = NULL;
-
- /* correct old index */
- if (m) {
- err = ext4_ext_get_access(handle, inode, path + i);
- if (err)
- goto cleanup;
- le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
- err = ext4_ext_dirty(handle, inode, path + i);
- if (err)
- goto cleanup;
- }
-
- i--;
- }
-
- /* insert new index */
- err = ext4_ext_insert_index(handle, inode, path + at,
- le32_to_cpu(border), newblock);
-
-cleanup:
- if (bh) {
- if (buffer_locked(bh))
- unlock_buffer(bh);
- brelse(bh);
- }
-
- if (err) {
- /* free all allocated blocks in error case */
- for (i = 0; i < depth; i++) {
- if (!ablocks[i])
- continue;
- ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
- EXT4_FREE_BLOCKS_METADATA);
- }
- }
- kfree(ablocks);
-
- return err;
-}
-
-/*
- * ext4_ext_grow_indepth:
- * implements tree growing procedure:
- * - allocates new block
- * - moves top-level data (index block or leaf) into the new block
- * - initializes new top-level, creating index that points to the
- * just created block
- */
-static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
- unsigned int flags,
- struct ext4_extent *newext)
-{
- struct ext4_extent_header *neh;
- struct buffer_head *bh;
- ext4_fsblk_t newblock;
- int err = 0;
-
- newblock = ext4_ext_new_meta_block(handle, inode, NULL,
- newext, &err, flags);
- if (newblock == 0)
- return err;
-
- bh = sb_getblk(inode->i_sb, newblock);
- if (!bh) {
- err = -EIO;
- ext4_std_error(inode->i_sb, err);
- return err;
- }
- lock_buffer(bh);
-
- err = ext4_journal_get_create_access(handle, bh);
- if (err) {
- unlock_buffer(bh);
- goto out;
- }
-
- /* move top-level index/leaf into new block */
- memmove(bh->b_data, EXT4_I(inode)->i_data,
- sizeof(EXT4_I(inode)->i_data));
-
- /* set size of new block */
- neh = ext_block_hdr(bh);
- /* old root could have indexes or leaves
- * so calculate e_max right way */
- if (ext_depth(inode))
- neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
- else
- neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
- neh->eh_magic = EXT4_EXT_MAGIC;
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
-
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (err)
- goto out;
-
- /* Update top-level index: num,max,pointer */
- neh = ext_inode_hdr(inode);
- neh->eh_entries = cpu_to_le16(1);
- ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
- if (neh->eh_depth == 0) {
- /* Root extent block becomes index block */
- neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
- EXT_FIRST_INDEX(neh)->ei_block =
- EXT_FIRST_EXTENT(neh)->ee_block;
- }
- ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
- le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
- le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
- ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
-
- neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1);
- ext4_mark_inode_dirty(handle, inode);
-out:
- brelse(bh);
-
- return err;
-}
-
-/*
- * ext4_ext_create_new_leaf:
- * finds empty index and adds new leaf.
- * if no free index is found, then it requests in-depth growing.
- */
-static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
- unsigned int flags,
- struct ext4_ext_path *path,
- struct ext4_extent *newext)
-{
- struct ext4_ext_path *curp;
- int depth, i, err = 0;
-
-repeat:
- i = depth = ext_depth(inode);
-
- /* walk up to the tree and look for free index entry */
- curp = path + depth;
- while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
- i--;
- curp--;
- }
-
- /* we use already allocated block for index block,
- * so subsequent data blocks should be contiguous */
- if (EXT_HAS_FREE_INDEX(curp)) {
- /* if we found index with free entry, then use that
- * entry: create all needed subtree and add new leaf */
- err = ext4_ext_split(handle, inode, flags, path, newext, i);
- if (err)
- goto out;
-
- /* refill path */
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode,
- (ext4_lblk_t)le32_to_cpu(newext->ee_block),
- path);
- if (IS_ERR(path))
- err = PTR_ERR(path);
- } else {
- /* tree is full, time to grow in depth */
- err = ext4_ext_grow_indepth(handle, inode, flags, newext);
- if (err)
- goto out;
-
- /* refill path */
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode,
- (ext4_lblk_t)le32_to_cpu(newext->ee_block),
- path);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
-
- /*
- * only first (depth 0 -> 1) produces free space;
- * in all other cases we have to split the grown tree
- */
- depth = ext_depth(inode);
- if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
- /* now we need to split */
- goto repeat;
- }
- }
-
-out:
- return err;
-}
-
-/*
- * search the closest allocated block to the left for *logical
- * and returns it at @logical + it's physical address at @phys
- * if *logical is the smallest allocated block, the function
- * returns 0 at @phys
- * return value contains 0 (success) or error code
- */
-static int ext4_ext_search_left(struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t *logical, ext4_fsblk_t *phys)
-{
- struct ext4_extent_idx *ix;
- struct ext4_extent *ex;
- int depth, ee_len;
-
- if (unlikely(path == NULL)) {
- EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
- return -EIO;
- }
- depth = path->p_depth;
- *phys = 0;
-
- if (depth == 0 && path->p_ext == NULL)
- return 0;
-
- /* usually extent in the path covers blocks smaller
- * then *logical, but it can be that extent is the
- * first one in the file */
-
- ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
- if (*logical < le32_to_cpu(ex->ee_block)) {
- if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
- EXT4_ERROR_INODE(inode,
- "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
- *logical, le32_to_cpu(ex->ee_block));
- return -EIO;
- }
- while (--depth >= 0) {
- ix = path[depth].p_idx;
- if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
- EXT4_ERROR_INODE(inode,
- "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
- ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
- EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
- le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
- depth);
- return -EIO;
- }
- }
- return 0;
- }
-
- if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
- EXT4_ERROR_INODE(inode,
- "logical %d < ee_block %d + ee_len %d!",
- *logical, le32_to_cpu(ex->ee_block), ee_len);
- return -EIO;
- }
-
- *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
- *phys = ext4_ext_pblock(ex) + ee_len - 1;
- return 0;
-}
-
-/*
- * search the closest allocated block to the right for *logical
- * and returns it at @logical + it's physical address at @phys
- * if *logical is the largest allocated block, the function
- * returns 0 at @phys
- * return value contains 0 (success) or error code
- */
-static int ext4_ext_search_right(struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t *logical, ext4_fsblk_t *phys,
- struct ext4_extent **ret_ex)
-{
- struct buffer_head *bh = NULL;
- struct ext4_extent_header *eh;
- struct ext4_extent_idx *ix;
- struct ext4_extent *ex;
- ext4_fsblk_t block;
- int depth; /* Note, NOT eh_depth; depth from top of tree */
- int ee_len;
-
- if (unlikely(path == NULL)) {
- EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
- return -EIO;
- }
- depth = path->p_depth;
- *phys = 0;
-
- if (depth == 0 && path->p_ext == NULL)
- return 0;
-
- /* usually extent in the path covers blocks smaller
- * then *logical, but it can be that extent is the
- * first one in the file */
-
- ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
- if (*logical < le32_to_cpu(ex->ee_block)) {
- if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
- EXT4_ERROR_INODE(inode,
- "first_extent(path[%d].p_hdr) != ex",
- depth);
- return -EIO;
- }
- while (--depth >= 0) {
- ix = path[depth].p_idx;
- if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
- EXT4_ERROR_INODE(inode,
- "ix != EXT_FIRST_INDEX *logical %d!",
- *logical);
- return -EIO;
- }
- }
- goto found_extent;
- }
-
- if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
- EXT4_ERROR_INODE(inode,
- "logical %d < ee_block %d + ee_len %d!",
- *logical, le32_to_cpu(ex->ee_block), ee_len);
- return -EIO;
- }
-
- if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
- /* next allocated block in this leaf */
- ex++;
- goto found_extent;
- }
-
- /* go up and search for index to the right */
- while (--depth >= 0) {
- ix = path[depth].p_idx;
- if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
- goto got_index;
- }
-
- /* we've gone up to the root and found no index to the right */
- return 0;
-
-got_index:
- /* we've found index to the right, let's
- * follow it and find the closest allocated
- * block to the right */
- ix++;
- block = ext4_idx_pblock(ix);
- while (++depth < path->p_depth) {
- bh = sb_bread(inode->i_sb, block);
- if (bh == NULL)
- return -EIO;
- eh = ext_block_hdr(bh);
- /* subtract from p_depth to get proper eh_depth */
- if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
- put_bh(bh);
- return -EIO;
- }
- ix = EXT_FIRST_INDEX(eh);
- block = ext4_idx_pblock(ix);
- put_bh(bh);
- }
-
- bh = sb_bread(inode->i_sb, block);
- if (bh == NULL)
- return -EIO;
- eh = ext_block_hdr(bh);
- if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
- put_bh(bh);
- return -EIO;
- }
- ex = EXT_FIRST_EXTENT(eh);
-found_extent:
- *logical = le32_to_cpu(ex->ee_block);
- *phys = ext4_ext_pblock(ex);
- *ret_ex = ex;
- if (bh)
- put_bh(bh);
- return 0;
-}
-
-/*
- * ext4_ext_next_allocated_block:
- * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
- * NOTE: it considers block number from index entry as
- * allocated block. Thus, index entries have to be consistent
- * with leaves.
- */
-static ext4_lblk_t
-ext4_ext_next_allocated_block(struct ext4_ext_path *path)
-{
- int depth;
-
- BUG_ON(path == NULL);
- depth = path->p_depth;
-
- if (depth == 0 && path->p_ext == NULL)
- return EXT_MAX_BLOCKS;
-
- while (depth >= 0) {
- if (depth == path->p_depth) {
- /* leaf */
- if (path[depth].p_ext &&
- path[depth].p_ext !=
- EXT_LAST_EXTENT(path[depth].p_hdr))
- return le32_to_cpu(path[depth].p_ext[1].ee_block);
- } else {
- /* index */
- if (path[depth].p_idx !=
- EXT_LAST_INDEX(path[depth].p_hdr))
- return le32_to_cpu(path[depth].p_idx[1].ei_block);
- }
- depth--;
- }
-
- return EXT_MAX_BLOCKS;
-}
-
-/*
- * ext4_ext_next_leaf_block:
- * returns first allocated block from next leaf or EXT_MAX_BLOCKS
- */
-static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
-{
- int depth;
-
- BUG_ON(path == NULL);
- depth = path->p_depth;
-
- /* zero-tree has no leaf blocks at all */
- if (depth == 0)
- return EXT_MAX_BLOCKS;
-
- /* go to index block */
- depth--;
-
- while (depth >= 0) {
- if (path[depth].p_idx !=
- EXT_LAST_INDEX(path[depth].p_hdr))
- return (ext4_lblk_t)
- le32_to_cpu(path[depth].p_idx[1].ei_block);
- depth--;
- }
-
- return EXT_MAX_BLOCKS;
-}
-
-/*
- * ext4_ext_correct_indexes:
- * if leaf gets modified and modified extent is first in the leaf,
- * then we have to correct all indexes above.
- * TODO: do we need to correct tree in all cases?
- */
-static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
-{
- struct ext4_extent_header *eh;
- int depth = ext_depth(inode);
- struct ext4_extent *ex;
- __le32 border;
- int k, err = 0;
-
- eh = path[depth].p_hdr;
- ex = path[depth].p_ext;
-
- if (unlikely(ex == NULL || eh == NULL)) {
- EXT4_ERROR_INODE(inode,
- "ex %p == NULL or eh %p == NULL", ex, eh);
- return -EIO;
- }
-
- if (depth == 0) {
- /* there is no tree at all */
- return 0;
- }
-
- if (ex != EXT_FIRST_EXTENT(eh)) {
- /* we correct tree if first leaf got modified only */
- return 0;
- }
-
- /*
- * TODO: we need correction if border is smaller than current one
- */
- k = depth - 1;
- border = path[depth].p_ext->ee_block;
- err = ext4_ext_get_access(handle, inode, path + k);
- if (err)
- return err;
- path[k].p_idx->ei_block = border;
- err = ext4_ext_dirty(handle, inode, path + k);
- if (err)
- return err;
-
- while (k--) {
- /* change all left-side indexes */
- if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
- break;
- err = ext4_ext_get_access(handle, inode, path + k);
- if (err)
- break;
- path[k].p_idx->ei_block = border;
- err = ext4_ext_dirty(handle, inode, path + k);
- if (err)
- break;
- }
-
- return err;
-}
-
-int
-ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
- struct ext4_extent *ex2)
-{
- unsigned short ext1_ee_len, ext2_ee_len, max_len;
-
- /*
- * Make sure that either both extents are uninitialized, or
- * both are _not_.
- */
- if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
- return 0;
-
- if (ext4_ext_is_uninitialized(ex1))
- max_len = EXT_UNINIT_MAX_LEN;
- else
- max_len = EXT_INIT_MAX_LEN;
-
- ext1_ee_len = ext4_ext_get_actual_len(ex1);
- ext2_ee_len = ext4_ext_get_actual_len(ex2);
-
- if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
- le32_to_cpu(ex2->ee_block))
- return 0;
-
- /*
- * To allow future support for preallocated extents to be added
- * as an RO_COMPAT feature, refuse to merge to extents if
- * this can result in the top bit of ee_len being set.
- */
- if (ext1_ee_len + ext2_ee_len > max_len)
- return 0;
-#ifdef AGGRESSIVE_TEST
- if (ext1_ee_len >= 4)
- return 0;
-#endif
-
- if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
- return 1;
- return 0;
-}
-
-/*
- * This function tries to merge the "ex" extent to the next extent in the tree.
- * It always tries to merge towards right. If you want to merge towards
- * left, pass "ex - 1" as argument instead of "ex".
- * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
- * 1 if they got merged.
- */
-static int ext4_ext_try_to_merge_right(struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *ex)
-{
- struct ext4_extent_header *eh;
- unsigned int depth, len;
- int merge_done = 0;
- int uninitialized = 0;
-
- depth = ext_depth(inode);
- BUG_ON(path[depth].p_hdr == NULL);
- eh = path[depth].p_hdr;
-
- while (ex < EXT_LAST_EXTENT(eh)) {
- if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
- break;
- /* merge with next extent! */
- if (ext4_ext_is_uninitialized(ex))
- uninitialized = 1;
- ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
- + ext4_ext_get_actual_len(ex + 1));
- if (uninitialized)
- ext4_ext_mark_uninitialized(ex);
-
- if (ex + 1 < EXT_LAST_EXTENT(eh)) {
- len = (EXT_LAST_EXTENT(eh) - ex - 1)
- * sizeof(struct ext4_extent);
- memmove(ex + 1, ex + 2, len);
- }
- le16_add_cpu(&eh->eh_entries, -1);
- merge_done = 1;
- WARN_ON(eh->eh_entries == 0);
- if (!eh->eh_entries)
- EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
- }
-
- return merge_done;
-}
-
-/*
- * This function tries to merge the @ex extent to neighbours in the tree.
- * return 1 if merge left else 0.
- */
-static int ext4_ext_try_to_merge(struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *ex) {
- struct ext4_extent_header *eh;
- unsigned int depth;
- int merge_done = 0;
- int ret = 0;
-
- depth = ext_depth(inode);
- BUG_ON(path[depth].p_hdr == NULL);
- eh = path[depth].p_hdr;
-
- if (ex > EXT_FIRST_EXTENT(eh))
- merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
-
- if (!merge_done)
- ret = ext4_ext_try_to_merge_right(inode, path, ex);
-
- return ret;
-}
-
-/*
- * check if a portion of the "newext" extent overlaps with an
- * existing extent.
- *
- * If there is an overlap discovered, it updates the length of the newext
- * such that there will be no overlap, and then returns 1.
- * If there is no overlap found, it returns 0.
- */
-static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
- struct inode *inode,
- struct ext4_extent *newext,
- struct ext4_ext_path *path)
-{
- ext4_lblk_t b1, b2;
- unsigned int depth, len1;
- unsigned int ret = 0;
-
- b1 = le32_to_cpu(newext->ee_block);
- len1 = ext4_ext_get_actual_len(newext);
- depth = ext_depth(inode);
- if (!path[depth].p_ext)
- goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
- b2 &= ~(sbi->s_cluster_ratio - 1);
-
- /*
- * get the next allocated block if the extent in the path
- * is before the requested block(s)
- */
- if (b2 < b1) {
- b2 = ext4_ext_next_allocated_block(path);
- if (b2 == EXT_MAX_BLOCKS)
- goto out;
- b2 &= ~(sbi->s_cluster_ratio - 1);
- }
-
- /* check for wrap through zero on extent logical start block*/
- if (b1 + len1 < b1) {
- len1 = EXT_MAX_BLOCKS - b1;
- newext->ee_len = cpu_to_le16(len1);
- ret = 1;
- }
-
- /* check for overlap */
- if (b1 + len1 > b2) {
- newext->ee_len = cpu_to_le16(b2 - b1);
- ret = 1;
- }
-out:
- return ret;
-}
-
-/*
- * ext4_ext_insert_extent:
- * tries to merge requsted extent into the existing extent or
- * inserts requested extent as new one into the tree,
- * creating new leaf in the no-space case.
- */
-int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *newext, int flag)
-{
- struct ext4_extent_header *eh;
- struct ext4_extent *ex, *fex;
- struct ext4_extent *nearex; /* nearest extent */
- struct ext4_ext_path *npath = NULL;
- int depth, len, err;
- ext4_lblk_t next;
- unsigned uninitialized = 0;
- int flags = 0;
-
- if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
- EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
- return -EIO;
- }
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- if (unlikely(path[depth].p_hdr == NULL)) {
- EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
- return -EIO;
- }
-
- /* try to insert block into found extent and return */
- if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
- && ext4_can_extents_be_merged(inode, ex, newext)) {
- ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
- ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
- le32_to_cpu(ex->ee_block),
- ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex),
- ext4_ext_pblock(ex));
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- return err;
-
- /*
- * ext4_can_extents_be_merged should have checked that either
- * both extents are uninitialized, or both aren't. Thus we
- * need to check only one of them here.
- */
- if (ext4_ext_is_uninitialized(ex))
- uninitialized = 1;
- ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
- + ext4_ext_get_actual_len(newext));
- if (uninitialized)
- ext4_ext_mark_uninitialized(ex);
- eh = path[depth].p_hdr;
- nearex = ex;
- goto merge;
- }
-
- depth = ext_depth(inode);
- eh = path[depth].p_hdr;
- if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
- goto has_space;
-
- /* probably next leaf has space for us? */
- fex = EXT_LAST_EXTENT(eh);
- next = EXT_MAX_BLOCKS;
- if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
- next = ext4_ext_next_leaf_block(path);
- if (next != EXT_MAX_BLOCKS) {
- ext_debug("next leaf block - %u\n", next);
- BUG_ON(npath != NULL);
- npath = ext4_ext_find_extent(inode, next, NULL);
- if (IS_ERR(npath))
- return PTR_ERR(npath);
- BUG_ON(npath->p_depth != path->p_depth);
- eh = npath[depth].p_hdr;
- if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
- ext_debug("next leaf isn't full(%d)\n",
- le16_to_cpu(eh->eh_entries));
- path = npath;
- goto has_space;
- }
- ext_debug("next leaf has no free space(%d,%d)\n",
- le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
- }
-
- /*
- * There is no free space in the found leaf.
- * We're gonna add a new leaf in the tree.
- */
- if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT)
- flags = EXT4_MB_USE_ROOT_BLOCKS;
- err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
- if (err)
- goto cleanup;
- depth = ext_depth(inode);
- eh = path[depth].p_hdr;
-
-has_space:
- nearex = path[depth].p_ext;
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto cleanup;
-
- if (!nearex) {
- /* there is no extent in this leaf, create first one */
- ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
- le32_to_cpu(newext->ee_block),
- ext4_ext_pblock(newext),
- ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext));
- nearex = EXT_FIRST_EXTENT(eh);
- } else {
- if (le32_to_cpu(newext->ee_block)
- > le32_to_cpu(nearex->ee_block)) {
- /* Insert after */
- ext_debug("insert %u:%llu:[%d]%d before: "
- "nearest %p\n",
- le32_to_cpu(newext->ee_block),
- ext4_ext_pblock(newext),
- ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
- nearex);
- nearex++;
- } else {
- /* Insert before */
- BUG_ON(newext->ee_block == nearex->ee_block);
- ext_debug("insert %u:%llu:[%d]%d after: "
- "nearest %p\n",
- le32_to_cpu(newext->ee_block),
- ext4_ext_pblock(newext),
- ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
- nearex);
- }
- len = EXT_LAST_EXTENT(eh) - nearex + 1;
- if (len > 0) {
- ext_debug("insert %u:%llu:[%d]%d: "
- "move %d extents from 0x%p to 0x%p\n",
- le32_to_cpu(newext->ee_block),
- ext4_ext_pblock(newext),
- ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
- len, nearex, nearex + 1);
- memmove(nearex + 1, nearex,
- len * sizeof(struct ext4_extent));
- }
- }
-
- le16_add_cpu(&eh->eh_entries, 1);
- path[depth].p_ext = nearex;
- nearex->ee_block = newext->ee_block;
- ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
- nearex->ee_len = newext->ee_len;
-
-merge:
- /* try to merge extents to the right */
- if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
- ext4_ext_try_to_merge(inode, path, nearex);
-
- /* try to merge extents to the left */
-
- /* time to correct all indexes above */
- err = ext4_ext_correct_indexes(handle, inode, path);
- if (err)
- goto cleanup;
-
- err = ext4_ext_dirty(handle, inode, path + depth);
-
-cleanup:
- if (npath) {
- ext4_ext_drop_refs(npath);
- kfree(npath);
- }
- ext4_ext_invalidate_cache(inode);
- return err;
-}
-
-static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
- ext4_lblk_t num, ext_prepare_callback func,
- void *cbdata)
-{
- struct ext4_ext_path *path = NULL;
- struct ext4_ext_cache cbex;
- struct ext4_extent *ex;
- ext4_lblk_t next, start = 0, end = 0;
- ext4_lblk_t last = block + num;
- int depth, exists, err = 0;
-
- BUG_ON(func == NULL);
- BUG_ON(inode == NULL);
-
- while (block < last && block != EXT_MAX_BLOCKS) {
- num = last - block;
- /* find extent for this block */
- down_read(&EXT4_I(inode)->i_data_sem);
- path = ext4_ext_find_extent(inode, block, path);
- up_read(&EXT4_I(inode)->i_data_sem);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- path = NULL;
- break;
- }
-
- depth = ext_depth(inode);
- if (unlikely(path[depth].p_hdr == NULL)) {
- EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
- err = -EIO;
- break;
- }
- ex = path[depth].p_ext;
- next = ext4_ext_next_allocated_block(path);
-
- exists = 0;
- if (!ex) {
- /* there is no extent yet, so try to allocate
- * all requested space */
- start = block;
- end = block + num;
- } else if (le32_to_cpu(ex->ee_block) > block) {
- /* need to allocate space before found extent */
- start = block;
- end = le32_to_cpu(ex->ee_block);
- if (block + num < end)
- end = block + num;
- } else if (block >= le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex)) {
- /* need to allocate space after found extent */
- start = block;
- end = block + num;
- if (end >= next)
- end = next;
- } else if (block >= le32_to_cpu(ex->ee_block)) {
- /*
- * some part of requested space is covered
- * by found extent
- */
- start = block;
- end = le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex);
- if (block + num < end)
- end = block + num;
- exists = 1;
- } else {
- BUG();
- }
- BUG_ON(end <= start);
-
- if (!exists) {
- cbex.ec_block = start;
- cbex.ec_len = end - start;
- cbex.ec_start = 0;
- } else {
- cbex.ec_block = le32_to_cpu(ex->ee_block);
- cbex.ec_len = ext4_ext_get_actual_len(ex);
- cbex.ec_start = ext4_ext_pblock(ex);
- }
-
- if (unlikely(cbex.ec_len == 0)) {
- EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
- err = -EIO;
- break;
- }
- err = func(inode, next, &cbex, ex, cbdata);
- ext4_ext_drop_refs(path);
-
- if (err < 0)
- break;
-
- if (err == EXT_REPEAT)
- continue;
- else if (err == EXT_BREAK) {
- err = 0;
- break;
- }
-
- if (ext_depth(inode) != depth) {
- /* depth was changed. we have to realloc path */
- kfree(path);
- path = NULL;
- }
-
- block = cbex.ec_block + cbex.ec_len;
- }
-
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
-
- return err;
-}
-
-static void
-ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
- __u32 len, ext4_fsblk_t start)
-{
- struct ext4_ext_cache *cex;
- BUG_ON(len == 0);
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- trace_ext4_ext_put_in_cache(inode, block, len, start);
- cex = &EXT4_I(inode)->i_cached_extent;
- cex->ec_block = block;
- cex->ec_len = len;
- cex->ec_start = start;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-}
-
-/*
- * ext4_ext_put_gap_in_cache:
- * calculate boundaries of the gap that the requested block fits into
- * and cache this gap
- */
-static void
-ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t block)
-{
- int depth = ext_depth(inode);
- unsigned long len;
- ext4_lblk_t lblock;
- struct ext4_extent *ex;
-
- ex = path[depth].p_ext;
- if (ex == NULL) {
- /* there is no extent yet, so gap is [0;-] */
- lblock = 0;
- len = EXT_MAX_BLOCKS;
- ext_debug("cache gap(whole file):");
- } else if (block < le32_to_cpu(ex->ee_block)) {
- lblock = block;
- len = le32_to_cpu(ex->ee_block) - block;
- ext_debug("cache gap(before): %u [%u:%u]",
- block,
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
- } else if (block >= le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex)) {
- ext4_lblk_t next;
- lblock = le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex);
-
- next = ext4_ext_next_allocated_block(path);
- ext_debug("cache gap(after): [%u:%u] %u",
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex),
- block);
- BUG_ON(next == lblock);
- len = next - lblock;
- } else {
- lblock = len = 0;
- BUG();
- }
-
- ext_debug(" -> %u:%lu\n", lblock, len);
- ext4_ext_put_in_cache(inode, lblock, len, 0);
-}
-
-/*
- * ext4_ext_check_cache()
- * Checks to see if the given block is in the cache.
- * If it is, the cached extent is stored in the given
- * cache extent pointer. If the cached extent is a hole,
- * this routine should be used instead of
- * ext4_ext_in_cache if the calling function needs to
- * know the size of the hole.
- *
- * @inode: The files inode
- * @block: The block to look for in the cache
- * @ex: Pointer where the cached extent will be stored
- * if it contains block
- *
- * Return 0 if cache is invalid; 1 if the cache is valid
- */
-static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
- struct ext4_ext_cache *ex){
- struct ext4_ext_cache *cex;
- struct ext4_sb_info *sbi;
- int ret = 0;
-
- /*
- * We borrow i_block_reservation_lock to protect i_cached_extent
- */
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- cex = &EXT4_I(inode)->i_cached_extent;
- sbi = EXT4_SB(inode->i_sb);
-
- /* has cache valid data? */
- if (cex->ec_len == 0)
- goto errout;
-
- if (in_range(block, cex->ec_block, cex->ec_len)) {
- memcpy(ex, cex, sizeof(struct ext4_ext_cache));
- ext_debug("%u cached by %u:%u:%llu\n",
- block,
- cex->ec_block, cex->ec_len, cex->ec_start);
- ret = 1;
- }
-errout:
- trace_ext4_ext_in_cache(inode, block, ret);
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- return ret;
-}
-
-/*
- * ext4_ext_in_cache()
- * Checks to see if the given block is in the cache.
- * If it is, the cached extent is stored in the given
- * extent pointer.
- *
- * @inode: The files inode
- * @block: The block to look for in the cache
- * @ex: Pointer where the cached extent will be stored
- * if it contains block
- *
- * Return 0 if cache is invalid; 1 if the cache is valid
- */
-static int
-ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
- struct ext4_extent *ex)
-{
- struct ext4_ext_cache cex;
- int ret = 0;
-
- if (ext4_ext_check_cache(inode, block, &cex)) {
- ex->ee_block = cpu_to_le32(cex.ec_block);
- ext4_ext_store_pblock(ex, cex.ec_start);
- ex->ee_len = cpu_to_le16(cex.ec_len);
- ret = 1;
- }
-
- return ret;
-}
-
-
-/*
- * ext4_ext_rm_idx:
- * removes index from the index block.
- */
-static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
-{
- int err;
- ext4_fsblk_t leaf;
-
- /* free index block */
- path--;
- leaf = ext4_idx_pblock(path->p_idx);
- if (unlikely(path->p_hdr->eh_entries == 0)) {
- EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
- return -EIO;
- }
- err = ext4_ext_get_access(handle, inode, path);
- if (err)
- return err;
-
- if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
- int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
- len *= sizeof(struct ext4_extent_idx);
- memmove(path->p_idx, path->p_idx + 1, len);
- }
-
- le16_add_cpu(&path->p_hdr->eh_entries, -1);
- err = ext4_ext_dirty(handle, inode, path);
- if (err)
- return err;
- ext_debug("index is empty, remove it, free block %llu\n", leaf);
- trace_ext4_ext_rm_idx(inode, leaf);
-
- ext4_free_blocks(handle, inode, NULL, leaf, 1,
- EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
- return err;
-}
-
-/*
- * ext4_ext_calc_credits_for_single_extent:
- * This routine returns max. credits that needed to insert an extent
- * to the extent tree.
- * When pass the actual path, the caller should calculate credits
- * under i_data_sem.
- */
-int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
- struct ext4_ext_path *path)
-{
- if (path) {
- int depth = ext_depth(inode);
- int ret = 0;
-
- /* probably there is space in leaf? */
- if (le16_to_cpu(path[depth].p_hdr->eh_entries)
- < le16_to_cpu(path[depth].p_hdr->eh_max)) {
-
- /*
- * There are some space in the leaf tree, no
- * need to account for leaf block credit
- *
- * bitmaps and block group descriptor blocks
- * and other metadata blocks still need to be
- * accounted.
- */
- /* 1 bitmap, 1 block group descriptor */
- ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
- return ret;
- }
- }
-
- return ext4_chunk_trans_blocks(inode, nrblocks);
-}
-
-/*
- * How many index/leaf blocks need to change/allocate to modify nrblocks?
- *
- * if nrblocks are fit in a single extent (chunk flag is 1), then
- * in the worse case, each tree level index/leaf need to be changed
- * if the tree split due to insert a new extent, then the old tree
- * index/leaf need to be updated too
- *
- * If the nrblocks are discontiguous, they could cause
- * the whole tree split more than once, but this is really rare.
- */
-int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
-{
- int index;
- int depth = ext_depth(inode);
-
- if (chunk)
- index = depth * 2;
- else
- index = depth * 3;
-
- return index;
-}
-
-static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
- struct ext4_extent *ex,
- ext4_fsblk_t *partial_cluster,
- ext4_lblk_t from, ext4_lblk_t to)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned short ee_len = ext4_ext_get_actual_len(ex);
- ext4_fsblk_t pblk;
- int flags = EXT4_FREE_BLOCKS_FORGET;
-
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
- /*
- * For bigalloc file systems, we never free a partial cluster
- * at the beginning of the extent. Instead, we make a note
- * that we tried freeing the cluster, and check to see if we
- * need to free it on a subsequent call to ext4_remove_blocks,
- * or at the end of the ext4_truncate() operation.
- */
- flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
-
- trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
- /*
- * If we have a partial cluster, and it's different from the
- * cluster of the last block, we need to explicitly free the
- * partial cluster here.
- */
- pblk = ext4_ext_pblock(ex) + ee_len - 1;
- if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
- ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(sbi, *partial_cluster),
- sbi->s_cluster_ratio, flags);
- *partial_cluster = 0;
- }
-
-#ifdef EXTENTS_STATS
- {
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- spin_lock(&sbi->s_ext_stats_lock);
- sbi->s_ext_blocks += ee_len;
- sbi->s_ext_extents++;
- if (ee_len < sbi->s_ext_min)
- sbi->s_ext_min = ee_len;
- if (ee_len > sbi->s_ext_max)
- sbi->s_ext_max = ee_len;
- if (ext_depth(inode) > sbi->s_depth_max)
- sbi->s_depth_max = ext_depth(inode);
- spin_unlock(&sbi->s_ext_stats_lock);
- }
-#endif
- if (from >= le32_to_cpu(ex->ee_block)
- && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
- /* tail removal */
- ext4_lblk_t num;
-
- num = le32_to_cpu(ex->ee_block) + ee_len - from;
- pblk = ext4_ext_pblock(ex) + ee_len - num;
- ext_debug("free last %u blocks starting %llu\n", num, pblk);
- ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
- /*
- * If the block range to be freed didn't start at the
- * beginning of a cluster, and we removed the entire
- * extent, save the partial cluster here, since we
- * might need to delete if we determine that the
- * truncate operation has removed all of the blocks in
- * the cluster.
- */
- if (pblk & (sbi->s_cluster_ratio - 1) &&
- (ee_len == num))
- *partial_cluster = EXT4_B2C(sbi, pblk);
- else
- *partial_cluster = 0;
- } else if (from == le32_to_cpu(ex->ee_block)
- && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
- /* head removal */
- ext4_lblk_t num;
- ext4_fsblk_t start;
-
- num = to - from;
- start = ext4_ext_pblock(ex);
-
- ext_debug("free first %u blocks starting %llu\n", num, start);
- ext4_free_blocks(handle, inode, NULL, start, num, flags);
-
- } else {
- printk(KERN_INFO "strange request: removal(2) "
- "%u-%u from %u:%u\n",
- from, to, le32_to_cpu(ex->ee_block), ee_len);
- }
- return 0;
-}
-
-
-/*
- * ext4_ext_rm_leaf() Removes the extents associated with the
- * blocks appearing between "start" and "end", and splits the extents
- * if "start" and "end" appear in the same extent
- *
- * @handle: The journal handle
- * @inode: The files inode
- * @path: The path to the leaf
- * @start: The first block to remove
- * @end: The last block to remove
- */
-static int
-ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
- ext4_lblk_t start, ext4_lblk_t end)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int err = 0, correct_index = 0;
- int depth = ext_depth(inode), credits;
- struct ext4_extent_header *eh;
- ext4_lblk_t a, b;
- unsigned num;
- ext4_lblk_t ex_ee_block;
- unsigned short ex_ee_len;
- unsigned uninitialized = 0;
- struct ext4_extent *ex;
-
- /* the header must be checked already in ext4_ext_remove_space() */
- ext_debug("truncate since %u in leaf to %u\n", start, end);
- if (!path[depth].p_hdr)
- path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
- eh = path[depth].p_hdr;
- if (unlikely(path[depth].p_hdr == NULL)) {
- EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
- return -EIO;
- }
- /* find where to start removing */
- ex = EXT_LAST_EXTENT(eh);
-
- ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
-
- trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
-
- while (ex >= EXT_FIRST_EXTENT(eh) &&
- ex_ee_block + ex_ee_len > start) {
-
- if (ext4_ext_is_uninitialized(ex))
- uninitialized = 1;
- else
- uninitialized = 0;
-
- ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
- uninitialized, ex_ee_len);
- path[depth].p_ext = ex;
-
- a = ex_ee_block > start ? ex_ee_block : start;
- b = ex_ee_block+ex_ee_len - 1 < end ?
- ex_ee_block+ex_ee_len - 1 : end;
-
- ext_debug(" border %u:%u\n", a, b);
-
- /* If this extent is beyond the end of the hole, skip it */
- if (end < ex_ee_block) {
- ex--;
- ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
- continue;
- } else if (b != ex_ee_block + ex_ee_len - 1) {
- EXT4_ERROR_INODE(inode,
- "can not handle truncate %u:%u "
- "on extent %u:%u",
- start, end, ex_ee_block,
- ex_ee_block + ex_ee_len - 1);
- err = -EIO;
- goto out;
- } else if (a != ex_ee_block) {
- /* remove tail of the extent */
- num = a - ex_ee_block;
- } else {
- /* remove whole extent: excellent! */
- num = 0;
- }
- /*
- * 3 for leaf, sb, and inode plus 2 (bmap and group
- * descriptor) for each block group; assume two block
- * groups plus ex_ee_len/blocks_per_block_group for
- * the worst case
- */
- credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
- if (ex == EXT_FIRST_EXTENT(eh)) {
- correct_index = 1;
- credits += (ext_depth(inode)) + 1;
- }
- credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
-
- err = ext4_ext_truncate_extend_restart(handle, inode, credits);
- if (err)
- goto out;
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
-
- err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
- a, b);
- if (err)
- goto out;
-
- if (num == 0)
- /* this extent is removed; mark slot entirely unused */
- ext4_ext_store_pblock(ex, 0);
-
- ex->ee_len = cpu_to_le16(num);
- /*
- * Do not mark uninitialized if all the blocks in the
- * extent have been removed.
- */
- if (uninitialized && num)
- ext4_ext_mark_uninitialized(ex);
- /*
- * If the extent was completely released,
- * we need to remove it from the leaf
- */
- if (num == 0) {
- if (end != EXT_MAX_BLOCKS - 1) {
- /*
- * For hole punching, we need to scoot all the
- * extents up when an extent is removed so that
- * we dont have blank extents in the middle
- */
- memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
- sizeof(struct ext4_extent));
-
- /* Now get rid of the one at the end */
- memset(EXT_LAST_EXTENT(eh), 0,
- sizeof(struct ext4_extent));
- }
- le16_add_cpu(&eh->eh_entries, -1);
- } else
- *partial_cluster = 0;
-
- err = ext4_ext_dirty(handle, inode, path + depth);
- if (err)
- goto out;
-
- ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num,
- ext4_ext_pblock(ex));
- ex--;
- ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
- }
-
- if (correct_index && eh->eh_entries)
- err = ext4_ext_correct_indexes(handle, inode, path);
-
- /*
- * If there is still a entry in the leaf node, check to see if
- * it references the partial cluster. This is the only place
- * where it could; if it doesn't, we can free the cluster.
- */
- if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
- (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
- *partial_cluster)) {
- int flags = EXT4_FREE_BLOCKS_FORGET;
-
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
-
- ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(sbi, *partial_cluster),
- sbi->s_cluster_ratio, flags);
- *partial_cluster = 0;
- }
-
- /* if this leaf is free, then we should
- * remove it from index block above */
- if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
- err = ext4_ext_rm_idx(handle, inode, path + depth);
-
-out:
- return err;
-}
-
-/*
- * ext4_ext_more_to_rm:
- * returns 1 if current index has to be freed (even partial)
- */
-static int
-ext4_ext_more_to_rm(struct ext4_ext_path *path)
-{
- BUG_ON(path->p_idx == NULL);
-
- if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
- return 0;
-
- /*
- * if truncate on deeper level happened, it wasn't partial,
- * so we have to consider current index for truncation
- */
- if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
- return 0;
- return 1;
-}
-
-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
- ext4_lblk_t end)
-{
- struct super_block *sb = inode->i_sb;
- int depth = ext_depth(inode);
- struct ext4_ext_path *path;
- ext4_fsblk_t partial_cluster = 0;
- handle_t *handle;
- int i, err;
-
- ext_debug("truncate since %u to %u\n", start, end);
-
- /* probably first extent we're gonna free will be last in block */
- handle = ext4_journal_start(inode, depth + 1);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
-again:
- ext4_ext_invalidate_cache(inode);
-
- trace_ext4_ext_remove_space(inode, start, depth);
-
- /*
- * Check if we are removing extents inside the extent tree. If that
- * is the case, we are going to punch a hole inside the extent tree
- * so we have to check whether we need to split the extent covering
- * the last block to remove so we can easily remove the part of it
- * in ext4_ext_rm_leaf().
- */
- if (end < EXT_MAX_BLOCKS - 1) {
- struct ext4_extent *ex;
- ext4_lblk_t ee_block;
-
- /* find extent for this block */
- path = ext4_ext_find_extent(inode, end, NULL);
- if (IS_ERR(path)) {
- ext4_journal_stop(handle);
- return PTR_ERR(path);
- }
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- if (!ex)
- goto cont;
-
- ee_block = le32_to_cpu(ex->ee_block);
-
- /*
- * See if the last block is inside the extent, if so split
- * the extent at 'end' block so we can easily remove the
- * tail of the first part of the split extent in
- * ext4_ext_rm_leaf().
- */
- if (end >= ee_block &&
- end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
- int split_flag = 0;
-
- if (ext4_ext_is_uninitialized(ex))
- split_flag = EXT4_EXT_MARK_UNINIT1 |
- EXT4_EXT_MARK_UNINIT2;
-
- /*
- * Split the extent in two so that 'end' is the last
- * block in the first new extent
- */
- err = ext4_split_extent_at(handle, inode, path,
- end + 1, split_flag,
- EXT4_GET_BLOCKS_PRE_IO |
- EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
-
- if (err < 0)
- goto out;
- }
- ext4_ext_drop_refs(path);
- kfree(path);
- }
-cont:
-
- /*
- * We start scanning from right side, freeing all the blocks
- * after i_size and walking into the tree depth-wise.
- */
- depth = ext_depth(inode);
- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
- if (path == NULL) {
- ext4_journal_stop(handle);
- return -ENOMEM;
- }
- path[0].p_depth = depth;
- path[0].p_hdr = ext_inode_hdr(inode);
-
- if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
- err = -EIO;
- goto out;
- }
- i = err = 0;
-
- while (i >= 0 && err == 0) {
- if (i == depth) {
- /* this is leaf block */
- err = ext4_ext_rm_leaf(handle, inode, path,
- &partial_cluster, start,
- end);
- /* root level has p_bh == NULL, brelse() eats this */
- brelse(path[i].p_bh);
- path[i].p_bh = NULL;
- i--;
- continue;
- }
-
- /* this is index block */
- if (!path[i].p_hdr) {
- ext_debug("initialize header\n");
- path[i].p_hdr = ext_block_hdr(path[i].p_bh);
- }
-
- if (!path[i].p_idx) {
- /* this level hasn't been touched yet */
- path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
- path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
- ext_debug("init index ptr: hdr 0x%p, num %d\n",
- path[i].p_hdr,
- le16_to_cpu(path[i].p_hdr->eh_entries));
- } else {
- /* we were already here, see at next index */
- path[i].p_idx--;
- }
-
- ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
- i, EXT_FIRST_INDEX(path[i].p_hdr),
- path[i].p_idx);
- if (ext4_ext_more_to_rm(path + i)) {
- struct buffer_head *bh;
- /* go to the next level */
- ext_debug("move to level %d (block %llu)\n",
- i + 1, ext4_idx_pblock(path[i].p_idx));
- memset(path + i + 1, 0, sizeof(*path));
- bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
- if (!bh) {
- /* should we reset i_size? */
- err = -EIO;
- break;
- }
- if (WARN_ON(i + 1 > depth)) {
- err = -EIO;
- break;
- }
- if (ext4_ext_check(inode, ext_block_hdr(bh),
- depth - i - 1)) {
- err = -EIO;
- break;
- }
- path[i + 1].p_bh = bh;
-
- /* save actual number of indexes since this
- * number is changed at the next iteration */
- path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
- i++;
- } else {
- /* we finished processing this index, go up */
- if (path[i].p_hdr->eh_entries == 0 && i > 0) {
- /* index is empty, remove it;
- * handle must be already prepared by the
- * truncatei_leaf() */
- err = ext4_ext_rm_idx(handle, inode, path + i);
- }
- /* root level has p_bh == NULL, brelse() eats this */
- brelse(path[i].p_bh);
- path[i].p_bh = NULL;
- i--;
- ext_debug("return to level %d\n", i);
- }
- }
-
- trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster,
- path->p_hdr->eh_entries);
-
- /* If we still have something in the partial cluster and we have removed
- * even the first extent, then we should free the blocks in the partial
- * cluster as well. */
- if (partial_cluster && path->p_hdr->eh_entries == 0) {
- int flags = EXT4_FREE_BLOCKS_FORGET;
-
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
-
- ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(EXT4_SB(sb), partial_cluster),
- EXT4_SB(sb)->s_cluster_ratio, flags);
- partial_cluster = 0;
- }
-
- /* TODO: flexible tree reduction should be here */
- if (path->p_hdr->eh_entries == 0) {
- /*
- * truncate to zero freed all the tree,
- * so we need to correct eh_depth
- */
- err = ext4_ext_get_access(handle, inode, path);
- if (err == 0) {
- ext_inode_hdr(inode)->eh_depth = 0;
- ext_inode_hdr(inode)->eh_max =
- cpu_to_le16(ext4_ext_space_root(inode, 0));
- err = ext4_ext_dirty(handle, inode, path);
- }
- }
-out:
- ext4_ext_drop_refs(path);
- kfree(path);
- if (err == -EAGAIN)
- goto again;
- ext4_journal_stop(handle);
-
- return err;
-}
-
-/*
- * called at mount time
- */
-void ext4_ext_init(struct super_block *sb)
-{
- /*
- * possible initialization would be here
- */
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
-#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
- printk(KERN_INFO "EXT4-fs: file extents enabled"
-#ifdef AGGRESSIVE_TEST
- ", aggressive tests"
-#endif
-#ifdef CHECK_BINSEARCH
- ", check binsearch"
-#endif
-#ifdef EXTENTS_STATS
- ", stats"
-#endif
- "\n");
-#endif
-#ifdef EXTENTS_STATS
- spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
- EXT4_SB(sb)->s_ext_min = 1 << 30;
- EXT4_SB(sb)->s_ext_max = 0;
-#endif
- }
-}
-
-/*
- * called at umount time
- */
-void ext4_ext_release(struct super_block *sb)
-{
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
- return;
-
-#ifdef EXTENTS_STATS
- if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
- sbi->s_ext_blocks, sbi->s_ext_extents,
- sbi->s_ext_blocks / sbi->s_ext_extents);
- printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
- sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
- }
-#endif
-}
-
-/* FIXME!! we need to try to merge to left or right after zero-out */
-static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
-{
- ext4_fsblk_t ee_pblock;
- unsigned int ee_len;
- int ret;
-
- ee_len = ext4_ext_get_actual_len(ex);
- ee_pblock = ext4_ext_pblock(ex);
-
- ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
- if (ret > 0)
- ret = 0;
-
- return ret;
-}
-
-/*
- * ext4_split_extent_at() splits an extent at given block.
- *
- * @handle: the journal handle
- * @inode: the file inode
- * @path: the path to the extent
- * @split: the logical block where the extent is splitted.
- * @split_flags: indicates if the extent could be zeroout if split fails, and
- * the states(init or uninit) of new extents.
- * @flags: flags used to insert new extent to extent tree.
- *
- *
- * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
- * of which are deterimined by split_flag.
- *
- * There are two cases:
- * a> the extent are splitted into two extent.
- * b> split is not needed, and just mark the extent.
- *
- * return 0 on success.
- */
-static int ext4_split_extent_at(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t split,
- int split_flag,
- int flags)
-{
- ext4_fsblk_t newblock;
- ext4_lblk_t ee_block;
- struct ext4_extent *ex, newex, orig_ex;
- struct ext4_extent *ex2 = NULL;
- unsigned int ee_len, depth;
- int err = 0;
-
- ext_debug("ext4_split_extents_at: inode %lu, logical"
- "block %llu\n", inode->i_ino, (unsigned long long)split);
-
- ext4_ext_show_leaf(inode, path);
-
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
- newblock = split - ee_block + ext4_ext_pblock(ex);
-
- BUG_ON(split < ee_block || split >= (ee_block + ee_len));
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
-
- if (split == ee_block) {
- /*
- * case b: block @split is the block that the extent begins with
- * then we just change the state of the extent, and splitting
- * is not needed.
- */
- if (split_flag & EXT4_EXT_MARK_UNINIT2)
- ext4_ext_mark_uninitialized(ex);
- else
- ext4_ext_mark_initialized(ex);
-
- if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
- ext4_ext_try_to_merge(inode, path, ex);
-
- err = ext4_ext_dirty(handle, inode, path + depth);
- goto out;
- }
-
- /* case a */
- memcpy(&orig_ex, ex, sizeof(orig_ex));
- ex->ee_len = cpu_to_le16(split - ee_block);
- if (split_flag & EXT4_EXT_MARK_UNINIT1)
- ext4_ext_mark_uninitialized(ex);
-
- /*
- * path may lead to new leaf, not to original leaf any more
- * after ext4_ext_insert_extent() returns,
- */
- err = ext4_ext_dirty(handle, inode, path + depth);
- if (err)
- goto fix_extent_len;
-
- ex2 = &newex;
- ex2->ee_block = cpu_to_le32(split);
- ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
- ext4_ext_store_pblock(ex2, newblock);
- if (split_flag & EXT4_EXT_MARK_UNINIT2)
- ext4_ext_mark_uninitialized(ex2);
-
- err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
- if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
- err = ext4_ext_zeroout(inode, &orig_ex);
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_len = cpu_to_le16(ee_len);
- ext4_ext_try_to_merge(inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + depth);
- goto out;
- } else if (err)
- goto fix_extent_len;
-
-out:
- ext4_ext_show_leaf(inode, path);
- return err;
-
-fix_extent_len:
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_dirty(handle, inode, path + depth);
- return err;
-}
-
-/*
- * ext4_split_extents() splits an extent and mark extent which is covered
- * by @map as split_flags indicates
- *
- * It may result in splitting the extent into multiple extents (upto three)
- * There are three possibilities:
- * a> There is no split required
- * b> Splits in two extents: Split is happening at either end of the extent
- * c> Splits in three extents: Somone is splitting in middle of the extent
- *
- */
-static int ext4_split_extent(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_map_blocks *map,
- int split_flag,
- int flags)
-{
- ext4_lblk_t ee_block;
- struct ext4_extent *ex;
- unsigned int ee_len, depth;
- int err = 0;
- int uninitialized;
- int split_flag1, flags1;
-
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
- uninitialized = ext4_ext_is_uninitialized(ex);
-
- if (map->m_lblk + map->m_len < ee_block + ee_len) {
- split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
- EXT4_EXT_MAY_ZEROOUT : 0;
- flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
- if (uninitialized)
- split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
- EXT4_EXT_MARK_UNINIT2;
- err = ext4_split_extent_at(handle, inode, path,
- map->m_lblk + map->m_len, split_flag1, flags1);
- if (err)
- goto out;
- }
-
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path);
- if (IS_ERR(path))
- return PTR_ERR(path);
-
- if (map->m_lblk >= ee_block) {
- split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
- EXT4_EXT_MAY_ZEROOUT : 0;
- if (uninitialized)
- split_flag1 |= EXT4_EXT_MARK_UNINIT1;
- if (split_flag & EXT4_EXT_MARK_UNINIT2)
- split_flag1 |= EXT4_EXT_MARK_UNINIT2;
- err = ext4_split_extent_at(handle, inode, path,
- map->m_lblk, split_flag1, flags);
- if (err)
- goto out;
- }
-
- ext4_ext_show_leaf(inode, path);
-out:
- return err ? err : map->m_len;
-}
-
-#define EXT4_EXT_ZERO_LEN 7
-/*
- * This function is called by ext4_ext_map_blocks() if someone tries to write
- * to an uninitialized extent. It may result in splitting the uninitialized
- * extent into multiple extents (up to three - one initialized and two
- * uninitialized).
- * There are three possibilities:
- * a> There is no split required: Entire extent should be initialized
- * b> Splits in two extents: Write is happening at either end of the extent
- * c> Splits in three extents: Somone is writing in middle of the extent
- *
- * Pre-conditions:
- * - The extent pointed to by 'path' is uninitialized.
- * - The extent pointed to by 'path' contains a superset
- * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
- *
- * Post-conditions on success:
- * - the returned value is the number of blocks beyond map->l_lblk
- * that are allocated and initialized.
- * It is guaranteed to be >= map->m_len.
- */
-static int ext4_ext_convert_to_initialized(handle_t *handle,
- struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path *path)
-{
- struct ext4_extent_header *eh;
- struct ext4_map_blocks split_map;
- struct ext4_extent zero_ex;
- struct ext4_extent *ex;
- ext4_lblk_t ee_block, eof_block;
- unsigned int ee_len, depth;
- int allocated;
- int err = 0;
- int split_flag = 0;
-
- ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
- "block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)map->m_lblk, map->m_len);
-
- eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
- inode->i_sb->s_blocksize_bits;
- if (eof_block < map->m_lblk + map->m_len)
- eof_block = map->m_lblk + map->m_len;
-
- depth = ext_depth(inode);
- eh = path[depth].p_hdr;
- ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
- allocated = ee_len - (map->m_lblk - ee_block);
-
- trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
-
- /* Pre-conditions */
- BUG_ON(!ext4_ext_is_uninitialized(ex));
- BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
-
- /*
- * Attempt to transfer newly initialized blocks from the currently
- * uninitialized extent to its left neighbor. This is much cheaper
- * than an insertion followed by a merge as those involve costly
- * memmove() calls. This is the common case in steady state for
- * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
- * writes.
- *
- * Limitations of the current logic:
- * - L1: we only deal with writes at the start of the extent.
- * The approach could be extended to writes at the end
- * of the extent but this scenario was deemed less common.
- * - L2: we do not deal with writes covering the whole extent.
- * This would require removing the extent if the transfer
- * is possible.
- * - L3: we only attempt to merge with an extent stored in the
- * same extent tree node.
- */
- if ((map->m_lblk == ee_block) && /*L1*/
- (map->m_len < ee_len) && /*L2*/
- (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/
- struct ext4_extent *prev_ex;
- ext4_lblk_t prev_lblk;
- ext4_fsblk_t prev_pblk, ee_pblk;
- unsigned int prev_len, write_len;
-
- prev_ex = ex - 1;
- prev_lblk = le32_to_cpu(prev_ex->ee_block);
- prev_len = ext4_ext_get_actual_len(prev_ex);
- prev_pblk = ext4_ext_pblock(prev_ex);
- ee_pblk = ext4_ext_pblock(ex);
- write_len = map->m_len;
-
- /*
- * A transfer of blocks from 'ex' to 'prev_ex' is allowed
- * upon those conditions:
- * - C1: prev_ex is initialized,
- * - C2: prev_ex is logically abutting ex,
- * - C3: prev_ex is physically abutting ex,
- * - C4: prev_ex can receive the additional blocks without
- * overflowing the (initialized) length limit.
- */
- if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/
- ((prev_lblk + prev_len) == ee_block) && /*C2*/
- ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
- (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
-
- trace_ext4_ext_convert_to_initialized_fastpath(inode,
- map, ex, prev_ex);
-
- /* Shift the start of ex by 'write_len' blocks */
- ex->ee_block = cpu_to_le32(ee_block + write_len);
- ext4_ext_store_pblock(ex, ee_pblk + write_len);
- ex->ee_len = cpu_to_le16(ee_len - write_len);
- ext4_ext_mark_uninitialized(ex); /* Restore the flag */
-
- /* Extend prev_ex by 'write_len' blocks */
- prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
-
- /* Mark the block containing both extents as dirty */
- ext4_ext_dirty(handle, inode, path + depth);
-
- /* Update path to point to the right extent */
- path[depth].p_ext = prev_ex;
-
- /* Result: number of initialized blocks past m_lblk */
- allocated = write_len;
- goto out;
- }
- }
-
- WARN_ON(map->m_lblk < ee_block);
- /*
- * It is safe to convert extent to initialized via explicit
- * zeroout only if extent is fully insde i_size or new_size.
- */
- split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
-
- /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
- if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
- (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
- err = ext4_ext_zeroout(inode, ex);
- if (err)
- goto out;
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
- ext4_ext_mark_initialized(ex);
- ext4_ext_try_to_merge(inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + depth);
- goto out;
- }
-
- /*
- * four cases:
- * 1. split the extent into three extents.
- * 2. split the extent into two extents, zeroout the first half.
- * 3. split the extent into two extents, zeroout the second half.
- * 4. split the extent into two extents with out zeroout.
- */
- split_map.m_lblk = map->m_lblk;
- split_map.m_len = map->m_len;
-
- if (allocated > map->m_len) {
- if (allocated <= EXT4_EXT_ZERO_LEN &&
- (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
- /* case 3 */
- zero_ex.ee_block =
- cpu_to_le32(map->m_lblk);
- zero_ex.ee_len = cpu_to_le16(allocated);
- ext4_ext_store_pblock(&zero_ex,
- ext4_ext_pblock(ex) + map->m_lblk - ee_block);
- err = ext4_ext_zeroout(inode, &zero_ex);
- if (err)
- goto out;
- split_map.m_lblk = map->m_lblk;
- split_map.m_len = allocated;
- } else if ((map->m_lblk - ee_block + map->m_len <
- EXT4_EXT_ZERO_LEN) &&
- (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
- /* case 2 */
- if (map->m_lblk != ee_block) {
- zero_ex.ee_block = ex->ee_block;
- zero_ex.ee_len = cpu_to_le16(map->m_lblk -
- ee_block);
- ext4_ext_store_pblock(&zero_ex,
- ext4_ext_pblock(ex));
- err = ext4_ext_zeroout(inode, &zero_ex);
- if (err)
- goto out;
- }
-
- split_map.m_lblk = ee_block;
- split_map.m_len = map->m_lblk - ee_block + map->m_len;
- allocated = map->m_len;
- }
- }
-
- allocated = ext4_split_extent(handle, inode, path,
- &split_map, split_flag, 0);
- if (allocated < 0)
- err = allocated;
-
-out:
- return err ? err : allocated;
-}
-
-/*
- * This function is called by ext4_ext_map_blocks() from
- * ext4_get_blocks_dio_write() when DIO to write
- * to an uninitialized extent.
- *
- * Writing to an uninitialized extent may result in splitting the uninitialized
- * extent into multiple /initialized uninitialized extents (up to three)
- * There are three possibilities:
- * a> There is no split required: Entire extent should be uninitialized
- * b> Splits in two extents: Write is happening at either end of the extent
- * c> Splits in three extents: Somone is writing in middle of the extent
- *
- * One of more index blocks maybe needed if the extent tree grow after
- * the uninitialized extent split. To prevent ENOSPC occur at the IO
- * complete, we need to split the uninitialized extent before DIO submit
- * the IO. The uninitialized extent called at this time will be split
- * into three uninitialized extent(at most). After IO complete, the part
- * being filled will be convert to initialized by the end_io callback function
- * via ext4_convert_unwritten_extents().
- *
- * Returns the size of uninitialized extent to be written on success.
- */
-static int ext4_split_unwritten_extents(handle_t *handle,
- struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path *path,
- int flags)
-{
- ext4_lblk_t eof_block;
- ext4_lblk_t ee_block;
- struct ext4_extent *ex;
- unsigned int ee_len;
- int split_flag = 0, depth;
-
- ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
- "block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)map->m_lblk, map->m_len);
-
- eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
- inode->i_sb->s_blocksize_bits;
- if (eof_block < map->m_lblk + map->m_len)
- eof_block = map->m_lblk + map->m_len;
- /*
- * It is safe to convert extent to initialized via explicit
- * zeroout only if extent is fully insde i_size or new_size.
- */
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
-
- split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
- split_flag |= EXT4_EXT_MARK_UNINIT2;
-
- flags |= EXT4_GET_BLOCKS_PRE_IO;
- return ext4_split_extent(handle, inode, path, map, split_flag, flags);
-}
-
-static int ext4_convert_unwritten_extents_endio(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path)
-{
- struct ext4_extent *ex;
- int depth;
- int err = 0;
-
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
-
- ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
- "block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
- /* first mark the extent as initialized */
- ext4_ext_mark_initialized(ex);
-
- /* note: ext4_ext_correct_indexes() isn't needed here because
- * borders are not changed
- */
- ext4_ext_try_to_merge(inode, path, ex);
-
- /* Mark modified extent as dirty */
- err = ext4_ext_dirty(handle, inode, path + depth);
-out:
- ext4_ext_show_leaf(inode, path);
- return err;
-}
-
-static void unmap_underlying_metadata_blocks(struct block_device *bdev,
- sector_t block, int count)
-{
- int i;
- for (i = 0; i < count; i++)
- unmap_underlying_metadata(bdev, block + i);
-}
-
-/*
- * Handle EOFBLOCKS_FL flag, clearing it if necessary
- */
-static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
- ext4_lblk_t lblk,
- struct ext4_ext_path *path,
- unsigned int len)
-{
- int i, depth;
- struct ext4_extent_header *eh;
- struct ext4_extent *last_ex;
-
- if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
- return 0;
-
- depth = ext_depth(inode);
- eh = path[depth].p_hdr;
-
- /*
- * We're going to remove EOFBLOCKS_FL entirely in future so we
- * do not care for this case anymore. Simply remove the flag
- * if there are no extents.
- */
- if (unlikely(!eh->eh_entries))
- goto out;
- last_ex = EXT_LAST_EXTENT(eh);
- /*
- * We should clear the EOFBLOCKS_FL flag if we are writing the
- * last block in the last extent in the file. We test this by
- * first checking to see if the caller to
- * ext4_ext_get_blocks() was interested in the last block (or
- * a block beyond the last block) in the current extent. If
- * this turns out to be false, we can bail out from this
- * function immediately.
- */
- if (lblk + len < le32_to_cpu(last_ex->ee_block) +
- ext4_ext_get_actual_len(last_ex))
- return 0;
- /*
- * If the caller does appear to be planning to write at or
- * beyond the end of the current extent, we then test to see
- * if the current extent is the last extent in the file, by
- * checking to make sure it was reached via the rightmost node
- * at each level of the tree.
- */
- for (i = depth-1; i >= 0; i--)
- if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
- return 0;
-out:
- ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
- return ext4_mark_inode_dirty(handle, inode);
-}
-
-/**
- * ext4_find_delalloc_range: find delayed allocated block in the given range.
- *
- * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
- * whether there are any buffers marked for delayed allocation. It returns '1'
- * on the first delalloc'ed buffer head found. If no buffer head in the given
- * range is marked for delalloc, it returns 0.
- * lblk_start should always be <= lblk_end.
- * search_hint_reverse is to indicate that searching in reverse from lblk_end to
- * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
- * block sooner). This is useful when blocks are truncated sequentially from
- * lblk_start towards lblk_end.
- */
-static int ext4_find_delalloc_range(struct inode *inode,
- ext4_lblk_t lblk_start,
- ext4_lblk_t lblk_end,
- int search_hint_reverse)
-{
- struct address_space *mapping = inode->i_mapping;
- struct buffer_head *head, *bh = NULL;
- struct page *page;
- ext4_lblk_t i, pg_lblk;
- pgoff_t index;
-
- if (!test_opt(inode->i_sb, DELALLOC))
- return 0;
-
- /* reverse search wont work if fs block size is less than page size */
- if (inode->i_blkbits < PAGE_CACHE_SHIFT)
- search_hint_reverse = 0;
-
- if (search_hint_reverse)
- i = lblk_end;
- else
- i = lblk_start;
-
- index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- while ((i >= lblk_start) && (i <= lblk_end)) {
- page = find_get_page(mapping, index);
- if (!page)
- goto nextpage;
-
- if (!page_has_buffers(page))
- goto nextpage;
-
- head = page_buffers(page);
- if (!head)
- goto nextpage;
-
- bh = head;
- pg_lblk = index << (PAGE_CACHE_SHIFT -
- inode->i_blkbits);
- do {
- if (unlikely(pg_lblk < lblk_start)) {
- /*
- * This is possible when fs block size is less
- * than page size and our cluster starts/ends in
- * middle of the page. So we need to skip the
- * initial few blocks till we reach the 'lblk'
- */
- pg_lblk++;
- continue;
- }
-
- /* Check if the buffer is delayed allocated and that it
- * is not yet mapped. (when da-buffers are mapped during
- * their writeout, their da_mapped bit is set.)
- */
- if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
- page_cache_release(page);
- trace_ext4_find_delalloc_range(inode,
- lblk_start, lblk_end,
- search_hint_reverse,
- 1, i);
- return 1;
- }
- if (search_hint_reverse)
- i--;
- else
- i++;
- } while ((i >= lblk_start) && (i <= lblk_end) &&
- ((bh = bh->b_this_page) != head));
-nextpage:
- if (page)
- page_cache_release(page);
- /*
- * Move to next page. 'i' will be the first lblk in the next
- * page.
- */
- if (search_hint_reverse)
- index--;
- else
- index++;
- i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- }
-
- trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
- search_hint_reverse, 0, 0);
- return 0;
-}
-
-int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
- int search_hint_reverse)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- ext4_lblk_t lblk_start, lblk_end;
- lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
- lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
-
- return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
- search_hint_reverse);
-}
-
-/**
- * Determines how many complete clusters (out of those specified by the 'map')
- * are under delalloc and were reserved quota for.
- * This function is called when we are writing out the blocks that were
- * originally written with their allocation delayed, but then the space was
- * allocated using fallocate() before the delayed allocation could be resolved.
- * The cases to look for are:
- * ('=' indicated delayed allocated blocks
- * '-' indicates non-delayed allocated blocks)
- * (a) partial clusters towards beginning and/or end outside of allocated range
- * are not delalloc'ed.
- * Ex:
- * |----c---=|====c====|====c====|===-c----|
- * |++++++ allocated ++++++|
- * ==> 4 complete clusters in above example
- *
- * (b) partial cluster (outside of allocated range) towards either end is
- * marked for delayed allocation. In this case, we will exclude that
- * cluster.
- * Ex:
- * |----====c========|========c========|
- * |++++++ allocated ++++++|
- * ==> 1 complete clusters in above example
- *
- * Ex:
- * |================c================|
- * |++++++ allocated ++++++|
- * ==> 0 complete clusters in above example
- *
- * The ext4_da_update_reserve_space will be called only if we
- * determine here that there were some "entire" clusters that span
- * this 'allocated' range.
- * In the non-bigalloc case, this function will just end up returning num_blks
- * without ever calling ext4_find_delalloc_range.
- */
-static unsigned int
-get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
- unsigned int num_blks)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
- ext4_lblk_t lblk_from, lblk_to, c_offset;
- unsigned int allocated_clusters = 0;
-
- alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
- alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
-
- /* max possible clusters for this allocation */
- allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
-
- trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
-
- /* Check towards left side */
- c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
- if (c_offset) {
- lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
- lblk_to = lblk_from + c_offset - 1;
-
- if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
- allocated_clusters--;
- }
-
- /* Now check towards right. */
- c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
- if (allocated_clusters && c_offset) {
- lblk_from = lblk_start + num_blks;
- lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
-
- if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
- allocated_clusters--;
- }
-
- return allocated_clusters;
-}
-
-static int
-ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path *path, int flags,
- unsigned int allocated, ext4_fsblk_t newblock)
-{
- int ret = 0;
- int err = 0;
- ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
-
- ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
- "block %llu, max_blocks %u, flags %x, allocated %u\n",
- inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
- flags, allocated);
- ext4_ext_show_leaf(inode, path);
-
- trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated,
- newblock);
-
- /* get_block() before submit the IO, split the extent */
- if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
- ret = ext4_split_unwritten_extents(handle, inode, map,
- path, flags);
- /*
- * Flag the inode(non aio case) or end_io struct (aio case)
- * that this IO needs to conversion to written when IO is
- * completed
- */
- if (io)
- ext4_set_io_unwritten_flag(inode, io);
- else
- ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
- if (ext4_should_dioread_nolock(inode))
- map->m_flags |= EXT4_MAP_UNINIT;
- goto out;
- }
- /* IO end_io complete, convert the filled extent to written */
- if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
- ret = ext4_convert_unwritten_extents_endio(handle, inode,
- path);
- if (ret >= 0) {
- ext4_update_inode_fsync_trans(handle, inode, 1);
- err = check_eofblocks_fl(handle, inode, map->m_lblk,
- path, map->m_len);
- } else
- err = ret;
- goto out2;
- }
- /* buffered IO case */
- /*
- * repeat fallocate creation request
- * we already have an unwritten extent
- */
- if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
- goto map_out;
-
- /* buffered READ or buffered write_begin() lookup */
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
- /*
- * We have blocks reserved already. We
- * return allocated blocks so that delalloc
- * won't do block reservation for us. But
- * the buffer head will be unmapped so that
- * a read from the block returns 0s.
- */
- map->m_flags |= EXT4_MAP_UNWRITTEN;
- goto out1;
- }
-
- /* buffered write, writepage time, convert*/
- ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
- if (ret >= 0)
- ext4_update_inode_fsync_trans(handle, inode, 1);
-out:
- if (ret <= 0) {
- err = ret;
- goto out2;
- } else
- allocated = ret;
- map->m_flags |= EXT4_MAP_NEW;
- /*
- * if we allocated more blocks than requested
- * we need to make sure we unmap the extra block
- * allocated. The actual needed block will get
- * unmapped later when we find the buffer_head marked
- * new.
- */
- if (allocated > map->m_len) {
- unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
- newblock + map->m_len,
- allocated - map->m_len);
- allocated = map->m_len;
- }
-
- /*
- * If we have done fallocate with the offset that is already
- * delayed allocated, we would have block reservation
- * and quota reservation done in the delayed write path.
- * But fallocate would have already updated quota and block
- * count for this offset. So cancel these reservation
- */
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
- unsigned int reserved_clusters;
- reserved_clusters = get_reserved_cluster_alloc(inode,
- map->m_lblk, map->m_len);
- if (reserved_clusters)
- ext4_da_update_reserve_space(inode,
- reserved_clusters,
- 0);
- }
-
-map_out:
- map->m_flags |= EXT4_MAP_MAPPED;
- if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
- err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
- map->m_len);
- if (err < 0)
- goto out2;
- }
-out1:
- if (allocated > map->m_len)
- allocated = map->m_len;
- ext4_ext_show_leaf(inode, path);
- map->m_pblk = newblock;
- map->m_len = allocated;
-out2:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
- return err ? err : allocated;
-}
-
-/*
- * get_implied_cluster_alloc - check to see if the requested
- * allocation (in the map structure) overlaps with a cluster already
- * allocated in an extent.
- * @sb The filesystem superblock structure
- * @map The requested lblk->pblk mapping
- * @ex The extent structure which might contain an implied
- * cluster allocation
- *
- * This function is called by ext4_ext_map_blocks() after we failed to
- * find blocks that were already in the inode's extent tree. Hence,
- * we know that the beginning of the requested region cannot overlap
- * the extent from the inode's extent tree. There are three cases we
- * want to catch. The first is this case:
- *
- * |--- cluster # N--|
- * |--- extent ---| |---- requested region ---|
- * |==========|
- *
- * The second case that we need to test for is this one:
- *
- * |--------- cluster # N ----------------|
- * |--- requested region --| |------- extent ----|
- * |=======================|
- *
- * The third case is when the requested region lies between two extents
- * within the same cluster:
- * |------------- cluster # N-------------|
- * |----- ex -----| |---- ex_right ----|
- * |------ requested region ------|
- * |================|
- *
- * In each of the above cases, we need to set the map->m_pblk and
- * map->m_len so it corresponds to the return the extent labelled as
- * "|====|" from cluster #N, since it is already in use for data in
- * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
- * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
- * as a new "allocated" block region. Otherwise, we will return 0 and
- * ext4_ext_map_blocks() will then allocate one or more new clusters
- * by calling ext4_mb_new_blocks().
- */
-static int get_implied_cluster_alloc(struct super_block *sb,
- struct ext4_map_blocks *map,
- struct ext4_extent *ex,
- struct ext4_ext_path *path)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
- ext4_lblk_t ex_cluster_start, ex_cluster_end;
- ext4_lblk_t rr_cluster_start;
- ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
- unsigned short ee_len = ext4_ext_get_actual_len(ex);
-
- /* The extent passed in that we are trying to match */
- ex_cluster_start = EXT4_B2C(sbi, ee_block);
- ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
-
- /* The requested region passed into ext4_map_blocks() */
- rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
-
- if ((rr_cluster_start == ex_cluster_end) ||
- (rr_cluster_start == ex_cluster_start)) {
- if (rr_cluster_start == ex_cluster_end)
- ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
- map->m_len = min(map->m_len,
- (unsigned) sbi->s_cluster_ratio - c_offset);
- /*
- * Check for and handle this case:
- *
- * |--------- cluster # N-------------|
- * |------- extent ----|
- * |--- requested region ---|
- * |===========|
- */
-
- if (map->m_lblk < ee_block)
- map->m_len = min(map->m_len, ee_block - map->m_lblk);
-
- /*
- * Check for the case where there is already another allocated
- * block to the right of 'ex' but before the end of the cluster.
- *
- * |------------- cluster # N-------------|
- * |----- ex -----| |---- ex_right ----|
- * |------ requested region ------|
- * |================|
- */
- if (map->m_lblk > ee_block) {
- ext4_lblk_t next = ext4_ext_next_allocated_block(path);
- map->m_len = min(map->m_len, next - map->m_lblk);
- }
-
- trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
- return 1;
- }
-
- trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
- return 0;
-}
-
-
-/*
- * Block allocation/map/preallocation routine for extents based files
- *
- *
- * Need to be called with
- * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
- * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
- *
- * return > 0, number of of blocks already mapped/allocated
- * if create == 0 and these are pre-allocated blocks
- * buffer head is unmapped
- * otherwise blocks are mapped
- *
- * return = 0, if plain look up failed (blocks have not been allocated)
- * buffer head is unmapped
- *
- * return < 0, error case.
- */
-int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map, int flags)
-{
- struct ext4_ext_path *path = NULL;
- struct ext4_extent newex, *ex, *ex2;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- ext4_fsblk_t newblock = 0;
- int free_on_err = 0, err = 0, depth, ret;
- unsigned int allocated = 0, offset = 0;
- unsigned int allocated_clusters = 0;
- struct ext4_allocation_request ar;
- ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
- ext4_lblk_t cluster_offset;
-
- ext_debug("blocks %u/%u requested for inode %lu\n",
- map->m_lblk, map->m_len, inode->i_ino);
- trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
-
- /* check in cache */
- if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
- if (!newex.ee_start_lo && !newex.ee_start_hi) {
- if ((sbi->s_cluster_ratio > 1) &&
- ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
-
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
- /*
- * block isn't allocated yet and
- * user doesn't want to allocate it
- */
- goto out2;
- }
- /* we should allocate requested block */
- } else {
- /* block is already allocated */
- if (sbi->s_cluster_ratio > 1)
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
- newblock = map->m_lblk
- - le32_to_cpu(newex.ee_block)
- + ext4_ext_pblock(&newex);
- /* number of remaining blocks in the extent */
- allocated = ext4_ext_get_actual_len(&newex) -
- (map->m_lblk - le32_to_cpu(newex.ee_block));
- goto out;
- }
- }
-
- /* find extent for this block */
- path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- path = NULL;
- goto out2;
- }
-
- depth = ext_depth(inode);
-
- /*
- * consistent leaf must not be empty;
- * this situation is possible, though, _during_ tree modification;
- * this is why assert can't be put in ext4_ext_find_extent()
- */
- if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
- EXT4_ERROR_INODE(inode, "bad extent address "
- "lblock: %lu, depth: %d pblock %lld",
- (unsigned long) map->m_lblk, depth,
- path[depth].p_block);
- err = -EIO;
- goto out2;
- }
-
- ex = path[depth].p_ext;
- if (ex) {
- ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
- unsigned short ee_len;
-
- /*
- * Uninitialized extents are treated as holes, except that
- * we split out initialized portions during a write.
- */
- ee_len = ext4_ext_get_actual_len(ex);
-
- trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
-
- /* if found extent covers block, simply return it */
- if (in_range(map->m_lblk, ee_block, ee_len)) {
- newblock = map->m_lblk - ee_block + ee_start;
- /* number of remaining blocks in the extent */
- allocated = ee_len - (map->m_lblk - ee_block);
- ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
- ee_block, ee_len, newblock);
-
- /*
- * Do not put uninitialized extent
- * in the cache
- */
- if (!ext4_ext_is_uninitialized(ex)) {
- ext4_ext_put_in_cache(inode, ee_block,
- ee_len, ee_start);
- goto out;
- }
- ret = ext4_ext_handle_uninitialized_extents(
- handle, inode, map, path, flags,
- allocated, newblock);
- return ret;
- }
- }
-
- if ((sbi->s_cluster_ratio > 1) &&
- ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
-
- /*
- * requested block isn't allocated yet;
- * we couldn't try to create block if create flag is zero
- */
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
- /*
- * put just found gap into cache to speed up
- * subsequent requests
- */
- ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
- goto out2;
- }
-
- /*
- * Okay, we need to do block allocation.
- */
- map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
- newex.ee_block = cpu_to_le32(map->m_lblk);
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
-
- /*
- * If we are doing bigalloc, check to see if the extent returned
- * by ext4_ext_find_extent() implies a cluster we can use.
- */
- if (cluster_offset && ex &&
- get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
- ar.len = allocated = map->m_len;
- newblock = map->m_pblk;
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
- goto got_allocated_blocks;
- }
-
- /* find neighbour allocated blocks */
- ar.lleft = map->m_lblk;
- err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
- if (err)
- goto out2;
- ar.lright = map->m_lblk;
- ex2 = NULL;
- err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
- if (err)
- goto out2;
-
- /* Check if the extent after searching to the right implies a
- * cluster we can use. */
- if ((sbi->s_cluster_ratio > 1) && ex2 &&
- get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
- ar.len = allocated = map->m_len;
- newblock = map->m_pblk;
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
- goto got_allocated_blocks;
- }
-
- /*
- * See if request is beyond maximum number of blocks we can have in
- * a single extent. For an initialized extent this limit is
- * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
- * EXT_UNINIT_MAX_LEN.
- */
- if (map->m_len > EXT_INIT_MAX_LEN &&
- !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
- map->m_len = EXT_INIT_MAX_LEN;
- else if (map->m_len > EXT_UNINIT_MAX_LEN &&
- (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
- map->m_len = EXT_UNINIT_MAX_LEN;
-
- /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
- newex.ee_len = cpu_to_le16(map->m_len);
- err = ext4_ext_check_overlap(sbi, inode, &newex, path);
- if (err)
- allocated = ext4_ext_get_actual_len(&newex);
- else
- allocated = map->m_len;
-
- /* allocate new block */
- ar.inode = inode;
- ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
- ar.logical = map->m_lblk;
- /*
- * We calculate the offset from the beginning of the cluster
- * for the logical block number, since when we allocate a
- * physical cluster, the physical block should start at the
- * same offset from the beginning of the cluster. This is
- * needed so that future calls to get_implied_cluster_alloc()
- * work correctly.
- */
- offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
- ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
- ar.goal -= offset;
- ar.logical -= offset;
- if (S_ISREG(inode->i_mode))
- ar.flags = EXT4_MB_HINT_DATA;
- else
- /* disable in-core preallocation for non-regular files */
- ar.flags = 0;
- if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
- ar.flags |= EXT4_MB_HINT_NOPREALLOC;
- newblock = ext4_mb_new_blocks(handle, &ar, &err);
- if (!newblock)
- goto out2;
- ext_debug("allocate new block: goal %llu, found %llu/%u\n",
- ar.goal, newblock, allocated);
- free_on_err = 1;
- allocated_clusters = ar.len;
- ar.len = EXT4_C2B(sbi, ar.len) - offset;
- if (ar.len > allocated)
- ar.len = allocated;
-
-got_allocated_blocks:
- /* try to insert new extent into found leaf and return */
- ext4_ext_store_pblock(&newex, newblock + offset);
- newex.ee_len = cpu_to_le16(ar.len);
- /* Mark uninitialized */
- if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
- ext4_ext_mark_uninitialized(&newex);
- /*
- * io_end structure was created for every IO write to an
- * uninitialized extent. To avoid unnecessary conversion,
- * here we flag the IO that really needs the conversion.
- * For non asycn direct IO case, flag the inode state
- * that we need to perform conversion when IO is done.
- */
- if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
- if (io)
- ext4_set_io_unwritten_flag(inode, io);
- else
- ext4_set_inode_state(inode,
- EXT4_STATE_DIO_UNWRITTEN);
- }
- if (ext4_should_dioread_nolock(inode))
- map->m_flags |= EXT4_MAP_UNINIT;
- }
-
- err = 0;
- if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0)
- err = check_eofblocks_fl(handle, inode, map->m_lblk,
- path, ar.len);
- if (!err)
- err = ext4_ext_insert_extent(handle, inode, path,
- &newex, flags);
- if (err && free_on_err) {
- int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
- EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
- /* free data blocks we just allocated */
- /* not a good idea to call discard here directly,
- * but otherwise we'd need to call it every free() */
- ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
- ext4_ext_get_actual_len(&newex), fb_flags);
- goto out2;
- }
-
- /* previous routine could use block we allocated */
- newblock = ext4_ext_pblock(&newex);
- allocated = ext4_ext_get_actual_len(&newex);
- if (allocated > map->m_len)
- allocated = map->m_len;
- map->m_flags |= EXT4_MAP_NEW;
-
- /*
- * Update reserved blocks/metadata blocks after successful
- * block allocation which had been deferred till now.
- */
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
- unsigned int reserved_clusters;
- /*
- * Check how many clusters we had reserved this allocated range
- */
- reserved_clusters = get_reserved_cluster_alloc(inode,
- map->m_lblk, allocated);
- if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
- if (reserved_clusters) {
- /*
- * We have clusters reserved for this range.
- * But since we are not doing actual allocation
- * and are simply using blocks from previously
- * allocated cluster, we should release the
- * reservation and not claim quota.
- */
- ext4_da_update_reserve_space(inode,
- reserved_clusters, 0);
- }
- } else {
- BUG_ON(allocated_clusters < reserved_clusters);
- /* We will claim quota for all newly allocated blocks.*/
- ext4_da_update_reserve_space(inode, allocated_clusters,
- 1);
- if (reserved_clusters < allocated_clusters) {
- struct ext4_inode_info *ei = EXT4_I(inode);
- int reservation = allocated_clusters -
- reserved_clusters;
- /*
- * It seems we claimed few clusters outside of
- * the range of this allocation. We should give
- * it back to the reservation pool. This can
- * happen in the following case:
- *
- * * Suppose s_cluster_ratio is 4 (i.e., each
- * cluster has 4 blocks. Thus, the clusters
- * are [0-3],[4-7],[8-11]...
- * * First comes delayed allocation write for
- * logical blocks 10 & 11. Since there were no
- * previous delayed allocated blocks in the
- * range [8-11], we would reserve 1 cluster
- * for this write.
- * * Next comes write for logical blocks 3 to 8.
- * In this case, we will reserve 2 clusters
- * (for [0-3] and [4-7]; and not for [8-11] as
- * that range has a delayed allocated blocks.
- * Thus total reserved clusters now becomes 3.
- * * Now, during the delayed allocation writeout
- * time, we will first write blocks [3-8] and
- * allocate 3 clusters for writing these
- * blocks. Also, we would claim all these
- * three clusters above.
- * * Now when we come here to writeout the
- * blocks [10-11], we would expect to claim
- * the reservation of 1 cluster we had made
- * (and we would claim it since there are no
- * more delayed allocated blocks in the range
- * [8-11]. But our reserved cluster count had
- * already gone to 0.
- *
- * Thus, at the step 4 above when we determine
- * that there are still some unwritten delayed
- * allocated blocks outside of our current
- * block range, we should increment the
- * reserved clusters count so that when the
- * remaining blocks finally gets written, we
- * could claim them.
- */
- dquot_reserve_block(inode,
- EXT4_C2B(sbi, reservation));
- spin_lock(&ei->i_block_reservation_lock);
- ei->i_reserved_data_blocks += reservation;
- spin_unlock(&ei->i_block_reservation_lock);
- }
- }
- }
-
- /*
- * Cache the extent and update transaction to commit on fdatasync only
- * when it is _not_ an uninitialized extent.
- */
- if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
- ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
- ext4_update_inode_fsync_trans(handle, inode, 1);
- } else
- ext4_update_inode_fsync_trans(handle, inode, 0);
-out:
- if (allocated > map->m_len)
- allocated = map->m_len;
- ext4_ext_show_leaf(inode, path);
- map->m_flags |= EXT4_MAP_MAPPED;
- map->m_pblk = newblock;
- map->m_len = allocated;
-out2:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
-
- trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
- newblock, map->m_len, err ? err : allocated);
-
- return err ? err : allocated;
-}
-
-void ext4_ext_truncate(struct inode *inode)
-{
- struct address_space *mapping = inode->i_mapping;
- struct super_block *sb = inode->i_sb;
- ext4_lblk_t last_block;
- handle_t *handle;
- loff_t page_len;
- int err = 0;
-
- /*
- * finish any pending end_io work so we won't run the risk of
- * converting any truncated blocks to initialized later
- */
- ext4_flush_completed_IO(inode);
-
- /*
- * probably first extent we're gonna free will be last in block
- */
- err = ext4_writepage_trans_blocks(inode);
- handle = ext4_journal_start(inode, err);
- if (IS_ERR(handle))
- return;
-
- if (inode->i_size % PAGE_CACHE_SIZE != 0) {
- page_len = PAGE_CACHE_SIZE -
- (inode->i_size & (PAGE_CACHE_SIZE - 1));
-
- err = ext4_discard_partial_page_buffers(handle,
- mapping, inode->i_size, page_len, 0);
-
- if (err)
- goto out_stop;
- }
-
- if (ext4_orphan_add(handle, inode))
- goto out_stop;
-
- down_write(&EXT4_I(inode)->i_data_sem);
- ext4_ext_invalidate_cache(inode);
-
- ext4_discard_preallocations(inode);
-
- /*
- * TODO: optimization is possible here.
- * Probably we need not scan at all,
- * because page truncation is enough.
- */
-
- /* we have to know where to truncate from in crash case */
- EXT4_I(inode)->i_disksize = inode->i_size;
- ext4_mark_inode_dirty(handle, inode);
-
- last_block = (inode->i_size + sb->s_blocksize - 1)
- >> EXT4_BLOCK_SIZE_BITS(sb);
- err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
-
- /* In a multi-transaction truncate, we only make the final
- * transaction synchronous.
- */
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
-
- up_write(&EXT4_I(inode)->i_data_sem);
-
-out_stop:
- /*
- * If this was a simple ftruncate() and the file will remain alive,
- * then we need to clear up the orphan record which we created above.
- * However, if this was a real unlink then we were called by
- * ext4_delete_inode(), and we allow that function to clean up the
- * orphan info for us.
- */
- if (inode->i_nlink)
- ext4_orphan_del(handle, inode);
-
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- ext4_journal_stop(handle);
-}
-
-static void ext4_falloc_update_inode(struct inode *inode,
- int mode, loff_t new_size, int update_ctime)
-{
- struct timespec now;
-
- if (update_ctime) {
- now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_ctime, &now))
- inode->i_ctime = now;
- }
- /*
- * Update only when preallocation was requested beyond
- * the file size.
- */
- if (!(mode & FALLOC_FL_KEEP_SIZE)) {
- if (new_size > i_size_read(inode))
- i_size_write(inode, new_size);
- if (new_size > EXT4_I(inode)->i_disksize)
- ext4_update_i_disksize(inode, new_size);
- } else {
- /*
- * Mark that we allocate beyond EOF so the subsequent truncate
- * can proceed even if the new size is the same as i_size.
- */
- if (new_size > i_size_read(inode))
- ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
- }
-
-}
-
-/*
- * preallocate space for a file. This implements ext4's fallocate file
- * operation, which gets called from sys_fallocate system call.
- * For block-mapped files, posix_fallocate should fall back to the method
- * of writing zeroes to the required new blocks (the same behavior which is
- * expected for file systems which do not support fallocate() system call).
- */
-long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- handle_t *handle;
- loff_t new_size;
- unsigned int max_blocks;
- int ret = 0;
- int ret2 = 0;
- int retries = 0;
- int flags;
- struct ext4_map_blocks map;
- unsigned int credits, blkbits = inode->i_blkbits;
-
- /*
- * currently supporting (pre)allocate mode for extent-based
- * files _only_
- */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- return -EOPNOTSUPP;
-
- /* Return error if mode is not supported */
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
- return -EOPNOTSUPP;
-
- if (mode & FALLOC_FL_PUNCH_HOLE)
- return ext4_punch_hole(file, offset, len);
-
- trace_ext4_fallocate_enter(inode, offset, len, mode);
- map.m_lblk = offset >> blkbits;
- /*
- * We can't just convert len to max_blocks because
- * If blocksize = 4096 offset = 3072 and len = 2048
- */
- max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- - map.m_lblk;
- /*
- * credits to insert 1 extent into extent tree
- */
- credits = ext4_chunk_trans_blocks(inode, max_blocks);
- mutex_lock(&inode->i_mutex);
- ret = inode_newsize_ok(inode, (len + offset));
- if (ret) {
- mutex_unlock(&inode->i_mutex);
- trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
- return ret;
- }
- flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
- if (mode & FALLOC_FL_KEEP_SIZE)
- flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
- /*
- * Don't normalize the request if it can fit in one extent so
- * that it doesn't get unnecessarily split into multiple
- * extents.
- */
- if (len <= EXT_UNINIT_MAX_LEN << blkbits)
- flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
-retry:
- while (ret >= 0 && ret < max_blocks) {
- map.m_lblk = map.m_lblk + ret;
- map.m_len = max_blocks = max_blocks - ret;
- handle = ext4_journal_start(inode, credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- break;
- }
- ret = ext4_map_blocks(handle, inode, &map, flags);
- if (ret <= 0) {
-#ifdef EXT4FS_DEBUG
- WARN_ON(ret <= 0);
- printk(KERN_ERR "%s: ext4_ext_map_blocks "
- "returned error inode#%lu, block=%u, "
- "max_blocks=%u", __func__,
- inode->i_ino, map.m_lblk, max_blocks);
-#endif
- ext4_mark_inode_dirty(handle, inode);
- ret2 = ext4_journal_stop(handle);
- break;
- }
- if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
- blkbits) >> blkbits))
- new_size = offset + len;
- else
- new_size = ((loff_t) map.m_lblk + ret) << blkbits;
-
- ext4_falloc_update_inode(inode, mode, new_size,
- (map.m_flags & EXT4_MAP_NEW));
- ext4_mark_inode_dirty(handle, inode);
- ret2 = ext4_journal_stop(handle);
- if (ret2)
- break;
- }
- if (ret == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries)) {
- ret = 0;
- goto retry;
- }
- mutex_unlock(&inode->i_mutex);
- trace_ext4_fallocate_exit(inode, offset, max_blocks,
- ret > 0 ? ret2 : ret);
- return ret > 0 ? ret2 : ret;
-}
-
-/*
- * This function convert a range of blocks to written extents
- * The caller of this function will pass the start offset and the size.
- * all unwritten extents within this range will be converted to
- * written extents.
- *
- * This function is called from the direct IO end io call back
- * function, to convert the fallocated extents after IO is completed.
- * Returns 0 on success.
- */
-int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
- ssize_t len)
-{
- handle_t *handle;
- unsigned int max_blocks;
- int ret = 0;
- int ret2 = 0;
- struct ext4_map_blocks map;
- unsigned int credits, blkbits = inode->i_blkbits;
-
- map.m_lblk = offset >> blkbits;
- /*
- * We can't just convert len to max_blocks because
- * If blocksize = 4096 offset = 3072 and len = 2048
- */
- max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
- map.m_lblk);
- /*
- * credits to insert 1 extent into extent tree
- */
- credits = ext4_chunk_trans_blocks(inode, max_blocks);
- while (ret >= 0 && ret < max_blocks) {
- map.m_lblk += ret;
- map.m_len = (max_blocks -= ret);
- handle = ext4_journal_start(inode, credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- break;
- }
- ret = ext4_map_blocks(handle, inode, &map,
- EXT4_GET_BLOCKS_IO_CONVERT_EXT);
- if (ret <= 0) {
- WARN_ON(ret <= 0);
- ext4_msg(inode->i_sb, KERN_ERR,
- "%s:%d: inode #%lu: block %u: len %u: "
- "ext4_ext_map_blocks returned %d",
- __func__, __LINE__, inode->i_ino, map.m_lblk,
- map.m_len, ret);
- }
- ext4_mark_inode_dirty(handle, inode);
- ret2 = ext4_journal_stop(handle);
- if (ret <= 0 || ret2 )
- break;
- }
- return ret > 0 ? ret2 : ret;
-}
-
-/*
- * Callback function called for each extent to gather FIEMAP information.
- */
-static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next,
- struct ext4_ext_cache *newex, struct ext4_extent *ex,
- void *data)
-{
- __u64 logical;
- __u64 physical;
- __u64 length;
- __u32 flags = 0;
- int ret = 0;
- struct fiemap_extent_info *fieinfo = data;
- unsigned char blksize_bits;
-
- blksize_bits = inode->i_sb->s_blocksize_bits;
- logical = (__u64)newex->ec_block << blksize_bits;
-
- if (newex->ec_start == 0) {
- /*
- * No extent in extent-tree contains block @newex->ec_start,
- * then the block may stay in 1)a hole or 2)delayed-extent.
- *
- * Holes or delayed-extents are processed as follows.
- * 1. lookup dirty pages with specified range in pagecache.
- * If no page is got, then there is no delayed-extent and
- * return with EXT_CONTINUE.
- * 2. find the 1st mapped buffer,
- * 3. check if the mapped buffer is both in the request range
- * and a delayed buffer. If not, there is no delayed-extent,
- * then return.
- * 4. a delayed-extent is found, the extent will be collected.
- */
- ext4_lblk_t end = 0;
- pgoff_t last_offset;
- pgoff_t offset;
- pgoff_t index;
- pgoff_t start_index = 0;
- struct page **pages = NULL;
- struct buffer_head *bh = NULL;
- struct buffer_head *head = NULL;
- unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
-
- pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (pages == NULL)
- return -ENOMEM;
-
- offset = logical >> PAGE_SHIFT;
-repeat:
- last_offset = offset;
- head = NULL;
- ret = find_get_pages_tag(inode->i_mapping, &offset,
- PAGECACHE_TAG_DIRTY, nr_pages, pages);
-
- if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
- /* First time, try to find a mapped buffer. */
- if (ret == 0) {
-out:
- for (index = 0; index < ret; index++)
- page_cache_release(pages[index]);
- /* just a hole. */
- kfree(pages);
- return EXT_CONTINUE;
- }
- index = 0;
-
-next_page:
- /* Try to find the 1st mapped buffer. */
- end = ((__u64)pages[index]->index << PAGE_SHIFT) >>
- blksize_bits;
- if (!page_has_buffers(pages[index]))
- goto out;
- head = page_buffers(pages[index]);
- if (!head)
- goto out;
-
- index++;
- bh = head;
- do {
- if (end >= newex->ec_block +
- newex->ec_len)
- /* The buffer is out of
- * the request range.
- */
- goto out;
-
- if (buffer_mapped(bh) &&
- end >= newex->ec_block) {
- start_index = index - 1;
- /* get the 1st mapped buffer. */
- goto found_mapped_buffer;
- }
-
- bh = bh->b_this_page;
- end++;
- } while (bh != head);
-
- /* No mapped buffer in the range found in this page,
- * We need to look up next page.
- */
- if (index >= ret) {
- /* There is no page left, but we need to limit
- * newex->ec_len.
- */
- newex->ec_len = end - newex->ec_block;
- goto out;
- }
- goto next_page;
- } else {
- /*Find contiguous delayed buffers. */
- if (ret > 0 && pages[0]->index == last_offset)
- head = page_buffers(pages[0]);
- bh = head;
- index = 1;
- start_index = 0;
- }
-
-found_mapped_buffer:
- if (bh != NULL && buffer_delay(bh)) {
- /* 1st or contiguous delayed buffer found. */
- if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
- /*
- * 1st delayed buffer found, record
- * the start of extent.
- */
- flags |= FIEMAP_EXTENT_DELALLOC;
- newex->ec_block = end;
- logical = (__u64)end << blksize_bits;
- }
- /* Find contiguous delayed buffers. */
- do {
- if (!buffer_delay(bh))
- goto found_delayed_extent;
- bh = bh->b_this_page;
- end++;
- } while (bh != head);
-
- for (; index < ret; index++) {
- if (!page_has_buffers(pages[index])) {
- bh = NULL;
- break;
- }
- head = page_buffers(pages[index]);
- if (!head) {
- bh = NULL;
- break;
- }
-
- if (pages[index]->index !=
- pages[start_index]->index + index
- - start_index) {
- /* Blocks are not contiguous. */
- bh = NULL;
- break;
- }
- bh = head;
- do {
- if (!buffer_delay(bh))
- /* Delayed-extent ends. */
- goto found_delayed_extent;
- bh = bh->b_this_page;
- end++;
- } while (bh != head);
- }
- } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
- /* a hole found. */
- goto out;
-
-found_delayed_extent:
- newex->ec_len = min(end - newex->ec_block,
- (ext4_lblk_t)EXT_INIT_MAX_LEN);
- if (ret == nr_pages && bh != NULL &&
- newex->ec_len < EXT_INIT_MAX_LEN &&
- buffer_delay(bh)) {
- /* Have not collected an extent and continue. */
- for (index = 0; index < ret; index++)
- page_cache_release(pages[index]);
- goto repeat;
- }
-
- for (index = 0; index < ret; index++)
- page_cache_release(pages[index]);
- kfree(pages);
- }
-
- physical = (__u64)newex->ec_start << blksize_bits;
- length = (__u64)newex->ec_len << blksize_bits;
-
- if (ex && ext4_ext_is_uninitialized(ex))
- flags |= FIEMAP_EXTENT_UNWRITTEN;
-
- if (next == EXT_MAX_BLOCKS)
- flags |= FIEMAP_EXTENT_LAST;
-
- ret = fiemap_fill_next_extent(fieinfo, logical, physical,
- length, flags);
- if (ret < 0)
- return ret;
- if (ret == 1)
- return EXT_BREAK;
- return EXT_CONTINUE;
-}
-/* fiemap flags we can handle specified here */
-#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-
-static int ext4_xattr_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo)
-{
- __u64 physical = 0;
- __u64 length;
- __u32 flags = FIEMAP_EXTENT_LAST;
- int blockbits = inode->i_sb->s_blocksize_bits;
- int error = 0;
-
- /* in-inode? */
- if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
- struct ext4_iloc iloc;
- int offset; /* offset of xattr in inode */
-
- error = ext4_get_inode_loc(inode, &iloc);
- if (error)
- return error;
- physical = iloc.bh->b_blocknr << blockbits;
- offset = EXT4_GOOD_OLD_INODE_SIZE +
- EXT4_I(inode)->i_extra_isize;
- physical += offset;
- length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
- flags |= FIEMAP_EXTENT_DATA_INLINE;
- brelse(iloc.bh);
- } else { /* external block */
- physical = EXT4_I(inode)->i_file_acl << blockbits;
- length = inode->i_sb->s_blocksize;
- }
-
- if (physical)
- error = fiemap_fill_next_extent(fieinfo, 0, physical,
- length, flags);
- return (error < 0 ? error : 0);
-}
-
-/*
- * ext4_ext_punch_hole
- *
- * Punches a hole of "length" bytes in a file starting
- * at byte "offset"
- *
- * @inode: The inode of the file to punch a hole in
- * @offset: The starting byte offset of the hole
- * @length: The length of the hole
- *
- * Returns the number of blocks removed or negative on err
- */
-int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct super_block *sb = inode->i_sb;
- ext4_lblk_t first_block, stop_block;
- struct address_space *mapping = inode->i_mapping;
- handle_t *handle;
- loff_t first_page, last_page, page_len;
- loff_t first_page_offset, last_page_offset;
- int credits, err = 0;
-
- /* No need to punch hole beyond i_size */
- if (offset >= inode->i_size)
- return 0;
-
- /*
- * If the hole extends beyond i_size, set the hole
- * to end after the page that contains i_size
- */
- if (offset + length > inode->i_size) {
- length = inode->i_size +
- PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
- offset;
- }
-
- first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- last_page = (offset + length) >> PAGE_CACHE_SHIFT;
-
- first_page_offset = first_page << PAGE_CACHE_SHIFT;
- last_page_offset = last_page << PAGE_CACHE_SHIFT;
-
- /*
- * Write out all dirty pages to avoid race conditions
- * Then release them.
- */
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- err = filemap_write_and_wait_range(mapping,
- offset, offset + length - 1);
-
- if (err)
- return err;
- }
-
- /* Now release the pages */
- if (last_page_offset > first_page_offset) {
- truncate_inode_pages_range(mapping, first_page_offset,
- last_page_offset-1);
- }
-
- /* finish any pending end_io work */
- ext4_flush_completed_IO(inode);
-
- credits = ext4_writepage_trans_blocks(inode);
- handle = ext4_journal_start(inode, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- err = ext4_orphan_add(handle, inode);
- if (err)
- goto out;
-
- /*
- * Now we need to zero out the non-page-aligned data in the
- * pages at the start and tail of the hole, and unmap the buffer
- * heads for the block aligned regions of the page that were
- * completely zeroed.
- */
- if (first_page > last_page) {
- /*
- * If the file space being truncated is contained within a page
- * just zero out and unmap the middle of that page
- */
- err = ext4_discard_partial_page_buffers(handle,
- mapping, offset, length, 0);
-
- if (err)
- goto out;
- } else {
- /*
- * zero out and unmap the partial page that contains
- * the start of the hole
- */
- page_len = first_page_offset - offset;
- if (page_len > 0) {
- err = ext4_discard_partial_page_buffers(handle, mapping,
- offset, page_len, 0);
- if (err)
- goto out;
- }
-
- /*
- * zero out and unmap the partial page that contains
- * the end of the hole
- */
- page_len = offset + length - last_page_offset;
- if (page_len > 0) {
- err = ext4_discard_partial_page_buffers(handle, mapping,
- last_page_offset, page_len, 0);
- if (err)
- goto out;
- }
- }
-
- /*
- * If i_size is contained in the last page, we need to
- * unmap and zero the partial page after i_size
- */
- if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
- inode->i_size % PAGE_CACHE_SIZE != 0) {
-
- page_len = PAGE_CACHE_SIZE -
- (inode->i_size & (PAGE_CACHE_SIZE - 1));
-
- if (page_len > 0) {
- err = ext4_discard_partial_page_buffers(handle,
- mapping, inode->i_size, page_len, 0);
-
- if (err)
- goto out;
- }
- }
-
- first_block = (offset + sb->s_blocksize - 1) >>
- EXT4_BLOCK_SIZE_BITS(sb);
- stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
-
- /* If there are no blocks to remove, return now */
- if (first_block >= stop_block)
- goto out;
-
- down_write(&EXT4_I(inode)->i_data_sem);
- ext4_ext_invalidate_cache(inode);
- ext4_discard_preallocations(inode);
-
- err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
-
- ext4_ext_invalidate_cache(inode);
- ext4_discard_preallocations(inode);
-
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
-
- up_write(&EXT4_I(inode)->i_data_sem);
-
-out:
- ext4_orphan_del(handle, inode);
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- ext4_journal_stop(handle);
- return err;
-}
-int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len)
-{
- ext4_lblk_t start_blk;
- int error = 0;
-
- /* fallback to generic here if not in extents fmt */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- return generic_block_fiemap(inode, fieinfo, start, len,
- ext4_get_block);
-
- if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
- return -EBADR;
-
- if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
- error = ext4_xattr_fiemap(inode, fieinfo);
- } else {
- ext4_lblk_t len_blks;
- __u64 last_blk;
-
- start_blk = start >> inode->i_sb->s_blocksize_bits;
- last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
- if (last_blk >= EXT_MAX_BLOCKS)
- last_blk = EXT_MAX_BLOCKS-1;
- len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
-
- /*
- * Walk the extent tree gathering extent information.
- * ext4_ext_fiemap_cb will push extents back to user.
- */
- error = ext4_ext_walk_space(inode, start_blk, len_blks,
- ext4_ext_fiemap_cb, fieinfo);
- }
-
- return error;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/file.c b/ANDROID_3.4.5/fs/ext4/file.c
deleted file mode 100644
index cb70f181..00000000
--- a/ANDROID_3.4.5/fs/ext4/file.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * linux/fs/ext4/file.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/file.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext4 fs regular file handling primitives
- *
- * 64-bit file support on 64-bit platforms by Jakub Jelinek
- * (jj@sunsite.ms.mff.cuni.cz)
- */
-
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/jbd2.h>
-#include <linux/mount.h>
-#include <linux/path.h>
-#include <linux/quotaops.h>
-#include "ext4.h"
-#include "ext4_jbd2.h"
-#include "xattr.h"
-#include "acl.h"
-
-/*
- * Called when an inode is released. Note that this is different
- * from ext4_file_open: open gets called at every open, but release
- * gets called only when /all/ the files are closed.
- */
-static int ext4_release_file(struct inode *inode, struct file *filp)
-{
- if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
- ext4_alloc_da_blocks(inode);
- ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
- }
- /* if we are the last writer on the inode, drop the block reservation */
- if ((filp->f_mode & FMODE_WRITE) &&
- (atomic_read(&inode->i_writecount) == 1) &&
- !EXT4_I(inode)->i_reserved_data_blocks)
- {
- down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode);
- up_write(&EXT4_I(inode)->i_data_sem);
- }
- if (is_dx(inode) && filp->private_data)
- ext4_htree_free_dir_info(filp->private_data);
-
- return 0;
-}
-
-static void ext4_aiodio_wait(struct inode *inode)
-{
- wait_queue_head_t *wq = ext4_ioend_wq(inode);
-
- wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
-}
-
-/*
- * This tests whether the IO in question is block-aligned or not.
- * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
- * are converted to written only after the IO is complete. Until they are
- * mapped, these blocks appear as holes, so dio_zero_block() will assume that
- * it needs to zero out portions of the start and/or end block. If 2 AIO
- * threads are at work on the same unwritten block, they must be synchronized
- * or one thread will zero the other's data, causing corruption.
- */
-static int
-ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct super_block *sb = inode->i_sb;
- int blockmask = sb->s_blocksize - 1;
- size_t count = iov_length(iov, nr_segs);
- loff_t final_size = pos + count;
-
- if (pos >= inode->i_size)
- return 0;
-
- if ((pos & blockmask) || (final_size & blockmask))
- return 1;
-
- return 0;
-}
-
-static ssize_t
-ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
- int unaligned_aio = 0;
- int ret;
-
- /*
- * If we have encountered a bitmap-format file, the size limit
- * is smaller than s_maxbytes, which is for extent-mapped files.
- */
-
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- size_t length = iov_length(iov, nr_segs);
-
- if ((pos > sbi->s_bitmap_maxbytes ||
- (pos == sbi->s_bitmap_maxbytes && length > 0)))
- return -EFBIG;
-
- if (pos + length > sbi->s_bitmap_maxbytes) {
- nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
- sbi->s_bitmap_maxbytes - pos);
- }
- } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
- !is_sync_kiocb(iocb))) {
- unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
- }
-
- /* Unaligned direct AIO must be serialized; see comment above */
- if (unaligned_aio) {
- static unsigned long unaligned_warn_time;
-
- /* Warn about this once per day */
- if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
- ext4_msg(inode->i_sb, KERN_WARNING,
- "Unaligned AIO/DIO on inode %ld by %s; "
- "performance will be poor.",
- inode->i_ino, current->comm);
- mutex_lock(ext4_aio_mutex(inode));
- ext4_aiodio_wait(inode);
- }
-
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
-
- if (unaligned_aio)
- mutex_unlock(ext4_aio_mutex(inode));
-
- return ret;
-}
-
-static const struct vm_operations_struct ext4_file_vm_ops = {
- .fault = filemap_fault,
- .page_mkwrite = ext4_page_mkwrite,
-};
-
-static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
-{
- struct address_space *mapping = file->f_mapping;
-
- if (!mapping->a_ops->readpage)
- return -ENOEXEC;
- file_accessed(file);
- vma->vm_ops = &ext4_file_vm_ops;
- vma->vm_flags |= VM_CAN_NONLINEAR;
- return 0;
-}
-
-static int ext4_file_open(struct inode * inode, struct file * filp)
-{
- struct super_block *sb = inode->i_sb;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct vfsmount *mnt = filp->f_path.mnt;
- struct path path;
- char buf[64], *cp;
-
- if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
- !(sb->s_flags & MS_RDONLY))) {
- sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
- /*
- * Sample where the filesystem has been mounted and
- * store it in the superblock for sysadmin convenience
- * when trying to sort through large numbers of block
- * devices or filesystem images.
- */
- memset(buf, 0, sizeof(buf));
- path.mnt = mnt;
- path.dentry = mnt->mnt_root;
- cp = d_path(&path, buf, sizeof(buf));
- if (!IS_ERR(cp)) {
- strlcpy(sbi->s_es->s_last_mounted, cp,
- sizeof(sbi->s_es->s_last_mounted));
- ext4_mark_super_dirty(sb);
- }
- }
- /*
- * Set up the jbd2_inode if we are opening the inode for
- * writing and the journal is present
- */
- if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
- struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
-
- spin_lock(&inode->i_lock);
- if (!ei->jinode) {
- if (!jinode) {
- spin_unlock(&inode->i_lock);
- return -ENOMEM;
- }
- ei->jinode = jinode;
- jbd2_journal_init_jbd_inode(ei->jinode, inode);
- jinode = NULL;
- }
- spin_unlock(&inode->i_lock);
- if (unlikely(jinode != NULL))
- jbd2_free_inode(jinode);
- }
- return dquot_file_open(inode, filp);
-}
-
-/*
- * ext4_llseek() copied from generic_file_llseek() to handle both
- * block-mapped and extent-mapped maxbytes values. This should
- * otherwise be identical with generic_file_llseek().
- */
-loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file->f_mapping->host;
- loff_t maxbytes;
-
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
- else
- maxbytes = inode->i_sb->s_maxbytes;
-
- return generic_file_llseek_size(file, offset, origin, maxbytes);
-}
-
-const struct file_operations ext4_file_operations = {
- .llseek = ext4_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = ext4_file_write,
- .unlocked_ioctl = ext4_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext4_compat_ioctl,
-#endif
- .mmap = ext4_file_mmap,
- .open = ext4_file_open,
- .release = ext4_release_file,
- .fsync = ext4_sync_file,
- .splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
- .fallocate = ext4_fallocate,
-};
-
-const struct inode_operations ext4_file_inode_operations = {
- .setattr = ext4_setattr,
- .getattr = ext4_getattr,
-#ifdef CONFIG_EXT4_FS_XATTR
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .listxattr = ext4_listxattr,
- .removexattr = generic_removexattr,
-#endif
- .get_acl = ext4_get_acl,
- .fiemap = ext4_fiemap,
-};
-
diff --git a/ANDROID_3.4.5/fs/ext4/fsync.c b/ANDROID_3.4.5/fs/ext4/fsync.c
deleted file mode 100644
index bb6c7d81..00000000
--- a/ANDROID_3.4.5/fs/ext4/fsync.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * linux/fs/ext4/fsync.c
- *
- * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com)
- * from
- * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- * from
- * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext4fs fsync primitive
- *
- * Big-endian to little-endian byte-swapping/bitmaps by
- * David S. Miller (davem@caip.rutgers.edu), 1995
- *
- * Removed unnecessary code duplication for little endian machines
- * and excessive __inline__s.
- * Andi Kleen, 1997
- *
- * Major simplications and cleanup - we only need to do the metadata, because
- * we can depend on generic_block_fdatasync() to sync the data blocks.
- */
-
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/writeback.h>
-#include <linux/jbd2.h>
-#include <linux/blkdev.h>
-
-#include "ext4.h"
-#include "ext4_jbd2.h"
-
-#include <trace/events/ext4.h>
-
-static void dump_completed_IO(struct inode * inode)
-{
-#ifdef EXT4FS_DEBUG
- struct list_head *cur, *before, *after;
- ext4_io_end_t *io, *io0, *io1;
- unsigned long flags;
-
- if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
- ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
- return;
- }
-
- ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
- list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
- cur = &io->list;
- before = cur->prev;
- io0 = container_of(before, ext4_io_end_t, list);
- after = cur->next;
- io1 = container_of(after, ext4_io_end_t, list);
-
- ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
- io, inode->i_ino, io0, io1);
- }
- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-#endif
-}
-
-/*
- * This function is called from ext4_sync_file().
- *
- * When IO is completed, the work to convert unwritten extents to
- * written is queued on workqueue but may not get immediately
- * scheduled. When fsync is called, we need to ensure the
- * conversion is complete before fsync returns.
- * The inode keeps track of a list of pending/completed IO that
- * might needs to do the conversion. This function walks through
- * the list and convert the related unwritten extents for completed IO
- * to written.
- * The function return the number of pending IOs on success.
- */
-int ext4_flush_completed_IO(struct inode *inode)
-{
- ext4_io_end_t *io;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret = 0;
- int ret2 = 0;
-
- dump_completed_IO(inode);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- while (!list_empty(&ei->i_completed_io_list)){
- io = list_entry(ei->i_completed_io_list.next,
- ext4_io_end_t, list);
- list_del_init(&io->list);
- io->flag |= EXT4_IO_END_IN_FSYNC;
- /*
- * Calling ext4_end_io_nolock() to convert completed
- * IO to written.
- *
- * When ext4_sync_file() is called, run_queue() may already
- * about to flush the work corresponding to this io structure.
- * It will be upset if it founds the io structure related
- * to the work-to-be schedule is freed.
- *
- * Thus we need to keep the io structure still valid here after
- * conversion finished. The io structure has a flag to
- * avoid double converting from both fsync and background work
- * queue work.
- */
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- ret = ext4_end_io_nolock(io);
- if (ret < 0)
- ret2 = ret;
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- io->flag &= ~EXT4_IO_END_IN_FSYNC;
- }
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- return (ret2 < 0) ? ret2 : 0;
-}
-
-/*
- * If we're not journaling and this is a just-created file, we have to
- * sync our parent directory (if it was freshly created) since
- * otherwise it will only be written by writeback, leaving a huge
- * window during which a crash may lose the file. This may apply for
- * the parent directory's parent as well, and so on recursively, if
- * they are also freshly created.
- */
-static int ext4_sync_parent(struct inode *inode)
-{
- struct writeback_control wbc;
- struct dentry *dentry = NULL;
- struct inode *next;
- int ret = 0;
-
- if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
- return 0;
- inode = igrab(inode);
- while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
- ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
- dentry = NULL;
- spin_lock(&inode->i_lock);
- if (!list_empty(&inode->i_dentry)) {
- dentry = list_first_entry(&inode->i_dentry,
- struct dentry, d_alias);
- dget(dentry);
- }
- spin_unlock(&inode->i_lock);
- if (!dentry)
- break;
- next = igrab(dentry->d_parent->d_inode);
- dput(dentry);
- if (!next)
- break;
- iput(inode);
- inode = next;
- ret = sync_mapping_buffers(inode->i_mapping);
- if (ret)
- break;
- memset(&wbc, 0, sizeof(wbc));
- wbc.sync_mode = WB_SYNC_ALL;
- wbc.nr_to_write = 0; /* only write out the inode */
- ret = sync_inode(inode, &wbc);
- if (ret)
- break;
- }
- iput(inode);
- return ret;
-}
-
-/**
- * __sync_file - generic_file_fsync without the locking and filemap_write
- * @inode: inode to sync
- * @datasync: only sync essential metadata if true
- *
- * This is just generic_file_fsync without the locking. This is needed for
- * nojournal mode to make sure this inodes data/metadata makes it to disk
- * properly. The i_mutex should be held already.
- */
-static int __sync_inode(struct inode *inode, int datasync)
-{
- int err;
- int ret;
-
- ret = sync_mapping_buffers(inode->i_mapping);
- if (!(inode->i_state & I_DIRTY))
- return ret;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return ret;
-
- err = sync_inode_metadata(inode, 1);
- if (ret == 0)
- ret = err;
- return ret;
-}
-
-/*
- * akpm: A new design for ext4_sync_file().
- *
- * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
- * There cannot be a transaction open by this task.
- * Another task could have dirtied this inode. Its data can be in any
- * state in the journalling system.
- *
- * What we do is just kick off a commit and wait on it. This will snapshot the
- * inode to disk.
- *
- * i_mutex lock is held when entering and exiting this function
- */
-
-int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
-{
- struct inode *inode = file->f_mapping->host;
- struct ext4_inode_info *ei = EXT4_I(inode);
- journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
- int ret;
- tid_t commit_tid;
- bool needs_barrier = false;
-
- J_ASSERT(ext4_journal_current_handle() == NULL);
-
- trace_ext4_sync_file_enter(file, datasync);
-
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret)
- return ret;
- mutex_lock(&inode->i_mutex);
-
- if (inode->i_sb->s_flags & MS_RDONLY)
- goto out;
-
- ret = ext4_flush_completed_IO(inode);
- if (ret < 0)
- goto out;
-
- if (!journal) {
- ret = __sync_inode(inode, datasync);
- if (!ret && !list_empty(&inode->i_dentry))
- ret = ext4_sync_parent(inode);
- goto out;
- }
-
- /*
- * data=writeback,ordered:
- * The caller's filemap_fdatawrite()/wait will sync the data.
- * Metadata is in the journal, we wait for proper transaction to
- * commit here.
- *
- * data=journal:
- * filemap_fdatawrite won't do anything (the buffers are clean).
- * ext4_force_commit will write the file data into the journal and
- * will wait on that.
- * filemap_fdatawait() will encounter a ton of newly-dirtied pages
- * (they were dirtied by commit). But that's OK - the blocks are
- * safe in-journal, which is all fsync() needs to ensure.
- */
- if (ext4_should_journal_data(inode)) {
- ret = ext4_force_commit(inode->i_sb);
- goto out;
- }
-
- commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
- if (journal->j_flags & JBD2_BARRIER &&
- !jbd2_trans_will_send_data_barrier(journal, commit_tid))
- needs_barrier = true;
- jbd2_log_start_commit(journal, commit_tid);
- ret = jbd2_log_wait_commit(journal, commit_tid);
- if (needs_barrier)
- blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- out:
- mutex_unlock(&inode->i_mutex);
- trace_ext4_sync_file_exit(inode, ret);
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/hash.c b/ANDROID_3.4.5/fs/ext4/hash.c
deleted file mode 100644
index fa8e4911..00000000
--- a/ANDROID_3.4.5/fs/ext4/hash.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * linux/fs/ext4/hash.c
- *
- * Copyright (C) 2002 by Theodore Ts'o
- *
- * This file is released under the GPL v2.
- *
- * This file may be redistributed under the terms of the GNU Public
- * License.
- */
-
-#include <linux/fs.h>
-#include <linux/jbd2.h>
-#include <linux/cryptohash.h>
-#include "ext4.h"
-
-#define DELTA 0x9E3779B9
-
-static void TEA_transform(__u32 buf[4], __u32 const in[])
-{
- __u32 sum = 0;
- __u32 b0 = buf[0], b1 = buf[1];
- __u32 a = in[0], b = in[1], c = in[2], d = in[3];
- int n = 16;
-
- do {
- sum += DELTA;
- b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
- b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
- } while (--n);
-
- buf[0] += b0;
- buf[1] += b1;
-}
-
-
-/* The old legacy hash */
-static __u32 dx_hack_hash_unsigned(const char *name, int len)
-{
- __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
- const unsigned char *ucp = (const unsigned char *) name;
-
- while (len--) {
- hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
-
- if (hash & 0x80000000)
- hash -= 0x7fffffff;
- hash1 = hash0;
- hash0 = hash;
- }
- return hash0 << 1;
-}
-
-static __u32 dx_hack_hash_signed(const char *name, int len)
-{
- __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
- const signed char *scp = (const signed char *) name;
-
- while (len--) {
- hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
-
- if (hash & 0x80000000)
- hash -= 0x7fffffff;
- hash1 = hash0;
- hash0 = hash;
- }
- return hash0 << 1;
-}
-
-static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
-{
- __u32 pad, val;
- int i;
- const signed char *scp = (const signed char *) msg;
-
- pad = (__u32)len | ((__u32)len << 8);
- pad |= pad << 16;
-
- val = pad;
- if (len > num*4)
- len = num * 4;
- for (i = 0; i < len; i++) {
- if ((i % 4) == 0)
- val = pad;
- val = ((int) scp[i]) + (val << 8);
- if ((i % 4) == 3) {
- *buf++ = val;
- val = pad;
- num--;
- }
- }
- if (--num >= 0)
- *buf++ = val;
- while (--num >= 0)
- *buf++ = pad;
-}
-
-static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
-{
- __u32 pad, val;
- int i;
- const unsigned char *ucp = (const unsigned char *) msg;
-
- pad = (__u32)len | ((__u32)len << 8);
- pad |= pad << 16;
-
- val = pad;
- if (len > num*4)
- len = num * 4;
- for (i = 0; i < len; i++) {
- if ((i % 4) == 0)
- val = pad;
- val = ((int) ucp[i]) + (val << 8);
- if ((i % 4) == 3) {
- *buf++ = val;
- val = pad;
- num--;
- }
- }
- if (--num >= 0)
- *buf++ = val;
- while (--num >= 0)
- *buf++ = pad;
-}
-
-/*
- * Returns the hash of a filename. If len is 0 and name is NULL, then
- * this function can be used to test whether or not a hash version is
- * supported.
- *
- * The seed is an 4 longword (32 bits) "secret" which can be used to
- * uniquify a hash. If the seed is all zero's, then some default seed
- * may be used.
- *
- * A particular hash version specifies whether or not the seed is
- * represented, and whether or not the returned hash is 32 bits or 64
- * bits. 32 bit hashes will return 0 for the minor hash.
- */
-int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
-{
- __u32 hash;
- __u32 minor_hash = 0;
- const char *p;
- int i;
- __u32 in[8], buf[4];
- void (*str2hashbuf)(const char *, int, __u32 *, int) =
- str2hashbuf_signed;
-
- /* Initialize the default seed for the hash checksum functions */
- buf[0] = 0x67452301;
- buf[1] = 0xefcdab89;
- buf[2] = 0x98badcfe;
- buf[3] = 0x10325476;
-
- /* Check to see if the seed is all zero's */
- if (hinfo->seed) {
- for (i = 0; i < 4; i++) {
- if (hinfo->seed[i])
- break;
- }
- if (i < 4)
- memcpy(buf, hinfo->seed, sizeof(buf));
- }
-
- switch (hinfo->hash_version) {
- case DX_HASH_LEGACY_UNSIGNED:
- hash = dx_hack_hash_unsigned(name, len);
- break;
- case DX_HASH_LEGACY:
- hash = dx_hack_hash_signed(name, len);
- break;
- case DX_HASH_HALF_MD4_UNSIGNED:
- str2hashbuf = str2hashbuf_unsigned;
- case DX_HASH_HALF_MD4:
- p = name;
- while (len > 0) {
- (*str2hashbuf)(p, len, in, 8);
- half_md4_transform(buf, in);
- len -= 32;
- p += 32;
- }
- minor_hash = buf[2];
- hash = buf[1];
- break;
- case DX_HASH_TEA_UNSIGNED:
- str2hashbuf = str2hashbuf_unsigned;
- case DX_HASH_TEA:
- p = name;
- while (len > 0) {
- (*str2hashbuf)(p, len, in, 4);
- TEA_transform(buf, in);
- len -= 16;
- p += 16;
- }
- hash = buf[0];
- minor_hash = buf[1];
- break;
- default:
- hinfo->hash = 0;
- return -1;
- }
- hash = hash & ~1;
- if (hash == (EXT4_HTREE_EOF_32BIT << 1))
- hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
- hinfo->hash = hash;
- hinfo->minor_hash = minor_hash;
- return 0;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/ialloc.c b/ANDROID_3.4.5/fs/ext4/ialloc.c
deleted file mode 100644
index b4a7dd56..00000000
--- a/ANDROID_3.4.5/fs/ext4/ialloc.c
+++ /dev/null
@@ -1,1161 +0,0 @@
-/*
- * linux/fs/ext4/ialloc.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * BSD ufs-inspired inode and directory allocation by
- * Stephen Tweedie (sct@redhat.com), 1993
- * Big-endian to little-endian byte-swapping/bitmaps by
- * David S. Miller (davem@caip.rutgers.edu), 1995
- */
-
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/jbd2.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/random.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-#include <asm/byteorder.h>
-
-#include "ext4.h"
-#include "ext4_jbd2.h"
-#include "xattr.h"
-#include "acl.h"
-
-#include <trace/events/ext4.h>
-
-/*
- * ialloc.c contains the inodes allocation and deallocation routines
- */
-
-/*
- * The free inodes are managed by bitmaps. A file system contains several
- * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
- * block for inodes, N blocks for the inode table and data blocks.
- *
- * The file system contains group descriptors which are located after the
- * super block. Each descriptor contains the number of the bitmap block and
- * the free blocks count in the block.
- */
-
-/*
- * To avoid calling the atomic setbit hundreds or thousands of times, we only
- * need to use it within a single byte (to ensure we get endianness right).
- * We can use memset for the rest of the bitmap as there are no other users.
- */
-void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
-{
- int i;
-
- if (start_bit >= end_bit)
- return;
-
- ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
- for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
- ext4_set_bit(i, bitmap);
- if (i < end_bit)
- memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
-}
-
-/* Initializes an uninitialized inode bitmap */
-static unsigned ext4_init_inode_bitmap(struct super_block *sb,
- struct buffer_head *bh,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- J_ASSERT_BH(bh, buffer_locked(bh));
-
- /* If checksum is bad mark all blocks and inodes use to prevent
- * allocation, essentially implementing a per-group read-only flag. */
- if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
- ext4_error(sb, "Checksum bad for group %u", block_group);
- ext4_free_group_clusters_set(sb, gdp, 0);
- ext4_free_inodes_set(sb, gdp, 0);
- ext4_itable_unused_set(sb, gdp, 0);
- memset(bh->b_data, 0xff, sb->s_blocksize);
- return 0;
- }
-
- memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
- ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
- bh->b_data);
-
- return EXT4_INODES_PER_GROUP(sb);
-}
-
-void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
-{
- if (uptodate) {
- set_buffer_uptodate(bh);
- set_bitmap_uptodate(bh);
- }
- unlock_buffer(bh);
- put_bh(bh);
-}
-
-/*
- * Read the inode allocation bitmap for a given block_group, reading
- * into the specified slot in the superblock's bitmap cache.
- *
- * Return buffer_head of bitmap on success or NULL.
- */
-static struct buffer_head *
-ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
-{
- struct ext4_group_desc *desc;
- struct buffer_head *bh = NULL;
- ext4_fsblk_t bitmap_blk;
-
- desc = ext4_get_group_desc(sb, block_group, NULL);
- if (!desc)
- return NULL;
-
- bitmap_blk = ext4_inode_bitmap(sb, desc);
- bh = sb_getblk(sb, bitmap_blk);
- if (unlikely(!bh)) {
- ext4_error(sb, "Cannot read inode bitmap - "
- "block_group = %u, inode_bitmap = %llu",
- block_group, bitmap_blk);
- return NULL;
- }
- if (bitmap_uptodate(bh))
- return bh;
-
- lock_buffer(bh);
- if (bitmap_uptodate(bh)) {
- unlock_buffer(bh);
- return bh;
- }
-
- ext4_lock_group(sb, block_group);
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- ext4_init_inode_bitmap(sb, bh, block_group, desc);
- set_bitmap_uptodate(bh);
- set_buffer_uptodate(bh);
- ext4_unlock_group(sb, block_group);
- unlock_buffer(bh);
- return bh;
- }
- ext4_unlock_group(sb, block_group);
-
- if (buffer_uptodate(bh)) {
- /*
- * if not uninit if bh is uptodate,
- * bitmap is also uptodate
- */
- set_bitmap_uptodate(bh);
- unlock_buffer(bh);
- return bh;
- }
- /*
- * submit the buffer_head for reading
- */
- trace_ext4_load_inode_bitmap(sb, block_group);
- bh->b_end_io = ext4_end_bitmap_read;
- get_bh(bh);
- submit_bh(READ, bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- put_bh(bh);
- ext4_error(sb, "Cannot read inode bitmap - "
- "block_group = %u, inode_bitmap = %llu",
- block_group, bitmap_blk);
- return NULL;
- }
- return bh;
-}
-
-/*
- * NOTE! When we get the inode, we're the only people
- * that have access to it, and as such there are no
- * race conditions we have to worry about. The inode
- * is not on the hash-lists, and it cannot be reached
- * through the filesystem because the directory entry
- * has been deleted earlier.
- *
- * HOWEVER: we must make sure that we get no aliases,
- * which means that we have to call "clear_inode()"
- * _before_ we mark the inode not in use in the inode
- * bitmaps. Otherwise a newly created file might use
- * the same inode number (not actually the same pointer
- * though), and then we'd have two inodes sharing the
- * same inode number and space on the harddisk.
- */
-void ext4_free_inode(handle_t *handle, struct inode *inode)
-{
- struct super_block *sb = inode->i_sb;
- int is_directory;
- unsigned long ino;
- struct buffer_head *bitmap_bh = NULL;
- struct buffer_head *bh2;
- ext4_group_t block_group;
- unsigned long bit;
- struct ext4_group_desc *gdp;
- struct ext4_super_block *es;
- struct ext4_sb_info *sbi;
- int fatal = 0, err, count, cleared;
-
- if (!sb) {
- printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
- "nonexistent device\n", __func__, __LINE__);
- return;
- }
- if (atomic_read(&inode->i_count) > 1) {
- ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
- __func__, __LINE__, inode->i_ino,
- atomic_read(&inode->i_count));
- return;
- }
- if (inode->i_nlink) {
- ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
- __func__, __LINE__, inode->i_ino, inode->i_nlink);
- return;
- }
- sbi = EXT4_SB(sb);
-
- ino = inode->i_ino;
- ext4_debug("freeing inode %lu\n", ino);
- trace_ext4_free_inode(inode);
-
- /*
- * Note: we must free any quota before locking the superblock,
- * as writing the quota to disk may need the lock as well.
- */
- dquot_initialize(inode);
- ext4_xattr_delete_inode(handle, inode);
- dquot_free_inode(inode);
- dquot_drop(inode);
-
- is_directory = S_ISDIR(inode->i_mode);
-
- /* Do this BEFORE marking the inode not in use or returning an error */
- ext4_clear_inode(inode);
-
- es = EXT4_SB(sb)->s_es;
- if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
- ext4_error(sb, "reserved or nonexistent inode %lu", ino);
- goto error_return;
- }
- block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
- bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
- bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
- if (!bitmap_bh)
- goto error_return;
-
- BUFFER_TRACE(bitmap_bh, "get_write_access");
- fatal = ext4_journal_get_write_access(handle, bitmap_bh);
- if (fatal)
- goto error_return;
-
- fatal = -ESRCH;
- gdp = ext4_get_group_desc(sb, block_group, &bh2);
- if (gdp) {
- BUFFER_TRACE(bh2, "get_write_access");
- fatal = ext4_journal_get_write_access(handle, bh2);
- }
- ext4_lock_group(sb, block_group);
- cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
- if (fatal || !cleared) {
- ext4_unlock_group(sb, block_group);
- goto out;
- }
-
- count = ext4_free_inodes_count(sb, gdp) + 1;
- ext4_free_inodes_set(sb, gdp, count);
- if (is_directory) {
- count = ext4_used_dirs_count(sb, gdp) - 1;
- ext4_used_dirs_set(sb, gdp, count);
- percpu_counter_dec(&sbi->s_dirs_counter);
- }
- gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
- ext4_unlock_group(sb, block_group);
-
- percpu_counter_inc(&sbi->s_freeinodes_counter);
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t f = ext4_flex_group(sbi, block_group);
-
- atomic_inc(&sbi->s_flex_groups[f].free_inodes);
- if (is_directory)
- atomic_dec(&sbi->s_flex_groups[f].used_dirs);
- }
- BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
- fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
-out:
- if (cleared) {
- BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
- if (!fatal)
- fatal = err;
- ext4_mark_super_dirty(sb);
- } else
- ext4_error(sb, "bit already cleared for inode %lu", ino);
-
-error_return:
- brelse(bitmap_bh);
- ext4_std_error(sb, fatal);
-}
-
-struct orlov_stats {
- __u32 free_inodes;
- __u32 free_clusters;
- __u32 used_dirs;
-};
-
-/*
- * Helper function for Orlov's allocator; returns critical information
- * for a particular block group or flex_bg. If flex_size is 1, then g
- * is a block group number; otherwise it is flex_bg number.
- */
-static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
- int flex_size, struct orlov_stats *stats)
-{
- struct ext4_group_desc *desc;
- struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
-
- if (flex_size > 1) {
- stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
- stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
- stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
- return;
- }
-
- desc = ext4_get_group_desc(sb, g, NULL);
- if (desc) {
- stats->free_inodes = ext4_free_inodes_count(sb, desc);
- stats->free_clusters = ext4_free_group_clusters(sb, desc);
- stats->used_dirs = ext4_used_dirs_count(sb, desc);
- } else {
- stats->free_inodes = 0;
- stats->free_clusters = 0;
- stats->used_dirs = 0;
- }
-}
-
-/*
- * Orlov's allocator for directories.
- *
- * We always try to spread first-level directories.
- *
- * If there are blockgroups with both free inodes and free blocks counts
- * not worse than average we return one with smallest directory count.
- * Otherwise we simply return a random group.
- *
- * For the rest rules look so:
- *
- * It's OK to put directory into a group unless
- * it has too many directories already (max_dirs) or
- * it has too few free inodes left (min_inodes) or
- * it has too few free blocks left (min_blocks) or
- * Parent's group is preferred, if it doesn't satisfy these
- * conditions we search cyclically through the rest. If none
- * of the groups look good we just look for a group with more
- * free inodes than average (starting at parent's group).
- */
-
-static int find_group_orlov(struct super_block *sb, struct inode *parent,
- ext4_group_t *group, umode_t mode,
- const struct qstr *qstr)
-{
- ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_group_t real_ngroups = ext4_get_groups_count(sb);
- int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
- unsigned int freei, avefreei, grp_free;
- ext4_fsblk_t freeb, avefreec;
- unsigned int ndirs;
- int max_dirs, min_inodes;
- ext4_grpblk_t min_clusters;
- ext4_group_t i, grp, g, ngroups;
- struct ext4_group_desc *desc;
- struct orlov_stats stats;
- int flex_size = ext4_flex_bg_size(sbi);
- struct dx_hash_info hinfo;
-
- ngroups = real_ngroups;
- if (flex_size > 1) {
- ngroups = (real_ngroups + flex_size - 1) >>
- sbi->s_log_groups_per_flex;
- parent_group >>= sbi->s_log_groups_per_flex;
- }
-
- freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
- avefreei = freei / ngroups;
- freeb = EXT4_C2B(sbi,
- percpu_counter_read_positive(&sbi->s_freeclusters_counter));
- avefreec = freeb;
- do_div(avefreec, ngroups);
- ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
-
- if (S_ISDIR(mode) &&
- ((parent == sb->s_root->d_inode) ||
- (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
- int best_ndir = inodes_per_group;
- int ret = -1;
-
- if (qstr) {
- hinfo.hash_version = DX_HASH_HALF_MD4;
- hinfo.seed = sbi->s_hash_seed;
- ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
- grp = hinfo.hash;
- } else
- get_random_bytes(&grp, sizeof(grp));
- parent_group = (unsigned)grp % ngroups;
- for (i = 0; i < ngroups; i++) {
- g = (parent_group + i) % ngroups;
- get_orlov_stats(sb, g, flex_size, &stats);
- if (!stats.free_inodes)
- continue;
- if (stats.used_dirs >= best_ndir)
- continue;
- if (stats.free_inodes < avefreei)
- continue;
- if (stats.free_clusters < avefreec)
- continue;
- grp = g;
- ret = 0;
- best_ndir = stats.used_dirs;
- }
- if (ret)
- goto fallback;
- found_flex_bg:
- if (flex_size == 1) {
- *group = grp;
- return 0;
- }
-
- /*
- * We pack inodes at the beginning of the flexgroup's
- * inode tables. Block allocation decisions will do
- * something similar, although regular files will
- * start at 2nd block group of the flexgroup. See
- * ext4_ext_find_goal() and ext4_find_near().
- */
- grp *= flex_size;
- for (i = 0; i < flex_size; i++) {
- if (grp+i >= real_ngroups)
- break;
- desc = ext4_get_group_desc(sb, grp+i, NULL);
- if (desc && ext4_free_inodes_count(sb, desc)) {
- *group = grp+i;
- return 0;
- }
- }
- goto fallback;
- }
-
- max_dirs = ndirs / ngroups + inodes_per_group / 16;
- min_inodes = avefreei - inodes_per_group*flex_size / 4;
- if (min_inodes < 1)
- min_inodes = 1;
- min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
-
- /*
- * Start looking in the flex group where we last allocated an
- * inode for this parent directory
- */
- if (EXT4_I(parent)->i_last_alloc_group != ~0) {
- parent_group = EXT4_I(parent)->i_last_alloc_group;
- if (flex_size > 1)
- parent_group >>= sbi->s_log_groups_per_flex;
- }
-
- for (i = 0; i < ngroups; i++) {
- grp = (parent_group + i) % ngroups;
- get_orlov_stats(sb, grp, flex_size, &stats);
- if (stats.used_dirs >= max_dirs)
- continue;
- if (stats.free_inodes < min_inodes)
- continue;
- if (stats.free_clusters < min_clusters)
- continue;
- goto found_flex_bg;
- }
-
-fallback:
- ngroups = real_ngroups;
- avefreei = freei / ngroups;
-fallback_retry:
- parent_group = EXT4_I(parent)->i_block_group;
- for (i = 0; i < ngroups; i++) {
- grp = (parent_group + i) % ngroups;
- desc = ext4_get_group_desc(sb, grp, NULL);
- if (desc) {
- grp_free = ext4_free_inodes_count(sb, desc);
- if (grp_free && grp_free >= avefreei) {
- *group = grp;
- return 0;
- }
- }
- }
-
- if (avefreei) {
- /*
- * The free-inodes counter is approximate, and for really small
- * filesystems the above test can fail to find any blockgroups
- */
- avefreei = 0;
- goto fallback_retry;
- }
-
- return -1;
-}
-
-static int find_group_other(struct super_block *sb, struct inode *parent,
- ext4_group_t *group, umode_t mode)
-{
- ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
- ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
- struct ext4_group_desc *desc;
- int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
-
- /*
- * Try to place the inode is the same flex group as its
- * parent. If we can't find space, use the Orlov algorithm to
- * find another flex group, and store that information in the
- * parent directory's inode information so that use that flex
- * group for future allocations.
- */
- if (flex_size > 1) {
- int retry = 0;
-
- try_again:
- parent_group &= ~(flex_size-1);
- last = parent_group + flex_size;
- if (last > ngroups)
- last = ngroups;
- for (i = parent_group; i < last; i++) {
- desc = ext4_get_group_desc(sb, i, NULL);
- if (desc && ext4_free_inodes_count(sb, desc)) {
- *group = i;
- return 0;
- }
- }
- if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
- retry = 1;
- parent_group = EXT4_I(parent)->i_last_alloc_group;
- goto try_again;
- }
- /*
- * If this didn't work, use the Orlov search algorithm
- * to find a new flex group; we pass in the mode to
- * avoid the topdir algorithms.
- */
- *group = parent_group + flex_size;
- if (*group > ngroups)
- *group = 0;
- return find_group_orlov(sb, parent, group, mode, NULL);
- }
-
- /*
- * Try to place the inode in its parent directory
- */
- *group = parent_group;
- desc = ext4_get_group_desc(sb, *group, NULL);
- if (desc && ext4_free_inodes_count(sb, desc) &&
- ext4_free_group_clusters(sb, desc))
- return 0;
-
- /*
- * We're going to place this inode in a different blockgroup from its
- * parent. We want to cause files in a common directory to all land in
- * the same blockgroup. But we want files which are in a different
- * directory which shares a blockgroup with our parent to land in a
- * different blockgroup.
- *
- * So add our directory's i_ino into the starting point for the hash.
- */
- *group = (*group + parent->i_ino) % ngroups;
-
- /*
- * Use a quadratic hash to find a group with a free inode and some free
- * blocks.
- */
- for (i = 1; i < ngroups; i <<= 1) {
- *group += i;
- if (*group >= ngroups)
- *group -= ngroups;
- desc = ext4_get_group_desc(sb, *group, NULL);
- if (desc && ext4_free_inodes_count(sb, desc) &&
- ext4_free_group_clusters(sb, desc))
- return 0;
- }
-
- /*
- * That failed: try linear search for a free inode, even if that group
- * has no free blocks.
- */
- *group = parent_group;
- for (i = 0; i < ngroups; i++) {
- if (++*group >= ngroups)
- *group = 0;
- desc = ext4_get_group_desc(sb, *group, NULL);
- if (desc && ext4_free_inodes_count(sb, desc))
- return 0;
- }
-
- return -1;
-}
-
-/*
- * There are two policies for allocating an inode. If the new inode is
- * a directory, then a forward search is made for a block group with both
- * free space and a low directory-to-inode ratio; if that fails, then of
- * the groups with above-average free space, that group with the fewest
- * directories already is chosen.
- *
- * For other inodes, search forward from the parent directory's block
- * group to find a free inode.
- */
-struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
- const struct qstr *qstr, __u32 goal, uid_t *owner)
-{
- struct super_block *sb;
- struct buffer_head *inode_bitmap_bh = NULL;
- struct buffer_head *group_desc_bh;
- ext4_group_t ngroups, group = 0;
- unsigned long ino = 0;
- struct inode *inode;
- struct ext4_group_desc *gdp = NULL;
- struct ext4_inode_info *ei;
- struct ext4_sb_info *sbi;
- int ret2, err = 0;
- struct inode *ret;
- ext4_group_t i;
- ext4_group_t flex_group;
-
- /* Cannot create files in a deleted directory */
- if (!dir || !dir->i_nlink)
- return ERR_PTR(-EPERM);
-
- sb = dir->i_sb;
- ngroups = ext4_get_groups_count(sb);
- trace_ext4_request_inode(dir, mode);
- inode = new_inode(sb);
- if (!inode)
- return ERR_PTR(-ENOMEM);
- ei = EXT4_I(inode);
- sbi = EXT4_SB(sb);
-
- if (!goal)
- goal = sbi->s_inode_goal;
-
- if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
- group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
- ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
- ret2 = 0;
- goto got_group;
- }
-
- if (S_ISDIR(mode))
- ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
- else
- ret2 = find_group_other(sb, dir, &group, mode);
-
-got_group:
- EXT4_I(dir)->i_last_alloc_group = group;
- err = -ENOSPC;
- if (ret2 == -1)
- goto out;
-
- /*
- * Normally we will only go through one pass of this loop,
- * unless we get unlucky and it turns out the group we selected
- * had its last inode grabbed by someone else.
- */
- for (i = 0; i < ngroups; i++, ino = 0) {
- err = -EIO;
-
- gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
- if (!gdp)
- goto fail;
-
- brelse(inode_bitmap_bh);
- inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
- if (!inode_bitmap_bh)
- goto fail;
-
-repeat_in_this_group:
- ino = ext4_find_next_zero_bit((unsigned long *)
- inode_bitmap_bh->b_data,
- EXT4_INODES_PER_GROUP(sb), ino);
- if (ino >= EXT4_INODES_PER_GROUP(sb)) {
- if (++group == ngroups)
- group = 0;
- continue;
- }
- if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
- ext4_error(sb, "reserved inode found cleared - "
- "inode=%lu", ino + 1);
- continue;
- }
- ext4_lock_group(sb, group);
- ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
- ext4_unlock_group(sb, group);
- ino++; /* the inode bitmap is zero-based */
- if (!ret2)
- goto got; /* we grabbed the inode! */
- if (ino < EXT4_INODES_PER_GROUP(sb))
- goto repeat_in_this_group;
- }
- err = -ENOSPC;
- goto out;
-
-got:
- /* We may have to initialize the block bitmap if it isn't already */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
- gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- struct buffer_head *block_bitmap_bh;
-
- block_bitmap_bh = ext4_read_block_bitmap(sb, group);
- BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
- err = ext4_journal_get_write_access(handle, block_bitmap_bh);
- if (err) {
- brelse(block_bitmap_bh);
- goto fail;
- }
-
- BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
- err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
- brelse(block_bitmap_bh);
-
- /* recheck and clear flag under lock if we still need to */
- ext4_lock_group(sb, group);
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- ext4_free_group_clusters_set(sb, gdp,
- ext4_free_clusters_after_init(sb, group, gdp));
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
- gdp);
- }
- ext4_unlock_group(sb, group);
-
- if (err)
- goto fail;
- }
-
- BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
- if (err)
- goto fail;
-
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, group_desc_bh);
- if (err)
- goto fail;
-
- /* Update the relevant bg descriptor fields */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
- int free;
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-
- down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
- ext4_lock_group(sb, group); /* while we modify the bg desc */
- free = EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp);
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
- free = 0;
- }
- /*
- * Check the relative inode number against the last used
- * relative inode number in this group. if it is greater
- * we need to update the bg_itable_unused count
- */
- if (ino > free)
- ext4_itable_unused_set(sb, gdp,
- (EXT4_INODES_PER_GROUP(sb) - ino));
- up_read(&grp->alloc_sem);
- } else {
- ext4_lock_group(sb, group);
- }
-
- ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
- if (S_ISDIR(mode)) {
- ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t f = ext4_flex_group(sbi, group);
-
- atomic_inc(&sbi->s_flex_groups[f].used_dirs);
- }
- }
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
- }
- ext4_unlock_group(sb, group);
-
- BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
- if (err)
- goto fail;
-
- BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
- if (err)
- goto fail;
-
- percpu_counter_dec(&sbi->s_freeinodes_counter);
- if (S_ISDIR(mode))
- percpu_counter_inc(&sbi->s_dirs_counter);
- ext4_mark_super_dirty(sb);
-
- if (sbi->s_log_groups_per_flex) {
- flex_group = ext4_flex_group(sbi, group);
- atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
- }
- if (owner) {
- inode->i_mode = mode;
- inode->i_uid = owner[0];
- inode->i_gid = owner[1];
- } else if (test_opt(sb, GRPID)) {
- inode->i_mode = mode;
- inode->i_uid = current_fsuid();
- inode->i_gid = dir->i_gid;
- } else
- inode_init_owner(inode, dir, mode);
-
- inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
- /* This is the optimal IO size (for stat), not the fs block size */
- inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
- ext4_current_time(inode);
-
- memset(ei->i_data, 0, sizeof(ei->i_data));
- ei->i_dir_start_lookup = 0;
- ei->i_disksize = 0;
-
- /* Don't inherit extent flag from directory, amongst others. */
- ei->i_flags =
- ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
- ei->i_file_acl = 0;
- ei->i_dtime = 0;
- ei->i_block_group = group;
- ei->i_last_alloc_group = ~0;
-
- ext4_set_inode_flags(inode);
- if (IS_DIRSYNC(inode))
- ext4_handle_sync(handle);
- if (insert_inode_locked(inode) < 0) {
- /*
- * Likely a bitmap corruption causing inode to be allocated
- * twice.
- */
- err = -EIO;
- goto fail;
- }
- spin_lock(&sbi->s_next_gen_lock);
- inode->i_generation = sbi->s_next_generation++;
- spin_unlock(&sbi->s_next_gen_lock);
-
- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
- ext4_set_inode_state(inode, EXT4_STATE_NEW);
-
- ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
-
- ret = inode;
- dquot_initialize(inode);
- err = dquot_alloc_inode(inode);
- if (err)
- goto fail_drop;
-
- err = ext4_init_acl(handle, inode, dir);
- if (err)
- goto fail_free_drop;
-
- err = ext4_init_security(handle, inode, dir, qstr);
- if (err)
- goto fail_free_drop;
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
- /* set extent flag only for directory, file and normal symlink*/
- if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
- ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
- ext4_ext_tree_init(handle, inode);
- }
- }
-
- if (ext4_handle_valid(handle)) {
- ei->i_sync_tid = handle->h_transaction->t_tid;
- ei->i_datasync_tid = handle->h_transaction->t_tid;
- }
-
- err = ext4_mark_inode_dirty(handle, inode);
- if (err) {
- ext4_std_error(sb, err);
- goto fail_free_drop;
- }
-
- ext4_debug("allocating inode %lu\n", inode->i_ino);
- trace_ext4_allocate_inode(inode, dir, mode);
- goto really_out;
-fail:
- ext4_std_error(sb, err);
-out:
- iput(inode);
- ret = ERR_PTR(err);
-really_out:
- brelse(inode_bitmap_bh);
- return ret;
-
-fail_free_drop:
- dquot_free_inode(inode);
-
-fail_drop:
- dquot_drop(inode);
- inode->i_flags |= S_NOQUOTA;
- clear_nlink(inode);
- unlock_new_inode(inode);
- iput(inode);
- brelse(inode_bitmap_bh);
- return ERR_PTR(err);
-}
-
-/* Verify that we are loading a valid orphan from disk */
-struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
-{
- unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
- ext4_group_t block_group;
- int bit;
- struct buffer_head *bitmap_bh;
- struct inode *inode = NULL;
- long err = -EIO;
-
- /* Error cases - e2fsck has already cleaned up for us */
- if (ino > max_ino) {
- ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
- goto error;
- }
-
- block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
- bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
- bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
- if (!bitmap_bh) {
- ext4_warning(sb, "inode bitmap error for orphan %lu", ino);
- goto error;
- }
-
- /* Having the inode bit set should be a 100% indicator that this
- * is a valid orphan (no e2fsck run on fs). Orphans also include
- * inodes that were being truncated, so we can't check i_nlink==0.
- */
- if (!ext4_test_bit(bit, bitmap_bh->b_data))
- goto bad_orphan;
-
- inode = ext4_iget(sb, ino);
- if (IS_ERR(inode))
- goto iget_failed;
-
- /*
- * If the orphans has i_nlinks > 0 then it should be able to be
- * truncated, otherwise it won't be removed from the orphan list
- * during processing and an infinite loop will result.
- */
- if (inode->i_nlink && !ext4_can_truncate(inode))
- goto bad_orphan;
-
- if (NEXT_ORPHAN(inode) > max_ino)
- goto bad_orphan;
- brelse(bitmap_bh);
- return inode;
-
-iget_failed:
- err = PTR_ERR(inode);
- inode = NULL;
-bad_orphan:
- ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
- printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
- bit, (unsigned long long)bitmap_bh->b_blocknr,
- ext4_test_bit(bit, bitmap_bh->b_data));
- printk(KERN_NOTICE "inode=%p\n", inode);
- if (inode) {
- printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
- is_bad_inode(inode));
- printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
- NEXT_ORPHAN(inode));
- printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
- printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
- /* Avoid freeing blocks if we got a bad deleted inode */
- if (inode->i_nlink == 0)
- inode->i_blocks = 0;
- iput(inode);
- }
- brelse(bitmap_bh);
-error:
- return ERR_PTR(err);
-}
-
-unsigned long ext4_count_free_inodes(struct super_block *sb)
-{
- unsigned long desc_count;
- struct ext4_group_desc *gdp;
- ext4_group_t i, ngroups = ext4_get_groups_count(sb);
-#ifdef EXT4FS_DEBUG
- struct ext4_super_block *es;
- unsigned long bitmap_count, x;
- struct buffer_head *bitmap_bh = NULL;
-
- es = EXT4_SB(sb)->s_es;
- desc_count = 0;
- bitmap_count = 0;
- gdp = NULL;
- for (i = 0; i < ngroups; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
- if (!gdp)
- continue;
- desc_count += ext4_free_inodes_count(sb, gdp);
- brelse(bitmap_bh);
- bitmap_bh = ext4_read_inode_bitmap(sb, i);
- if (!bitmap_bh)
- continue;
-
- x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
- printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
- (unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
- bitmap_count += x;
- }
- brelse(bitmap_bh);
- printk(KERN_DEBUG "ext4_count_free_inodes: "
- "stored = %u, computed = %lu, %lu\n",
- le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
- return desc_count;
-#else
- desc_count = 0;
- for (i = 0; i < ngroups; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
- if (!gdp)
- continue;
- desc_count += ext4_free_inodes_count(sb, gdp);
- cond_resched();
- }
- return desc_count;
-#endif
-}
-
-/* Called at mount-time, super-block is locked */
-unsigned long ext4_count_dirs(struct super_block * sb)
-{
- unsigned long count = 0;
- ext4_group_t i, ngroups = ext4_get_groups_count(sb);
-
- for (i = 0; i < ngroups; i++) {
- struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
- if (!gdp)
- continue;
- count += ext4_used_dirs_count(sb, gdp);
- }
- return count;
-}
-
-/*
- * Zeroes not yet zeroed inode table - just write zeroes through the whole
- * inode table. Must be called without any spinlock held. The only place
- * where it is called from on active part of filesystem is ext4lazyinit
- * thread, so we do not need any special locks, however we have to prevent
- * inode allocation from the current group, so we take alloc_sem lock, to
- * block ext4_new_inode() until we are finished.
- */
-int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
- int barrier)
-{
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_group_desc *gdp = NULL;
- struct buffer_head *group_desc_bh;
- handle_t *handle;
- ext4_fsblk_t blk;
- int num, ret = 0, used_blks = 0;
-
- /* This should not happen, but just to be sure check this */
- if (sb->s_flags & MS_RDONLY) {
- ret = 1;
- goto out;
- }
-
- gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
- if (!gdp)
- goto out;
-
- /*
- * We do not need to lock this, because we are the only one
- * handling this flag.
- */
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
- goto out;
-
- handle = ext4_journal_start_sb(sb, 1);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
-
- down_write(&grp->alloc_sem);
- /*
- * If inode bitmap was already initialized there may be some
- * used inodes so we need to skip blocks with used inodes in
- * inode table.
- */
- if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
- used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp)),
- sbi->s_inodes_per_block);
-
- if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
- ext4_error(sb, "Something is wrong with group %u: "
- "used itable blocks: %d; "
- "itable unused count: %u",
- group, used_blks,
- ext4_itable_unused_count(sb, gdp));
- ret = 1;
- goto err_out;
- }
-
- blk = ext4_inode_table(sb, gdp) + used_blks;
- num = sbi->s_itb_per_group - used_blks;
-
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- ret = ext4_journal_get_write_access(handle,
- group_desc_bh);
- if (ret)
- goto err_out;
-
- /*
- * Skip zeroout if the inode table is full. But we set the ZEROED
- * flag anyway, because obviously, when it is full it does not need
- * further zeroing.
- */
- if (unlikely(num == 0))
- goto skip_zeroout;
-
- ext4_debug("going to zero out inode table in group %d\n",
- group);
- ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
- if (ret < 0)
- goto err_out;
- if (barrier)
- blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
-
-skip_zeroout:
- ext4_lock_group(sb, group);
- gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
- ext4_unlock_group(sb, group);
-
- BUFFER_TRACE(group_desc_bh,
- "call ext4_handle_dirty_metadata");
- ret = ext4_handle_dirty_metadata(handle, NULL,
- group_desc_bh);
-
-err_out:
- up_write(&grp->alloc_sem);
- ext4_journal_stop(handle);
-out:
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/indirect.c b/ANDROID_3.4.5/fs/ext4/indirect.c
deleted file mode 100644
index 830e1b2b..00000000
--- a/ANDROID_3.4.5/fs/ext4/indirect.c
+++ /dev/null
@@ -1,1502 +0,0 @@
-/*
- * linux/fs/ext4/indirect.c
- *
- * from
- *
- * linux/fs/ext4/inode.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/inode.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * Goal-directed block allocation by Stephen Tweedie
- * (sct@redhat.com), 1993, 1998
- */
-
-#include "ext4_jbd2.h"
-#include "truncate.h"
-
-#include <trace/events/ext4.h>
-
-typedef struct {
- __le32 *p;
- __le32 key;
- struct buffer_head *bh;
-} Indirect;
-
-static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
-{
- p->key = *(p->p = v);
- p->bh = bh;
-}
-
-/**
- * ext4_block_to_path - parse the block number into array of offsets
- * @inode: inode in question (we are only interested in its superblock)
- * @i_block: block number to be parsed
- * @offsets: array to store the offsets in
- * @boundary: set this non-zero if the referred-to block is likely to be
- * followed (on disk) by an indirect block.
- *
- * To store the locations of file's data ext4 uses a data structure common
- * for UNIX filesystems - tree of pointers anchored in the inode, with
- * data blocks at leaves and indirect blocks in intermediate nodes.
- * This function translates the block number into path in that tree -
- * return value is the path length and @offsets[n] is the offset of
- * pointer to (n+1)th node in the nth one. If @block is out of range
- * (negative or too large) warning is printed and zero returned.
- *
- * Note: function doesn't find node addresses, so no IO is needed. All
- * we need to know is the capacity of indirect blocks (taken from the
- * inode->i_sb).
- */
-
-/*
- * Portability note: the last comparison (check that we fit into triple
- * indirect block) is spelled differently, because otherwise on an
- * architecture with 32-bit longs and 8Kb pages we might get into trouble
- * if our filesystem had 8Kb blocks. We might use long long, but that would
- * kill us on x86. Oh, well, at least the sign propagation does not matter -
- * i_block would have to be negative in the very beginning, so we would not
- * get there at all.
- */
-
-static int ext4_block_to_path(struct inode *inode,
- ext4_lblk_t i_block,
- ext4_lblk_t offsets[4], int *boundary)
-{
- int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
- const long direct_blocks = EXT4_NDIR_BLOCKS,
- indirect_blocks = ptrs,
- double_blocks = (1 << (ptrs_bits * 2));
- int n = 0;
- int final = 0;
-
- if (i_block < direct_blocks) {
- offsets[n++] = i_block;
- final = direct_blocks;
- } else if ((i_block -= direct_blocks) < indirect_blocks) {
- offsets[n++] = EXT4_IND_BLOCK;
- offsets[n++] = i_block;
- final = ptrs;
- } else if ((i_block -= indirect_blocks) < double_blocks) {
- offsets[n++] = EXT4_DIND_BLOCK;
- offsets[n++] = i_block >> ptrs_bits;
- offsets[n++] = i_block & (ptrs - 1);
- final = ptrs;
- } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
- offsets[n++] = EXT4_TIND_BLOCK;
- offsets[n++] = i_block >> (ptrs_bits * 2);
- offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
- offsets[n++] = i_block & (ptrs - 1);
- final = ptrs;
- } else {
- ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
- i_block + direct_blocks +
- indirect_blocks + double_blocks, inode->i_ino);
- }
- if (boundary)
- *boundary = final - 1 - (i_block & (ptrs - 1));
- return n;
-}
-
-/**
- * ext4_get_branch - read the chain of indirect blocks leading to data
- * @inode: inode in question
- * @depth: depth of the chain (1 - direct pointer, etc.)
- * @offsets: offsets of pointers in inode/indirect blocks
- * @chain: place to store the result
- * @err: here we store the error value
- *
- * Function fills the array of triples <key, p, bh> and returns %NULL
- * if everything went OK or the pointer to the last filled triple
- * (incomplete one) otherwise. Upon the return chain[i].key contains
- * the number of (i+1)-th block in the chain (as it is stored in memory,
- * i.e. little-endian 32-bit), chain[i].p contains the address of that
- * number (it points into struct inode for i==0 and into the bh->b_data
- * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
- * block for i>0 and NULL for i==0. In other words, it holds the block
- * numbers of the chain, addresses they were taken from (and where we can
- * verify that chain did not change) and buffer_heads hosting these
- * numbers.
- *
- * Function stops when it stumbles upon zero pointer (absent block)
- * (pointer to last triple returned, *@err == 0)
- * or when it gets an IO error reading an indirect block
- * (ditto, *@err == -EIO)
- * or when it reads all @depth-1 indirect blocks successfully and finds
- * the whole chain, all way to the data (returns %NULL, *err == 0).
- *
- * Need to be called with
- * down_read(&EXT4_I(inode)->i_data_sem)
- */
-static Indirect *ext4_get_branch(struct inode *inode, int depth,
- ext4_lblk_t *offsets,
- Indirect chain[4], int *err)
-{
- struct super_block *sb = inode->i_sb;
- Indirect *p = chain;
- struct buffer_head *bh;
-
- *err = 0;
- /* i_data is not going away, no lock needed */
- add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
- if (!p->key)
- goto no_block;
- while (--depth) {
- bh = sb_getblk(sb, le32_to_cpu(p->key));
- if (unlikely(!bh))
- goto failure;
-
- if (!bh_uptodate_or_lock(bh)) {
- if (bh_submit_read(bh) < 0) {
- put_bh(bh);
- goto failure;
- }
- /* validate block references */
- if (ext4_check_indirect_blockref(inode, bh)) {
- put_bh(bh);
- goto failure;
- }
- }
-
- add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
- /* Reader: end */
- if (!p->key)
- goto no_block;
- }
- return NULL;
-
-failure:
- *err = -EIO;
-no_block:
- return p;
-}
-
-/**
- * ext4_find_near - find a place for allocation with sufficient locality
- * @inode: owner
- * @ind: descriptor of indirect block.
- *
- * This function returns the preferred place for block allocation.
- * It is used when heuristic for sequential allocation fails.
- * Rules are:
- * + if there is a block to the left of our position - allocate near it.
- * + if pointer will live in indirect block - allocate near that block.
- * + if pointer will live in inode - allocate in the same
- * cylinder group.
- *
- * In the latter case we colour the starting block by the callers PID to
- * prevent it from clashing with concurrent allocations for a different inode
- * in the same block group. The PID is used here so that functionally related
- * files will be close-by on-disk.
- *
- * Caller must make sure that @ind is valid and will stay that way.
- */
-static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
- __le32 *p;
-
- /* Try to find previous block */
- for (p = ind->p - 1; p >= start; p--) {
- if (*p)
- return le32_to_cpu(*p);
- }
-
- /* No such thing, so let's try location of indirect block */
- if (ind->bh)
- return ind->bh->b_blocknr;
-
- /*
- * It is going to be referred to from the inode itself? OK, just put it
- * into the same cylinder group then.
- */
- return ext4_inode_to_goal_block(inode);
-}
-
-/**
- * ext4_find_goal - find a preferred place for allocation.
- * @inode: owner
- * @block: block we want
- * @partial: pointer to the last triple within a chain
- *
- * Normally this function find the preferred place for block allocation,
- * returns it.
- * Because this is only used for non-extent files, we limit the block nr
- * to 32 bits.
- */
-static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
- Indirect *partial)
-{
- ext4_fsblk_t goal;
-
- /*
- * XXX need to get goal block from mballoc's data structures
- */
-
- goal = ext4_find_near(inode, partial);
- goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
- return goal;
-}
-
-/**
- * ext4_blks_to_allocate - Look up the block map and count the number
- * of direct blocks need to be allocated for the given branch.
- *
- * @branch: chain of indirect blocks
- * @k: number of blocks need for indirect blocks
- * @blks: number of data blocks to be mapped.
- * @blocks_to_boundary: the offset in the indirect block
- *
- * return the total number of blocks to be allocate, including the
- * direct and indirect blocks.
- */
-static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
- int blocks_to_boundary)
-{
- unsigned int count = 0;
-
- /*
- * Simple case, [t,d]Indirect block(s) has not allocated yet
- * then it's clear blocks on that path have not allocated
- */
- if (k > 0) {
- /* right now we don't handle cross boundary allocation */
- if (blks < blocks_to_boundary + 1)
- count += blks;
- else
- count += blocks_to_boundary + 1;
- return count;
- }
-
- count++;
- while (count < blks && count <= blocks_to_boundary &&
- le32_to_cpu(*(branch[0].p + count)) == 0) {
- count++;
- }
- return count;
-}
-
-/**
- * ext4_alloc_blocks: multiple allocate blocks needed for a branch
- * @handle: handle for this transaction
- * @inode: inode which needs allocated blocks
- * @iblock: the logical block to start allocated at
- * @goal: preferred physical block of allocation
- * @indirect_blks: the number of blocks need to allocate for indirect
- * blocks
- * @blks: number of desired blocks
- * @new_blocks: on return it will store the new block numbers for
- * the indirect blocks(if needed) and the first direct block,
- * @err: on return it will store the error code
- *
- * This function will return the number of blocks allocated as
- * requested by the passed-in parameters.
- */
-static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t iblock, ext4_fsblk_t goal,
- int indirect_blks, int blks,
- ext4_fsblk_t new_blocks[4], int *err)
-{
- struct ext4_allocation_request ar;
- int target, i;
- unsigned long count = 0, blk_allocated = 0;
- int index = 0;
- ext4_fsblk_t current_block = 0;
- int ret = 0;
-
- /*
- * Here we try to allocate the requested multiple blocks at once,
- * on a best-effort basis.
- * To build a branch, we should allocate blocks for
- * the indirect blocks(if not allocated yet), and at least
- * the first direct block of this branch. That's the
- * minimum number of blocks need to allocate(required)
- */
- /* first we try to allocate the indirect blocks */
- target = indirect_blks;
- while (target > 0) {
- count = target;
- /* allocating blocks for indirect blocks and direct blocks */
- current_block = ext4_new_meta_blocks(handle, inode, goal,
- 0, &count, err);
- if (*err)
- goto failed_out;
-
- if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
- EXT4_ERROR_INODE(inode,
- "current_block %llu + count %lu > %d!",
- current_block, count,
- EXT4_MAX_BLOCK_FILE_PHYS);
- *err = -EIO;
- goto failed_out;
- }
-
- target -= count;
- /* allocate blocks for indirect blocks */
- while (index < indirect_blks && count) {
- new_blocks[index++] = current_block++;
- count--;
- }
- if (count > 0) {
- /*
- * save the new block number
- * for the first direct block
- */
- new_blocks[index] = current_block;
- printk(KERN_INFO "%s returned more blocks than "
- "requested\n", __func__);
- WARN_ON(1);
- break;
- }
- }
-
- target = blks - count ;
- blk_allocated = count;
- if (!target)
- goto allocated;
- /* Now allocate data blocks */
- memset(&ar, 0, sizeof(ar));
- ar.inode = inode;
- ar.goal = goal;
- ar.len = target;
- ar.logical = iblock;
- if (S_ISREG(inode->i_mode))
- /* enable in-core preallocation only for regular files */
- ar.flags = EXT4_MB_HINT_DATA;
-
- current_block = ext4_mb_new_blocks(handle, &ar, err);
- if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
- EXT4_ERROR_INODE(inode,
- "current_block %llu + ar.len %d > %d!",
- current_block, ar.len,
- EXT4_MAX_BLOCK_FILE_PHYS);
- *err = -EIO;
- goto failed_out;
- }
-
- if (*err && (target == blks)) {
- /*
- * if the allocation failed and we didn't allocate
- * any blocks before
- */
- goto failed_out;
- }
- if (!*err) {
- if (target == blks) {
- /*
- * save the new block number
- * for the first direct block
- */
- new_blocks[index] = current_block;
- }
- blk_allocated += ar.len;
- }
-allocated:
- /* total number of blocks allocated for direct blocks */
- ret = blk_allocated;
- *err = 0;
- return ret;
-failed_out:
- for (i = 0; i < index; i++)
- ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
- return ret;
-}
-
-/**
- * ext4_alloc_branch - allocate and set up a chain of blocks.
- * @handle: handle for this transaction
- * @inode: owner
- * @indirect_blks: number of allocated indirect blocks
- * @blks: number of allocated direct blocks
- * @goal: preferred place for allocation
- * @offsets: offsets (in the blocks) to store the pointers to next.
- * @branch: place to store the chain in.
- *
- * This function allocates blocks, zeroes out all but the last one,
- * links them into chain and (if we are synchronous) writes them to disk.
- * In other words, it prepares a branch that can be spliced onto the
- * inode. It stores the information about that chain in the branch[], in
- * the same format as ext4_get_branch() would do. We are calling it after
- * we had read the existing part of chain and partial points to the last
- * triple of that (one with zero ->key). Upon the exit we have the same
- * picture as after the successful ext4_get_block(), except that in one
- * place chain is disconnected - *branch->p is still zero (we did not
- * set the last link), but branch->key contains the number that should
- * be placed into *branch->p to fill that gap.
- *
- * If allocation fails we free all blocks we've allocated (and forget
- * their buffer_heads) and return the error value the from failed
- * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
- * as described above and return 0.
- */
-static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
- ext4_lblk_t iblock, int indirect_blks,
- int *blks, ext4_fsblk_t goal,
- ext4_lblk_t *offsets, Indirect *branch)
-{
- int blocksize = inode->i_sb->s_blocksize;
- int i, n = 0;
- int err = 0;
- struct buffer_head *bh;
- int num;
- ext4_fsblk_t new_blocks[4];
- ext4_fsblk_t current_block;
-
- num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
- *blks, new_blocks, &err);
- if (err)
- return err;
-
- branch[0].key = cpu_to_le32(new_blocks[0]);
- /*
- * metadata blocks and data blocks are allocated.
- */
- for (n = 1; n <= indirect_blks; n++) {
- /*
- * Get buffer_head for parent block, zero it out
- * and set the pointer to new one, then send
- * parent to disk.
- */
- bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
- if (unlikely(!bh)) {
- err = -EIO;
- goto failed;
- }
-
- branch[n].bh = bh;
- lock_buffer(bh);
- BUFFER_TRACE(bh, "call get_create_access");
- err = ext4_journal_get_create_access(handle, bh);
- if (err) {
- /* Don't brelse(bh) here; it's done in
- * ext4_journal_forget() below */
- unlock_buffer(bh);
- goto failed;
- }
-
- memset(bh->b_data, 0, blocksize);
- branch[n].p = (__le32 *) bh->b_data + offsets[n];
- branch[n].key = cpu_to_le32(new_blocks[n]);
- *branch[n].p = branch[n].key;
- if (n == indirect_blks) {
- current_block = new_blocks[n];
- /*
- * End of chain, update the last new metablock of
- * the chain to point to the new allocated
- * data blocks numbers
- */
- for (i = 1; i < num; i++)
- *(branch[n].p + i) = cpu_to_le32(++current_block);
- }
- BUFFER_TRACE(bh, "marking uptodate");
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
-
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (err)
- goto failed;
- }
- *blks = num;
- return err;
-failed:
- /* Allocation failed, free what we already allocated */
- ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
- for (i = 1; i <= n ; i++) {
- /*
- * branch[i].bh is newly allocated, so there is no
- * need to revoke the block, which is why we don't
- * need to set EXT4_FREE_BLOCKS_METADATA.
- */
- ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
- EXT4_FREE_BLOCKS_FORGET);
- }
- for (i = n+1; i < indirect_blks; i++)
- ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
-
- ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
-
- return err;
-}
-
-/**
- * ext4_splice_branch - splice the allocated branch onto inode.
- * @handle: handle for this transaction
- * @inode: owner
- * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- * ext4_alloc_branch)
- * @where: location of missing link
- * @num: number of indirect blocks we are adding
- * @blks: number of direct blocks we are adding
- *
- * This function fills the missing link and does all housekeeping needed in
- * inode (->i_blocks, etc.). In case of success we end up with the full
- * chain to new block and return 0.
- */
-static int ext4_splice_branch(handle_t *handle, struct inode *inode,
- ext4_lblk_t block, Indirect *where, int num,
- int blks)
-{
- int i;
- int err = 0;
- ext4_fsblk_t current_block;
-
- /*
- * If we're splicing into a [td]indirect block (as opposed to the
- * inode) then we need to get write access to the [td]indirect block
- * before the splice.
- */
- if (where->bh) {
- BUFFER_TRACE(where->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, where->bh);
- if (err)
- goto err_out;
- }
- /* That's it */
-
- *where->p = where->key;
-
- /*
- * Update the host buffer_head or inode to point to more just allocated
- * direct blocks blocks
- */
- if (num == 0 && blks > 1) {
- current_block = le32_to_cpu(where->key) + 1;
- for (i = 1; i < blks; i++)
- *(where->p + i) = cpu_to_le32(current_block++);
- }
-
- /* We are done with atomic stuff, now do the rest of housekeeping */
- /* had we spliced it onto indirect block? */
- if (where->bh) {
- /*
- * If we spliced it onto an indirect block, we haven't
- * altered the inode. Note however that if it is being spliced
- * onto an indirect block at the very end of the file (the
- * file is growing) then we *will* alter the inode to reflect
- * the new i_size. But that is not done here - it is done in
- * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
- */
- jbd_debug(5, "splicing indirect only\n");
- BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, where->bh);
- if (err)
- goto err_out;
- } else {
- /*
- * OK, we spliced it into the inode itself on a direct block.
- */
- ext4_mark_inode_dirty(handle, inode);
- jbd_debug(5, "splicing direct\n");
- }
- return err;
-
-err_out:
- for (i = 1; i <= num; i++) {
- /*
- * branch[i].bh is newly allocated, so there is no
- * need to revoke the block, which is why we don't
- * need to set EXT4_FREE_BLOCKS_METADATA.
- */
- ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
- EXT4_FREE_BLOCKS_FORGET);
- }
- ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
- blks, 0);
-
- return err;
-}
-
-/*
- * The ext4_ind_map_blocks() function handles non-extents inodes
- * (i.e., using the traditional indirect/double-indirect i_blocks
- * scheme) for ext4_map_blocks().
- *
- * Allocation strategy is simple: if we have to allocate something, we will
- * have to go the whole way to leaf. So let's do it before attaching anything
- * to tree, set linkage between the newborn blocks, write them if sync is
- * required, recheck the path, free and repeat if check fails, otherwise
- * set the last missing link (that will protect us from any truncate-generated
- * removals - all blocks on the path are immune now) and possibly force the
- * write on the parent block.
- * That has a nice additional property: no special recovery from the failed
- * allocations is needed - we simply release blocks and do not touch anything
- * reachable from inode.
- *
- * `handle' can be NULL if create == 0.
- *
- * return > 0, # of blocks mapped or allocated.
- * return = 0, if plain lookup failed.
- * return < 0, error case.
- *
- * The ext4_ind_get_blocks() function should be called with
- * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
- * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
- * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
- * blocks.
- */
-int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map,
- int flags)
-{
- int err = -EIO;
- ext4_lblk_t offsets[4];
- Indirect chain[4];
- Indirect *partial;
- ext4_fsblk_t goal;
- int indirect_blks;
- int blocks_to_boundary = 0;
- int depth;
- int count = 0;
- ext4_fsblk_t first_block = 0;
-
- trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
- J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
- J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
- depth = ext4_block_to_path(inode, map->m_lblk, offsets,
- &blocks_to_boundary);
-
- if (depth == 0)
- goto out;
-
- partial = ext4_get_branch(inode, depth, offsets, chain, &err);
-
- /* Simplest case - block found, no allocation needed */
- if (!partial) {
- first_block = le32_to_cpu(chain[depth - 1].key);
- count++;
- /*map more blocks*/
- while (count < map->m_len && count <= blocks_to_boundary) {
- ext4_fsblk_t blk;
-
- blk = le32_to_cpu(*(chain[depth-1].p + count));
-
- if (blk == first_block + count)
- count++;
- else
- break;
- }
- goto got_it;
- }
-
- /* Next simple case - plain lookup or failed read of indirect block */
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
- goto cleanup;
-
- /*
- * Okay, we need to do block allocation.
- */
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
- EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
- "non-extent mapped inodes with bigalloc");
- return -ENOSPC;
- }
-
- goal = ext4_find_goal(inode, map->m_lblk, partial);
-
- /* the number of blocks need to allocate for [d,t]indirect blocks */
- indirect_blks = (chain + depth) - partial - 1;
-
- /*
- * Next look up the indirect map to count the totoal number of
- * direct blocks to allocate for this branch.
- */
- count = ext4_blks_to_allocate(partial, indirect_blks,
- map->m_len, blocks_to_boundary);
- /*
- * Block out ext4_truncate while we alter the tree
- */
- err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
- &count, goal,
- offsets + (partial - chain), partial);
-
- /*
- * The ext4_splice_branch call will free and forget any buffers
- * on the new chain if there is a failure, but that risks using
- * up transaction credits, especially for bitmaps where the
- * credits cannot be returned. Can we handle this somehow? We
- * may need to return -EAGAIN upwards in the worst case. --sct
- */
- if (!err)
- err = ext4_splice_branch(handle, inode, map->m_lblk,
- partial, indirect_blks, count);
- if (err)
- goto cleanup;
-
- map->m_flags |= EXT4_MAP_NEW;
-
- ext4_update_inode_fsync_trans(handle, inode, 1);
-got_it:
- map->m_flags |= EXT4_MAP_MAPPED;
- map->m_pblk = le32_to_cpu(chain[depth-1].key);
- map->m_len = count;
- if (count > blocks_to_boundary)
- map->m_flags |= EXT4_MAP_BOUNDARY;
- err = count;
- /* Clean up and exit */
- partial = chain + depth - 1; /* the whole chain */
-cleanup:
- while (partial > chain) {
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
- partial--;
- }
-out:
- trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
- map->m_pblk, map->m_len, err);
- return err;
-}
-
-/*
- * O_DIRECT for ext3 (or indirect map) based files
- *
- * If the O_DIRECT write will extend the file then add this inode to the
- * orphan list. So recovery will truncate it back to the original size
- * if the machine crashes during the write.
- *
- * If the O_DIRECT write is intantiating holes inside i_size and the machine
- * crashes then stale disk data _may_ be exposed inside the file. But current
- * VFS code falls back into buffered path in that case so we are safe.
- */
-ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct ext4_inode_info *ei = EXT4_I(inode);
- handle_t *handle;
- ssize_t ret;
- int orphan = 0;
- size_t count = iov_length(iov, nr_segs);
- int retries = 0;
-
- if (rw == WRITE) {
- loff_t final_size = offset + count;
-
- if (final_size > inode->i_size) {
- /* Credits for sb + inode write */
- handle = ext4_journal_start(inode, 2);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
- ret = ext4_orphan_add(handle, inode);
- if (ret) {
- ext4_journal_stop(handle);
- goto out;
- }
- orphan = 1;
- ei->i_disksize = inode->i_size;
- ext4_journal_stop(handle);
- }
- }
-
-retry:
- if (rw == READ && ext4_should_dioread_nolock(inode)) {
- if (unlikely(!list_empty(&ei->i_completed_io_list))) {
- mutex_lock(&inode->i_mutex);
- ext4_flush_completed_IO(inode);
- mutex_unlock(&inode->i_mutex);
- }
- ret = __blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- ext4_get_block, NULL, NULL, 0);
- } else {
- ret = blockdev_direct_IO(rw, iocb, inode, iov,
- offset, nr_segs, ext4_get_block);
-
- if (unlikely((rw & WRITE) && ret < 0)) {
- loff_t isize = i_size_read(inode);
- loff_t end = offset + iov_length(iov, nr_segs);
-
- if (end > isize)
- ext4_truncate_failed_write(inode);
- }
- }
- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
-
- if (orphan) {
- int err;
-
- /* Credits for sb + inode write */
- handle = ext4_journal_start(inode, 2);
- if (IS_ERR(handle)) {
- /* This is really bad luck. We've written the data
- * but cannot extend i_size. Bail out and pretend
- * the write failed... */
- ret = PTR_ERR(handle);
- if (inode->i_nlink)
- ext4_orphan_del(NULL, inode);
-
- goto out;
- }
- if (inode->i_nlink)
- ext4_orphan_del(handle, inode);
- if (ret > 0) {
- loff_t end = offset + ret;
- if (end > inode->i_size) {
- ei->i_disksize = end;
- i_size_write(inode, end);
- /*
- * We're going to return a positive `ret'
- * here due to non-zero-length I/O, so there's
- * no way of reporting error returns from
- * ext4_mark_inode_dirty() to userspace. So
- * ignore it.
- */
- ext4_mark_inode_dirty(handle, inode);
- }
- }
- err = ext4_journal_stop(handle);
- if (ret == 0)
- ret = err;
- }
-out:
- return ret;
-}
-
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate a new block at @lblocks for non extent file based file
- */
-int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
- int blk_bits;
-
- if (lblock < EXT4_NDIR_BLOCKS)
- return 0;
-
- lblock -= EXT4_NDIR_BLOCKS;
-
- if (ei->i_da_metadata_calc_len &&
- (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
- ei->i_da_metadata_calc_len++;
- return 0;
- }
- ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
- ei->i_da_metadata_calc_len = 1;
- blk_bits = order_base_2(lblock);
- return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
-}
-
-int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk)
-{
- int indirects;
-
- /* if nrblocks are contiguous */
- if (chunk) {
- /*
- * With N contiguous data blocks, we need at most
- * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
- * 2 dindirect blocks, and 1 tindirect block
- */
- return DIV_ROUND_UP(nrblocks,
- EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
- }
- /*
- * if nrblocks are not contiguous, worse case, each block touch
- * a indirect block, and each indirect block touch a double indirect
- * block, plus a triple indirect block
- */
- indirects = nrblocks * 2 + 1;
- return indirects;
-}
-
-/*
- * Truncate transactions can be complex and absolutely huge. So we need to
- * be able to restart the transaction at a conventient checkpoint to make
- * sure we don't overflow the journal.
- *
- * start_transaction gets us a new handle for a truncate transaction,
- * and extend_transaction tries to extend the existing one a bit. If
- * extend fails, we need to propagate the failure up and restart the
- * transaction in the top-level truncate loop. --sct
- */
-static handle_t *start_transaction(struct inode *inode)
-{
- handle_t *result;
-
- result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode));
- if (!IS_ERR(result))
- return result;
-
- ext4_std_error(inode->i_sb, PTR_ERR(result));
- return result;
-}
-
-/*
- * Try to extend this transaction for the purposes of truncation.
- *
- * Returns 0 if we managed to create more room. If we can't create more
- * room, and the transaction must be restarted we return 1.
- */
-static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
-{
- if (!ext4_handle_valid(handle))
- return 0;
- if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
- return 0;
- if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode)))
- return 0;
- return 1;
-}
-
-/*
- * Probably it should be a library function... search for first non-zero word
- * or memcmp with zero_page, whatever is better for particular architecture.
- * Linus?
- */
-static inline int all_zeroes(__le32 *p, __le32 *q)
-{
- while (p < q)
- if (*p++)
- return 0;
- return 1;
-}
-
-/**
- * ext4_find_shared - find the indirect blocks for partial truncation.
- * @inode: inode in question
- * @depth: depth of the affected branch
- * @offsets: offsets of pointers in that branch (see ext4_block_to_path)
- * @chain: place to store the pointers to partial indirect blocks
- * @top: place to the (detached) top of branch
- *
- * This is a helper function used by ext4_truncate().
- *
- * When we do truncate() we may have to clean the ends of several
- * indirect blocks but leave the blocks themselves alive. Block is
- * partially truncated if some data below the new i_size is referred
- * from it (and it is on the path to the first completely truncated
- * data block, indeed). We have to free the top of that path along
- * with everything to the right of the path. Since no allocation
- * past the truncation point is possible until ext4_truncate()
- * finishes, we may safely do the latter, but top of branch may
- * require special attention - pageout below the truncation point
- * might try to populate it.
- *
- * We atomically detach the top of branch from the tree, store the
- * block number of its root in *@top, pointers to buffer_heads of
- * partially truncated blocks - in @chain[].bh and pointers to
- * their last elements that should not be removed - in
- * @chain[].p. Return value is the pointer to last filled element
- * of @chain.
- *
- * The work left to caller to do the actual freeing of subtrees:
- * a) free the subtree starting from *@top
- * b) free the subtrees whose roots are stored in
- * (@chain[i].p+1 .. end of @chain[i].bh->b_data)
- * c) free the subtrees growing from the inode past the @chain[0].
- * (no partially truncated stuff there). */
-
-static Indirect *ext4_find_shared(struct inode *inode, int depth,
- ext4_lblk_t offsets[4], Indirect chain[4],
- __le32 *top)
-{
- Indirect *partial, *p;
- int k, err;
-
- *top = 0;
- /* Make k index the deepest non-null offset + 1 */
- for (k = depth; k > 1 && !offsets[k-1]; k--)
- ;
- partial = ext4_get_branch(inode, k, offsets, chain, &err);
- /* Writer: pointers */
- if (!partial)
- partial = chain + k-1;
- /*
- * If the branch acquired continuation since we've looked at it -
- * fine, it should all survive and (new) top doesn't belong to us.
- */
- if (!partial->key && *partial->p)
- /* Writer: end */
- goto no_top;
- for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
- ;
- /*
- * OK, we've found the last block that must survive. The rest of our
- * branch should be detached before unlocking. However, if that rest
- * of branch is all ours and does not grow immediately from the inode
- * it's easier to cheat and just decrement partial->p.
- */
- if (p == chain + k - 1 && p > chain) {
- p->p--;
- } else {
- *top = *p->p;
- /* Nope, don't do this in ext4. Must leave the tree intact */
-#if 0
- *p->p = 0;
-#endif
- }
- /* Writer: end */
-
- while (partial > p) {
- brelse(partial->bh);
- partial--;
- }
-no_top:
- return partial;
-}
-
-/*
- * Zero a number of block pointers in either an inode or an indirect block.
- * If we restart the transaction we must again get write access to the
- * indirect block for further modification.
- *
- * We release `count' blocks on disk, but (last - first) may be greater
- * than `count' because there can be holes in there.
- *
- * Return 0 on success, 1 on invalid block range
- * and < 0 on fatal error.
- */
-static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
- struct buffer_head *bh,
- ext4_fsblk_t block_to_free,
- unsigned long count, __le32 *first,
- __le32 *last)
-{
- __le32 *p;
- int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
- int err;
-
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
-
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
- count)) {
- EXT4_ERROR_INODE(inode, "attempt to clear invalid "
- "blocks %llu len %lu",
- (unsigned long long) block_to_free, count);
- return 1;
- }
-
- if (try_to_extend_transaction(handle, inode)) {
- if (bh) {
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (unlikely(err))
- goto out_err;
- }
- err = ext4_mark_inode_dirty(handle, inode);
- if (unlikely(err))
- goto out_err;
- err = ext4_truncate_restart_trans(handle, inode,
- ext4_blocks_for_truncate(inode));
- if (unlikely(err))
- goto out_err;
- if (bh) {
- BUFFER_TRACE(bh, "retaking write access");
- err = ext4_journal_get_write_access(handle, bh);
- if (unlikely(err))
- goto out_err;
- }
- }
-
- for (p = first; p < last; p++)
- *p = 0;
-
- ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
- return 0;
-out_err:
- ext4_std_error(inode->i_sb, err);
- return err;
-}
-
-/**
- * ext4_free_data - free a list of data blocks
- * @handle: handle for this transaction
- * @inode: inode we are dealing with
- * @this_bh: indirect buffer_head which contains *@first and *@last
- * @first: array of block numbers
- * @last: points immediately past the end of array
- *
- * We are freeing all blocks referred from that array (numbers are stored as
- * little-endian 32-bit) and updating @inode->i_blocks appropriately.
- *
- * We accumulate contiguous runs of blocks to free. Conveniently, if these
- * blocks are contiguous then releasing them at one time will only affect one
- * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
- * actually use a lot of journal space.
- *
- * @this_bh will be %NULL if @first and @last point into the inode's direct
- * block pointers.
- */
-static void ext4_free_data(handle_t *handle, struct inode *inode,
- struct buffer_head *this_bh,
- __le32 *first, __le32 *last)
-{
- ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */
- unsigned long count = 0; /* Number of blocks in the run */
- __le32 *block_to_free_p = NULL; /* Pointer into inode/ind
- corresponding to
- block_to_free */
- ext4_fsblk_t nr; /* Current block # */
- __le32 *p; /* Pointer into inode/ind
- for current block */
- int err = 0;
-
- if (this_bh) { /* For indirect block */
- BUFFER_TRACE(this_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, this_bh);
- /* Important: if we can't update the indirect pointers
- * to the blocks, we can't free them. */
- if (err)
- return;
- }
-
- for (p = first; p < last; p++) {
- nr = le32_to_cpu(*p);
- if (nr) {
- /* accumulate blocks to free if they're contiguous */
- if (count == 0) {
- block_to_free = nr;
- block_to_free_p = p;
- count = 1;
- } else if (nr == block_to_free + count) {
- count++;
- } else {
- err = ext4_clear_blocks(handle, inode, this_bh,
- block_to_free, count,
- block_to_free_p, p);
- if (err)
- break;
- block_to_free = nr;
- block_to_free_p = p;
- count = 1;
- }
- }
- }
-
- if (!err && count > 0)
- err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
- count, block_to_free_p, p);
- if (err < 0)
- /* fatal error */
- return;
-
- if (this_bh) {
- BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
-
- /*
- * The buffer head should have an attached journal head at this
- * point. However, if the data is corrupted and an indirect
- * block pointed to itself, it would have been detached when
- * the block was cleared. Check for this instead of OOPSing.
- */
- if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
- ext4_handle_dirty_metadata(handle, inode, this_bh);
- else
- EXT4_ERROR_INODE(inode,
- "circular indirect block detected at "
- "block %llu",
- (unsigned long long) this_bh->b_blocknr);
- }
-}
-
-/**
- * ext4_free_branches - free an array of branches
- * @handle: JBD handle for this transaction
- * @inode: inode we are dealing with
- * @parent_bh: the buffer_head which contains *@first and *@last
- * @first: array of block numbers
- * @last: pointer immediately past the end of array
- * @depth: depth of the branches to free
- *
- * We are freeing all blocks referred from these branches (numbers are
- * stored as little-endian 32-bit) and updating @inode->i_blocks
- * appropriately.
- */
-static void ext4_free_branches(handle_t *handle, struct inode *inode,
- struct buffer_head *parent_bh,
- __le32 *first, __le32 *last, int depth)
-{
- ext4_fsblk_t nr;
- __le32 *p;
-
- if (ext4_handle_is_aborted(handle))
- return;
-
- if (depth--) {
- struct buffer_head *bh;
- int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- p = last;
- while (--p >= first) {
- nr = le32_to_cpu(*p);
- if (!nr)
- continue; /* A hole */
-
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- nr, 1)) {
- EXT4_ERROR_INODE(inode,
- "invalid indirect mapped "
- "block %lu (level %d)",
- (unsigned long) nr, depth);
- break;
- }
-
- /* Go read the buffer for the next level down */
- bh = sb_bread(inode->i_sb, nr);
-
- /*
- * A read failure? Report error and clear slot
- * (should be rare).
- */
- if (!bh) {
- EXT4_ERROR_INODE_BLOCK(inode, nr,
- "Read failure");
- continue;
- }
-
- /* This zaps the entire block. Bottom up. */
- BUFFER_TRACE(bh, "free child branches");
- ext4_free_branches(handle, inode, bh,
- (__le32 *) bh->b_data,
- (__le32 *) bh->b_data + addr_per_block,
- depth);
- brelse(bh);
-
- /*
- * Everything below this this pointer has been
- * released. Now let this top-of-subtree go.
- *
- * We want the freeing of this indirect block to be
- * atomic in the journal with the updating of the
- * bitmap block which owns it. So make some room in
- * the journal.
- *
- * We zero the parent pointer *after* freeing its
- * pointee in the bitmaps, so if extend_transaction()
- * for some reason fails to put the bitmap changes and
- * the release into the same transaction, recovery
- * will merely complain about releasing a free block,
- * rather than leaking blocks.
- */
- if (ext4_handle_is_aborted(handle))
- return;
- if (try_to_extend_transaction(handle, inode)) {
- ext4_mark_inode_dirty(handle, inode);
- ext4_truncate_restart_trans(handle, inode,
- ext4_blocks_for_truncate(inode));
- }
-
- /*
- * The forget flag here is critical because if
- * we are journaling (and not doing data
- * journaling), we have to make sure a revoke
- * record is written to prevent the journal
- * replay from overwriting the (former)
- * indirect block if it gets reallocated as a
- * data block. This must happen in the same
- * transaction where the data blocks are
- * actually freed.
- */
- ext4_free_blocks(handle, inode, NULL, nr, 1,
- EXT4_FREE_BLOCKS_METADATA|
- EXT4_FREE_BLOCKS_FORGET);
-
- if (parent_bh) {
- /*
- * The block which we have just freed is
- * pointed to by an indirect block: journal it
- */
- BUFFER_TRACE(parent_bh, "get_write_access");
- if (!ext4_journal_get_write_access(handle,
- parent_bh)){
- *p = 0;
- BUFFER_TRACE(parent_bh,
- "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle,
- inode,
- parent_bh);
- }
- }
- }
- } else {
- /* We have reached the bottom of the tree. */
- BUFFER_TRACE(parent_bh, "free data blocks");
- ext4_free_data(handle, inode, parent_bh, first, last);
- }
-}
-
-void ext4_ind_truncate(struct inode *inode)
-{
- handle_t *handle;
- struct ext4_inode_info *ei = EXT4_I(inode);
- __le32 *i_data = ei->i_data;
- int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- struct address_space *mapping = inode->i_mapping;
- ext4_lblk_t offsets[4];
- Indirect chain[4];
- Indirect *partial;
- __le32 nr = 0;
- int n = 0;
- ext4_lblk_t last_block, max_block;
- loff_t page_len;
- unsigned blocksize = inode->i_sb->s_blocksize;
- int err;
-
- handle = start_transaction(inode);
- if (IS_ERR(handle))
- return; /* AKPM: return what? */
-
- last_block = (inode->i_size + blocksize-1)
- >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
- max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
- >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
-
- if (inode->i_size % PAGE_CACHE_SIZE != 0) {
- page_len = PAGE_CACHE_SIZE -
- (inode->i_size & (PAGE_CACHE_SIZE - 1));
-
- err = ext4_discard_partial_page_buffers(handle,
- mapping, inode->i_size, page_len, 0);
-
- if (err)
- goto out_stop;
- }
-
- if (last_block != max_block) {
- n = ext4_block_to_path(inode, last_block, offsets, NULL);
- if (n == 0)
- goto out_stop; /* error */
- }
-
- /*
- * OK. This truncate is going to happen. We add the inode to the
- * orphan list, so that if this truncate spans multiple transactions,
- * and we crash, we will resume the truncate when the filesystem
- * recovers. It also marks the inode dirty, to catch the new size.
- *
- * Implication: the file must always be in a sane, consistent
- * truncatable state while each transaction commits.
- */
- if (ext4_orphan_add(handle, inode))
- goto out_stop;
-
- /*
- * From here we block out all ext4_get_block() callers who want to
- * modify the block allocation tree.
- */
- down_write(&ei->i_data_sem);
-
- ext4_discard_preallocations(inode);
-
- /*
- * The orphan list entry will now protect us from any crash which
- * occurs before the truncate completes, so it is now safe to propagate
- * the new, shorter inode size (held for now in i_size) into the
- * on-disk inode. We do this via i_disksize, which is the value which
- * ext4 *really* writes onto the disk inode.
- */
- ei->i_disksize = inode->i_size;
-
- if (last_block == max_block) {
- /*
- * It is unnecessary to free any data blocks if last_block is
- * equal to the indirect block limit.
- */
- goto out_unlock;
- } else if (n == 1) { /* direct blocks */
- ext4_free_data(handle, inode, NULL, i_data+offsets[0],
- i_data + EXT4_NDIR_BLOCKS);
- goto do_indirects;
- }
-
- partial = ext4_find_shared(inode, n, offsets, chain, &nr);
- /* Kill the top of shared branch (not detached) */
- if (nr) {
- if (partial == chain) {
- /* Shared branch grows from the inode */
- ext4_free_branches(handle, inode, NULL,
- &nr, &nr+1, (chain+n-1) - partial);
- *partial->p = 0;
- /*
- * We mark the inode dirty prior to restart,
- * and prior to stop. No need for it here.
- */
- } else {
- /* Shared branch grows from an indirect block */
- BUFFER_TRACE(partial->bh, "get_write_access");
- ext4_free_branches(handle, inode, partial->bh,
- partial->p,
- partial->p+1, (chain+n-1) - partial);
- }
- }
- /* Clear the ends of indirect blocks on the shared branch */
- while (partial > chain) {
- ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
- (__le32*)partial->bh->b_data+addr_per_block,
- (chain+n-1) - partial);
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
- partial--;
- }
-do_indirects:
- /* Kill the remaining (whole) subtrees */
- switch (offsets[0]) {
- default:
- nr = i_data[EXT4_IND_BLOCK];
- if (nr) {
- ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
- i_data[EXT4_IND_BLOCK] = 0;
- }
- case EXT4_IND_BLOCK:
- nr = i_data[EXT4_DIND_BLOCK];
- if (nr) {
- ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
- i_data[EXT4_DIND_BLOCK] = 0;
- }
- case EXT4_DIND_BLOCK:
- nr = i_data[EXT4_TIND_BLOCK];
- if (nr) {
- ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
- i_data[EXT4_TIND_BLOCK] = 0;
- }
- case EXT4_TIND_BLOCK:
- ;
- }
-
-out_unlock:
- up_write(&ei->i_data_sem);
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
-
- /*
- * In a multi-transaction truncate, we only make the final transaction
- * synchronous
- */
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
-out_stop:
- /*
- * If this was a simple ftruncate(), and the file will remain alive
- * then we need to clear up the orphan record which we created above.
- * However, if this was a real unlink then we were called by
- * ext4_delete_inode(), and we allow that function to clean up the
- * orphan info for us.
- */
- if (inode->i_nlink)
- ext4_orphan_del(handle, inode);
-
- ext4_journal_stop(handle);
- trace_ext4_truncate_exit(inode);
-}
-
diff --git a/ANDROID_3.4.5/fs/ext4/inode.c b/ANDROID_3.4.5/fs/ext4/inode.c
deleted file mode 100644
index c77b0bd2..00000000
--- a/ANDROID_3.4.5/fs/ext4/inode.c
+++ /dev/null
@@ -1,4676 +0,0 @@
-/*
- * linux/fs/ext4/inode.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/inode.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * 64-bit file support on 64-bit platforms by Jakub Jelinek
- * (jj@sunsite.ms.mff.cuni.cz)
- *
- * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000
- */
-
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/jbd2.h>
-#include <linux/highuid.h>
-#include <linux/pagemap.h>
-#include <linux/quotaops.h>
-#include <linux/string.h>
-#include <linux/buffer_head.h>
-#include <linux/writeback.h>
-#include <linux/pagevec.h>
-#include <linux/mpage.h>
-#include <linux/namei.h>
-#include <linux/uio.h>
-#include <linux/bio.h>
-#include <linux/workqueue.h>
-#include <linux/kernel.h>
-#include <linux/printk.h>
-#include <linux/slab.h>
-#include <linux/ratelimit.h>
-
-#include "ext4_jbd2.h"
-#include "xattr.h"
-#include "acl.h"
-#include "truncate.h"
-
-#include <trace/events/ext4.h>
-
-#define MPAGE_DA_EXTENT_TAIL 0x01
-
-static inline int ext4_begin_ordered_truncate(struct inode *inode,
- loff_t new_size)
-{
- trace_ext4_begin_ordered_truncate(inode, new_size);
- /*
- * If jinode is zero, then we never opened the file for
- * writing, so there's no need to call
- * jbd2_journal_begin_ordered_truncate() since there's no
- * outstanding writes we need to flush.
- */
- if (!EXT4_I(inode)->jinode)
- return 0;
- return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
- EXT4_I(inode)->jinode,
- new_size);
-}
-
-static void ext4_invalidatepage(struct page *page, unsigned long offset);
-static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
-static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
-static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
-static int __ext4_journalled_writepage(struct page *page, unsigned int len);
-static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
-static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
- struct inode *inode, struct page *page, loff_t from,
- loff_t length, int flags);
-
-/*
- * Test whether an inode is a fast symlink.
- */
-static int ext4_inode_is_fast_symlink(struct inode *inode)
-{
- int ea_blocks = EXT4_I(inode)->i_file_acl ?
- (inode->i_sb->s_blocksize >> 9) : 0;
-
- return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
-}
-
-/*
- * Restart the transaction associated with *handle. This does a commit,
- * so before we call here everything must be consistently dirtied against
- * this transaction.
- */
-int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
- int nblocks)
-{
- int ret;
-
- /*
- * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
- * moment, get_block can be called only for blocks inside i_size since
- * page cache has been already dropped and writes are blocked by
- * i_mutex. So we can safely drop the i_data_sem here.
- */
- BUG_ON(EXT4_JOURNAL(inode) == NULL);
- jbd_debug(2, "restarting handle %p\n", handle);
- up_write(&EXT4_I(inode)->i_data_sem);
- ret = ext4_journal_restart(handle, nblocks);
- down_write(&EXT4_I(inode)->i_data_sem);
- ext4_discard_preallocations(inode);
-
- return ret;
-}
-
-/*
- * Called at the last iput() if i_nlink is zero.
- */
-void ext4_evict_inode(struct inode *inode)
-{
- handle_t *handle;
- int err;
-
- trace_ext4_evict_inode(inode);
-
- ext4_ioend_wait(inode);
-
- if (inode->i_nlink) {
- /*
- * When journalling data dirty buffers are tracked only in the
- * journal. So although mm thinks everything is clean and
- * ready for reaping the inode might still have some pages to
- * write in the running transaction or waiting to be
- * checkpointed. Thus calling jbd2_journal_invalidatepage()
- * (via truncate_inode_pages()) to discard these buffers can
- * cause data loss. Also even if we did not discard these
- * buffers, we would have no way to find them after the inode
- * is reaped and thus user could see stale data if he tries to
- * read them before the transaction is checkpointed. So be
- * careful and force everything to disk here... We use
- * ei->i_datasync_tid to store the newest transaction
- * containing inode's data.
- *
- * Note that directories do not have this problem because they
- * don't use page cache.
- */
- if (ext4_should_journal_data(inode) &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
- journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
- tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
-
- jbd2_log_start_commit(journal, commit_tid);
- jbd2_log_wait_commit(journal, commit_tid);
- filemap_write_and_wait(&inode->i_data);
- }
- truncate_inode_pages(&inode->i_data, 0);
- goto no_delete;
- }
-
- if (!is_bad_inode(inode))
- dquot_initialize(inode);
-
- if (ext4_should_order_data(inode))
- ext4_begin_ordered_truncate(inode, 0);
- truncate_inode_pages(&inode->i_data, 0);
-
- if (is_bad_inode(inode))
- goto no_delete;
-
- handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
- if (IS_ERR(handle)) {
- ext4_std_error(inode->i_sb, PTR_ERR(handle));
- /*
- * If we're going to skip the normal cleanup, we still need to
- * make sure that the in-core orphan linked list is properly
- * cleaned up.
- */
- ext4_orphan_del(NULL, inode);
- goto no_delete;
- }
-
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
- inode->i_size = 0;
- err = ext4_mark_inode_dirty(handle, inode);
- if (err) {
- ext4_warning(inode->i_sb,
- "couldn't mark inode dirty (err %d)", err);
- goto stop_handle;
- }
- if (inode->i_blocks)
- ext4_truncate(inode);
-
- /*
- * ext4_ext_truncate() doesn't reserve any slop when it
- * restarts journal transactions; therefore there may not be
- * enough credits left in the handle to remove the inode from
- * the orphan list and set the dtime field.
- */
- if (!ext4_handle_has_enough_credits(handle, 3)) {
- err = ext4_journal_extend(handle, 3);
- if (err > 0)
- err = ext4_journal_restart(handle, 3);
- if (err != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", err);
- stop_handle:
- ext4_journal_stop(handle);
- ext4_orphan_del(NULL, inode);
- goto no_delete;
- }
- }
-
- /*
- * Kill off the orphan record which ext4_truncate created.
- * AKPM: I think this can be inside the above `if'.
- * Note that ext4_orphan_del() has to be able to cope with the
- * deletion of a non-existent orphan - this is because we don't
- * know if ext4_truncate() actually created an orphan record.
- * (Well, we could do this if we need to, but heck - it works)
- */
- ext4_orphan_del(handle, inode);
- EXT4_I(inode)->i_dtime = get_seconds();
-
- /*
- * One subtle ordering requirement: if anything has gone wrong
- * (transaction abort, IO errors, whatever), then we can still
- * do these next steps (the fs will already have been marked as
- * having errors), but we can't free the inode if the mark_dirty
- * fails.
- */
- if (ext4_mark_inode_dirty(handle, inode))
- /* If that failed, just do the required in-core inode clear. */
- ext4_clear_inode(inode);
- else
- ext4_free_inode(handle, inode);
- ext4_journal_stop(handle);
- return;
-no_delete:
- ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
-}
-
-#ifdef CONFIG_QUOTA
-qsize_t *ext4_get_reserved_space(struct inode *inode)
-{
- return &EXT4_I(inode)->i_reserved_quota;
-}
-#endif
-
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate a block located at @lblock
- */
-static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
-{
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- return ext4_ext_calc_metadata_amount(inode, lblock);
-
- return ext4_ind_calc_metadata_amount(inode, lblock);
-}
-
-/*
- * Called with i_data_sem down, which is important since we can call
- * ext4_discard_preallocations() from here.
- */
-void ext4_da_update_reserve_space(struct inode *inode,
- int used, int quota_claim)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct ext4_inode_info *ei = EXT4_I(inode);
-
- spin_lock(&ei->i_block_reservation_lock);
- trace_ext4_da_update_reserve_space(inode, used, quota_claim);
- if (unlikely(used > ei->i_reserved_data_blocks)) {
- ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
- "with only %d reserved data blocks",
- __func__, inode->i_ino, used,
- ei->i_reserved_data_blocks);
- WARN_ON(1);
- used = ei->i_reserved_data_blocks;
- }
-
- /* Update per-inode reservations */
- ei->i_reserved_data_blocks -= used;
- ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- used + ei->i_allocated_meta_blocks);
- ei->i_allocated_meta_blocks = 0;
-
- if (ei->i_reserved_data_blocks == 0) {
- /*
- * We can release all of the reserved metadata blocks
- * only when we have written all of the delayed
- * allocation blocks.
- */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- ei->i_reserved_meta_blocks);
- ei->i_reserved_meta_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- }
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
- /* Update quota subsystem for data blocks */
- if (quota_claim)
- dquot_claim_block(inode, EXT4_C2B(sbi, used));
- else {
- /*
- * We did fallocate with an offset that is already delayed
- * allocated. So on delayed allocated writeback we should
- * not re-claim the quota for fallocated blocks.
- */
- dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
- }
-
- /*
- * If we have done all the pending block allocations and if
- * there aren't any writers on the inode, we can discard the
- * inode's preallocations.
- */
- if ((ei->i_reserved_data_blocks == 0) &&
- (atomic_read(&inode->i_writecount) == 0))
- ext4_discard_preallocations(inode);
-}
-
-static int __check_block_validity(struct inode *inode, const char *func,
- unsigned int line,
- struct ext4_map_blocks *map)
-{
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
- map->m_len)) {
- ext4_error_inode(inode, func, line, map->m_pblk,
- "lblock %lu mapped to illegal pblock "
- "(length %d)", (unsigned long) map->m_lblk,
- map->m_len);
- return -EIO;
- }
- return 0;
-}
-
-#define check_block_validity(inode, map) \
- __check_block_validity((inode), __func__, __LINE__, (map))
-
-/*
- * Return the number of contiguous dirty pages in a given inode
- * starting at page frame idx.
- */
-static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
- unsigned int max_pages)
-{
- struct address_space *mapping = inode->i_mapping;
- pgoff_t index;
- struct pagevec pvec;
- pgoff_t num = 0;
- int i, nr_pages, done = 0;
-
- if (max_pages == 0)
- return 0;
- pagevec_init(&pvec, 0);
- while (!done) {
- index = idx;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- (pgoff_t)PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- struct buffer_head *bh, *head;
-
- lock_page(page);
- if (unlikely(page->mapping != mapping) ||
- !PageDirty(page) ||
- PageWriteback(page) ||
- page->index != idx) {
- done = 1;
- unlock_page(page);
- break;
- }
- if (page_has_buffers(page)) {
- bh = head = page_buffers(page);
- do {
- if (!buffer_delay(bh) &&
- !buffer_unwritten(bh))
- done = 1;
- bh = bh->b_this_page;
- } while (!done && (bh != head));
- }
- unlock_page(page);
- if (done)
- break;
- idx++;
- num++;
- if (num >= max_pages) {
- done = 1;
- break;
- }
- }
- pagevec_release(&pvec);
- }
- return num;
-}
-
-/*
- * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
- */
-static void set_buffers_da_mapped(struct inode *inode,
- struct ext4_map_blocks *map)
-{
- struct address_space *mapping = inode->i_mapping;
- struct pagevec pvec;
- int i, nr_pages;
- pgoff_t index, end;
-
- index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- end = (map->m_lblk + map->m_len - 1) >>
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- pagevec_init(&pvec, 0);
- while (index <= end) {
- nr_pages = pagevec_lookup(&pvec, mapping, index,
- min(end - index + 1,
- (pgoff_t)PAGEVEC_SIZE));
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- struct buffer_head *bh, *head;
-
- if (unlikely(page->mapping != mapping) ||
- !PageDirty(page))
- break;
-
- if (page_has_buffers(page)) {
- bh = head = page_buffers(page);
- do {
- set_buffer_da_mapped(bh);
- bh = bh->b_this_page;
- } while (bh != head);
- }
- index++;
- }
- pagevec_release(&pvec);
- }
-}
-
-/*
- * The ext4_map_blocks() function tries to look up the requested blocks,
- * and returns if the blocks are already mapped.
- *
- * Otherwise it takes the write lock of the i_data_sem and allocate blocks
- * and store the allocated blocks in the result buffer head and mark it
- * mapped.
- *
- * If file type is extents based, it will call ext4_ext_map_blocks(),
- * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
- * based files
- *
- * On success, it returns the number of blocks being mapped or allocate.
- * if create==0 and the blocks are pre-allocated and uninitialized block,
- * the result buffer head is unmapped. If the create ==1, it will make sure
- * the buffer head is mapped.
- *
- * It returns 0 if plain look up failed (blocks have not been allocated), in
- * that case, buffer head is unmapped
- *
- * It returns the error in case of allocation failure.
- */
-int ext4_map_blocks(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map, int flags)
-{
- int retval;
-
- map->m_flags = 0;
- ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
- "logical block %lu\n", inode->i_ino, flags, map->m_len,
- (unsigned long) map->m_lblk);
- /*
- * Try to see if we can get the block without requesting a new
- * file system block.
- */
- down_read((&EXT4_I(inode)->i_data_sem));
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- retval = ext4_ext_map_blocks(handle, inode, map, flags &
- EXT4_GET_BLOCKS_KEEP_SIZE);
- } else {
- retval = ext4_ind_map_blocks(handle, inode, map, flags &
- EXT4_GET_BLOCKS_KEEP_SIZE);
- }
- up_read((&EXT4_I(inode)->i_data_sem));
-
- if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
- int ret = check_block_validity(inode, map);
- if (ret != 0)
- return ret;
- }
-
- /* If it is only a block(s) look up */
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
- return retval;
-
- /*
- * Returns if the blocks have already allocated
- *
- * Note that if blocks have been preallocated
- * ext4_ext_get_block() returns the create = 0
- * with buffer head unmapped.
- */
- if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
- return retval;
-
- /*
- * When we call get_blocks without the create flag, the
- * BH_Unwritten flag could have gotten set if the blocks
- * requested were part of a uninitialized extent. We need to
- * clear this flag now that we are committed to convert all or
- * part of the uninitialized extent to be an initialized
- * extent. This is because we need to avoid the combination
- * of BH_Unwritten and BH_Mapped flags being simultaneously
- * set on the buffer_head.
- */
- map->m_flags &= ~EXT4_MAP_UNWRITTEN;
-
- /*
- * New blocks allocate and/or writing to uninitialized extent
- * will possibly result in updating i_data, so we take
- * the write lock of i_data_sem, and call get_blocks()
- * with create == 1 flag.
- */
- down_write((&EXT4_I(inode)->i_data_sem));
-
- /*
- * if the caller is from delayed allocation writeout path
- * we have already reserved fs blocks for allocation
- * let the underlying get_block() function know to
- * avoid double accounting
- */
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
- /*
- * We need to check for EXT4 here because migrate
- * could have changed the inode type in between
- */
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- retval = ext4_ext_map_blocks(handle, inode, map, flags);
- } else {
- retval = ext4_ind_map_blocks(handle, inode, map, flags);
-
- if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
- /*
- * We allocated new blocks which will result in
- * i_data's format changing. Force the migrate
- * to fail by clearing migrate flags
- */
- ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
- }
-
- /*
- * Update reserved blocks/metadata blocks after successful
- * block allocation which had been deferred till now. We don't
- * support fallocate for non extent files. So we can update
- * reserve space here.
- */
- if ((retval > 0) &&
- (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
- ext4_da_update_reserve_space(inode, retval, 1);
- }
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
- ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
-
- /* If we have successfully mapped the delayed allocated blocks,
- * set the BH_Da_Mapped bit on them. Its important to do this
- * under the protection of i_data_sem.
- */
- if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
- set_buffers_da_mapped(inode, map);
- }
-
- up_write((&EXT4_I(inode)->i_data_sem));
- if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
- int ret = check_block_validity(inode, map);
- if (ret != 0)
- return ret;
- }
- return retval;
-}
-
-/* Maximum number of blocks we map for direct IO at once. */
-#define DIO_MAX_BLOCKS 4096
-
-static int _ext4_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int flags)
-{
- handle_t *handle = ext4_journal_current_handle();
- struct ext4_map_blocks map;
- int ret = 0, started = 0;
- int dio_credits;
-
- map.m_lblk = iblock;
- map.m_len = bh->b_size >> inode->i_blkbits;
-
- if (flags && !handle) {
- /* Direct IO write... */
- if (map.m_len > DIO_MAX_BLOCKS)
- map.m_len = DIO_MAX_BLOCKS;
- dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
- handle = ext4_journal_start(inode, dio_credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- return ret;
- }
- started = 1;
- }
-
- ret = ext4_map_blocks(handle, inode, &map, flags);
- if (ret > 0) {
- map_bh(bh, inode->i_sb, map.m_pblk);
- bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
- bh->b_size = inode->i_sb->s_blocksize * map.m_len;
- ret = 0;
- }
- if (started)
- ext4_journal_stop(handle);
- return ret;
-}
-
-int ext4_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- return _ext4_get_block(inode, iblock, bh,
- create ? EXT4_GET_BLOCKS_CREATE : 0);
-}
-
-/*
- * `handle' can be NULL if create is zero
- */
-struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
- ext4_lblk_t block, int create, int *errp)
-{
- struct ext4_map_blocks map;
- struct buffer_head *bh;
- int fatal = 0, err;
-
- J_ASSERT(handle != NULL || create == 0);
-
- map.m_lblk = block;
- map.m_len = 1;
- err = ext4_map_blocks(handle, inode, &map,
- create ? EXT4_GET_BLOCKS_CREATE : 0);
-
- if (err < 0)
- *errp = err;
- if (err <= 0)
- return NULL;
- *errp = 0;
-
- bh = sb_getblk(inode->i_sb, map.m_pblk);
- if (!bh) {
- *errp = -EIO;
- return NULL;
- }
- if (map.m_flags & EXT4_MAP_NEW) {
- J_ASSERT(create != 0);
- J_ASSERT(handle != NULL);
-
- /*
- * Now that we do not always journal data, we should
- * keep in mind whether this should always journal the
- * new buffer as metadata. For now, regular file
- * writes use ext4_get_block instead, so it's not a
- * problem.
- */
- lock_buffer(bh);
- BUFFER_TRACE(bh, "call get_create_access");
- fatal = ext4_journal_get_create_access(handle, bh);
- if (!fatal && !buffer_uptodate(bh)) {
- memset(bh->b_data, 0, inode->i_sb->s_blocksize);
- set_buffer_uptodate(bh);
- }
- unlock_buffer(bh);
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (!fatal)
- fatal = err;
- } else {
- BUFFER_TRACE(bh, "not a new buffer");
- }
- if (fatal) {
- *errp = fatal;
- brelse(bh);
- bh = NULL;
- }
- return bh;
-}
-
-struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
- ext4_lblk_t block, int create, int *err)
-{
- struct buffer_head *bh;
-
- bh = ext4_getblk(handle, inode, block, create, err);
- if (!bh)
- return bh;
- if (buffer_uptodate(bh))
- return bh;
- ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
- wait_on_buffer(bh);
- if (buffer_uptodate(bh))
- return bh;
- put_bh(bh);
- *err = -EIO;
- return NULL;
-}
-
-static int walk_page_buffers(handle_t *handle,
- struct buffer_head *head,
- unsigned from,
- unsigned to,
- int *partial,
- int (*fn)(handle_t *handle,
- struct buffer_head *bh))
-{
- struct buffer_head *bh;
- unsigned block_start, block_end;
- unsigned blocksize = head->b_size;
- int err, ret = 0;
- struct buffer_head *next;
-
- for (bh = head, block_start = 0;
- ret == 0 && (bh != head || !block_start);
- block_start = block_end, bh = next) {
- next = bh->b_this_page;
- block_end = block_start + blocksize;
- if (block_end <= from || block_start >= to) {
- if (partial && !buffer_uptodate(bh))
- *partial = 1;
- continue;
- }
- err = (*fn)(handle, bh);
- if (!ret)
- ret = err;
- }
- return ret;
-}
-
-/*
- * To preserve ordering, it is essential that the hole instantiation and
- * the data write be encapsulated in a single transaction. We cannot
- * close off a transaction and start a new one between the ext4_get_block()
- * and the commit_write(). So doing the jbd2_journal_start at the start of
- * prepare_write() is the right place.
- *
- * Also, this function can nest inside ext4_writepage() ->
- * block_write_full_page(). In that case, we *know* that ext4_writepage()
- * has generated enough buffer credits to do the whole page. So we won't
- * block on the journal in that case, which is good, because the caller may
- * be PF_MEMALLOC.
- *
- * By accident, ext4 can be reentered when a transaction is open via
- * quota file writes. If we were to commit the transaction while thus
- * reentered, there can be a deadlock - we would be holding a quota
- * lock, and the commit would never complete if another thread had a
- * transaction open and was blocking on the quota lock - a ranking
- * violation.
- *
- * So what we do is to rely on the fact that jbd2_journal_stop/journal_start
- * will _not_ run commit under these circumstances because handle->h_ref
- * is elevated. We'll still have enough credits for the tiny quotafile
- * write.
- */
-static int do_journal_get_write_access(handle_t *handle,
- struct buffer_head *bh)
-{
- int dirty = buffer_dirty(bh);
- int ret;
-
- if (!buffer_mapped(bh) || buffer_freed(bh))
- return 0;
- /*
- * __block_write_begin() could have dirtied some buffers. Clean
- * the dirty bit as jbd2_journal_get_write_access() could complain
- * otherwise about fs integrity issues. Setting of the dirty bit
- * by __block_write_begin() isn't a real problem here as we clear
- * the bit before releasing a page lock and thus writeback cannot
- * ever write the buffer.
- */
- if (dirty)
- clear_buffer_dirty(bh);
- ret = ext4_journal_get_write_access(handle, bh);
- if (!ret && dirty)
- ret = ext4_handle_dirty_metadata(handle, NULL, bh);
- return ret;
-}
-
-static int ext4_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
-static int ext4_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- struct inode *inode = mapping->host;
- int ret, needed_blocks;
- handle_t *handle;
- int retries = 0;
- struct page *page;
- pgoff_t index;
- unsigned from, to;
-
- trace_ext4_write_begin(inode, pos, len, flags);
- /*
- * Reserve one block more for addition to orphan list in case
- * we allocate blocks but write fails for some reason
- */
- needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
- index = pos >> PAGE_CACHE_SHIFT;
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
-
-retry:
- handle = ext4_journal_start(inode, needed_blocks);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
-
- /* We cannot recurse into the filesystem as the transaction is already
- * started */
- flags |= AOP_FLAG_NOFS;
-
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page) {
- ext4_journal_stop(handle);
- ret = -ENOMEM;
- goto out;
- }
- *pagep = page;
-
- if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(page, pos, len, ext4_get_block_write);
- else
- ret = __block_write_begin(page, pos, len, ext4_get_block);
-
- if (!ret && ext4_should_journal_data(inode)) {
- ret = walk_page_buffers(handle, page_buffers(page),
- from, to, NULL, do_journal_get_write_access);
- }
-
- if (ret) {
- unlock_page(page);
- page_cache_release(page);
- /*
- * __block_write_begin may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- *
- * Add inode to orphan list in case we crash before
- * truncate finishes
- */
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
- ext4_orphan_add(handle, inode);
-
- ext4_journal_stop(handle);
- if (pos + len > inode->i_size) {
- ext4_truncate_failed_write(inode);
- /*
- * If truncate failed early the inode might
- * still be on the orphan list; we need to
- * make sure the inode is removed from the
- * orphan list in that case.
- */
- if (inode->i_nlink)
- ext4_orphan_del(NULL, inode);
- }
- }
-
- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
-out:
- return ret;
-}
-
-/* For write_end() in data=journal mode */
-static int write_end_fn(handle_t *handle, struct buffer_head *bh)
-{
- if (!buffer_mapped(bh) || buffer_freed(bh))
- return 0;
- set_buffer_uptodate(bh);
- return ext4_handle_dirty_metadata(handle, NULL, bh);
-}
-
-static int ext4_generic_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- int i_size_changed = 0;
- struct inode *inode = mapping->host;
- handle_t *handle = ext4_journal_current_handle();
-
- copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
-
- /*
- * No need to use i_size_read() here, the i_size
- * cannot change under us because we hold i_mutex.
- *
- * But it's important to update i_size while still holding page lock:
- * page writeout could otherwise come in and zero beyond i_size.
- */
- if (pos + copied > inode->i_size) {
- i_size_write(inode, pos + copied);
- i_size_changed = 1;
- }
-
- if (pos + copied > EXT4_I(inode)->i_disksize) {
- /* We need to mark inode dirty even if
- * new_i_size is less that inode->i_size
- * bu greater than i_disksize.(hint delalloc)
- */
- ext4_update_i_disksize(inode, (pos + copied));
- i_size_changed = 1;
- }
- unlock_page(page);
- page_cache_release(page);
-
- /*
- * Don't mark the inode dirty under page lock. First, it unnecessarily
- * makes the holding time of page lock longer. Second, it forces lock
- * ordering of page lock and transaction start for journaling
- * filesystems.
- */
- if (i_size_changed)
- ext4_mark_inode_dirty(handle, inode);
-
- return copied;
-}
-
-/*
- * We need to pick up the new inode size which generic_commit_write gave us
- * `file' can be NULL - eg, when called from page_symlink().
- *
- * ext4 never places buffers on inode->i_mapping->private_list. metadata
- * buffers are managed internally.
- */
-static int ext4_ordered_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- handle_t *handle = ext4_journal_current_handle();
- struct inode *inode = mapping->host;
- int ret = 0, ret2;
-
- trace_ext4_ordered_write_end(inode, pos, len, copied);
- ret = ext4_jbd2_file_inode(handle, inode);
-
- if (ret == 0) {
- ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
- page, fsdata);
- copied = ret2;
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
- /* if we have allocated more blocks and copied
- * less. We will have blocks allocated outside
- * inode->i_size. So truncate them
- */
- ext4_orphan_add(handle, inode);
- if (ret2 < 0)
- ret = ret2;
- } else {
- unlock_page(page);
- page_cache_release(page);
- }
-
- ret2 = ext4_journal_stop(handle);
- if (!ret)
- ret = ret2;
-
- if (pos + len > inode->i_size) {
- ext4_truncate_failed_write(inode);
- /*
- * If truncate failed early the inode might still be
- * on the orphan list; we need to make sure the inode
- * is removed from the orphan list in that case.
- */
- if (inode->i_nlink)
- ext4_orphan_del(NULL, inode);
- }
-
-
- return ret ? ret : copied;
-}
-
-static int ext4_writeback_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- handle_t *handle = ext4_journal_current_handle();
- struct inode *inode = mapping->host;
- int ret = 0, ret2;
-
- trace_ext4_writeback_write_end(inode, pos, len, copied);
- ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
- page, fsdata);
- copied = ret2;
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
- /* if we have allocated more blocks and copied
- * less. We will have blocks allocated outside
- * inode->i_size. So truncate them
- */
- ext4_orphan_add(handle, inode);
-
- if (ret2 < 0)
- ret = ret2;
-
- ret2 = ext4_journal_stop(handle);
- if (!ret)
- ret = ret2;
-
- if (pos + len > inode->i_size) {
- ext4_truncate_failed_write(inode);
- /*
- * If truncate failed early the inode might still be
- * on the orphan list; we need to make sure the inode
- * is removed from the orphan list in that case.
- */
- if (inode->i_nlink)
- ext4_orphan_del(NULL, inode);
- }
-
- return ret ? ret : copied;
-}
-
-static int ext4_journalled_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- handle_t *handle = ext4_journal_current_handle();
- struct inode *inode = mapping->host;
- int ret = 0, ret2;
- int partial = 0;
- unsigned from, to;
- loff_t new_i_size;
-
- trace_ext4_journalled_write_end(inode, pos, len, copied);
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
-
- BUG_ON(!ext4_handle_valid(handle));
-
- if (copied < len) {
- if (!PageUptodate(page))
- copied = 0;
- page_zero_new_buffers(page, from+copied, to);
- }
-
- ret = walk_page_buffers(handle, page_buffers(page), from,
- to, &partial, write_end_fn);
- if (!partial)
- SetPageUptodate(page);
- new_i_size = pos + copied;
- if (new_i_size > inode->i_size)
- i_size_write(inode, pos+copied);
- ext4_set_inode_state(inode, EXT4_STATE_JDATA);
- EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
- if (new_i_size > EXT4_I(inode)->i_disksize) {
- ext4_update_i_disksize(inode, new_i_size);
- ret2 = ext4_mark_inode_dirty(handle, inode);
- if (!ret)
- ret = ret2;
- }
-
- unlock_page(page);
- page_cache_release(page);
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
- /* if we have allocated more blocks and copied
- * less. We will have blocks allocated outside
- * inode->i_size. So truncate them
- */
- ext4_orphan_add(handle, inode);
-
- ret2 = ext4_journal_stop(handle);
- if (!ret)
- ret = ret2;
- if (pos + len > inode->i_size) {
- ext4_truncate_failed_write(inode);
- /*
- * If truncate failed early the inode might still be
- * on the orphan list; we need to make sure the inode
- * is removed from the orphan list in that case.
- */
- if (inode->i_nlink)
- ext4_orphan_del(NULL, inode);
- }
-
- return ret ? ret : copied;
-}
-
-/*
- * Reserve a single cluster located at lblock
- */
-static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
-{
- int retries = 0;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned int md_needed;
- int ret;
-
- /*
- * recalculate the amount of metadata blocks to reserve
- * in order to allocate nrblocks
- * worse case is one extent per block
- */
-repeat:
- spin_lock(&ei->i_block_reservation_lock);
- md_needed = EXT4_NUM_B2C(sbi,
- ext4_calc_metadata_amount(inode, lblock));
- trace_ext4_da_reserve_space(inode, md_needed);
- spin_unlock(&ei->i_block_reservation_lock);
-
- /*
- * We will charge metadata quota at writeout time; this saves
- * us from metadata over-estimation, though we may go over by
- * a small amount in the end. Here we just reserve for data.
- */
- ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
- if (ret)
- return ret;
- /*
- * We do still charge estimated metadata to the sb though;
- * we cannot afford to run out of free blocks.
- */
- if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
- dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- yield();
- goto repeat;
- }
- return -ENOSPC;
- }
- spin_lock(&ei->i_block_reservation_lock);
- ei->i_reserved_data_blocks++;
- ei->i_reserved_meta_blocks += md_needed;
- spin_unlock(&ei->i_block_reservation_lock);
-
- return 0; /* success */
-}
-
-static void ext4_da_release_space(struct inode *inode, int to_free)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct ext4_inode_info *ei = EXT4_I(inode);
-
- if (!to_free)
- return; /* Nothing to release, exit */
-
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-
- trace_ext4_da_release_space(inode, to_free);
- if (unlikely(to_free > ei->i_reserved_data_blocks)) {
- /*
- * if there aren't enough reserved blocks, then the
- * counter is messed up somewhere. Since this
- * function is called from invalidate page, it's
- * harmless to return without any action.
- */
- ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
- "ino %lu, to_free %d with only %d reserved "
- "data blocks", inode->i_ino, to_free,
- ei->i_reserved_data_blocks);
- WARN_ON(1);
- to_free = ei->i_reserved_data_blocks;
- }
- ei->i_reserved_data_blocks -= to_free;
-
- if (ei->i_reserved_data_blocks == 0) {
- /*
- * We can release all of the reserved metadata blocks
- * only when we have written all of the delayed
- * allocation blocks.
- * Note that in case of bigalloc, i_reserved_meta_blocks,
- * i_reserved_data_blocks, etc. refer to number of clusters.
- */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- ei->i_reserved_meta_blocks);
- ei->i_reserved_meta_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- }
-
- /* update fs dirty data blocks counter */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
-
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
- dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
-}
-
-static void ext4_da_page_release_reservation(struct page *page,
- unsigned long offset)
-{
- int to_release = 0;
- struct buffer_head *head, *bh;
- unsigned int curr_off = 0;
- struct inode *inode = page->mapping->host;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int num_clusters;
-
- head = page_buffers(page);
- bh = head;
- do {
- unsigned int next_off = curr_off + bh->b_size;
-
- if ((offset <= curr_off) && (buffer_delay(bh))) {
- to_release++;
- clear_buffer_delay(bh);
- clear_buffer_da_mapped(bh);
- }
- curr_off = next_off;
- } while ((bh = bh->b_this_page) != head);
-
- /* If we have released all the blocks belonging to a cluster, then we
- * need to release the reserved space for that cluster. */
- num_clusters = EXT4_NUM_B2C(sbi, to_release);
- while (num_clusters > 0) {
- ext4_fsblk_t lblk;
- lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
- ((num_clusters - 1) << sbi->s_cluster_bits);
- if (sbi->s_cluster_ratio == 1 ||
- !ext4_find_delalloc_cluster(inode, lblk, 1))
- ext4_da_release_space(inode, 1);
-
- num_clusters--;
- }
-}
-
-/*
- * Delayed allocation stuff
- */
-
-/*
- * mpage_da_submit_io - walks through extent of pages and try to write
- * them with writepage() call back
- *
- * @mpd->inode: inode
- * @mpd->first_page: first page of the extent
- * @mpd->next_page: page after the last page of the extent
- *
- * By the time mpage_da_submit_io() is called we expect all blocks
- * to be allocated. this may be wrong if allocation failed.
- *
- * As pages are already locked by write_cache_pages(), we can't use it
- */
-static int mpage_da_submit_io(struct mpage_da_data *mpd,
- struct ext4_map_blocks *map)
-{
- struct pagevec pvec;
- unsigned long index, end;
- int ret = 0, err, nr_pages, i;
- struct inode *inode = mpd->inode;
- struct address_space *mapping = inode->i_mapping;
- loff_t size = i_size_read(inode);
- unsigned int len, block_start;
- struct buffer_head *bh, *page_bufs = NULL;
- int journal_data = ext4_should_journal_data(inode);
- sector_t pblock = 0, cur_logical = 0;
- struct ext4_io_submit io_submit;
-
- BUG_ON(mpd->next_page <= mpd->first_page);
- memset(&io_submit, 0, sizeof(io_submit));
- /*
- * We need to start from the first_page to the next_page - 1
- * to make sure we also write the mapped dirty buffer_heads.
- * If we look at mpd->b_blocknr we would only be looking
- * at the currently mapped buffer_heads.
- */
- index = mpd->first_page;
- end = mpd->next_page - 1;
-
- pagevec_init(&pvec, 0);
- while (index <= end) {
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- int commit_write = 0, skip_page = 0;
- struct page *page = pvec.pages[i];
-
- index = page->index;
- if (index > end)
- break;
-
- if (index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
- else
- len = PAGE_CACHE_SIZE;
- if (map) {
- cur_logical = index << (PAGE_CACHE_SHIFT -
- inode->i_blkbits);
- pblock = map->m_pblk + (cur_logical -
- map->m_lblk);
- }
- index++;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
-
- /*
- * If the page does not have buffers (for
- * whatever reason), try to create them using
- * __block_write_begin. If this fails,
- * skip the page and move on.
- */
- if (!page_has_buffers(page)) {
- if (__block_write_begin(page, 0, len,
- noalloc_get_block_write)) {
- skip_page:
- unlock_page(page);
- continue;
- }
- commit_write = 1;
- }
-
- bh = page_bufs = page_buffers(page);
- block_start = 0;
- do {
- if (!bh)
- goto skip_page;
- if (map && (cur_logical >= map->m_lblk) &&
- (cur_logical <= (map->m_lblk +
- (map->m_len - 1)))) {
- if (buffer_delay(bh)) {
- clear_buffer_delay(bh);
- bh->b_blocknr = pblock;
- }
- if (buffer_da_mapped(bh))
- clear_buffer_da_mapped(bh);
- if (buffer_unwritten(bh) ||
- buffer_mapped(bh))
- BUG_ON(bh->b_blocknr != pblock);
- if (map->m_flags & EXT4_MAP_UNINIT)
- set_buffer_uninit(bh);
- clear_buffer_unwritten(bh);
- }
-
- /*
- * skip page if block allocation undone and
- * block is dirty
- */
- if (ext4_bh_delay_or_unwritten(NULL, bh))
- skip_page = 1;
- bh = bh->b_this_page;
- block_start += bh->b_size;
- cur_logical++;
- pblock++;
- } while (bh != page_bufs);
-
- if (skip_page)
- goto skip_page;
-
- if (commit_write)
- /* mark the buffer_heads as dirty & uptodate */
- block_commit_write(page, 0, len);
-
- clear_page_dirty_for_io(page);
- /*
- * Delalloc doesn't support data journalling,
- * but eventually maybe we'll lift this
- * restriction.
- */
- if (unlikely(journal_data && PageChecked(page)))
- err = __ext4_journalled_writepage(page, len);
- else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
- err = ext4_bio_write_page(&io_submit, page,
- len, mpd->wbc);
- else if (buffer_uninit(page_bufs)) {
- ext4_set_bh_endio(page_bufs, inode);
- err = block_write_full_page_endio(page,
- noalloc_get_block_write,
- mpd->wbc, ext4_end_io_buffer_write);
- } else
- err = block_write_full_page(page,
- noalloc_get_block_write, mpd->wbc);
-
- if (!err)
- mpd->pages_written++;
- /*
- * In error case, we have to continue because
- * remaining pages are still locked
- */
- if (ret == 0)
- ret = err;
- }
- pagevec_release(&pvec);
- }
- ext4_io_submit(&io_submit);
- return ret;
-}
-
-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
-{
- int nr_pages, i;
- pgoff_t index, end;
- struct pagevec pvec;
- struct inode *inode = mpd->inode;
- struct address_space *mapping = inode->i_mapping;
-
- index = mpd->first_page;
- end = mpd->next_page - 1;
- while (index <= end) {
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- if (page->index > end)
- break;
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
- block_invalidatepage(page, 0);
- ClearPageUptodate(page);
- unlock_page(page);
- }
- index = pvec.pages[nr_pages - 1]->index + 1;
- pagevec_release(&pvec);
- }
- return;
-}
-
-static void ext4_print_free_blocks(struct inode *inode)
-{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct super_block *sb = inode->i_sb;
-
- ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
- EXT4_C2B(EXT4_SB(inode->i_sb),
- ext4_count_free_clusters(inode->i_sb)));
- ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
- ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
- (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
- percpu_counter_sum(&sbi->s_freeclusters_counter)));
- ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
- (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
- percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
- ext4_msg(sb, KERN_CRIT, "Block reservation details");
- ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
- EXT4_I(inode)->i_reserved_data_blocks);
- ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
- EXT4_I(inode)->i_reserved_meta_blocks);
- return;
-}
-
-/*
- * mpage_da_map_and_submit - go through given space, map them
- * if necessary, and then submit them for I/O
- *
- * @mpd - bh describing space
- *
- * The function skips space we know is already mapped to disk blocks.
- *
- */
-static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
-{
- int err, blks, get_blocks_flags;
- struct ext4_map_blocks map, *mapp = NULL;
- sector_t next = mpd->b_blocknr;
- unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
- loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
- handle_t *handle = NULL;
-
- /*
- * If the blocks are mapped already, or we couldn't accumulate
- * any blocks, then proceed immediately to the submission stage.
- */
- if ((mpd->b_size == 0) ||
- ((mpd->b_state & (1 << BH_Mapped)) &&
- !(mpd->b_state & (1 << BH_Delay)) &&
- !(mpd->b_state & (1 << BH_Unwritten))))
- goto submit_io;
-
- handle = ext4_journal_current_handle();
- BUG_ON(!handle);
-
- /*
- * Call ext4_map_blocks() to allocate any delayed allocation
- * blocks, or to convert an uninitialized extent to be
- * initialized (in the case where we have written into
- * one or more preallocated blocks).
- *
- * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
- * indicate that we are on the delayed allocation path. This
- * affects functions in many different parts of the allocation
- * call path. This flag exists primarily because we don't
- * want to change *many* call functions, so ext4_map_blocks()
- * will set the EXT4_STATE_DELALLOC_RESERVED flag once the
- * inode's allocation semaphore is taken.
- *
- * If the blocks in questions were delalloc blocks, set
- * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
- * variables are updated after the blocks have been allocated.
- */
- map.m_lblk = next;
- map.m_len = max_blocks;
- get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
- if (ext4_should_dioread_nolock(mpd->inode))
- get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
- if (mpd->b_state & (1 << BH_Delay))
- get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
-
- blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
- if (blks < 0) {
- struct super_block *sb = mpd->inode->i_sb;
-
- err = blks;
- /*
- * If get block returns EAGAIN or ENOSPC and there
- * appears to be free blocks we will just let
- * mpage_da_submit_io() unlock all of the pages.
- */
- if (err == -EAGAIN)
- goto submit_io;
-
- if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
- mpd->retval = err;
- goto submit_io;
- }
-
- /*
- * get block failure will cause us to loop in
- * writepages, because a_ops->writepage won't be able
- * to make progress. The page will be redirtied by
- * writepage and writepages will again try to write
- * the same.
- */
- if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
- ext4_msg(sb, KERN_CRIT,
- "delayed block allocation failed for inode %lu "
- "at logical offset %llu with max blocks %zd "
- "with error %d", mpd->inode->i_ino,
- (unsigned long long) next,
- mpd->b_size >> mpd->inode->i_blkbits, err);
- ext4_msg(sb, KERN_CRIT,
- "This should not happen!! Data will be lost\n");
- if (err == -ENOSPC)
- ext4_print_free_blocks(mpd->inode);
- }
- /* invalidate all the pages */
- ext4_da_block_invalidatepages(mpd);
-
- /* Mark this page range as having been completed */
- mpd->io_done = 1;
- return;
- }
- BUG_ON(blks == 0);
-
- mapp = &map;
- if (map.m_flags & EXT4_MAP_NEW) {
- struct block_device *bdev = mpd->inode->i_sb->s_bdev;
- int i;
-
- for (i = 0; i < map.m_len; i++)
- unmap_underlying_metadata(bdev, map.m_pblk + i);
-
- if (ext4_should_order_data(mpd->inode)) {
- err = ext4_jbd2_file_inode(handle, mpd->inode);
- if (err) {
- /* Only if the journal is aborted */
- mpd->retval = err;
- goto submit_io;
- }
- }
- }
-
- /*
- * Update on-disk size along with block allocation.
- */
- disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
- if (disksize > i_size_read(mpd->inode))
- disksize = i_size_read(mpd->inode);
- if (disksize > EXT4_I(mpd->inode)->i_disksize) {
- ext4_update_i_disksize(mpd->inode, disksize);
- err = ext4_mark_inode_dirty(handle, mpd->inode);
- if (err)
- ext4_error(mpd->inode->i_sb,
- "Failed to mark inode %lu dirty",
- mpd->inode->i_ino);
- }
-
-submit_io:
- mpage_da_submit_io(mpd, mapp);
- mpd->io_done = 1;
-}
-
-#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
- (1 << BH_Delay) | (1 << BH_Unwritten))
-
-/*
- * mpage_add_bh_to_extent - try to add one more block to extent of blocks
- *
- * @mpd->lbh - extent of blocks
- * @logical - logical number of the block in the file
- * @bh - bh of the block (used to access block's state)
- *
- * the function is used to collect contig. blocks in same state
- */
-static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
- sector_t logical, size_t b_size,
- unsigned long b_state)
-{
- sector_t next;
- int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
-
- /*
- * XXX Don't go larger than mballoc is willing to allocate
- * This is a stopgap solution. We eventually need to fold
- * mpage_da_submit_io() into this function and then call
- * ext4_map_blocks() multiple times in a loop
- */
- if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
- goto flush_it;
-
- /* check if thereserved journal credits might overflow */
- if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
- if (nrblocks >= EXT4_MAX_TRANS_DATA) {
- /*
- * With non-extent format we are limited by the journal
- * credit available. Total credit needed to insert
- * nrblocks contiguous blocks is dependent on the
- * nrblocks. So limit nrblocks.
- */
- goto flush_it;
- } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
- EXT4_MAX_TRANS_DATA) {
- /*
- * Adding the new buffer_head would make it cross the
- * allowed limit for which we have journal credit
- * reserved. So limit the new bh->b_size
- */
- b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
- mpd->inode->i_blkbits;
- /* we will do mpage_da_submit_io in the next loop */
- }
- }
- /*
- * First block in the extent
- */
- if (mpd->b_size == 0) {
- mpd->b_blocknr = logical;
- mpd->b_size = b_size;
- mpd->b_state = b_state & BH_FLAGS;
- return;
- }
-
- next = mpd->b_blocknr + nrblocks;
- /*
- * Can we merge the block to our big extent?
- */
- if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
- mpd->b_size += b_size;
- return;
- }
-
-flush_it:
- /*
- * We couldn't merge the block to our extent, so we
- * need to flush current extent and start new one
- */
- mpage_da_map_and_submit(mpd);
- return;
-}
-
-static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
-{
- return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
-}
-
-/*
- * This function is grabs code from the very beginning of
- * ext4_map_blocks, but assumes that the caller is from delayed write
- * time. This function looks up the requested blocks and sets the
- * buffer delay bit under the protection of i_data_sem.
- */
-static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
- struct ext4_map_blocks *map,
- struct buffer_head *bh)
-{
- int retval;
- sector_t invalid_block = ~((sector_t) 0xffff);
-
- if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
- invalid_block = ~0;
-
- map->m_flags = 0;
- ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
- "logical block %lu\n", inode->i_ino, map->m_len,
- (unsigned long) map->m_lblk);
- /*
- * Try to see if we can get the block without requesting a new
- * file system block.
- */
- down_read((&EXT4_I(inode)->i_data_sem));
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- retval = ext4_ext_map_blocks(NULL, inode, map, 0);
- else
- retval = ext4_ind_map_blocks(NULL, inode, map, 0);
-
- if (retval == 0) {
- /*
- * XXX: __block_prepare_write() unmaps passed block,
- * is it OK?
- */
- /* If the block was allocated from previously allocated cluster,
- * then we dont need to reserve it again. */
- if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
- retval = ext4_da_reserve_space(inode, iblock);
- if (retval)
- /* not enough space to reserve */
- goto out_unlock;
- }
-
- /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
- * and it should not appear on the bh->b_state.
- */
- map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
-
- map_bh(bh, inode->i_sb, invalid_block);
- set_buffer_new(bh);
- set_buffer_delay(bh);
- }
-
-out_unlock:
- up_read((&EXT4_I(inode)->i_data_sem));
-
- return retval;
-}
-
-/*
- * This is a special get_blocks_t callback which is used by
- * ext4_da_write_begin(). It will either return mapped block or
- * reserve space for a single block.
- *
- * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
- * We also have b_blocknr = -1 and b_bdev initialized properly
- *
- * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
- * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
- * initialized properly.
- */
-static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- struct ext4_map_blocks map;
- int ret = 0;
-
- BUG_ON(create == 0);
- BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
-
- map.m_lblk = iblock;
- map.m_len = 1;
-
- /*
- * first, we need to know whether the block is allocated already
- * preallocated blocks are unmapped but should treated
- * the same as allocated blocks.
- */
- ret = ext4_da_map_blocks(inode, iblock, &map, bh);
- if (ret <= 0)
- return ret;
-
- map_bh(bh, inode->i_sb, map.m_pblk);
- bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
-
- if (buffer_unwritten(bh)) {
- /* A delayed write to unwritten bh should be marked
- * new and mapped. Mapped ensures that we don't do
- * get_block multiple times when we write to the same
- * offset and new ensures that we do proper zero out
- * for partial write.
- */
- set_buffer_new(bh);
- set_buffer_mapped(bh);
- }
- return 0;
-}
-
-/*
- * This function is used as a standard get_block_t calback function
- * when there is no desire to allocate any blocks. It is used as a
- * callback function for block_write_begin() and block_write_full_page().
- * These functions should only try to map a single block at a time.
- *
- * Since this function doesn't do block allocations even if the caller
- * requests it by passing in create=1, it is critically important that
- * any caller checks to make sure that any buffer heads are returned
- * by this function are either all already mapped or marked for
- * delayed allocation before calling block_write_full_page(). Otherwise,
- * b_blocknr could be left unitialized, and the page write functions will
- * be taken by surprise.
- */
-static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
- return _ext4_get_block(inode, iblock, bh_result, 0);
-}
-
-static int bget_one(handle_t *handle, struct buffer_head *bh)
-{
- get_bh(bh);
- return 0;
-}
-
-static int bput_one(handle_t *handle, struct buffer_head *bh)
-{
- put_bh(bh);
- return 0;
-}
-
-static int __ext4_journalled_writepage(struct page *page,
- unsigned int len)
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
- struct buffer_head *page_bufs;
- handle_t *handle = NULL;
- int ret = 0;
- int err;
-
- ClearPageChecked(page);
- page_bufs = page_buffers(page);
- BUG_ON(!page_bufs);
- walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
- /* As soon as we unlock the page, it can go away, but we have
- * references to buffers so we are safe */
- unlock_page(page);
-
- handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
-
- BUG_ON(!ext4_handle_valid(handle));
-
- ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
- do_journal_get_write_access);
-
- err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
- write_end_fn);
- if (ret == 0)
- ret = err;
- EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
-
- walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
- ext4_set_inode_state(inode, EXT4_STATE_JDATA);
-out:
- return ret;
-}
-
-static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
-static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
-
-/*
- * Note that we don't need to start a transaction unless we're journaling data
- * because we should have holes filled from ext4_page_mkwrite(). We even don't
- * need to file the inode to the transaction's list in ordered mode because if
- * we are writing back data added by write(), the inode is already there and if
- * we are writing back data modified via mmap(), no one guarantees in which
- * transaction the data will hit the disk. In case we are journaling data, we
- * cannot start transaction directly because transaction start ranks above page
- * lock so we have to do some magic.
- *
- * This function can get called via...
- * - ext4_da_writepages after taking page lock (have journal handle)
- * - journal_submit_inode_data_buffers (no journal handle)
- * - shrink_page_list via pdflush (no journal handle)
- * - grab_page_cache when doing write_begin (have journal handle)
- *
- * We don't do any block allocation in this function. If we have page with
- * multiple blocks we need to write those buffer_heads that are mapped. This
- * is important for mmaped based write. So if we do with blocksize 1K
- * truncate(f, 1024);
- * a = mmap(f, 0, 4096);
- * a[0] = 'a';
- * truncate(f, 4096);
- * we have in the page first buffer_head mapped via page_mkwrite call back
- * but other buffer_heads would be unmapped but dirty (dirty done via the
- * do_wp_page). So writepage should write the first block. If we modify
- * the mmap area beyond 1024 we will again get a page_fault and the
- * page_mkwrite callback will do the block allocation and mark the
- * buffer_heads mapped.
- *
- * We redirty the page if we have any buffer_heads that is either delay or
- * unwritten in the page.
- *
- * We can get recursively called as show below.
- *
- * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- * ext4_writepage()
- *
- * But since we don't do any block allocation we should not deadlock.
- * Page also have the dirty flag cleared so we don't get recurive page_lock.
- */
-static int ext4_writepage(struct page *page,
- struct writeback_control *wbc)
-{
- int ret = 0, commit_write = 0;
- loff_t size;
- unsigned int len;
- struct buffer_head *page_bufs = NULL;
- struct inode *inode = page->mapping->host;
-
- trace_ext4_writepage(page);
- size = i_size_read(inode);
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
- else
- len = PAGE_CACHE_SIZE;
-
- /*
- * If the page does not have buffers (for whatever reason),
- * try to create them using __block_write_begin. If this
- * fails, redirty the page and move on.
- */
- if (!page_has_buffers(page)) {
- if (__block_write_begin(page, 0, len,
- noalloc_get_block_write)) {
- redirty_page:
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
- commit_write = 1;
- }
- page_bufs = page_buffers(page);
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- /*
- * We don't want to do block allocation, so redirty
- * the page and return. We may reach here when we do
- * a journal commit via journal_submit_inode_data_buffers.
- * We can also reach here via shrink_page_list but it
- * should never be for direct reclaim so warn if that
- * happens
- */
- WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
- PF_MEMALLOC);
- goto redirty_page;
- }
- if (commit_write)
- /* now mark the buffer_heads as dirty and uptodate */
- block_commit_write(page, 0, len);
-
- if (PageChecked(page) && ext4_should_journal_data(inode))
- /*
- * It's mmapped pagecache. Add buffers and journal it. There
- * doesn't seem much point in redirtying the page here.
- */
- return __ext4_journalled_writepage(page, len);
-
- if (buffer_uninit(page_bufs)) {
- ext4_set_bh_endio(page_bufs, inode);
- ret = block_write_full_page_endio(page, noalloc_get_block_write,
- wbc, ext4_end_io_buffer_write);
- } else
- ret = block_write_full_page(page, noalloc_get_block_write,
- wbc);
-
- return ret;
-}
-
-/*
- * This is called via ext4_da_writepages() to
- * calculate the total number of credits to reserve to fit
- * a single extent allocation into a single transaction,
- * ext4_da_writpeages() will loop calling this before
- * the block allocation.
- */
-
-static int ext4_da_writepages_trans_blocks(struct inode *inode)
-{
- int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-
- /*
- * With non-extent format the journal credit needed to
- * insert nrblocks contiguous block is dependent on
- * number of contiguous block. So we will limit
- * number of contiguous block to a sane value
- */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
- (max_blocks > EXT4_MAX_TRANS_DATA))
- max_blocks = EXT4_MAX_TRANS_DATA;
-
- return ext4_chunk_trans_blocks(inode, max_blocks);
-}
-
-/*
- * write_cache_pages_da - walk the list of dirty pages of the given
- * address space and accumulate pages that need writing, and call
- * mpage_da_map_and_submit to map a single contiguous memory region
- * and then write them.
- */
-static int write_cache_pages_da(struct address_space *mapping,
- struct writeback_control *wbc,
- struct mpage_da_data *mpd,
- pgoff_t *done_index)
-{
- struct buffer_head *bh, *head;
- struct inode *inode = mapping->host;
- struct pagevec pvec;
- unsigned int nr_pages;
- sector_t logical;
- pgoff_t index, end;
- long nr_to_write = wbc->nr_to_write;
- int i, tag, ret = 0;
-
- memset(mpd, 0, sizeof(struct mpage_da_data));
- mpd->wbc = wbc;
- mpd->inode = inode;
- pagevec_init(&pvec, 0);
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
-
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag = PAGECACHE_TAG_TOWRITE;
- else
- tag = PAGECACHE_TAG_DIRTY;
-
- *done_index = index;
- while (index <= end) {
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
- if (nr_pages == 0)
- return 0;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- /*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end)
- goto out;
-
- *done_index = page->index + 1;
-
- /*
- * If we can't merge this page, and we have
- * accumulated an contiguous region, write it
- */
- if ((mpd->next_page != page->index) &&
- (mpd->next_page != mpd->first_page)) {
- mpage_da_map_and_submit(mpd);
- goto ret_extent_tail;
- }
-
- lock_page(page);
-
- /*
- * If the page is no longer dirty, or its
- * mapping no longer corresponds to inode we
- * are writing (which means it has been
- * truncated or invalidated), or the page is
- * already under writeback and we are not
- * doing a data integrity writeback, skip the page
- */
- if (!PageDirty(page) ||
- (PageWriteback(page) &&
- (wbc->sync_mode == WB_SYNC_NONE)) ||
- unlikely(page->mapping != mapping)) {
- unlock_page(page);
- continue;
- }
-
- wait_on_page_writeback(page);
- BUG_ON(PageWriteback(page));
-
- if (mpd->next_page != page->index)
- mpd->first_page = page->index;
- mpd->next_page = page->index + 1;
- logical = (sector_t) page->index <<
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- if (!page_has_buffers(page)) {
- mpage_add_bh_to_extent(mpd, logical,
- PAGE_CACHE_SIZE,
- (1 << BH_Dirty) | (1 << BH_Uptodate));
- if (mpd->io_done)
- goto ret_extent_tail;
- } else {
- /*
- * Page with regular buffer heads,
- * just add all dirty ones
- */
- head = page_buffers(page);
- bh = head;
- do {
- BUG_ON(buffer_locked(bh));
- /*
- * We need to try to allocate
- * unmapped blocks in the same page.
- * Otherwise we won't make progress
- * with the page in ext4_writepage
- */
- if (ext4_bh_delay_or_unwritten(NULL, bh)) {
- mpage_add_bh_to_extent(mpd, logical,
- bh->b_size,
- bh->b_state);
- if (mpd->io_done)
- goto ret_extent_tail;
- } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
- /*
- * mapped dirty buffer. We need
- * to update the b_state
- * because we look at b_state
- * in mpage_da_map_blocks. We
- * don't update b_size because
- * if we find an unmapped
- * buffer_head later we need to
- * use the b_state flag of that
- * buffer_head.
- */
- if (mpd->b_size == 0)
- mpd->b_state = bh->b_state & BH_FLAGS;
- }
- logical++;
- } while ((bh = bh->b_this_page) != head);
- }
-
- if (nr_to_write > 0) {
- nr_to_write--;
- if (nr_to_write == 0 &&
- wbc->sync_mode == WB_SYNC_NONE)
- /*
- * We stop writing back only if we are
- * not doing integrity sync. In case of
- * integrity sync we have to keep going
- * because someone may be concurrently
- * dirtying pages, and we might have
- * synced a lot of newly appeared dirty
- * pages, but have not synced all of the
- * old dirty pages.
- */
- goto out;
- }
- }
- pagevec_release(&pvec);
- cond_resched();
- }
- return 0;
-ret_extent_tail:
- ret = MPAGE_DA_EXTENT_TAIL;
-out:
- pagevec_release(&pvec);
- cond_resched();
- return ret;
-}
-
-
-static int ext4_da_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- pgoff_t index;
- int range_whole = 0;
- handle_t *handle = NULL;
- struct mpage_da_data mpd;
- struct inode *inode = mapping->host;
- int pages_written = 0;
- unsigned int max_pages;
- int range_cyclic, cycled = 1, io_done = 0;
- int needed_blocks, ret = 0;
- long desired_nr_to_write, nr_to_writebump = 0;
- loff_t range_start = wbc->range_start;
- struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
- pgoff_t done_index = 0;
- pgoff_t end;
- struct blk_plug plug;
-
- trace_ext4_da_writepages(inode, wbc);
-
- /*
- * No pages to write? This is mainly a kludge to avoid starting
- * a transaction for special inodes like journal inode on last iput()
- * because that could violate lock ordering on umount
- */
- if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- return 0;
-
- /*
- * If the filesystem has aborted, it is read-only, so return
- * right away instead of dumping stack traces later on that
- * will obscure the real source of the problem. We test
- * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
- * the latter could be true if the filesystem is mounted
- * read-only, and in that case, ext4_da_writepages should
- * *never* be called, so if that ever happens, we would want
- * the stack trace.
- */
- if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
- return -EROFS;
-
- if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
- range_whole = 1;
-
- range_cyclic = wbc->range_cyclic;
- if (wbc->range_cyclic) {
- index = mapping->writeback_index;
- if (index)
- cycled = 0;
- wbc->range_start = index << PAGE_CACHE_SHIFT;
- wbc->range_end = LLONG_MAX;
- wbc->range_cyclic = 0;
- end = -1;
- } else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
- }
-
- /*
- * This works around two forms of stupidity. The first is in
- * the writeback code, which caps the maximum number of pages
- * written to be 1024 pages. This is wrong on multiple
- * levels; different architectues have a different page size,
- * which changes the maximum amount of data which gets
- * written. Secondly, 4 megabytes is way too small. XFS
- * forces this value to be 16 megabytes by multiplying
- * nr_to_write parameter by four, and then relies on its
- * allocator to allocate larger extents to make them
- * contiguous. Unfortunately this brings us to the second
- * stupidity, which is that ext4's mballoc code only allocates
- * at most 2048 blocks. So we force contiguous writes up to
- * the number of dirty blocks in the inode, or
- * sbi->max_writeback_mb_bump whichever is smaller.
- */
- max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
- if (!range_cyclic && range_whole) {
- if (wbc->nr_to_write == LONG_MAX)
- desired_nr_to_write = wbc->nr_to_write;
- else
- desired_nr_to_write = wbc->nr_to_write * 8;
- } else
- desired_nr_to_write = ext4_num_dirty_pages(inode, index,
- max_pages);
- if (desired_nr_to_write > max_pages)
- desired_nr_to_write = max_pages;
-
- if (wbc->nr_to_write < desired_nr_to_write) {
- nr_to_writebump = desired_nr_to_write - wbc->nr_to_write;
- wbc->nr_to_write = desired_nr_to_write;
- }
-
-retry:
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag_pages_for_writeback(mapping, index, end);
-
- blk_start_plug(&plug);
- while (!ret && wbc->nr_to_write > 0) {
-
- /*
- * we insert one extent at a time. So we need
- * credit needed for single extent allocation.
- * journalled mode is currently not supported
- * by delalloc
- */
- BUG_ON(ext4_should_journal_data(inode));
- needed_blocks = ext4_da_writepages_trans_blocks(inode);
-
- /* start a new transaction*/
- handle = ext4_journal_start(inode, needed_blocks);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
- "%ld pages, ino %lu; err %d", __func__,
- wbc->nr_to_write, inode->i_ino, ret);
- blk_finish_plug(&plug);
- goto out_writepages;
- }
-
- /*
- * Now call write_cache_pages_da() to find the next
- * contiguous region of logical blocks that need
- * blocks to be allocated by ext4 and submit them.
- */
- ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
- /*
- * If we have a contiguous extent of pages and we
- * haven't done the I/O yet, map the blocks and submit
- * them for I/O.
- */
- if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- mpage_da_map_and_submit(&mpd);
- ret = MPAGE_DA_EXTENT_TAIL;
- }
- trace_ext4_da_write_pages(inode, &mpd);
- wbc->nr_to_write -= mpd.pages_written;
-
- ext4_journal_stop(handle);
-
- if ((mpd.retval == -ENOSPC) && sbi->s_journal) {
- /* commit the transaction which would
- * free blocks released in the transaction
- * and try again
- */
- jbd2_journal_force_commit_nested(sbi->s_journal);
- ret = 0;
- } else if (ret == MPAGE_DA_EXTENT_TAIL) {
- /*
- * Got one extent now try with rest of the pages.
- * If mpd.retval is set -EIO, journal is aborted.
- * So we don't need to write any more.
- */
- pages_written += mpd.pages_written;
- ret = mpd.retval;
- io_done = 1;
- } else if (wbc->nr_to_write)
- /*
- * There is no more writeout needed
- * or we requested for a noblocking writeout
- * and we found the device congested
- */
- break;
- }
- blk_finish_plug(&plug);
- if (!io_done && !cycled) {
- cycled = 1;
- index = 0;
- wbc->range_start = index << PAGE_CACHE_SHIFT;
- wbc->range_end = mapping->writeback_index - 1;
- goto retry;
- }
-
- /* Update index */
- wbc->range_cyclic = range_cyclic;
- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
- /*
- * set the writeback_index so that range_cyclic
- * mode will write it back later
- */
- mapping->writeback_index = done_index;
-
-out_writepages:
- wbc->nr_to_write -= nr_to_writebump;
- wbc->range_start = range_start;
- trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
- return ret;
-}
-
-#define FALL_BACK_TO_NONDELALLOC 1
-static int ext4_nonda_switch(struct super_block *sb)
-{
- s64 free_blocks, dirty_blocks;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- /*
- * switch to non delalloc mode if we are running low
- * on free block. The free block accounting via percpu
- * counters can get slightly wrong with percpu_counter_batch getting
- * accumulated on each CPU without updating global counters
- * Delalloc need an accurate free block accounting. So switch
- * to non delalloc when we are near to error range.
- */
- free_blocks = EXT4_C2B(sbi,
- percpu_counter_read_positive(&sbi->s_freeclusters_counter));
- dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
- if (2 * free_blocks < 3 * dirty_blocks ||
- free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
- /*
- * free block count is less than 150% of dirty blocks
- * or free blocks is less than watermark
- */
- return 1;
- }
- /*
- * Even if we don't switch but are nearing capacity,
- * start pushing delalloc when 1/2 of free blocks are dirty.
- */
- if (free_blocks < 2 * dirty_blocks)
- writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
-
- return 0;
-}
-
-static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- int ret, retries = 0;
- struct page *page;
- pgoff_t index;
- struct inode *inode = mapping->host;
- handle_t *handle;
-
- index = pos >> PAGE_CACHE_SHIFT;
-
- if (ext4_nonda_switch(inode->i_sb)) {
- *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
- return ext4_write_begin(file, mapping, pos,
- len, flags, pagep, fsdata);
- }
- *fsdata = (void *)0;
- trace_ext4_da_write_begin(inode, pos, len, flags);
-retry:
- /*
- * With delayed allocation, we don't log the i_disksize update
- * if there is delayed block allocation. But we still need
- * to journalling the i_disksize update if writes to the end
- * of file which has an already mapped buffer.
- */
- handle = ext4_journal_start(inode, 1);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
- /* We cannot recurse into the filesystem as the transaction is already
- * started */
- flags |= AOP_FLAG_NOFS;
-
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page) {
- ext4_journal_stop(handle);
- ret = -ENOMEM;
- goto out;
- }
- *pagep = page;
-
- ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
- if (ret < 0) {
- unlock_page(page);
- ext4_journal_stop(handle);
- page_cache_release(page);
- /*
- * block_write_begin may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- */
- if (pos + len > inode->i_size)
- ext4_truncate_failed_write(inode);
- }
-
- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
-out:
- return ret;
-}
-
-/*
- * Check if we should update i_disksize
- * when write to the end of file but not require block allocation
- */
-static int ext4_da_should_update_i_disksize(struct page *page,
- unsigned long offset)
-{
- struct buffer_head *bh;
- struct inode *inode = page->mapping->host;
- unsigned int idx;
- int i;
-
- bh = page_buffers(page);
- idx = offset >> inode->i_blkbits;
-
- for (i = 0; i < idx; i++)
- bh = bh->b_this_page;
-
- if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
- return 0;
- return 1;
-}
-
-static int ext4_da_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- struct inode *inode = mapping->host;
- int ret = 0, ret2;
- handle_t *handle = ext4_journal_current_handle();
- loff_t new_i_size;
- unsigned long start, end;
- int write_mode = (int)(unsigned long)fsdata;
-
- if (write_mode == FALL_BACK_TO_NONDELALLOC) {
- switch (ext4_inode_journal_mode(inode)) {
- case EXT4_INODE_ORDERED_DATA_MODE:
- return ext4_ordered_write_end(file, mapping, pos,
- len, copied, page, fsdata);
- case EXT4_INODE_WRITEBACK_DATA_MODE:
- return ext4_writeback_write_end(file, mapping, pos,
- len, copied, page, fsdata);
- default:
- BUG();
- }
- }
-
- trace_ext4_da_write_end(inode, pos, len, copied);
- start = pos & (PAGE_CACHE_SIZE - 1);
- end = start + copied - 1;
-
- /*
- * generic_write_end() will run mark_inode_dirty() if i_size
- * changes. So let's piggyback the i_disksize mark_inode_dirty
- * into that.
- */
-
- new_i_size = pos + copied;
- if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
- if (ext4_da_should_update_i_disksize(page, end)) {
- down_write(&EXT4_I(inode)->i_data_sem);
- if (new_i_size > EXT4_I(inode)->i_disksize) {
- /*
- * Updating i_disksize when extending file
- * without needing block allocation
- */
- if (ext4_should_order_data(inode))
- ret = ext4_jbd2_file_inode(handle,
- inode);
-
- EXT4_I(inode)->i_disksize = new_i_size;
- }
- up_write(&EXT4_I(inode)->i_data_sem);
- /* We need to mark inode dirty even if
- * new_i_size is less that inode->i_size
- * bu greater than i_disksize.(hint delalloc)
- */
- ext4_mark_inode_dirty(handle, inode);
- }
- }
- ret2 = generic_write_end(file, mapping, pos, len, copied,
- page, fsdata);
- copied = ret2;
- if (ret2 < 0)
- ret = ret2;
- ret2 = ext4_journal_stop(handle);
- if (!ret)
- ret = ret2;
-
- return ret ? ret : copied;
-}
-
-static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
-{
- /*
- * Drop reserved blocks
- */
- BUG_ON(!PageLocked(page));
- if (!page_has_buffers(page))
- goto out;
-
- ext4_da_page_release_reservation(page, offset);
-
-out:
- ext4_invalidatepage(page, offset);
-
- return;
-}
-
-/*
- * Force all delayed allocation blocks to be allocated for a given inode.
- */
-int ext4_alloc_da_blocks(struct inode *inode)
-{
- trace_ext4_alloc_da_blocks(inode);
-
- if (!EXT4_I(inode)->i_reserved_data_blocks &&
- !EXT4_I(inode)->i_reserved_meta_blocks)
- return 0;
-
- /*
- * We do something simple for now. The filemap_flush() will
- * also start triggering a write of the data blocks, which is
- * not strictly speaking necessary (and for users of
- * laptop_mode, not even desirable). However, to do otherwise
- * would require replicating code paths in:
- *
- * ext4_da_writepages() ->
- * write_cache_pages() ---> (via passed in callback function)
- * __mpage_da_writepage() -->
- * mpage_add_bh_to_extent()
- * mpage_da_map_blocks()
- *
- * The problem is that write_cache_pages(), located in
- * mm/page-writeback.c, marks pages clean in preparation for
- * doing I/O, which is not desirable if we're not planning on
- * doing I/O at all.
- *
- * We could call write_cache_pages(), and then redirty all of
- * the pages by calling redirty_page_for_writepage() but that
- * would be ugly in the extreme. So instead we would need to
- * replicate parts of the code in the above functions,
- * simplifying them because we wouldn't actually intend to
- * write out the pages, but rather only collect contiguous
- * logical block extents, call the multi-block allocator, and
- * then update the buffer heads with the block allocations.
- *
- * For now, though, we'll cheat by calling filemap_flush(),
- * which will map the blocks, and start the I/O, but not
- * actually wait for the I/O to complete.
- */
- return filemap_flush(inode->i_mapping);
-}
-
-/*
- * bmap() is special. It gets used by applications such as lilo and by
- * the swapper to find the on-disk block of a specific piece of data.
- *
- * Naturally, this is dangerous if the block concerned is still in the
- * journal. If somebody makes a swapfile on an ext4 data-journaling
- * filesystem and enables swap, then they may get a nasty shock when the
- * data getting swapped to that swapfile suddenly gets overwritten by
- * the original zero's written out previously to the journal and
- * awaiting writeback in the kernel's buffer cache.
- *
- * So, if we see any bmap calls here on a modified, data-journaled file,
- * take extra steps to flush any blocks which might be in the cache.
- */
-static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
-{
- struct inode *inode = mapping->host;
- journal_t *journal;
- int err;
-
- if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
- test_opt(inode->i_sb, DELALLOC)) {
- /*
- * With delalloc we want to sync the file
- * so that we can make sure we allocate
- * blocks for file
- */
- filemap_write_and_wait(mapping);
- }
-
- if (EXT4_JOURNAL(inode) &&
- ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
- /*
- * This is a REALLY heavyweight approach, but the use of
- * bmap on dirty files is expected to be extremely rare:
- * only if we run lilo or swapon on a freshly made file
- * do we expect this to happen.
- *
- * (bmap requires CAP_SYS_RAWIO so this does not
- * represent an unprivileged user DOS attack --- we'd be
- * in trouble if mortal users could trigger this path at
- * will.)
- *
- * NB. EXT4_STATE_JDATA is not set on files other than
- * regular files. If somebody wants to bmap a directory
- * or symlink and gets confused because the buffer
- * hasn't yet been flushed to disk, they deserve
- * everything they get.
- */
-
- ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
- journal = EXT4_JOURNAL(inode);
- jbd2_journal_lock_updates(journal);
- err = jbd2_journal_flush(journal);
- jbd2_journal_unlock_updates(journal);
-
- if (err)
- return 0;
- }
-
- return generic_block_bmap(mapping, block, ext4_get_block);
-}
-
-static int ext4_readpage(struct file *file, struct page *page)
-{
- trace_ext4_readpage(page);
- return mpage_readpage(page, ext4_get_block);
-}
-
-static int
-ext4_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
-{
- return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
-}
-
-static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
-{
- struct buffer_head *head, *bh;
- unsigned int curr_off = 0;
-
- if (!page_has_buffers(page))
- return;
- head = bh = page_buffers(page);
- do {
- if (offset <= curr_off && test_clear_buffer_uninit(bh)
- && bh->b_private) {
- ext4_free_io_end(bh->b_private);
- bh->b_private = NULL;
- bh->b_end_io = NULL;
- }
- curr_off = curr_off + bh->b_size;
- bh = bh->b_this_page;
- } while (bh != head);
-}
-
-static void ext4_invalidatepage(struct page *page, unsigned long offset)
-{
- journal_t *journal = EXT4_JOURNAL(page->mapping->host);
-
- trace_ext4_invalidatepage(page, offset);
-
- /*
- * free any io_end structure allocated for buffers to be discarded
- */
- if (ext4_should_dioread_nolock(page->mapping->host))
- ext4_invalidatepage_free_endio(page, offset);
- /*
- * If it's a full truncate we just forget about the pending dirtying
- */
- if (offset == 0)
- ClearPageChecked(page);
-
- if (journal)
- jbd2_journal_invalidatepage(journal, page, offset);
- else
- block_invalidatepage(page, offset);
-}
-
-static int ext4_releasepage(struct page *page, gfp_t wait)
-{
- journal_t *journal = EXT4_JOURNAL(page->mapping->host);
-
- trace_ext4_releasepage(page);
-
- WARN_ON(PageChecked(page));
- if (!page_has_buffers(page))
- return 0;
- if (journal)
- return jbd2_journal_try_to_free_buffers(journal, page, wait);
- else
- return try_to_free_buffers(page);
-}
-
-/*
- * ext4_get_block used when preparing for a DIO write or buffer write.
- * We allocate an uinitialized extent if blocks haven't been allocated.
- * The extent will be converted to initialized after the IO is complete.
- */
-static int ext4_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
- inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_IO_CREATE_EXT);
-}
-
-static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
- ssize_t size, void *private, int ret,
- bool is_async)
-{
- struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
- ext4_io_end_t *io_end = iocb->private;
- struct workqueue_struct *wq;
- unsigned long flags;
- struct ext4_inode_info *ei;
-
- /* if not async direct IO or dio with 0 bytes write, just return */
- if (!io_end || !size)
- goto out;
-
- ext_debug("ext4_end_io_dio(): io_end 0x%p "
- "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
- iocb->private, io_end->inode->i_ino, iocb, offset,
- size);
-
- iocb->private = NULL;
-
- /* if not aio dio with unwritten extents, just free io and return */
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
- ext4_free_io_end(io_end);
-out:
- if (is_async)
- aio_complete(iocb, ret, 0);
- inode_dio_done(inode);
- return;
- }
-
- io_end->offset = offset;
- io_end->size = size;
- if (is_async) {
- io_end->iocb = iocb;
- io_end->result = ret;
- }
- wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
-
- /* Add the io_end to per-inode completed aio dio list*/
- ei = EXT4_I(io_end->inode);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- list_add_tail(&io_end->list, &ei->i_completed_io_list);
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
-
- /* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
-}
-
-static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
-{
- ext4_io_end_t *io_end = bh->b_private;
- struct workqueue_struct *wq;
- struct inode *inode;
- unsigned long flags;
-
- if (!test_clear_buffer_uninit(bh) || !io_end)
- goto out;
-
- if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
- ext4_msg(io_end->inode->i_sb, KERN_INFO,
- "sb umounted, discard end_io request for inode %lu",
- io_end->inode->i_ino);
- ext4_free_io_end(io_end);
- goto out;
- }
-
- /*
- * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
- * but being more careful is always safe for the future change.
- */
- inode = io_end->inode;
- ext4_set_io_unwritten_flag(inode, io_end);
-
- /* Add the io_end to per-inode completed io list*/
- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
- list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-
- wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
- /* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
-out:
- bh->b_private = NULL;
- bh->b_end_io = NULL;
- clear_buffer_uninit(bh);
- end_buffer_async_write(bh, uptodate);
-}
-
-static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
-{
- ext4_io_end_t *io_end;
- struct page *page = bh->b_page;
- loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
- size_t size = bh->b_size;
-
-retry:
- io_end = ext4_init_io_end(inode, GFP_ATOMIC);
- if (!io_end) {
- pr_warn_ratelimited("%s: allocation fail\n", __func__);
- schedule();
- goto retry;
- }
- io_end->offset = offset;
- io_end->size = size;
- /*
- * We need to hold a reference to the page to make sure it
- * doesn't get evicted before ext4_end_io_work() has a chance
- * to convert the extent from written to unwritten.
- */
- io_end->page = page;
- get_page(io_end->page);
-
- bh->b_private = io_end;
- bh->b_end_io = ext4_end_io_buffer_write;
- return 0;
-}
-
-/*
- * For ext4 extent files, ext4 will do direct-io write to holes,
- * preallocated extents, and those write extend the file, no need to
- * fall back to buffered IO.
- *
- * For holes, we fallocate those blocks, mark them as uninitialized
- * If those blocks were preallocated, we mark sure they are splited, but
- * still keep the range to write as uninitialized.
- *
- * The unwrritten extents will be converted to written when DIO is completed.
- * For async direct IO, since the IO may still pending when return, we
- * set up an end_io call back function, which will do the conversion
- * when async direct IO completed.
- *
- * If the O_DIRECT write will extend the file then add this inode to the
- * orphan list. So recovery will truncate it back to the original size
- * if the machine crashes during the write.
- *
- */
-static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- ssize_t ret;
- size_t count = iov_length(iov, nr_segs);
-
- loff_t final_size = offset + count;
- if (rw == WRITE && final_size <= inode->i_size) {
- /*
- * We could direct write to holes and fallocate.
- *
- * Allocated blocks to fill the hole are marked as uninitialized
- * to prevent parallel buffered read to expose the stale data
- * before DIO complete the data IO.
- *
- * As to previously fallocated extents, ext4 get_block
- * will just simply mark the buffer mapped but still
- * keep the extents uninitialized.
- *
- * for non AIO case, we will convert those unwritten extents
- * to written after return back from blockdev_direct_IO.
- *
- * for async DIO, the conversion needs to be defered when
- * the IO is completed. The ext4 end_io callback function
- * will be called to take care of the conversion work.
- * Here for async case, we allocate an io_end structure to
- * hook to the iocb.
- */
- iocb->private = NULL;
- EXT4_I(inode)->cur_aio_dio = NULL;
- if (!is_sync_kiocb(iocb)) {
- ext4_io_end_t *io_end =
- ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end)
- return -ENOMEM;
- io_end->flag |= EXT4_IO_END_DIRECT;
- iocb->private = io_end;
- /*
- * we save the io structure for current async
- * direct IO, so that later ext4_map_blocks()
- * could flag the io structure whether there
- * is a unwritten extents needs to be converted
- * when IO is completed.
- */
- EXT4_I(inode)->cur_aio_dio = iocb->private;
- }
-
- ret = __blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- ext4_get_block_write,
- ext4_end_io_dio,
- NULL,
- DIO_LOCKING);
- if (iocb->private)
- EXT4_I(inode)->cur_aio_dio = NULL;
- /*
- * The io_end structure takes a reference to the inode,
- * that structure needs to be destroyed and the
- * reference to the inode need to be dropped, when IO is
- * complete, even with 0 byte write, or failed.
- *
- * In the successful AIO DIO case, the io_end structure will be
- * desctroyed and the reference to the inode will be dropped
- * after the end_io call back function is called.
- *
- * In the case there is 0 byte write, or error case, since
- * VFS direct IO won't invoke the end_io call back function,
- * we need to free the end_io structure here.
- */
- if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
- ext4_free_io_end(iocb->private);
- iocb->private = NULL;
- } else if (ret > 0 && ext4_test_inode_state(inode,
- EXT4_STATE_DIO_UNWRITTEN)) {
- int err;
- /*
- * for non AIO case, since the IO is already
- * completed, we could do the conversion right here
- */
- err = ext4_convert_unwritten_extents(inode,
- offset, ret);
- if (err < 0)
- ret = err;
- ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
- }
- return ret;
- }
-
- /* for write the the end of file case, we fall back to old way */
- return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
-}
-
-static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- ssize_t ret;
-
- /*
- * If we are doing data journalling we don't support O_DIRECT
- */
- if (ext4_should_journal_data(inode))
- return 0;
-
- trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
- else
- ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
- trace_ext4_direct_IO_exit(inode, offset,
- iov_length(iov, nr_segs), rw, ret);
- return ret;
-}
-
-/*
- * Pages can be marked dirty completely asynchronously from ext4's journalling
- * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do
- * much here because ->set_page_dirty is called under VFS locks. The page is
- * not necessarily locked.
- *
- * We cannot just dirty the page and leave attached buffers clean, because the
- * buffers' dirty state is "definitive". We cannot just set the buffers dirty
- * or jbddirty because all the journalling code will explode.
- *
- * So what we do is to mark the page "pending dirty" and next time writepage
- * is called, propagate that into the buffers appropriately.
- */
-static int ext4_journalled_set_page_dirty(struct page *page)
-{
- SetPageChecked(page);
- return __set_page_dirty_nobuffers(page);
-}
-
-static const struct address_space_operations ext4_ordered_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_writepage,
- .write_begin = ext4_write_begin,
- .write_end = ext4_ordered_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
-};
-
-static const struct address_space_operations ext4_writeback_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_writepage,
- .write_begin = ext4_write_begin,
- .write_end = ext4_writeback_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
-};
-
-static const struct address_space_operations ext4_journalled_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_writepage,
- .write_begin = ext4_write_begin,
- .write_end = ext4_journalled_write_end,
- .set_page_dirty = ext4_journalled_set_page_dirty,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
-};
-
-static const struct address_space_operations ext4_da_aops = {
- .readpage = ext4_readpage,
- .readpages = ext4_readpages,
- .writepage = ext4_writepage,
- .writepages = ext4_da_writepages,
- .write_begin = ext4_da_write_begin,
- .write_end = ext4_da_write_end,
- .bmap = ext4_bmap,
- .invalidatepage = ext4_da_invalidatepage,
- .releasepage = ext4_releasepage,
- .direct_IO = ext4_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
-};
-
-void ext4_set_aops(struct inode *inode)
-{
- switch (ext4_inode_journal_mode(inode)) {
- case EXT4_INODE_ORDERED_DATA_MODE:
- if (test_opt(inode->i_sb, DELALLOC))
- inode->i_mapping->a_ops = &ext4_da_aops;
- else
- inode->i_mapping->a_ops = &ext4_ordered_aops;
- break;
- case EXT4_INODE_WRITEBACK_DATA_MODE:
- if (test_opt(inode->i_sb, DELALLOC))
- inode->i_mapping->a_ops = &ext4_da_aops;
- else
- inode->i_mapping->a_ops = &ext4_writeback_aops;
- break;
- case EXT4_INODE_JOURNAL_DATA_MODE:
- inode->i_mapping->a_ops = &ext4_journalled_aops;
- break;
- default:
- BUG();
- }
-}
-
-
-/*
- * ext4_discard_partial_page_buffers()
- * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
- * This function finds and locks the page containing the offset
- * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
- * Calling functions that already have the page locked should call
- * ext4_discard_partial_page_buffers_no_lock directly.
- */
-int ext4_discard_partial_page_buffers(handle_t *handle,
- struct address_space *mapping, loff_t from,
- loff_t length, int flags)
-{
- struct inode *inode = mapping->host;
- struct page *page;
- int err = 0;
-
- page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
- mapping_gfp_mask(mapping) & ~__GFP_FS);
- if (!page)
- return -ENOMEM;
-
- err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
- from, length, flags);
-
- unlock_page(page);
- page_cache_release(page);
- return err;
-}
-
-/*
- * ext4_discard_partial_page_buffers_no_lock()
- * Zeros a page range of length 'length' starting from offset 'from'.
- * Buffer heads that correspond to the block aligned regions of the
- * zeroed range will be unmapped. Unblock aligned regions
- * will have the corresponding buffer head mapped if needed so that
- * that region of the page can be updated with the partial zero out.
- *
- * This function assumes that the page has already been locked. The
- * The range to be discarded must be contained with in the given page.
- * If the specified range exceeds the end of the page it will be shortened
- * to the end of the page that corresponds to 'from'. This function is
- * appropriate for updating a page and it buffer heads to be unmapped and
- * zeroed for blocks that have been either released, or are going to be
- * released.
- *
- * handle: The journal handle
- * inode: The files inode
- * page: A locked page that contains the offset "from"
- * from: The starting byte offset (from the begining of the file)
- * to begin discarding
- * len: The length of bytes to discard
- * flags: Optional flags that may be used:
- *
- * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
- * Only zero the regions of the page whose buffer heads
- * have already been unmapped. This flag is appropriate
- * for updateing the contents of a page whose blocks may
- * have already been released, and we only want to zero
- * out the regions that correspond to those released blocks.
- *
- * Returns zero on sucess or negative on failure.
- */
-static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
- struct inode *inode, struct page *page, loff_t from,
- loff_t length, int flags)
-{
- ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
- unsigned int offset = from & (PAGE_CACHE_SIZE-1);
- unsigned int blocksize, max, pos;
- ext4_lblk_t iblock;
- struct buffer_head *bh;
- int err = 0;
-
- blocksize = inode->i_sb->s_blocksize;
- max = PAGE_CACHE_SIZE - offset;
-
- if (index != page->index)
- return -EINVAL;
-
- /*
- * correct length if it does not fall between
- * 'from' and the end of the page
- */
- if (length > max || length < 0)
- length = max;
-
- iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
-
- if (!page_has_buffers(page))
- create_empty_buffers(page, blocksize, 0);
-
- /* Find the buffer that contains "offset" */
- bh = page_buffers(page);
- pos = blocksize;
- while (offset >= pos) {
- bh = bh->b_this_page;
- iblock++;
- pos += blocksize;
- }
-
- pos = offset;
- while (pos < offset + length) {
- unsigned int end_of_block, range_to_discard;
-
- err = 0;
-
- /* The length of space left to zero and unmap */
- range_to_discard = offset + length - pos;
-
- /* The length of space until the end of the block */
- end_of_block = blocksize - (pos & (blocksize-1));
-
- /*
- * Do not unmap or zero past end of block
- * for this buffer head
- */
- if (range_to_discard > end_of_block)
- range_to_discard = end_of_block;
-
-
- /*
- * Skip this buffer head if we are only zeroing unampped
- * regions of the page
- */
- if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
- buffer_mapped(bh))
- goto next;
-
- /* If the range is block aligned, unmap */
- if (range_to_discard == blocksize) {
- clear_buffer_dirty(bh);
- bh->b_bdev = NULL;
- clear_buffer_mapped(bh);
- clear_buffer_req(bh);
- clear_buffer_new(bh);
- clear_buffer_delay(bh);
- clear_buffer_unwritten(bh);
- clear_buffer_uptodate(bh);
- zero_user(page, pos, range_to_discard);
- BUFFER_TRACE(bh, "Buffer discarded");
- goto next;
- }
-
- /*
- * If this block is not completely contained in the range
- * to be discarded, then it is not going to be released. Because
- * we need to keep this block, we need to make sure this part
- * of the page is uptodate before we modify it by writeing
- * partial zeros on it.
- */
- if (!buffer_mapped(bh)) {
- /*
- * Buffer head must be mapped before we can read
- * from the block
- */
- BUFFER_TRACE(bh, "unmapped");
- ext4_get_block(inode, iblock, bh, 0);
- /* unmapped? It's a hole - nothing to do */
- if (!buffer_mapped(bh)) {
- BUFFER_TRACE(bh, "still unmapped");
- goto next;
- }
- }
-
- /* Ok, it's mapped. Make sure it's up-to-date */
- if (PageUptodate(page))
- set_buffer_uptodate(bh);
-
- if (!buffer_uptodate(bh)) {
- err = -EIO;
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- /* Uhhuh. Read error. Complain and punt.*/
- if (!buffer_uptodate(bh))
- goto next;
- }
-
- if (ext4_should_journal_data(inode)) {
- BUFFER_TRACE(bh, "get write access");
- err = ext4_journal_get_write_access(handle, bh);
- if (err)
- goto next;
- }
-
- zero_user(page, pos, range_to_discard);
-
- err = 0;
- if (ext4_should_journal_data(inode)) {
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- } else
- mark_buffer_dirty(bh);
-
- BUFFER_TRACE(bh, "Partial buffer zeroed");
-next:
- bh = bh->b_this_page;
- iblock++;
- pos += range_to_discard;
- }
-
- return err;
-}
-
-int ext4_can_truncate(struct inode *inode)
-{
- if (S_ISREG(inode->i_mode))
- return 1;
- if (S_ISDIR(inode->i_mode))
- return 1;
- if (S_ISLNK(inode->i_mode))
- return !ext4_inode_is_fast_symlink(inode);
- return 0;
-}
-
-/*
- * ext4_punch_hole: punches a hole in a file by releaseing the blocks
- * associated with the given offset and length
- *
- * @inode: File inode
- * @offset: The offset where the hole will begin
- * @len: The length of the hole
- *
- * Returns: 0 on sucess or negative on failure
- */
-
-int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- if (!S_ISREG(inode->i_mode))
- return -EOPNOTSUPP;
-
- if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- /* TODO: Add support for non extent hole punching */
- return -EOPNOTSUPP;
- }
-
- if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
- /* TODO: Add support for bigalloc file systems */
- return -EOPNOTSUPP;
- }
-
- return ext4_ext_punch_hole(file, offset, length);
-}
-
-/*
- * ext4_truncate()
- *
- * We block out ext4_get_block() block instantiations across the entire
- * transaction, and VFS/VM ensures that ext4_truncate() cannot run
- * simultaneously on behalf of the same inode.
- *
- * As we work through the truncate and commit bits of it to the journal there
- * is one core, guiding principle: the file's tree must always be consistent on
- * disk. We must be able to restart the truncate after a crash.
- *
- * The file's tree may be transiently inconsistent in memory (although it
- * probably isn't), but whenever we close off and commit a journal transaction,
- * the contents of (the filesystem + the journal) must be consistent and
- * restartable. It's pretty simple, really: bottom up, right to left (although
- * left-to-right works OK too).
- *
- * Note that at recovery time, journal replay occurs *before* the restart of
- * truncate against the orphan inode list.
- *
- * The committed inode has the new, desired i_size (which is the same as
- * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see
- * that this inode's truncate did not complete and it will again call
- * ext4_truncate() to have another go. So there will be instantiated blocks
- * to the right of the truncation point in a crashed ext4 filesystem. But
- * that's fine - as long as they are linked from the inode, the post-crash
- * ext4_truncate() run will find them and release them.
- */
-void ext4_truncate(struct inode *inode)
-{
- trace_ext4_truncate_enter(inode);
-
- if (!ext4_can_truncate(inode))
- return;
-
- ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
-
- if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
- ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
-
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- ext4_ext_truncate(inode);
- else
- ext4_ind_truncate(inode);
-
- trace_ext4_truncate_exit(inode);
-}
-
-/*
- * ext4_get_inode_loc returns with an extra refcount against the inode's
- * underlying buffer_head on success. If 'in_mem' is true, we have all
- * data in memory that is needed to recreate the on-disk version of this
- * inode.
- */
-static int __ext4_get_inode_loc(struct inode *inode,
- struct ext4_iloc *iloc, int in_mem)
-{
- struct ext4_group_desc *gdp;
- struct buffer_head *bh;
- struct super_block *sb = inode->i_sb;
- ext4_fsblk_t block;
- int inodes_per_block, inode_offset;
-
- iloc->bh = NULL;
- if (!ext4_valid_inum(sb, inode->i_ino))
- return -EIO;
-
- iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
- gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
- if (!gdp)
- return -EIO;
-
- /*
- * Figure out the offset within the block group inode table
- */
- inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
- inode_offset = ((inode->i_ino - 1) %
- EXT4_INODES_PER_GROUP(sb));
- block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
- iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
-
- bh = sb_getblk(sb, block);
- if (!bh) {
- EXT4_ERROR_INODE_BLOCK(inode, block,
- "unable to read itable block");
- return -EIO;
- }
- if (!buffer_uptodate(bh)) {
- lock_buffer(bh);
-
- /*
- * If the buffer has the write error flag, we have failed
- * to write out another inode in the same block. In this
- * case, we don't have to read the block because we may
- * read the old inode data successfully.
- */
- if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
- set_buffer_uptodate(bh);
-
- if (buffer_uptodate(bh)) {
- /* someone brought it uptodate while we waited */
- unlock_buffer(bh);
- goto has_buffer;
- }
-
- /*
- * If we have all information of the inode in memory and this
- * is the only valid inode in the block, we need not read the
- * block.
- */
- if (in_mem) {
- struct buffer_head *bitmap_bh;
- int i, start;
-
- start = inode_offset & ~(inodes_per_block - 1);
-
- /* Is the inode bitmap in cache? */
- bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
- if (!bitmap_bh)
- goto make_io;
-
- /*
- * If the inode bitmap isn't in cache then the
- * optimisation may end up performing two reads instead
- * of one, so skip it.
- */
- if (!buffer_uptodate(bitmap_bh)) {
- brelse(bitmap_bh);
- goto make_io;
- }
- for (i = start; i < start + inodes_per_block; i++) {
- if (i == inode_offset)
- continue;
- if (ext4_test_bit(i, bitmap_bh->b_data))
- break;
- }
- brelse(bitmap_bh);
- if (i == start + inodes_per_block) {
- /* all other inodes are free, so skip I/O */
- memset(bh->b_data, 0, bh->b_size);
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
- goto has_buffer;
- }
- }
-
-make_io:
- /*
- * If we need to do any I/O, try to pre-readahead extra
- * blocks from the inode table.
- */
- if (EXT4_SB(sb)->s_inode_readahead_blks) {
- ext4_fsblk_t b, end, table;
- unsigned num;
-
- table = ext4_inode_table(sb, gdp);
- /* s_inode_readahead_blks is always a power of 2 */
- b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
- if (table > b)
- b = table;
- end = b + EXT4_SB(sb)->s_inode_readahead_blks;
- num = EXT4_INODES_PER_GROUP(sb);
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
- num -= ext4_itable_unused_count(sb, gdp);
- table += num / inodes_per_block;
- if (end > table)
- end = table;
- while (b <= end)
- sb_breadahead(sb, b++);
- }
-
- /*
- * There are other valid inodes in the buffer, this inode
- * has in-inode xattrs, or we don't have this inode in memory.
- * Read the block from disk.
- */
- trace_ext4_load_inode(inode);
- get_bh(bh);
- bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ | REQ_META | REQ_PRIO, bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- EXT4_ERROR_INODE_BLOCK(inode, block,
- "unable to read itable block");
- brelse(bh);
- return -EIO;
- }
- }
-has_buffer:
- iloc->bh = bh;
- return 0;
-}
-
-int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
-{
- /* We have all inode data except xattrs in memory here. */
- return __ext4_get_inode_loc(inode, iloc,
- !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
-}
-
-void ext4_set_inode_flags(struct inode *inode)
-{
- unsigned int flags = EXT4_I(inode)->i_flags;
-
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
- if (flags & EXT4_SYNC_FL)
- inode->i_flags |= S_SYNC;
- if (flags & EXT4_APPEND_FL)
- inode->i_flags |= S_APPEND;
- if (flags & EXT4_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
- if (flags & EXT4_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
- if (flags & EXT4_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
-}
-
-/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
-void ext4_get_inode_flags(struct ext4_inode_info *ei)
-{
- unsigned int vfs_fl;
- unsigned long old_fl, new_fl;
-
- do {
- vfs_fl = ei->vfs_inode.i_flags;
- old_fl = ei->i_flags;
- new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
- EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
- EXT4_DIRSYNC_FL);
- if (vfs_fl & S_SYNC)
- new_fl |= EXT4_SYNC_FL;
- if (vfs_fl & S_APPEND)
- new_fl |= EXT4_APPEND_FL;
- if (vfs_fl & S_IMMUTABLE)
- new_fl |= EXT4_IMMUTABLE_FL;
- if (vfs_fl & S_NOATIME)
- new_fl |= EXT4_NOATIME_FL;
- if (vfs_fl & S_DIRSYNC)
- new_fl |= EXT4_DIRSYNC_FL;
- } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
-}
-
-static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
- struct ext4_inode_info *ei)
-{
- blkcnt_t i_blocks ;
- struct inode *inode = &(ei->vfs_inode);
- struct super_block *sb = inode->i_sb;
-
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
- /* we are using combined 48 bit field */
- i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
- le32_to_cpu(raw_inode->i_blocks_lo);
- if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
- /* i_blocks represent file system block size */
- return i_blocks << (inode->i_blkbits - 9);
- } else {
- return i_blocks;
- }
- } else {
- return le32_to_cpu(raw_inode->i_blocks_lo);
- }
-}
-
-struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
-{
- struct ext4_iloc iloc;
- struct ext4_inode *raw_inode;
- struct ext4_inode_info *ei;
- struct inode *inode;
- journal_t *journal = EXT4_SB(sb)->s_journal;
- long ret;
- int block;
-
- inode = iget_locked(sb, ino);
- if (!inode)
- return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
-
- ei = EXT4_I(inode);
- iloc.bh = NULL;
-
- ret = __ext4_get_inode_loc(inode, &iloc, 0);
- if (ret < 0)
- goto bad_inode;
- raw_inode = ext4_raw_inode(&iloc);
- inode->i_mode = le16_to_cpu(raw_inode->i_mode);
- inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
- inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
- if (!(test_opt(inode->i_sb, NO_UID32))) {
- inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
- inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
- }
- set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
-
- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
- ei->i_dir_start_lookup = 0;
- ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
- /* We now have enough fields to check if the inode was active or not.
- * This is needed because nfsd might try to access dead inodes
- * the test is that same one that e2fsck uses
- * NeilBrown 1999oct15
- */
- if (inode->i_nlink == 0) {
- if (inode->i_mode == 0 ||
- !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
- /* this inode is deleted */
- ret = -ESTALE;
- goto bad_inode;
- }
- /* The only unlinked inodes we let through here have
- * valid i_mode and are being read by the orphan
- * recovery code: that's fine, we're about to complete
- * the process of deleting those. */
- }
- ei->i_flags = le32_to_cpu(raw_inode->i_flags);
- inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
- ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
- ei->i_file_acl |=
- ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
- inode->i_size = ext4_isize(raw_inode);
- ei->i_disksize = inode->i_size;
-#ifdef CONFIG_QUOTA
- ei->i_reserved_quota = 0;
-#endif
- inode->i_generation = le32_to_cpu(raw_inode->i_generation);
- ei->i_block_group = iloc.block_group;
- ei->i_last_alloc_group = ~0;
- /*
- * NOTE! The in-memory inode i_data array is in little-endian order
- * even on big-endian machines: we do NOT byteswap the block numbers!
- */
- for (block = 0; block < EXT4_N_BLOCKS; block++)
- ei->i_data[block] = raw_inode->i_block[block];
- INIT_LIST_HEAD(&ei->i_orphan);
-
- /*
- * Set transaction id's of transactions that have to be committed
- * to finish f[data]sync. We set them to currently running transaction
- * as we cannot be sure that the inode or some of its metadata isn't
- * part of the transaction - the inode could have been reclaimed and
- * now it is reread from disk.
- */
- if (journal) {
- transaction_t *transaction;
- tid_t tid;
-
- read_lock(&journal->j_state_lock);
- if (journal->j_running_transaction)
- transaction = journal->j_running_transaction;
- else
- transaction = journal->j_committing_transaction;
- if (transaction)
- tid = transaction->t_tid;
- else
- tid = journal->j_commit_sequence;
- read_unlock(&journal->j_state_lock);
- ei->i_sync_tid = tid;
- ei->i_datasync_tid = tid;
- }
-
- if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
- ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
- if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
- EXT4_INODE_SIZE(inode->i_sb)) {
- ret = -EIO;
- goto bad_inode;
- }
- if (ei->i_extra_isize == 0) {
- /* The extra space is currently unused. Use it. */
- ei->i_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- } else {
- __le32 *magic = (void *)raw_inode +
- EXT4_GOOD_OLD_INODE_SIZE +
- ei->i_extra_isize;
- if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
- ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- }
- } else
- ei->i_extra_isize = 0;
-
- EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
- EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
- EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
- EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
-
- inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
- if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
- if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
- inode->i_version |=
- (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
- }
-
- ret = 0;
- if (ei->i_file_acl &&
- !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
- EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
- ei->i_file_acl);
- ret = -EIO;
- goto bad_inode;
- } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- (S_ISLNK(inode->i_mode) &&
- !ext4_inode_is_fast_symlink(inode)))
- /* Validate extent which is part of inode */
- ret = ext4_ext_check_inode(inode);
- } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- (S_ISLNK(inode->i_mode) &&
- !ext4_inode_is_fast_symlink(inode))) {
- /* Validate block references which are part of inode */
- ret = ext4_ind_check_inode(inode);
- }
- if (ret)
- goto bad_inode;
-
- if (S_ISREG(inode->i_mode)) {
- inode->i_op = &ext4_file_inode_operations;
- inode->i_fop = &ext4_file_operations;
- ext4_set_aops(inode);
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &ext4_dir_inode_operations;
- inode->i_fop = &ext4_dir_operations;
- } else if (S_ISLNK(inode->i_mode)) {
- if (ext4_inode_is_fast_symlink(inode)) {
- inode->i_op = &ext4_fast_symlink_inode_operations;
- nd_terminate_link(ei->i_data, inode->i_size,
- sizeof(ei->i_data) - 1);
- } else {
- inode->i_op = &ext4_symlink_inode_operations;
- ext4_set_aops(inode);
- }
- } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
- S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
- inode->i_op = &ext4_special_inode_operations;
- if (raw_inode->i_block[0])
- init_special_inode(inode, inode->i_mode,
- old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
- else
- init_special_inode(inode, inode->i_mode,
- new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
- } else {
- ret = -EIO;
- EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
- goto bad_inode;
- }
- brelse(iloc.bh);
- ext4_set_inode_flags(inode);
- unlock_new_inode(inode);
- return inode;
-
-bad_inode:
- brelse(iloc.bh);
- iget_failed(inode);
- return ERR_PTR(ret);
-}
-
-static int ext4_inode_blocks_set(handle_t *handle,
- struct ext4_inode *raw_inode,
- struct ext4_inode_info *ei)
-{
- struct inode *inode = &(ei->vfs_inode);
- u64 i_blocks = inode->i_blocks;
- struct super_block *sb = inode->i_sb;
-
- if (i_blocks <= ~0U) {
- /*
- * i_blocks can be represnted in a 32 bit variable
- * as multiple of 512 bytes
- */
- raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
- raw_inode->i_blocks_high = 0;
- ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
- return 0;
- }
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
- return -EFBIG;
-
- if (i_blocks <= 0xffffffffffffULL) {
- /*
- * i_blocks can be represented in a 48 bit variable
- * as multiple of 512 bytes
- */
- raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
- raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
- ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
- } else {
- ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
- /* i_block is stored in file system block size */
- i_blocks = i_blocks >> (inode->i_blkbits - 9);
- raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
- raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
- }
- return 0;
-}
-
-/*
- * Post the struct inode info into an on-disk inode location in the
- * buffer-cache. This gobbles the caller's reference to the
- * buffer_head in the inode location struct.
- *
- * The caller must have write access to iloc->bh.
- */
-static int ext4_do_update_inode(handle_t *handle,
- struct inode *inode,
- struct ext4_iloc *iloc)
-{
- struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct buffer_head *bh = iloc->bh;
- int err = 0, rc, block;
-
- /* For fields not not tracking in the in-memory inode,
- * initialise them to zero for new inodes. */
- if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
- memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
-
- ext4_get_inode_flags(ei);
- raw_inode->i_mode = cpu_to_le16(inode->i_mode);
- if (!(test_opt(inode->i_sb, NO_UID32))) {
- raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
- raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
-/*
- * Fix up interoperability with old kernels. Otherwise, old inodes get
- * re-used with the upper 16 bits of the uid/gid intact
- */
- if (!ei->i_dtime) {
- raw_inode->i_uid_high =
- cpu_to_le16(high_16_bits(inode->i_uid));
- raw_inode->i_gid_high =
- cpu_to_le16(high_16_bits(inode->i_gid));
- } else {
- raw_inode->i_uid_high = 0;
- raw_inode->i_gid_high = 0;
- }
- } else {
- raw_inode->i_uid_low =
- cpu_to_le16(fs_high2lowuid(inode->i_uid));
- raw_inode->i_gid_low =
- cpu_to_le16(fs_high2lowgid(inode->i_gid));
- raw_inode->i_uid_high = 0;
- raw_inode->i_gid_high = 0;
- }
- raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
-
- EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
- EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
- EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
- EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
-
- if (ext4_inode_blocks_set(handle, raw_inode, ei))
- goto out_brelse;
- raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
- raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
- cpu_to_le32(EXT4_OS_HURD))
- raw_inode->i_file_acl_high =
- cpu_to_le16(ei->i_file_acl >> 32);
- raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
- ext4_isize_set(raw_inode, ei->i_disksize);
- if (ei->i_disksize > 0x7fffffffULL) {
- struct super_block *sb = inode->i_sb;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
- EXT4_SB(sb)->s_es->s_rev_level ==
- cpu_to_le32(EXT4_GOOD_OLD_REV)) {
- /* If this is the first large file
- * created, add a flag to the superblock.
- */
- err = ext4_journal_get_write_access(handle,
- EXT4_SB(sb)->s_sbh);
- if (err)
- goto out_brelse;
- ext4_update_dynamic_rev(sb);
- EXT4_SET_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
- ext4_handle_sync(handle);
- err = ext4_handle_dirty_super(handle, sb);
- }
- }
- raw_inode->i_generation = cpu_to_le32(inode->i_generation);
- if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
- if (old_valid_dev(inode->i_rdev)) {
- raw_inode->i_block[0] =
- cpu_to_le32(old_encode_dev(inode->i_rdev));
- raw_inode->i_block[1] = 0;
- } else {
- raw_inode->i_block[0] = 0;
- raw_inode->i_block[1] =
- cpu_to_le32(new_encode_dev(inode->i_rdev));
- raw_inode->i_block[2] = 0;
- }
- } else
- for (block = 0; block < EXT4_N_BLOCKS; block++)
- raw_inode->i_block[block] = ei->i_data[block];
-
- raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
- if (ei->i_extra_isize) {
- if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
- raw_inode->i_version_hi =
- cpu_to_le32(inode->i_version >> 32);
- raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
- }
-
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- rc = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (!err)
- err = rc;
- ext4_clear_inode_state(inode, EXT4_STATE_NEW);
-
- ext4_update_inode_fsync_trans(handle, inode, 0);
-out_brelse:
- brelse(bh);
- ext4_std_error(inode->i_sb, err);
- return err;
-}
-
-/*
- * ext4_write_inode()
- *
- * We are called from a few places:
- *
- * - Within generic_file_write() for O_SYNC files.
- * Here, there will be no transaction running. We wait for any running
- * trasnaction to commit.
- *
- * - Within sys_sync(), kupdate and such.
- * We wait on commit, if tol to.
- *
- * - Within prune_icache() (PF_MEMALLOC == true)
- * Here we simply return. We can't afford to block kswapd on the
- * journal commit.
- *
- * In all cases it is actually safe for us to return without doing anything,
- * because the inode has been copied into a raw inode buffer in
- * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for
- * knfsd.
- *
- * Note that we are absolutely dependent upon all inode dirtiers doing the
- * right thing: they *must* call mark_inode_dirty() after dirtying info in
- * which we are interested.
- *
- * It would be a bug for them to not do this. The code:
- *
- * mark_inode_dirty(inode)
- * stuff();
- * inode->i_size = expr;
- *
- * is in error because a kswapd-driven write_inode() could occur while
- * `stuff()' is running, and the new i_size will be lost. Plus the inode
- * will no longer be on the superblock's dirty inode list.
- */
-int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- int err;
-
- if (current->flags & PF_MEMALLOC)
- return 0;
-
- if (EXT4_SB(inode->i_sb)->s_journal) {
- if (ext4_journal_current_handle()) {
- jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
- dump_stack();
- return -EIO;
- }
-
- if (wbc->sync_mode != WB_SYNC_ALL)
- return 0;
-
- err = ext4_force_commit(inode->i_sb);
- } else {
- struct ext4_iloc iloc;
-
- err = __ext4_get_inode_loc(inode, &iloc, 0);
- if (err)
- return err;
- if (wbc->sync_mode == WB_SYNC_ALL)
- sync_dirty_buffer(iloc.bh);
- if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
- EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
- "IO error syncing inode");
- err = -EIO;
- }
- brelse(iloc.bh);
- }
- return err;
-}
-
-/*
- * ext4_setattr()
- *
- * Called from notify_change.
- *
- * We want to trap VFS attempts to truncate the file as soon as
- * possible. In particular, we want to make sure that when the VFS
- * shrinks i_size, we put the inode on the orphan list and modify
- * i_disksize immediately, so that during the subsequent flushing of
- * dirty pages and freeing of disk blocks, we can guarantee that any
- * commit will leave the blocks being flushed in an unused state on
- * disk. (On recovery, the inode will get truncated and the blocks will
- * be freed, so we have a strong guarantee that no future commit will
- * leave these blocks visible to the user.)
- *
- * Another thing we have to assure is that if we are in ordered mode
- * and inode is still attached to the committing transaction, we must
- * we start writeout of all the dirty pages which are being truncated.
- * This way we are sure that all the data written in the previous
- * transaction are already on disk (truncate waits for pages under
- * writeback).
- *
- * Called with inode->i_mutex down.
- */
-int ext4_setattr(struct dentry *dentry, struct iattr *attr)
-{
- struct inode *inode = dentry->d_inode;
- int error, rc = 0;
- int orphan = 0;
- const unsigned int ia_valid = attr->ia_valid;
-
- error = inode_change_ok(inode, attr);
- if (error)
- return error;
-
- if (is_quota_modification(inode, attr))
- dquot_initialize(inode);
- if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
- (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
- handle_t *handle;
-
- /* (user+group)*(old+new) structure, inode write (sb,
- * inode block, ? - but truncate inode update has it) */
- handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
- EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- goto err_out;
- }
- error = dquot_transfer(inode, attr);
- if (error) {
- ext4_journal_stop(handle);
- return error;
- }
- /* Update corresponding info in inode so that everything is in
- * one transaction */
- if (attr->ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (attr->ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
- error = ext4_mark_inode_dirty(handle, inode);
- ext4_journal_stop(handle);
- }
-
- if (attr->ia_valid & ATTR_SIZE) {
- inode_dio_wait(inode);
-
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-
- if (attr->ia_size > sbi->s_bitmap_maxbytes)
- return -EFBIG;
- }
- }
-
- if (S_ISREG(inode->i_mode) &&
- attr->ia_valid & ATTR_SIZE &&
- (attr->ia_size < inode->i_size)) {
- handle_t *handle;
-
- handle = ext4_journal_start(inode, 3);
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- goto err_out;
- }
- if (ext4_handle_valid(handle)) {
- error = ext4_orphan_add(handle, inode);
- orphan = 1;
- }
- EXT4_I(inode)->i_disksize = attr->ia_size;
- rc = ext4_mark_inode_dirty(handle, inode);
- if (!error)
- error = rc;
- ext4_journal_stop(handle);
-
- if (ext4_should_order_data(inode)) {
- error = ext4_begin_ordered_truncate(inode,
- attr->ia_size);
- if (error) {
- /* Do as much error cleanup as possible */
- handle = ext4_journal_start(inode, 3);
- if (IS_ERR(handle)) {
- ext4_orphan_del(NULL, inode);
- goto err_out;
- }
- ext4_orphan_del(handle, inode);
- orphan = 0;
- ext4_journal_stop(handle);
- goto err_out;
- }
- }
- }
-
- if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size != i_size_read(inode))
- truncate_setsize(inode, attr->ia_size);
- ext4_truncate(inode);
- }
-
- if (!rc) {
- setattr_copy(inode, attr);
- mark_inode_dirty(inode);
- }
-
- /*
- * If the call to ext4_truncate failed to get a transaction handle at
- * all, we need to clean up the in-core orphan list manually.
- */
- if (orphan && inode->i_nlink)
- ext4_orphan_del(NULL, inode);
-
- if (!rc && (ia_valid & ATTR_MODE))
- rc = ext4_acl_chmod(inode);
-
-err_out:
- ext4_std_error(inode->i_sb, error);
- if (!error)
- error = rc;
- return error;
-}
-
-int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- struct inode *inode;
- unsigned long delalloc_blocks;
-
- inode = dentry->d_inode;
- generic_fillattr(inode, stat);
-
- /*
- * We can't update i_blocks if the block allocation is delayed
- * otherwise in the case of system crash before the real block
- * allocation is done, we will have i_blocks inconsistent with
- * on-disk file blocks.
- * We always keep i_blocks updated together with real
- * allocation. But to not confuse with user, stat
- * will return the blocks that include the delayed allocation
- * blocks for this file.
- */
- delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-
- stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
- return 0;
-}
-
-static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
-{
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- return ext4_ind_trans_blocks(inode, nrblocks, chunk);
- return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
-}
-
-/*
- * Account for index blocks, block groups bitmaps and block group
- * descriptor blocks if modify datablocks and index blocks
- * worse case, the indexs blocks spread over different block groups
- *
- * If datablocks are discontiguous, they are possible to spread over
- * different block groups too. If they are contiuguous, with flexbg,
- * they could still across block group boundary.
- *
- * Also account for superblock, inode, quota and xattr blocks
- */
-static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
-{
- ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
- int gdpblocks;
- int idxblocks;
- int ret = 0;
-
- /*
- * How many index blocks need to touch to modify nrblocks?
- * The "Chunk" flag indicating whether the nrblocks is
- * physically contiguous on disk
- *
- * For Direct IO and fallocate, they calls get_block to allocate
- * one single extent at a time, so they could set the "Chunk" flag
- */
- idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
-
- ret = idxblocks;
-
- /*
- * Now let's see how many group bitmaps and group descriptors need
- * to account
- */
- groups = idxblocks;
- if (chunk)
- groups += 1;
- else
- groups += nrblocks;
-
- gdpblocks = groups;
- if (groups > ngroups)
- groups = ngroups;
- if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
- gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
-
- /* bitmaps and block group descriptor blocks */
- ret += groups + gdpblocks;
-
- /* Blocks for super block, inode, quota and xattr blocks */
- ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
-
- return ret;
-}
-
-/*
- * Calculate the total number of credits to reserve to fit
- * the modification of a single pages into a single transaction,
- * which may include multiple chunks of block allocations.
- *
- * This could be called via ext4_write_begin()
- *
- * We need to consider the worse case, when
- * one new block per extent.
- */
-int ext4_writepage_trans_blocks(struct inode *inode)
-{
- int bpp = ext4_journal_blocks_per_page(inode);
- int ret;
-
- ret = ext4_meta_trans_blocks(inode, bpp, 0);
-
- /* Account for data blocks for journalled mode */
- if (ext4_should_journal_data(inode))
- ret += bpp;
- return ret;
-}
-
-/*
- * Calculate the journal credits for a chunk of data modification.
- *
- * This is called from DIO, fallocate or whoever calling
- * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
- *
- * journal buffers for data blocks are not included here, as DIO
- * and fallocate do no need to journal data buffers.
- */
-int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
-{
- return ext4_meta_trans_blocks(inode, nrblocks, 1);
-}
-
-/*
- * The caller must have previously called ext4_reserve_inode_write().
- * Give this, we know that the caller already has write access to iloc->bh.
- */
-int ext4_mark_iloc_dirty(handle_t *handle,
- struct inode *inode, struct ext4_iloc *iloc)
-{
- int err = 0;
-
- if (IS_I_VERSION(inode))
- inode_inc_iversion(inode);
-
- /* the do_update_inode consumes one bh->b_count */
- get_bh(iloc->bh);
-
- /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
- err = ext4_do_update_inode(handle, inode, iloc);
- put_bh(iloc->bh);
- return err;
-}
-
-/*
- * On success, We end up with an outstanding reference count against
- * iloc->bh. This _must_ be cleaned up later.
- */
-
-int
-ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
- struct ext4_iloc *iloc)
-{
- int err;
-
- err = ext4_get_inode_loc(inode, iloc);
- if (!err) {
- BUFFER_TRACE(iloc->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, iloc->bh);
- if (err) {
- brelse(iloc->bh);
- iloc->bh = NULL;
- }
- }
- ext4_std_error(inode->i_sb, err);
- return err;
-}
-
-/*
- * Expand an inode by new_extra_isize bytes.
- * Returns 0 on success or negative error number on failure.
- */
-static int ext4_expand_extra_isize(struct inode *inode,
- unsigned int new_extra_isize,
- struct ext4_iloc iloc,
- handle_t *handle)
-{
- struct ext4_inode *raw_inode;
- struct ext4_xattr_ibody_header *header;
-
- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
- return 0;
-
- raw_inode = ext4_raw_inode(&iloc);
-
- header = IHDR(inode, raw_inode);
-
- /* No extended attributes present */
- if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
- header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
- memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
- new_extra_isize);
- EXT4_I(inode)->i_extra_isize = new_extra_isize;
- return 0;
- }
-
- /* try to expand with EAs present */
- return ext4_expand_extra_isize_ea(inode, new_extra_isize,
- raw_inode, handle);
-}
-
-/*
- * What we do here is to mark the in-core inode as clean with respect to inode
- * dirtiness (it may still be data-dirty).
- * This means that the in-core inode may be reaped by prune_icache
- * without having to perform any I/O. This is a very good thing,
- * because *any* task may call prune_icache - even ones which
- * have a transaction open against a different journal.
- *
- * Is this cheating? Not really. Sure, we haven't written the
- * inode out, but prune_icache isn't a user-visible syncing function.
- * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
- * we start and wait on commits.
- *
- * Is this efficient/effective? Well, we're being nice to the system
- * by cleaning up our inodes proactively so they can be reaped
- * without I/O. But we are potentially leaving up to five seconds'
- * worth of inodes floating about which prune_icache wants us to
- * write out. One way to fix that would be to get prune_icache()
- * to do a write_super() to free up some memory. It has the desired
- * effect.
- */
-int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
-{
- struct ext4_iloc iloc;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- static unsigned int mnt_count;
- int err, ret;
-
- might_sleep();
- trace_ext4_mark_inode_dirty(inode, _RET_IP_);
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (ext4_handle_valid(handle) &&
- EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
- !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
- /*
- * We need extra buffer credits since we may write into EA block
- * with this same handle. If journal_extend fails, then it will
- * only result in a minor loss of functionality for that inode.
- * If this is felt to be critical, then e2fsck should be run to
- * force a large enough s_min_extra_isize.
- */
- if ((jbd2_journal_extend(handle,
- EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
- ret = ext4_expand_extra_isize(inode,
- sbi->s_want_extra_isize,
- iloc, handle);
- if (ret) {
- ext4_set_inode_state(inode,
- EXT4_STATE_NO_EXPAND);
- if (mnt_count !=
- le16_to_cpu(sbi->s_es->s_mnt_count)) {
- ext4_warning(inode->i_sb,
- "Unable to expand inode %lu. Delete"
- " some EAs or run e2fsck.",
- inode->i_ino);
- mnt_count =
- le16_to_cpu(sbi->s_es->s_mnt_count);
- }
- }
- }
- }
- if (!err)
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
- return err;
-}
-
-/*
- * ext4_dirty_inode() is called from __mark_inode_dirty()
- *
- * We're really interested in the case where a file is being extended.
- * i_size has been changed by generic_commit_write() and we thus need
- * to include the updated inode in the current transaction.
- *
- * Also, dquot_alloc_block() will always dirty the inode when blocks
- * are allocated to the file.
- *
- * If the inode is marked synchronous, we don't honour that here - doing
- * so would cause a commit on atime updates, which we don't bother doing.
- * We handle synchronous inodes at the highest possible level.
- */
-void ext4_dirty_inode(struct inode *inode, int flags)
-{
- handle_t *handle;
-
- handle = ext4_journal_start(inode, 2);
- if (IS_ERR(handle))
- goto out;
-
- ext4_mark_inode_dirty(handle, inode);
-
- ext4_journal_stop(handle);
-out:
- return;
-}
-
-#if 0
-/*
- * Bind an inode's backing buffer_head into this transaction, to prevent
- * it from being flushed to disk early. Unlike
- * ext4_reserve_inode_write, this leaves behind no bh reference and
- * returns no iloc structure, so the caller needs to repeat the iloc
- * lookup to mark the inode dirty later.
- */
-static int ext4_pin_inode(handle_t *handle, struct inode *inode)
-{
- struct ext4_iloc iloc;
-
- int err = 0;
- if (handle) {
- err = ext4_get_inode_loc(inode, &iloc);
- if (!err) {
- BUFFER_TRACE(iloc.bh, "get_write_access");
- err = jbd2_journal_get_write_access(handle, iloc.bh);
- if (!err)
- err = ext4_handle_dirty_metadata(handle,
- NULL,
- iloc.bh);
- brelse(iloc.bh);
- }
- }
- ext4_std_error(inode->i_sb, err);
- return err;
-}
-#endif
-
-int ext4_change_inode_journal_flag(struct inode *inode, int val)
-{
- journal_t *journal;
- handle_t *handle;
- int err;
-
- /*
- * We have to be very careful here: changing a data block's
- * journaling status dynamically is dangerous. If we write a
- * data block to the journal, change the status and then delete
- * that block, we risk forgetting to revoke the old log record
- * from the journal and so a subsequent replay can corrupt data.
- * So, first we make sure that the journal is empty and that
- * nobody is changing anything.
- */
-
- journal = EXT4_JOURNAL(inode);
- if (!journal)
- return 0;
- if (is_journal_aborted(journal))
- return -EROFS;
- /* We have to allocate physical blocks for delalloc blocks
- * before flushing journal. otherwise delalloc blocks can not
- * be allocated any more. even more truncate on delalloc blocks
- * could trigger BUG by flushing delalloc blocks in journal.
- * There is no delalloc block in non-journal data mode.
- */
- if (val && test_opt(inode->i_sb, DELALLOC)) {
- err = ext4_alloc_da_blocks(inode);
- if (err < 0)
- return err;
- }
-
- jbd2_journal_lock_updates(journal);
-
- /*
- * OK, there are no updates running now, and all cached data is
- * synced to disk. We are now in a completely consistent state
- * which doesn't have anything in the journal, and we know that
- * no filesystem updates are running, so it is safe to modify
- * the inode's in-core data-journaling state flag now.
- */
-
- if (val)
- ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
- else {
- jbd2_journal_flush(journal);
- ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
- }
- ext4_set_aops(inode);
-
- jbd2_journal_unlock_updates(journal);
-
- /* Finally we can mark the inode as dirty. */
-
- handle = ext4_journal_start(inode, 1);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- err = ext4_mark_inode_dirty(handle, inode);
- ext4_handle_sync(handle);
- ext4_journal_stop(handle);
- ext4_std_error(inode->i_sb, err);
-
- return err;
-}
-
-static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
-{
- return !buffer_mapped(bh);
-}
-
-int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *page = vmf->page;
- loff_t size;
- unsigned long len;
- int ret;
- struct file *file = vma->vm_file;
- struct inode *inode = file->f_path.dentry->d_inode;
- struct address_space *mapping = inode->i_mapping;
- handle_t *handle;
- get_block_t *get_block;
- int retries = 0;
-
- /*
- * This check is racy but catches the common case. We rely on
- * __block_page_mkwrite() to do a reliable check.
- */
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
- /* Delalloc case is easy... */
- if (test_opt(inode->i_sb, DELALLOC) &&
- !ext4_should_journal_data(inode) &&
- !ext4_nonda_switch(inode->i_sb)) {
- do {
- ret = __block_page_mkwrite(vma, vmf,
- ext4_da_get_block_prep);
- } while (ret == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries));
- goto out_ret;
- }
-
- lock_page(page);
- size = i_size_read(inode);
- /* Page got truncated from under us? */
- if (page->mapping != mapping || page_offset(page) > size) {
- unlock_page(page);
- ret = VM_FAULT_NOPAGE;
- goto out;
- }
-
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
- else
- len = PAGE_CACHE_SIZE;
- /*
- * Return if we have all the buffers mapped. This avoids the need to do
- * journal_start/journal_stop which can block and take a long time
- */
- if (page_has_buffers(page)) {
- if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
- ext4_bh_unmapped)) {
- /* Wait so that we don't change page under IO */
- wait_on_page_writeback(page);
- ret = VM_FAULT_LOCKED;
- goto out;
- }
- }
- unlock_page(page);
- /* OK, we need to fill the hole... */
- if (ext4_should_dioread_nolock(inode))
- get_block = ext4_get_block_write;
- else
- get_block = ext4_get_block;
-retry_alloc:
- handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
- if (IS_ERR(handle)) {
- ret = VM_FAULT_SIGBUS;
- goto out;
- }
- ret = __block_page_mkwrite(vma, vmf, get_block);
- if (!ret && ext4_should_journal_data(inode)) {
- if (walk_page_buffers(handle, page_buffers(page), 0,
- PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
- unlock_page(page);
- ret = VM_FAULT_SIGBUS;
- ext4_journal_stop(handle);
- goto out;
- }
- ext4_set_inode_state(inode, EXT4_STATE_JDATA);
- }
- ext4_journal_stop(handle);
- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry_alloc;
-out_ret:
- ret = block_page_mkwrite_return(ret);
-out:
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/ioctl.c b/ANDROID_3.4.5/fs/ext4/ioctl.c
deleted file mode 100644
index 1365903a..00000000
--- a/ANDROID_3.4.5/fs/ext4/ioctl.c
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- * linux/fs/ext4/ioctl.c
- *
- * Copyright (C) 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- */
-
-#include <linux/fs.h>
-#include <linux/jbd2.h>
-#include <linux/capability.h>
-#include <linux/time.h>
-#include <linux/compat.h>
-#include <linux/mount.h>
-#include <linux/file.h>
-#include <asm/uaccess.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-
-#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
-
-long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = filp->f_dentry->d_inode;
- struct super_block *sb = inode->i_sb;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned int flags;
-
- ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
-
- switch (cmd) {
- case EXT4_IOC_GETFLAGS:
- ext4_get_inode_flags(ei);
- flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
- return put_user(flags, (int __user *) arg);
- case EXT4_IOC_SETFLAGS: {
- handle_t *handle = NULL;
- int err, migrate = 0;
- struct ext4_iloc iloc;
- unsigned int oldflags, mask, i;
- unsigned int jflag;
-
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- if (get_user(flags, (int __user *) arg))
- return -EFAULT;
-
- err = mnt_want_write_file(filp);
- if (err)
- return err;
-
- flags = ext4_mask_flags(inode->i_mode, flags);
-
- err = -EPERM;
- mutex_lock(&inode->i_mutex);
- /* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
- goto flags_out;
-
- oldflags = ei->i_flags;
-
- /* The JOURNAL_DATA flag is modifiable only by root */
- jflag = flags & EXT4_JOURNAL_DATA_FL;
-
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- *
- * This test looks nicer. Thanks to Pauline Middelink
- */
- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE))
- goto flags_out;
- }
-
- /*
- * The JOURNAL_DATA flag can only be changed by
- * the relevant capability.
- */
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
- if (!capable(CAP_SYS_RESOURCE))
- goto flags_out;
- }
- if (oldflags & EXT4_EXTENTS_FL) {
- /* We don't support clearning extent flags */
- if (!(flags & EXT4_EXTENTS_FL)) {
- err = -EOPNOTSUPP;
- goto flags_out;
- }
- } else if (flags & EXT4_EXTENTS_FL) {
- /* migrate the file */
- migrate = 1;
- flags &= ~EXT4_EXTENTS_FL;
- }
-
- if (flags & EXT4_EOFBLOCKS_FL) {
- /* we don't support adding EOFBLOCKS flag */
- if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
- err = -EOPNOTSUPP;
- goto flags_out;
- }
- } else if (oldflags & EXT4_EOFBLOCKS_FL)
- ext4_truncate(inode);
-
- handle = ext4_journal_start(inode, 1);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- goto flags_out;
- }
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto flags_err;
-
- for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
- if (!(mask & EXT4_FL_USER_MODIFIABLE))
- continue;
- if (mask & flags)
- ext4_set_inode_flag(inode, i);
- else
- ext4_clear_inode_flag(inode, i);
- }
-
- ext4_set_inode_flags(inode);
- inode->i_ctime = ext4_current_time(inode);
-
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
- ext4_journal_stop(handle);
- if (err)
- goto flags_out;
-
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
- err = ext4_change_inode_journal_flag(inode, jflag);
- if (err)
- goto flags_out;
- if (migrate)
- err = ext4_ext_migrate(inode);
-flags_out:
- mutex_unlock(&inode->i_mutex);
- mnt_drop_write_file(filp);
- return err;
- }
- case EXT4_IOC_GETVERSION:
- case EXT4_IOC_GETVERSION_OLD:
- return put_user(inode->i_generation, (int __user *) arg);
- case EXT4_IOC_SETVERSION:
- case EXT4_IOC_SETVERSION_OLD: {
- handle_t *handle;
- struct ext4_iloc iloc;
- __u32 generation;
- int err;
-
- if (!inode_owner_or_capable(inode))
- return -EPERM;
-
- err = mnt_want_write_file(filp);
- if (err)
- return err;
- if (get_user(generation, (int __user *) arg)) {
- err = -EFAULT;
- goto setversion_out;
- }
-
- mutex_lock(&inode->i_mutex);
- handle = ext4_journal_start(inode, 1);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- goto unlock_out;
- }
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err == 0) {
- inode->i_ctime = ext4_current_time(inode);
- inode->i_generation = generation;
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
- }
- ext4_journal_stop(handle);
-
-unlock_out:
- mutex_unlock(&inode->i_mutex);
-setversion_out:
- mnt_drop_write_file(filp);
- return err;
- }
- case EXT4_IOC_GROUP_EXTEND: {
- ext4_fsblk_t n_blocks_count;
- int err, err2=0;
-
- err = ext4_resize_begin(sb);
- if (err)
- return err;
-
- if (get_user(n_blocks_count, (__u32 __user *)arg)) {
- err = -EFAULT;
- goto group_extend_out;
- }
-
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not supported with bigalloc");
- err = -EOPNOTSUPP;
- goto group_extend_out;
- }
-
- err = mnt_want_write_file(filp);
- if (err)
- goto group_extend_out;
-
- err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
- if (EXT4_SB(sb)->s_journal) {
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- }
- if (err == 0)
- err = err2;
- mnt_drop_write_file(filp);
-group_extend_out:
- ext4_resize_end(sb);
- return err;
- }
-
- case EXT4_IOC_MOVE_EXT: {
- struct move_extent me;
- struct file *donor_filp;
- int err;
-
- if (!(filp->f_mode & FMODE_READ) ||
- !(filp->f_mode & FMODE_WRITE))
- return -EBADF;
-
- if (copy_from_user(&me,
- (struct move_extent __user *)arg, sizeof(me)))
- return -EFAULT;
- me.moved_len = 0;
-
- donor_filp = fget(me.donor_fd);
- if (!donor_filp)
- return -EBADF;
-
- if (!(donor_filp->f_mode & FMODE_WRITE)) {
- err = -EBADF;
- goto mext_out;
- }
-
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
- ext4_msg(sb, KERN_ERR,
- "Online defrag not supported with bigalloc");
- return -EOPNOTSUPP;
- }
-
- err = mnt_want_write_file(filp);
- if (err)
- goto mext_out;
-
- err = ext4_move_extents(filp, donor_filp, me.orig_start,
- me.donor_start, me.len, &me.moved_len);
- mnt_drop_write_file(filp);
- mnt_drop_write(filp->f_path.mnt);
-
- if (copy_to_user((struct move_extent __user *)arg,
- &me, sizeof(me)))
- err = -EFAULT;
-mext_out:
- fput(donor_filp);
- return err;
- }
-
- case EXT4_IOC_GROUP_ADD: {
- struct ext4_new_group_data input;
- int err, err2=0;
-
- err = ext4_resize_begin(sb);
- if (err)
- return err;
-
- if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
- sizeof(input))) {
- err = -EFAULT;
- goto group_add_out;
- }
-
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not supported with bigalloc");
- err = -EOPNOTSUPP;
- goto group_add_out;
- }
-
- err = mnt_want_write_file(filp);
- if (err)
- goto group_add_out;
-
- err = ext4_group_add(sb, &input);
- if (EXT4_SB(sb)->s_journal) {
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- }
- if (err == 0)
- err = err2;
- mnt_drop_write_file(filp);
-group_add_out:
- ext4_resize_end(sb);
- return err;
- }
-
- case EXT4_IOC_MIGRATE:
- {
- int err;
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- err = mnt_want_write_file(filp);
- if (err)
- return err;
- /*
- * inode_mutex prevent write and truncate on the file.
- * Read still goes through. We take i_data_sem in
- * ext4_ext_swap_inode_data before we switch the
- * inode format to prevent read.
- */
- mutex_lock(&(inode->i_mutex));
- err = ext4_ext_migrate(inode);
- mutex_unlock(&(inode->i_mutex));
- mnt_drop_write_file(filp);
- return err;
- }
-
- case EXT4_IOC_ALLOC_DA_BLKS:
- {
- int err;
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- err = mnt_want_write_file(filp);
- if (err)
- return err;
- err = ext4_alloc_da_blocks(inode);
- mnt_drop_write_file(filp);
- return err;
- }
-
- case EXT4_IOC_RESIZE_FS: {
- ext4_fsblk_t n_blocks_count;
- struct super_block *sb = inode->i_sb;
- int err = 0, err2 = 0;
-
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not (yet) supported with bigalloc");
- return -EOPNOTSUPP;
- }
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb,
- EXT4_FEATURE_INCOMPAT_META_BG)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not (yet) supported with meta_bg");
- return -EOPNOTSUPP;
- }
-
- if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
- sizeof(__u64))) {
- return -EFAULT;
- }
-
- if (n_blocks_count > MAX_32_NUM &&
- !EXT4_HAS_INCOMPAT_FEATURE(sb,
- EXT4_FEATURE_INCOMPAT_64BIT)) {
- ext4_msg(sb, KERN_ERR,
- "File system only supports 32-bit block numbers");
- return -EOPNOTSUPP;
- }
-
- err = ext4_resize_begin(sb);
- if (err)
- return err;
-
- err = mnt_want_write(filp->f_path.mnt);
- if (err)
- goto resizefs_out;
-
- err = ext4_resize_fs(sb, n_blocks_count);
- if (EXT4_SB(sb)->s_journal) {
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- }
- if (err == 0)
- err = err2;
- mnt_drop_write(filp->f_path.mnt);
-resizefs_out:
- ext4_resize_end(sb);
- return err;
- }
-
- case FITRIM:
- {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- struct fstrim_range range;
- int ret = 0;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
-
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
- ext4_msg(sb, KERN_ERR,
- "FITRIM not supported with bigalloc");
- return -EOPNOTSUPP;
- }
-
- if (copy_from_user(&range, (struct fstrim_range __user *)arg,
- sizeof(range)))
- return -EFAULT;
-
- range.minlen = max((unsigned int)range.minlen,
- q->limits.discard_granularity);
- ret = ext4_trim_fs(sb, &range);
- if (ret < 0)
- return ret;
-
- if (copy_to_user((struct fstrim_range __user *)arg, &range,
- sizeof(range)))
- return -EFAULT;
-
- return 0;
- }
-
- default:
- return -ENOTTY;
- }
-}
-
-#ifdef CONFIG_COMPAT
-long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- /* These are just misnamed, they actually get/put from/to user an int */
- switch (cmd) {
- case EXT4_IOC32_GETFLAGS:
- cmd = EXT4_IOC_GETFLAGS;
- break;
- case EXT4_IOC32_SETFLAGS:
- cmd = EXT4_IOC_SETFLAGS;
- break;
- case EXT4_IOC32_GETVERSION:
- cmd = EXT4_IOC_GETVERSION;
- break;
- case EXT4_IOC32_SETVERSION:
- cmd = EXT4_IOC_SETVERSION;
- break;
- case EXT4_IOC32_GROUP_EXTEND:
- cmd = EXT4_IOC_GROUP_EXTEND;
- break;
- case EXT4_IOC32_GETVERSION_OLD:
- cmd = EXT4_IOC_GETVERSION_OLD;
- break;
- case EXT4_IOC32_SETVERSION_OLD:
- cmd = EXT4_IOC_SETVERSION_OLD;
- break;
- case EXT4_IOC32_GETRSVSZ:
- cmd = EXT4_IOC_GETRSVSZ;
- break;
- case EXT4_IOC32_SETRSVSZ:
- cmd = EXT4_IOC_SETRSVSZ;
- break;
- case EXT4_IOC32_GROUP_ADD: {
- struct compat_ext4_new_group_input __user *uinput;
- struct ext4_new_group_input input;
- mm_segment_t old_fs;
- int err;
-
- uinput = compat_ptr(arg);
- err = get_user(input.group, &uinput->group);
- err |= get_user(input.block_bitmap, &uinput->block_bitmap);
- err |= get_user(input.inode_bitmap, &uinput->inode_bitmap);
- err |= get_user(input.inode_table, &uinput->inode_table);
- err |= get_user(input.blocks_count, &uinput->blocks_count);
- err |= get_user(input.reserved_blocks,
- &uinput->reserved_blocks);
- if (err)
- return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
- (unsigned long) &input);
- set_fs(old_fs);
- return err;
- }
- case EXT4_IOC_MOVE_EXT:
- case FITRIM:
- case EXT4_IOC_RESIZE_FS:
- break;
- default:
- return -ENOIOCTLCMD;
- }
- return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
-}
-#endif
diff --git a/ANDROID_3.4.5/fs/ext4/mballoc.c b/ANDROID_3.4.5/fs/ext4/mballoc.c
deleted file mode 100644
index 6b0a57ea..00000000
--- a/ANDROID_3.4.5/fs/ext4/mballoc.c
+++ /dev/null
@@ -1,5047 +0,0 @@
-/*
- * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
- * Written by Alex Tomas <alex@clusterfs.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
- */
-
-
-/*
- * mballoc.c contains the multiblocks allocation routines
- */
-
-#include "ext4_jbd2.h"
-#include "mballoc.h"
-#include <linux/debugfs.h>
-#include <linux/slab.h>
-#include <trace/events/ext4.h>
-
-/*
- * MUSTDO:
- * - test ext4_ext_search_left() and ext4_ext_search_right()
- * - search for metadata in few groups
- *
- * TODO v4:
- * - normalization should take into account whether file is still open
- * - discard preallocations if no free space left (policy?)
- * - don't normalize tails
- * - quota
- * - reservation for superuser
- *
- * TODO v3:
- * - bitmap read-ahead (proposed by Oleg Drokin aka green)
- * - track min/max extents in each group for better group selection
- * - mb_mark_used() may allocate chunk right after splitting buddy
- * - tree of groups sorted by number of free blocks
- * - error handling
- */
-
-/*
- * The allocation request involve request for multiple number of blocks
- * near to the goal(block) value specified.
- *
- * During initialization phase of the allocator we decide to use the
- * group preallocation or inode preallocation depending on the size of
- * the file. The size of the file could be the resulting file size we
- * would have after allocation, or the current file size, which ever
- * is larger. If the size is less than sbi->s_mb_stream_request we
- * select to use the group preallocation. The default value of
- * s_mb_stream_request is 16 blocks. This can also be tuned via
- * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
- * terms of number of blocks.
- *
- * The main motivation for having small file use group preallocation is to
- * ensure that we have small files closer together on the disk.
- *
- * First stage the allocator looks at the inode prealloc list,
- * ext4_inode_info->i_prealloc_list, which contains list of prealloc
- * spaces for this particular inode. The inode prealloc space is
- * represented as:
- *
- * pa_lstart -> the logical start block for this prealloc space
- * pa_pstart -> the physical start block for this prealloc space
- * pa_len -> length for this prealloc space (in clusters)
- * pa_free -> free space available in this prealloc space (in clusters)
- *
- * The inode preallocation space is used looking at the _logical_ start
- * block. If only the logical file block falls within the range of prealloc
- * space we will consume the particular prealloc space. This makes sure that
- * we have contiguous physical blocks representing the file blocks
- *
- * The important thing to be noted in case of inode prealloc space is that
- * we don't modify the values associated to inode prealloc space except
- * pa_free.
- *
- * If we are not able to find blocks in the inode prealloc space and if we
- * have the group allocation flag set then we look at the locality group
- * prealloc space. These are per CPU prealloc list represented as
- *
- * ext4_sb_info.s_locality_groups[smp_processor_id()]
- *
- * The reason for having a per cpu locality group is to reduce the contention
- * between CPUs. It is possible to get scheduled at this point.
- *
- * The locality group prealloc space is used looking at whether we have
- * enough free space (pa_free) within the prealloc space.
- *
- * If we can't allocate blocks via inode prealloc or/and locality group
- * prealloc then we look at the buddy cache. The buddy cache is represented
- * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets
- * mapped to the buddy and bitmap information regarding different
- * groups. The buddy information is attached to buddy cache inode so that
- * we can access them through the page cache. The information regarding
- * each group is loaded via ext4_mb_load_buddy. The information involve
- * block bitmap and buddy information. The information are stored in the
- * inode as:
- *
- * { page }
- * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
- *
- *
- * one block each for bitmap and buddy information. So for each group we
- * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE /
- * blocksize) blocks. So it can have information regarding groups_per_page
- * which is blocks_per_page/2
- *
- * The buddy cache inode is not stored on disk. The inode is thrown
- * away when the filesystem is unmounted.
- *
- * We look for count number of blocks in the buddy cache. If we were able
- * to locate that many free blocks we return with additional information
- * regarding rest of the contiguous physical block available
- *
- * Before allocating blocks via buddy cache we normalize the request
- * blocks. This ensure we ask for more blocks that we needed. The extra
- * blocks that we get after allocation is added to the respective prealloc
- * list. In case of inode preallocation we follow a list of heuristics
- * based on file size. This can be found in ext4_mb_normalize_request. If
- * we are doing a group prealloc we try to normalize the request to
- * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is
- * dependent on the cluster size; for non-bigalloc file systems, it is
- * 512 blocks. This can be tuned via
- * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
- * terms of number of blocks. If we have mounted the file system with -O
- * stripe=<value> option the group prealloc request is normalized to the
- * the smallest multiple of the stripe value (sbi->s_stripe) which is
- * greater than the default mb_group_prealloc.
- *
- * The regular allocator (using the buddy cache) supports a few tunables.
- *
- * /sys/fs/ext4/<partition>/mb_min_to_scan
- * /sys/fs/ext4/<partition>/mb_max_to_scan
- * /sys/fs/ext4/<partition>/mb_order2_req
- *
- * The regular allocator uses buddy scan only if the request len is power of
- * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
- * value of s_mb_order2_reqs can be tuned via
- * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
- * stripe size (sbi->s_stripe), we try to search for contiguous block in
- * stripe size. This should result in better allocation on RAID setups. If
- * not, we search in the specific group using bitmap for best extents. The
- * tunable min_to_scan and max_to_scan control the behaviour here.
- * min_to_scan indicate how long the mballoc __must__ look for a best
- * extent and max_to_scan indicates how long the mballoc __can__ look for a
- * best extent in the found extents. Searching for the blocks starts with
- * the group specified as the goal value in allocation context via
- * ac_g_ex. Each group is first checked based on the criteria whether it
- * can be used for allocation. ext4_mb_good_group explains how the groups are
- * checked.
- *
- * Both the prealloc space are getting populated as above. So for the first
- * request we will hit the buddy cache which will result in this prealloc
- * space getting filled. The prealloc space is then later used for the
- * subsequent request.
- */
-
-/*
- * mballoc operates on the following data:
- * - on-disk bitmap
- * - in-core buddy (actually includes buddy and bitmap)
- * - preallocation descriptors (PAs)
- *
- * there are two types of preallocations:
- * - inode
- * assiged to specific inode and can be used for this inode only.
- * it describes part of inode's space preallocated to specific
- * physical blocks. any block from that preallocated can be used
- * independent. the descriptor just tracks number of blocks left
- * unused. so, before taking some block from descriptor, one must
- * make sure corresponded logical block isn't allocated yet. this
- * also means that freeing any block within descriptor's range
- * must discard all preallocated blocks.
- * - locality group
- * assigned to specific locality group which does not translate to
- * permanent set of inodes: inode can join and leave group. space
- * from this type of preallocation can be used for any inode. thus
- * it's consumed from the beginning to the end.
- *
- * relation between them can be expressed as:
- * in-core buddy = on-disk bitmap + preallocation descriptors
- *
- * this mean blocks mballoc considers used are:
- * - allocated blocks (persistent)
- * - preallocated blocks (non-persistent)
- *
- * consistency in mballoc world means that at any time a block is either
- * free or used in ALL structures. notice: "any time" should not be read
- * literally -- time is discrete and delimited by locks.
- *
- * to keep it simple, we don't use block numbers, instead we count number of
- * blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA.
- *
- * all operations can be expressed as:
- * - init buddy: buddy = on-disk + PAs
- * - new PA: buddy += N; PA = N
- * - use inode PA: on-disk += N; PA -= N
- * - discard inode PA buddy -= on-disk - PA; PA = 0
- * - use locality group PA on-disk += N; PA -= N
- * - discard locality group PA buddy -= PA; PA = 0
- * note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap
- * is used in real operation because we can't know actual used
- * bits from PA, only from on-disk bitmap
- *
- * if we follow this strict logic, then all operations above should be atomic.
- * given some of them can block, we'd have to use something like semaphores
- * killing performance on high-end SMP hardware. let's try to relax it using
- * the following knowledge:
- * 1) if buddy is referenced, it's already initialized
- * 2) while block is used in buddy and the buddy is referenced,
- * nobody can re-allocate that block
- * 3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has
- * bit set and PA claims same block, it's OK. IOW, one can set bit in
- * on-disk bitmap if buddy has same bit set or/and PA covers corresponded
- * block
- *
- * so, now we're building a concurrency table:
- * - init buddy vs.
- * - new PA
- * blocks for PA are allocated in the buddy, buddy must be referenced
- * until PA is linked to allocation group to avoid concurrent buddy init
- * - use inode PA
- * we need to make sure that either on-disk bitmap or PA has uptodate data
- * given (3) we care that PA-=N operation doesn't interfere with init
- * - discard inode PA
- * the simplest way would be to have buddy initialized by the discard
- * - use locality group PA
- * again PA-=N must be serialized with init
- * - discard locality group PA
- * the simplest way would be to have buddy initialized by the discard
- * - new PA vs.
- * - use inode PA
- * i_data_sem serializes them
- * - discard inode PA
- * discard process must wait until PA isn't used by another process
- * - use locality group PA
- * some mutex should serialize them
- * - discard locality group PA
- * discard process must wait until PA isn't used by another process
- * - use inode PA
- * - use inode PA
- * i_data_sem or another mutex should serializes them
- * - discard inode PA
- * discard process must wait until PA isn't used by another process
- * - use locality group PA
- * nothing wrong here -- they're different PAs covering different blocks
- * - discard locality group PA
- * discard process must wait until PA isn't used by another process
- *
- * now we're ready to make few consequences:
- * - PA is referenced and while it is no discard is possible
- * - PA is referenced until block isn't marked in on-disk bitmap
- * - PA changes only after on-disk bitmap
- * - discard must not compete with init. either init is done before
- * any discard or they're serialized somehow
- * - buddy init as sum of on-disk bitmap and PAs is done atomically
- *
- * a special case when we've used PA to emptiness. no need to modify buddy
- * in this case, but we should care about concurrent init
- *
- */
-
- /*
- * Logic in few words:
- *
- * - allocation:
- * load group
- * find blocks
- * mark bits in on-disk bitmap
- * release group
- *
- * - use preallocation:
- * find proper PA (per-inode or group)
- * load group
- * mark bits in on-disk bitmap
- * release group
- * release PA
- *
- * - free:
- * load group
- * mark bits in on-disk bitmap
- * release group
- *
- * - discard preallocations in group:
- * mark PAs deleted
- * move them onto local list
- * load on-disk bitmap
- * load group
- * remove PA from object (inode or locality group)
- * mark free blocks in-core
- *
- * - discard inode's preallocations:
- */
-
-/*
- * Locking rules
- *
- * Locks:
- * - bitlock on a group (group)
- * - object (inode/locality) (object)
- * - per-pa lock (pa)
- *
- * Paths:
- * - new pa
- * object
- * group
- *
- * - find and use pa:
- * pa
- *
- * - release consumed pa:
- * pa
- * group
- * object
- *
- * - generate in-core bitmap:
- * group
- * pa
- *
- * - discard all for given object (inode, locality group):
- * object
- * pa
- * group
- *
- * - discard all for given group:
- * group
- * pa
- * group
- * object
- *
- */
-static struct kmem_cache *ext4_pspace_cachep;
-static struct kmem_cache *ext4_ac_cachep;
-static struct kmem_cache *ext4_free_data_cachep;
-
-/* We create slab caches for groupinfo data structures based on the
- * superblock block size. There will be one per mounted filesystem for
- * each unique s_blocksize_bits */
-#define NR_GRPINFO_CACHES 8
-static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
-
-static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
- "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
- "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
- "ext4_groupinfo_64k", "ext4_groupinfo_128k"
-};
-
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
- ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
-static void ext4_free_data_callback(struct super_block *sb,
- struct ext4_journal_cb_entry *jce, int rc);
-
-static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
-{
-#if BITS_PER_LONG == 64
- *bit += ((unsigned long) addr & 7UL) << 3;
- addr = (void *) ((unsigned long) addr & ~7UL);
-#elif BITS_PER_LONG == 32
- *bit += ((unsigned long) addr & 3UL) << 3;
- addr = (void *) ((unsigned long) addr & ~3UL);
-#else
-#error "how many bits you are?!"
-#endif
- return addr;
-}
-
-static inline int mb_test_bit(int bit, void *addr)
-{
- /*
- * ext4_test_bit on architecture like powerpc
- * needs unsigned long aligned address
- */
- addr = mb_correct_addr_and_bit(&bit, addr);
- return ext4_test_bit(bit, addr);
-}
-
-static inline void mb_set_bit(int bit, void *addr)
-{
- addr = mb_correct_addr_and_bit(&bit, addr);
- ext4_set_bit(bit, addr);
-}
-
-static inline void mb_clear_bit(int bit, void *addr)
-{
- addr = mb_correct_addr_and_bit(&bit, addr);
- ext4_clear_bit(bit, addr);
-}
-
-static inline int mb_find_next_zero_bit(void *addr, int max, int start)
-{
- int fix = 0, ret, tmpmax;
- addr = mb_correct_addr_and_bit(&fix, addr);
- tmpmax = max + fix;
- start += fix;
-
- ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
- if (ret > max)
- return max;
- return ret;
-}
-
-static inline int mb_find_next_bit(void *addr, int max, int start)
-{
- int fix = 0, ret, tmpmax;
- addr = mb_correct_addr_and_bit(&fix, addr);
- tmpmax = max + fix;
- start += fix;
-
- ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
- if (ret > max)
- return max;
- return ret;
-}
-
-static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
-{
- char *bb;
-
- BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
- BUG_ON(max == NULL);
-
- if (order > e4b->bd_blkbits + 1) {
- *max = 0;
- return NULL;
- }
-
- /* at order 0 we see each particular block */
- if (order == 0) {
- *max = 1 << (e4b->bd_blkbits + 3);
- return e4b->bd_bitmap;
- }
-
- bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
- *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
-
- return bb;
-}
-
-#ifdef DOUBLE_CHECK
-static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
- int first, int count)
-{
- int i;
- struct super_block *sb = e4b->bd_sb;
-
- if (unlikely(e4b->bd_info->bb_bitmap == NULL))
- return;
- assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
- for (i = 0; i < count; i++) {
- if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
- ext4_fsblk_t blocknr;
-
- blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
- blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
- ext4_grp_locked_error(sb, e4b->bd_group,
- inode ? inode->i_ino : 0,
- blocknr,
- "freeing block already freed "
- "(bit %u)",
- first + i);
- }
- mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
- }
-}
-
-static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
-{
- int i;
-
- if (unlikely(e4b->bd_info->bb_bitmap == NULL))
- return;
- assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
- for (i = 0; i < count; i++) {
- BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
- mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
- }
-}
-
-static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
-{
- if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
- unsigned char *b1, *b2;
- int i;
- b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
- b2 = (unsigned char *) bitmap;
- for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
- if (b1[i] != b2[i]) {
- ext4_msg(e4b->bd_sb, KERN_ERR,
- "corruption in group %u "
- "at byte %u(%u): %x in copy != %x "
- "on disk/prealloc",
- e4b->bd_group, i, i * 8, b1[i], b2[i]);
- BUG();
- }
- }
- }
-}
-
-#else
-static inline void mb_free_blocks_double(struct inode *inode,
- struct ext4_buddy *e4b, int first, int count)
-{
- return;
-}
-static inline void mb_mark_used_double(struct ext4_buddy *e4b,
- int first, int count)
-{
- return;
-}
-static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
-{
- return;
-}
-#endif
-
-#ifdef AGGRESSIVE_CHECK
-
-#define MB_CHECK_ASSERT(assert) \
-do { \
- if (!(assert)) { \
- printk(KERN_EMERG \
- "Assertion failure in %s() at %s:%d: \"%s\"\n", \
- function, file, line, # assert); \
- BUG(); \
- } \
-} while (0)
-
-static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
- const char *function, int line)
-{
- struct super_block *sb = e4b->bd_sb;
- int order = e4b->bd_blkbits + 1;
- int max;
- int max2;
- int i;
- int j;
- int k;
- int count;
- struct ext4_group_info *grp;
- int fragments = 0;
- int fstart;
- struct list_head *cur;
- void *buddy;
- void *buddy2;
-
- {
- static int mb_check_counter;
- if (mb_check_counter++ % 100 != 0)
- return 0;
- }
-
- while (order > 1) {
- buddy = mb_find_buddy(e4b, order, &max);
- MB_CHECK_ASSERT(buddy);
- buddy2 = mb_find_buddy(e4b, order - 1, &max2);
- MB_CHECK_ASSERT(buddy2);
- MB_CHECK_ASSERT(buddy != buddy2);
- MB_CHECK_ASSERT(max * 2 == max2);
-
- count = 0;
- for (i = 0; i < max; i++) {
-
- if (mb_test_bit(i, buddy)) {
- /* only single bit in buddy2 may be 1 */
- if (!mb_test_bit(i << 1, buddy2)) {
- MB_CHECK_ASSERT(
- mb_test_bit((i<<1)+1, buddy2));
- } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
- MB_CHECK_ASSERT(
- mb_test_bit(i << 1, buddy2));
- }
- continue;
- }
-
- /* both bits in buddy2 must be 1 */
- MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
- MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
-
- for (j = 0; j < (1 << order); j++) {
- k = (i * (1 << order)) + j;
- MB_CHECK_ASSERT(
- !mb_test_bit(k, e4b->bd_bitmap));
- }
- count++;
- }
- MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
- order--;
- }
-
- fstart = -1;
- buddy = mb_find_buddy(e4b, 0, &max);
- for (i = 0; i < max; i++) {
- if (!mb_test_bit(i, buddy)) {
- MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
- if (fstart == -1) {
- fragments++;
- fstart = i;
- }
- continue;
- }
- fstart = -1;
- /* check used bits only */
- for (j = 0; j < e4b->bd_blkbits + 1; j++) {
- buddy2 = mb_find_buddy(e4b, j, &max2);
- k = i >> j;
- MB_CHECK_ASSERT(k < max2);
- MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
- }
- }
- MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
- MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
-
- grp = ext4_get_group_info(sb, e4b->bd_group);
- list_for_each(cur, &grp->bb_prealloc_list) {
- ext4_group_t groupnr;
- struct ext4_prealloc_space *pa;
- pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
- ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
- MB_CHECK_ASSERT(groupnr == e4b->bd_group);
- for (i = 0; i < pa->pa_len; i++)
- MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
- }
- return 0;
-}
-#undef MB_CHECK_ASSERT
-#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
- __FILE__, __func__, __LINE__)
-#else
-#define mb_check_buddy(e4b)
-#endif
-
-/*
- * Divide blocks started from @first with length @len into
- * smaller chunks with power of 2 blocks.
- * Clear the bits in bitmap which the blocks of the chunk(s) covered,
- * then increase bb_counters[] for corresponded chunk size.
- */
-static void ext4_mb_mark_free_simple(struct super_block *sb,
- void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
- struct ext4_group_info *grp)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_grpblk_t min;
- ext4_grpblk_t max;
- ext4_grpblk_t chunk;
- unsigned short border;
-
- BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
-
- border = 2 << sb->s_blocksize_bits;
-
- while (len > 0) {
- /* find how many blocks can be covered since this position */
- max = ffs(first | border) - 1;
-
- /* find how many blocks of power 2 we need to mark */
- min = fls(len) - 1;
-
- if (max < min)
- min = max;
- chunk = 1 << min;
-
- /* mark multiblock chunks only */
- grp->bb_counters[min]++;
- if (min > 0)
- mb_clear_bit(first >> min,
- buddy + sbi->s_mb_offsets[min]);
-
- len -= chunk;
- first += chunk;
- }
-}
-
-/*
- * Cache the order of the largest free extent we have available in this block
- * group.
- */
-static void
-mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
-{
- int i;
- int bits;
-
- grp->bb_largest_free_order = -1; /* uninit */
-
- bits = sb->s_blocksize_bits + 1;
- for (i = bits; i >= 0; i--) {
- if (grp->bb_counters[i] > 0) {
- grp->bb_largest_free_order = i;
- break;
- }
- }
-}
-
-static noinline_for_stack
-void ext4_mb_generate_buddy(struct super_block *sb,
- void *buddy, void *bitmap, ext4_group_t group)
-{
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
- ext4_grpblk_t i = 0;
- ext4_grpblk_t first;
- ext4_grpblk_t len;
- unsigned free = 0;
- unsigned fragments = 0;
- unsigned long long period = get_cycles();
-
- /* initialize buddy from bitmap which is aggregation
- * of on-disk bitmap and preallocations */
- i = mb_find_next_zero_bit(bitmap, max, 0);
- grp->bb_first_free = i;
- while (i < max) {
- fragments++;
- first = i;
- i = mb_find_next_bit(bitmap, max, i);
- len = i - first;
- free += len;
- if (len > 1)
- ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
- else
- grp->bb_counters[0]++;
- if (i < max)
- i = mb_find_next_zero_bit(bitmap, max, i);
- }
- grp->bb_fragments = fragments;
-
- if (free != grp->bb_free) {
- ext4_grp_locked_error(sb, group, 0, 0,
- "%u clusters in bitmap, %u in gd",
- free, grp->bb_free);
- /*
- * If we intent to continue, we consider group descritor
- * corrupt and update bb_free using bitmap value
- */
- grp->bb_free = free;
- }
- mb_set_largest_free_order(sb, grp);
-
- clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
-
- period = get_cycles() - period;
- spin_lock(&EXT4_SB(sb)->s_bal_lock);
- EXT4_SB(sb)->s_mb_buddies_generated++;
- EXT4_SB(sb)->s_mb_generation_time += period;
- spin_unlock(&EXT4_SB(sb)->s_bal_lock);
-}
-
-/* The buddy information is attached the buddy cache inode
- * for convenience. The information regarding each group
- * is loaded via ext4_mb_load_buddy. The information involve
- * block bitmap and buddy information. The information are
- * stored in the inode as
- *
- * { page }
- * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
- *
- *
- * one block each for bitmap and buddy information.
- * So for each group we take up 2 blocks. A page can
- * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks.
- * So it can have information regarding groups_per_page which
- * is blocks_per_page/2
- *
- * Locking note: This routine takes the block group lock of all groups
- * for this page; do not hold this lock when calling this routine!
- */
-
-static int ext4_mb_init_cache(struct page *page, char *incore)
-{
- ext4_group_t ngroups;
- int blocksize;
- int blocks_per_page;
- int groups_per_page;
- int err = 0;
- int i;
- ext4_group_t first_group, group;
- int first_block;
- struct super_block *sb;
- struct buffer_head *bhs;
- struct buffer_head **bh;
- struct inode *inode;
- char *data;
- char *bitmap;
- struct ext4_group_info *grinfo;
-
- mb_debug(1, "init page %lu\n", page->index);
-
- inode = page->mapping->host;
- sb = inode->i_sb;
- ngroups = ext4_get_groups_count(sb);
- blocksize = 1 << inode->i_blkbits;
- blocks_per_page = PAGE_CACHE_SIZE / blocksize;
-
- groups_per_page = blocks_per_page >> 1;
- if (groups_per_page == 0)
- groups_per_page = 1;
-
- /* allocate buffer_heads to read bitmaps */
- if (groups_per_page > 1) {
- i = sizeof(struct buffer_head *) * groups_per_page;
- bh = kzalloc(i, GFP_NOFS);
- if (bh == NULL) {
- err = -ENOMEM;
- goto out;
- }
- } else
- bh = &bhs;
-
- first_group = page->index * blocks_per_page / 2;
-
- /* read all groups the page covers into the cache */
- for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
- if (group >= ngroups)
- break;
-
- grinfo = ext4_get_group_info(sb, group);
- /*
- * If page is uptodate then we came here after online resize
- * which added some new uninitialized group info structs, so
- * we must skip all initialized uptodate buddies on the page,
- * which may be currently in use by an allocating task.
- */
- if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
- bh[i] = NULL;
- continue;
- }
- if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
- err = -ENOMEM;
- goto out;
- }
- mb_debug(1, "read bitmap for group %u\n", group);
- }
-
- /* wait for I/O completion */
- for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
- if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
- err = -EIO;
- goto out;
- }
- }
-
- first_block = page->index * blocks_per_page;
- for (i = 0; i < blocks_per_page; i++) {
- int group;
-
- group = (first_block + i) >> 1;
- if (group >= ngroups)
- break;
-
- if (!bh[group - first_group])
- /* skip initialized uptodate buddy */
- continue;
-
- /*
- * data carry information regarding this
- * particular group in the format specified
- * above
- *
- */
- data = page_address(page) + (i * blocksize);
- bitmap = bh[group - first_group]->b_data;
-
- /*
- * We place the buddy block and bitmap block
- * close together
- */
- if ((first_block + i) & 1) {
- /* this is block of buddy */
- BUG_ON(incore == NULL);
- mb_debug(1, "put buddy for group %u in page %lu/%x\n",
- group, page->index, i * blocksize);
- trace_ext4_mb_buddy_bitmap_load(sb, group);
- grinfo = ext4_get_group_info(sb, group);
- grinfo->bb_fragments = 0;
- memset(grinfo->bb_counters, 0,
- sizeof(*grinfo->bb_counters) *
- (sb->s_blocksize_bits+2));
- /*
- * incore got set to the group block bitmap below
- */
- ext4_lock_group(sb, group);
- /* init the buddy */
- memset(data, 0xff, blocksize);
- ext4_mb_generate_buddy(sb, data, incore, group);
- ext4_unlock_group(sb, group);
- incore = NULL;
- } else {
- /* this is block of bitmap */
- BUG_ON(incore != NULL);
- mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
- group, page->index, i * blocksize);
- trace_ext4_mb_bitmap_load(sb, group);
-
- /* see comments in ext4_mb_put_pa() */
- ext4_lock_group(sb, group);
- memcpy(data, bitmap, blocksize);
-
- /* mark all preallocated blks used in in-core bitmap */
- ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
- ext4_unlock_group(sb, group);
-
- /* set incore so that the buddy information can be
- * generated using this
- */
- incore = data;
- }
- }
- SetPageUptodate(page);
-
-out:
- if (bh) {
- for (i = 0; i < groups_per_page; i++)
- brelse(bh[i]);
- if (bh != &bhs)
- kfree(bh);
- }
- return err;
-}
-
-/*
- * Lock the buddy and bitmap pages. This make sure other parallel init_group
- * on the same buddy page doesn't happen whild holding the buddy page lock.
- * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
- * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
- */
-static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
- ext4_group_t group, struct ext4_buddy *e4b)
-{
- struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
- int block, pnum, poff;
- int blocks_per_page;
- struct page *page;
-
- e4b->bd_buddy_page = NULL;
- e4b->bd_bitmap_page = NULL;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- poff = block % blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
- if (!page)
- return -EIO;
- BUG_ON(page->mapping != inode->i_mapping);
- e4b->bd_bitmap_page = page;
- e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
-
- if (blocks_per_page >= 2) {
- /* buddy and bitmap are on the same page */
- return 0;
- }
-
- block++;
- pnum = block / blocks_per_page;
- poff = block % blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
- if (!page)
- return -EIO;
- BUG_ON(page->mapping != inode->i_mapping);
- e4b->bd_buddy_page = page;
- return 0;
-}
-
-static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
-{
- if (e4b->bd_bitmap_page) {
- unlock_page(e4b->bd_bitmap_page);
- page_cache_release(e4b->bd_bitmap_page);
- }
- if (e4b->bd_buddy_page) {
- unlock_page(e4b->bd_buddy_page);
- page_cache_release(e4b->bd_buddy_page);
- }
-}
-
-/*
- * Locking note: This routine calls ext4_mb_init_cache(), which takes the
- * block group lock of all groups for this page; do not hold the BG lock when
- * calling this routine!
- */
-static noinline_for_stack
-int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
-{
-
- struct ext4_group_info *this_grp;
- struct ext4_buddy e4b;
- struct page *page;
- int ret = 0;
-
- mb_debug(1, "init group %u\n", group);
- this_grp = ext4_get_group_info(sb, group);
- /*
- * This ensures that we don't reinit the buddy cache
- * page which map to the group from which we are already
- * allocating. If we are looking at the buddy cache we would
- * have taken a reference using ext4_mb_load_buddy and that
- * would have pinned buddy page to page cache.
- */
- ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
- if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
- /*
- * somebody initialized the group
- * return without doing anything
- */
- goto err;
- }
-
- page = e4b.bd_bitmap_page;
- ret = ext4_mb_init_cache(page, NULL);
- if (ret)
- goto err;
- if (!PageUptodate(page)) {
- ret = -EIO;
- goto err;
- }
- mark_page_accessed(page);
-
- if (e4b.bd_buddy_page == NULL) {
- /*
- * If both the bitmap and buddy are in
- * the same page we don't need to force
- * init the buddy
- */
- ret = 0;
- goto err;
- }
- /* init buddy cache */
- page = e4b.bd_buddy_page;
- ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
- if (ret)
- goto err;
- if (!PageUptodate(page)) {
- ret = -EIO;
- goto err;
- }
- mark_page_accessed(page);
-err:
- ext4_mb_put_buddy_page_lock(&e4b);
- return ret;
-}
-
-/*
- * Locking note: This routine calls ext4_mb_init_cache(), which takes the
- * block group lock of all groups for this page; do not hold the BG lock when
- * calling this routine!
- */
-static noinline_for_stack int
-ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- struct ext4_buddy *e4b)
-{
- int blocks_per_page;
- int block;
- int pnum;
- int poff;
- struct page *page;
- int ret;
- struct ext4_group_info *grp;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct inode *inode = sbi->s_buddy_cache;
-
- mb_debug(1, "load group %u\n", group);
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- grp = ext4_get_group_info(sb, group);
-
- e4b->bd_blkbits = sb->s_blocksize_bits;
- e4b->bd_info = grp;
- e4b->bd_sb = sb;
- e4b->bd_group = group;
- e4b->bd_buddy_page = NULL;
- e4b->bd_bitmap_page = NULL;
-
- if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- /*
- * we need full data about the group
- * to make a good selection
- */
- ret = ext4_mb_init_group(sb, group);
- if (ret)
- return ret;
- }
-
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- poff = block % blocks_per_page;
-
- /* we could use find_or_create_page(), but it locks page
- * what we'd like to avoid in fast path ... */
- page = find_get_page(inode->i_mapping, pnum);
- if (page == NULL || !PageUptodate(page)) {
- if (page)
- /*
- * drop the page reference and try
- * to get the page with lock. If we
- * are not uptodate that implies
- * somebody just created the page but
- * is yet to initialize the same. So
- * wait for it to initialize.
- */
- page_cache_release(page);
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
- if (page) {
- BUG_ON(page->mapping != inode->i_mapping);
- if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, NULL);
- if (ret) {
- unlock_page(page);
- goto err;
- }
- mb_cmp_bitmaps(e4b, page_address(page) +
- (poff * sb->s_blocksize));
- }
- unlock_page(page);
- }
- }
- if (page == NULL || !PageUptodate(page)) {
- ret = -EIO;
- goto err;
- }
- e4b->bd_bitmap_page = page;
- e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
- mark_page_accessed(page);
-
- block++;
- pnum = block / blocks_per_page;
- poff = block % blocks_per_page;
-
- page = find_get_page(inode->i_mapping, pnum);
- if (page == NULL || !PageUptodate(page)) {
- if (page)
- page_cache_release(page);
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
- if (page) {
- BUG_ON(page->mapping != inode->i_mapping);
- if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
- if (ret) {
- unlock_page(page);
- goto err;
- }
- }
- unlock_page(page);
- }
- }
- if (page == NULL || !PageUptodate(page)) {
- ret = -EIO;
- goto err;
- }
- e4b->bd_buddy_page = page;
- e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
- mark_page_accessed(page);
-
- BUG_ON(e4b->bd_bitmap_page == NULL);
- BUG_ON(e4b->bd_buddy_page == NULL);
-
- return 0;
-
-err:
- if (page)
- page_cache_release(page);
- if (e4b->bd_bitmap_page)
- page_cache_release(e4b->bd_bitmap_page);
- if (e4b->bd_buddy_page)
- page_cache_release(e4b->bd_buddy_page);
- e4b->bd_buddy = NULL;
- e4b->bd_bitmap = NULL;
- return ret;
-}
-
-static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
-{
- if (e4b->bd_bitmap_page)
- page_cache_release(e4b->bd_bitmap_page);
- if (e4b->bd_buddy_page)
- page_cache_release(e4b->bd_buddy_page);
-}
-
-
-static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
-{
- int order = 1;
- void *bb;
-
- BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
- BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
-
- bb = e4b->bd_buddy;
- while (order <= e4b->bd_blkbits + 1) {
- block = block >> 1;
- if (!mb_test_bit(block, bb)) {
- /* this block is part of buddy of order 'order' */
- return order;
- }
- bb += 1 << (e4b->bd_blkbits - order);
- order++;
- }
- return 0;
-}
-
-static void mb_clear_bits(void *bm, int cur, int len)
-{
- __u32 *addr;
-
- len = cur + len;
- while (cur < len) {
- if ((cur & 31) == 0 && (len - cur) >= 32) {
- /* fast path: clear whole word at once */
- addr = bm + (cur >> 3);
- *addr = 0;
- cur += 32;
- continue;
- }
- mb_clear_bit(cur, bm);
- cur++;
- }
-}
-
-void ext4_set_bits(void *bm, int cur, int len)
-{
- __u32 *addr;
-
- len = cur + len;
- while (cur < len) {
- if ((cur & 31) == 0 && (len - cur) >= 32) {
- /* fast path: set whole word at once */
- addr = bm + (cur >> 3);
- *addr = 0xffffffff;
- cur += 32;
- continue;
- }
- mb_set_bit(cur, bm);
- cur++;
- }
-}
-
-static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
- int first, int count)
-{
- int block = 0;
- int max = 0;
- int order;
- void *buddy;
- void *buddy2;
- struct super_block *sb = e4b->bd_sb;
-
- BUG_ON(first + count > (sb->s_blocksize << 3));
- assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
- mb_check_buddy(e4b);
- mb_free_blocks_double(inode, e4b, first, count);
-
- e4b->bd_info->bb_free += count;
- if (first < e4b->bd_info->bb_first_free)
- e4b->bd_info->bb_first_free = first;
-
- /* let's maintain fragments counter */
- if (first != 0)
- block = !mb_test_bit(first - 1, e4b->bd_bitmap);
- if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
- max = !mb_test_bit(first + count, e4b->bd_bitmap);
- if (block && max)
- e4b->bd_info->bb_fragments--;
- else if (!block && !max)
- e4b->bd_info->bb_fragments++;
-
- /* let's maintain buddy itself */
- while (count-- > 0) {
- block = first++;
- order = 0;
-
- if (!mb_test_bit(block, e4b->bd_bitmap)) {
- ext4_fsblk_t blocknr;
-
- blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
- blocknr += EXT4_C2B(EXT4_SB(sb), block);
- ext4_grp_locked_error(sb, e4b->bd_group,
- inode ? inode->i_ino : 0,
- blocknr,
- "freeing already freed block "
- "(bit %u)", block);
- }
- mb_clear_bit(block, e4b->bd_bitmap);
- e4b->bd_info->bb_counters[order]++;
-
- /* start of the buddy */
- buddy = mb_find_buddy(e4b, order, &max);
-
- do {
- block &= ~1UL;
- if (mb_test_bit(block, buddy) ||
- mb_test_bit(block + 1, buddy))
- break;
-
- /* both the buddies are free, try to coalesce them */
- buddy2 = mb_find_buddy(e4b, order + 1, &max);
-
- if (!buddy2)
- break;
-
- if (order > 0) {
- /* for special purposes, we don't set
- * free bits in bitmap */
- mb_set_bit(block, buddy);
- mb_set_bit(block + 1, buddy);
- }
- e4b->bd_info->bb_counters[order]--;
- e4b->bd_info->bb_counters[order]--;
-
- block = block >> 1;
- order++;
- e4b->bd_info->bb_counters[order]++;
-
- mb_clear_bit(block, buddy2);
- buddy = buddy2;
- } while (1);
- }
- mb_set_largest_free_order(sb, e4b->bd_info);
- mb_check_buddy(e4b);
-}
-
-static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
- int needed, struct ext4_free_extent *ex)
-{
- int next = block;
- int max;
- void *buddy;
-
- assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
- BUG_ON(ex == NULL);
-
- buddy = mb_find_buddy(e4b, order, &max);
- BUG_ON(buddy == NULL);
- BUG_ON(block >= max);
- if (mb_test_bit(block, buddy)) {
- ex->fe_len = 0;
- ex->fe_start = 0;
- ex->fe_group = 0;
- return 0;
- }
-
- /* FIXME dorp order completely ? */
- if (likely(order == 0)) {
- /* find actual order */
- order = mb_find_order_for_block(e4b, block);
- block = block >> order;
- }
-
- ex->fe_len = 1 << order;
- ex->fe_start = block << order;
- ex->fe_group = e4b->bd_group;
-
- /* calc difference from given start */
- next = next - ex->fe_start;
- ex->fe_len -= next;
- ex->fe_start += next;
-
- while (needed > ex->fe_len &&
- (buddy = mb_find_buddy(e4b, order, &max))) {
-
- if (block + 1 >= max)
- break;
-
- next = (block + 1) * (1 << order);
- if (mb_test_bit(next, e4b->bd_bitmap))
- break;
-
- order = mb_find_order_for_block(e4b, next);
-
- block = next >> order;
- ex->fe_len += 1 << order;
- }
-
- BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
- return ex->fe_len;
-}
-
-static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
-{
- int ord;
- int mlen = 0;
- int max = 0;
- int cur;
- int start = ex->fe_start;
- int len = ex->fe_len;
- unsigned ret = 0;
- int len0 = len;
- void *buddy;
-
- BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
- BUG_ON(e4b->bd_group != ex->fe_group);
- assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
- mb_check_buddy(e4b);
- mb_mark_used_double(e4b, start, len);
-
- e4b->bd_info->bb_free -= len;
- if (e4b->bd_info->bb_first_free == start)
- e4b->bd_info->bb_first_free += len;
-
- /* let's maintain fragments counter */
- if (start != 0)
- mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
- if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
- max = !mb_test_bit(start + len, e4b->bd_bitmap);
- if (mlen && max)
- e4b->bd_info->bb_fragments++;
- else if (!mlen && !max)
- e4b->bd_info->bb_fragments--;
-
- /* let's maintain buddy itself */
- while (len) {
- ord = mb_find_order_for_block(e4b, start);
-
- if (((start >> ord) << ord) == start && len >= (1 << ord)) {
- /* the whole chunk may be allocated at once! */
- mlen = 1 << ord;
- buddy = mb_find_buddy(e4b, ord, &max);
- BUG_ON((start >> ord) >= max);
- mb_set_bit(start >> ord, buddy);
- e4b->bd_info->bb_counters[ord]--;
- start += mlen;
- len -= mlen;
- BUG_ON(len < 0);
- continue;
- }
-
- /* store for history */
- if (ret == 0)
- ret = len | (ord << 16);
-
- /* we have to split large buddy */
- BUG_ON(ord <= 0);
- buddy = mb_find_buddy(e4b, ord, &max);
- mb_set_bit(start >> ord, buddy);
- e4b->bd_info->bb_counters[ord]--;
-
- ord--;
- cur = (start >> ord) & ~1U;
- buddy = mb_find_buddy(e4b, ord, &max);
- mb_clear_bit(cur, buddy);
- mb_clear_bit(cur + 1, buddy);
- e4b->bd_info->bb_counters[ord]++;
- e4b->bd_info->bb_counters[ord]++;
- }
- mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
-
- ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
- mb_check_buddy(e4b);
-
- return ret;
-}
-
-/*
- * Must be called under group lock!
- */
-static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
- struct ext4_buddy *e4b)
-{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- int ret;
-
- BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
- BUG_ON(ac->ac_status == AC_STATUS_FOUND);
-
- ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
- ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
- ret = mb_mark_used(e4b, &ac->ac_b_ex);
-
- /* preallocation can change ac_b_ex, thus we store actually
- * allocated blocks for history */
- ac->ac_f_ex = ac->ac_b_ex;
-
- ac->ac_status = AC_STATUS_FOUND;
- ac->ac_tail = ret & 0xffff;
- ac->ac_buddy = ret >> 16;
-
- /*
- * take the page reference. We want the page to be pinned
- * so that we don't get a ext4_mb_init_cache_call for this
- * group until we update the bitmap. That would mean we
- * double allocate blocks. The reference is dropped
- * in ext4_mb_release_context
- */
- ac->ac_bitmap_page = e4b->bd_bitmap_page;
- get_page(ac->ac_bitmap_page);
- ac->ac_buddy_page = e4b->bd_buddy_page;
- get_page(ac->ac_buddy_page);
- /* store last allocated for subsequent stream allocation */
- if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
- spin_lock(&sbi->s_md_lock);
- sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
- sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
- spin_unlock(&sbi->s_md_lock);
- }
-}
-
-/*
- * regular allocator, for general purposes allocation
- */
-
-static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
- struct ext4_buddy *e4b,
- int finish_group)
-{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct ext4_free_extent *bex = &ac->ac_b_ex;
- struct ext4_free_extent *gex = &ac->ac_g_ex;
- struct ext4_free_extent ex;
- int max;
-
- if (ac->ac_status == AC_STATUS_FOUND)
- return;
- /*
- * We don't want to scan for a whole year
- */
- if (ac->ac_found > sbi->s_mb_max_to_scan &&
- !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
- ac->ac_status = AC_STATUS_BREAK;
- return;
- }
-
- /*
- * Haven't found good chunk so far, let's continue
- */
- if (bex->fe_len < gex->fe_len)
- return;
-
- if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
- && bex->fe_group == e4b->bd_group) {
- /* recheck chunk's availability - we don't know
- * when it was found (within this lock-unlock
- * period or not) */
- max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
- if (max >= gex->fe_len) {
- ext4_mb_use_best_found(ac, e4b);
- return;
- }
- }
-}
-
-/*
- * The routine checks whether found extent is good enough. If it is,
- * then the extent gets marked used and flag is set to the context
- * to stop scanning. Otherwise, the extent is compared with the
- * previous found extent and if new one is better, then it's stored
- * in the context. Later, the best found extent will be used, if
- * mballoc can't find good enough extent.
- *
- * FIXME: real allocation policy is to be designed yet!
- */
-static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
- struct ext4_free_extent *ex,
- struct ext4_buddy *e4b)
-{
- struct ext4_free_extent *bex = &ac->ac_b_ex;
- struct ext4_free_extent *gex = &ac->ac_g_ex;
-
- BUG_ON(ex->fe_len <= 0);
- BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
- BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
- BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
-
- ac->ac_found++;
-
- /*
- * The special case - take what you catch first
- */
- if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
- *bex = *ex;
- ext4_mb_use_best_found(ac, e4b);
- return;
- }
-
- /*
- * Let's check whether the chuck is good enough
- */
- if (ex->fe_len == gex->fe_len) {
- *bex = *ex;
- ext4_mb_use_best_found(ac, e4b);
- return;
- }
-
- /*
- * If this is first found extent, just store it in the context
- */
- if (bex->fe_len == 0) {
- *bex = *ex;
- return;
- }
-
- /*
- * If new found extent is better, store it in the context
- */
- if (bex->fe_len < gex->fe_len) {
- /* if the request isn't satisfied, any found extent
- * larger than previous best one is better */
- if (ex->fe_len > bex->fe_len)
- *bex = *ex;
- } else if (ex->fe_len > gex->fe_len) {
- /* if the request is satisfied, then we try to find
- * an extent that still satisfy the request, but is
- * smaller than previous one */
- if (ex->fe_len < bex->fe_len)
- *bex = *ex;
- }
-
- ext4_mb_check_limits(ac, e4b, 0);
-}
-
-static noinline_for_stack
-int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
- struct ext4_buddy *e4b)
-{
- struct ext4_free_extent ex = ac->ac_b_ex;
- ext4_group_t group = ex.fe_group;
- int max;
- int err;
-
- BUG_ON(ex.fe_len <= 0);
- err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
- if (err)
- return err;
-
- ext4_lock_group(ac->ac_sb, group);
- max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
-
- if (max > 0) {
- ac->ac_b_ex = ex;
- ext4_mb_use_best_found(ac, e4b);
- }
-
- ext4_unlock_group(ac->ac_sb, group);
- ext4_mb_unload_buddy(e4b);
-
- return 0;
-}
-
-static noinline_for_stack
-int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
- struct ext4_buddy *e4b)
-{
- ext4_group_t group = ac->ac_g_ex.fe_group;
- int max;
- int err;
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct ext4_free_extent ex;
-
- if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
- return 0;
-
- err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
- if (err)
- return err;
-
- ext4_lock_group(ac->ac_sb, group);
- max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
- ac->ac_g_ex.fe_len, &ex);
-
- if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
- ext4_fsblk_t start;
-
- start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
- ex.fe_start;
- /* use do_div to get remainder (would be 64-bit modulo) */
- if (do_div(start, sbi->s_stripe) == 0) {
- ac->ac_found++;
- ac->ac_b_ex = ex;
- ext4_mb_use_best_found(ac, e4b);
- }
- } else if (max >= ac->ac_g_ex.fe_len) {
- BUG_ON(ex.fe_len <= 0);
- BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
- BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
- ac->ac_found++;
- ac->ac_b_ex = ex;
- ext4_mb_use_best_found(ac, e4b);
- } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
- /* Sometimes, caller may want to merge even small
- * number of blocks to an existing extent */
- BUG_ON(ex.fe_len <= 0);
- BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
- BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
- ac->ac_found++;
- ac->ac_b_ex = ex;
- ext4_mb_use_best_found(ac, e4b);
- }
- ext4_unlock_group(ac->ac_sb, group);
- ext4_mb_unload_buddy(e4b);
-
- return 0;
-}
-
-/*
- * The routine scans buddy structures (not bitmap!) from given order
- * to max order and tries to find big enough chunk to satisfy the req
- */
-static noinline_for_stack
-void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
- struct ext4_buddy *e4b)
-{
- struct super_block *sb = ac->ac_sb;
- struct ext4_group_info *grp = e4b->bd_info;
- void *buddy;
- int i;
- int k;
- int max;
-
- BUG_ON(ac->ac_2order <= 0);
- for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
- if (grp->bb_counters[i] == 0)
- continue;
-
- buddy = mb_find_buddy(e4b, i, &max);
- BUG_ON(buddy == NULL);
-
- k = mb_find_next_zero_bit(buddy, max, 0);
- BUG_ON(k >= max);
-
- ac->ac_found++;
-
- ac->ac_b_ex.fe_len = 1 << i;
- ac->ac_b_ex.fe_start = k << i;
- ac->ac_b_ex.fe_group = e4b->bd_group;
-
- ext4_mb_use_best_found(ac, e4b);
-
- BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
-
- if (EXT4_SB(sb)->s_mb_stats)
- atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
-
- break;
- }
-}
-
-/*
- * The routine scans the group and measures all found extents.
- * In order to optimize scanning, caller must pass number of
- * free blocks in the group, so the routine can know upper limit.
- */
-static noinline_for_stack
-void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
- struct ext4_buddy *e4b)
-{
- struct super_block *sb = ac->ac_sb;
- void *bitmap = e4b->bd_bitmap;
- struct ext4_free_extent ex;
- int i;
- int free;
-
- free = e4b->bd_info->bb_free;
- BUG_ON(free <= 0);
-
- i = e4b->bd_info->bb_first_free;
-
- while (free && ac->ac_status == AC_STATUS_CONTINUE) {
- i = mb_find_next_zero_bit(bitmap,
- EXT4_CLUSTERS_PER_GROUP(sb), i);
- if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
- /*
- * IF we have corrupt bitmap, we won't find any
- * free blocks even though group info says we
- * we have free blocks
- */
- ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
- "%d free clusters as per "
- "group info. But bitmap says 0",
- free);
- break;
- }
-
- mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
- BUG_ON(ex.fe_len <= 0);
- if (free < ex.fe_len) {
- ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
- "%d free clusters as per "
- "group info. But got %d blocks",
- free, ex.fe_len);
- /*
- * The number of free blocks differs. This mostly
- * indicate that the bitmap is corrupt. So exit
- * without claiming the space.
- */
- break;
- }
-
- ext4_mb_measure_extent(ac, &ex, e4b);
-
- i += ex.fe_len;
- free -= ex.fe_len;
- }
-
- ext4_mb_check_limits(ac, e4b, 1);
-}
-
-/*
- * This is a special case for storages like raid5
- * we try to find stripe-aligned chunks for stripe-size-multiple requests
- */
-static noinline_for_stack
-void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
- struct ext4_buddy *e4b)
-{
- struct super_block *sb = ac->ac_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- void *bitmap = e4b->bd_bitmap;
- struct ext4_free_extent ex;
- ext4_fsblk_t first_group_block;
- ext4_fsblk_t a;
- ext4_grpblk_t i;
- int max;
-
- BUG_ON(sbi->s_stripe == 0);
-
- /* find first stripe-aligned block in group */
- first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
-
- a = first_group_block + sbi->s_stripe - 1;
- do_div(a, sbi->s_stripe);
- i = (a * sbi->s_stripe) - first_group_block;
-
- while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
- if (!mb_test_bit(i, bitmap)) {
- max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
- if (max >= sbi->s_stripe) {
- ac->ac_found++;
- ac->ac_b_ex = ex;
- ext4_mb_use_best_found(ac, e4b);
- break;
- }
- }
- i += sbi->s_stripe;
- }
-}
-
-/* This is now called BEFORE we load the buddy bitmap. */
-static int ext4_mb_good_group(struct ext4_allocation_context *ac,
- ext4_group_t group, int cr)
-{
- unsigned free, fragments;
- int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
- struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
-
- BUG_ON(cr < 0 || cr >= 4);
-
- /* We only do this if the grp has never been initialized */
- if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- int ret = ext4_mb_init_group(ac->ac_sb, group);
- if (ret)
- return 0;
- }
-
- free = grp->bb_free;
- fragments = grp->bb_fragments;
- if (free == 0)
- return 0;
- if (fragments == 0)
- return 0;
-
- switch (cr) {
- case 0:
- BUG_ON(ac->ac_2order == 0);
-
- if (grp->bb_largest_free_order < ac->ac_2order)
- return 0;
-
- /* Avoid using the first bg of a flexgroup for data files */
- if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
- (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
- ((group % flex_size) == 0))
- return 0;
-
- return 1;
- case 1:
- if ((free / fragments) >= ac->ac_g_ex.fe_len)
- return 1;
- break;
- case 2:
- if (free >= ac->ac_g_ex.fe_len)
- return 1;
- break;
- case 3:
- return 1;
- default:
- BUG();
- }
-
- return 0;
-}
-
-static noinline_for_stack int
-ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
-{
- ext4_group_t ngroups, group, i;
- int cr;
- int err = 0;
- struct ext4_sb_info *sbi;
- struct super_block *sb;
- struct ext4_buddy e4b;
-
- sb = ac->ac_sb;
- sbi = EXT4_SB(sb);
- ngroups = ext4_get_groups_count(sb);
- /* non-extent files are limited to low blocks/groups */
- if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
- ngroups = sbi->s_blockfile_groups;
-
- BUG_ON(ac->ac_status == AC_STATUS_FOUND);
-
- /* first, try the goal */
- err = ext4_mb_find_by_goal(ac, &e4b);
- if (err || ac->ac_status == AC_STATUS_FOUND)
- goto out;
-
- if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
- goto out;
-
- /*
- * ac->ac2_order is set only if the fe_len is a power of 2
- * if ac2_order is set we also set criteria to 0 so that we
- * try exact allocation using buddy.
- */
- i = fls(ac->ac_g_ex.fe_len);
- ac->ac_2order = 0;
- /*
- * We search using buddy data only if the order of the request
- * is greater than equal to the sbi_s_mb_order2_reqs
- * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
- */
- if (i >= sbi->s_mb_order2_reqs) {
- /*
- * This should tell if fe_len is exactly power of 2
- */
- if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
- ac->ac_2order = i - 1;
- }
-
- /* if stream allocation is enabled, use global goal */
- if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
- /* TBD: may be hot point */
- spin_lock(&sbi->s_md_lock);
- ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
- ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
- spin_unlock(&sbi->s_md_lock);
- }
-
- /* Let's just scan groups to find more-less suitable blocks */
- cr = ac->ac_2order ? 0 : 1;
- /*
- * cr == 0 try to get exact allocation,
- * cr == 3 try to get anything
- */
-repeat:
- for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
- ac->ac_criteria = cr;
- /*
- * searching for the right group start
- * from the goal value specified
- */
- group = ac->ac_g_ex.fe_group;
-
- for (i = 0; i < ngroups; group++, i++) {
- if (group == ngroups)
- group = 0;
-
- /* This now checks without needing the buddy page */
- if (!ext4_mb_good_group(ac, group, cr))
- continue;
-
- err = ext4_mb_load_buddy(sb, group, &e4b);
- if (err)
- goto out;
-
- ext4_lock_group(sb, group);
-
- /*
- * We need to check again after locking the
- * block group
- */
- if (!ext4_mb_good_group(ac, group, cr)) {
- ext4_unlock_group(sb, group);
- ext4_mb_unload_buddy(&e4b);
- continue;
- }
-
- ac->ac_groups_scanned++;
- if (cr == 0)
- ext4_mb_simple_scan_group(ac, &e4b);
- else if (cr == 1 && sbi->s_stripe &&
- !(ac->ac_g_ex.fe_len % sbi->s_stripe))
- ext4_mb_scan_aligned(ac, &e4b);
- else
- ext4_mb_complex_scan_group(ac, &e4b);
-
- ext4_unlock_group(sb, group);
- ext4_mb_unload_buddy(&e4b);
-
- if (ac->ac_status != AC_STATUS_CONTINUE)
- break;
- }
- }
-
- if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
- !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
- /*
- * We've been searching too long. Let's try to allocate
- * the best chunk we've found so far
- */
-
- ext4_mb_try_best_found(ac, &e4b);
- if (ac->ac_status != AC_STATUS_FOUND) {
- /*
- * Someone more lucky has already allocated it.
- * The only thing we can do is just take first
- * found block(s)
- printk(KERN_DEBUG "EXT4-fs: someone won our chunk\n");
- */
- ac->ac_b_ex.fe_group = 0;
- ac->ac_b_ex.fe_start = 0;
- ac->ac_b_ex.fe_len = 0;
- ac->ac_status = AC_STATUS_CONTINUE;
- ac->ac_flags |= EXT4_MB_HINT_FIRST;
- cr = 3;
- atomic_inc(&sbi->s_mb_lost_chunks);
- goto repeat;
- }
- }
-out:
- return err;
-}
-
-static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
-{
- struct super_block *sb = seq->private;
- ext4_group_t group;
-
- if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
- return NULL;
- group = *pos + 1;
- return (void *) ((unsigned long) group);
-}
-
-static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct super_block *sb = seq->private;
- ext4_group_t group;
-
- ++*pos;
- if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
- return NULL;
- group = *pos + 1;
- return (void *) ((unsigned long) group);
-}
-
-static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
-{
- struct super_block *sb = seq->private;
- ext4_group_t group = (ext4_group_t) ((unsigned long) v);
- int i;
- int err;
- struct ext4_buddy e4b;
- struct sg {
- struct ext4_group_info info;
- ext4_grpblk_t counters[16];
- } sg;
-
- group--;
- if (group == 0)
- seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
- "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
- "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
- "group", "free", "frags", "first",
- "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
- "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
-
- i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
- sizeof(struct ext4_group_info);
- err = ext4_mb_load_buddy(sb, group, &e4b);
- if (err) {
- seq_printf(seq, "#%-5u: I/O error\n", group);
- return 0;
- }
- ext4_lock_group(sb, group);
- memcpy(&sg, ext4_get_group_info(sb, group), i);
- ext4_unlock_group(sb, group);
- ext4_mb_unload_buddy(&e4b);
-
- seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
- sg.info.bb_fragments, sg.info.bb_first_free);
- for (i = 0; i <= 13; i++)
- seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
- sg.info.bb_counters[i] : 0);
- seq_printf(seq, " ]\n");
-
- return 0;
-}
-
-static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
-{
-}
-
-static const struct seq_operations ext4_mb_seq_groups_ops = {
- .start = ext4_mb_seq_groups_start,
- .next = ext4_mb_seq_groups_next,
- .stop = ext4_mb_seq_groups_stop,
- .show = ext4_mb_seq_groups_show,
-};
-
-static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
-{
- struct super_block *sb = PDE(inode)->data;
- int rc;
-
- rc = seq_open(file, &ext4_mb_seq_groups_ops);
- if (rc == 0) {
- struct seq_file *m = file->private_data;
- m->private = sb;
- }
- return rc;
-
-}
-
-static const struct file_operations ext4_mb_seq_groups_fops = {
- .owner = THIS_MODULE,
- .open = ext4_mb_seq_groups_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
-{
- int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
- struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
-
- BUG_ON(!cachep);
- return cachep;
-}
-
-/* Create and initialize ext4_group_info data for the given group. */
-int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
- struct ext4_group_desc *desc)
-{
- int i;
- int metalen = 0;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_group_info **meta_group_info;
- struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
-
- /*
- * First check if this group is the first of a reserved block.
- * If it's true, we have to allocate a new table of pointers
- * to ext4_group_info structures
- */
- if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
- metalen = sizeof(*meta_group_info) <<
- EXT4_DESC_PER_BLOCK_BITS(sb);
- meta_group_info = kmalloc(metalen, GFP_KERNEL);
- if (meta_group_info == NULL) {
- ext4_msg(sb, KERN_ERR, "can't allocate mem "
- "for a buddy group");
- goto exit_meta_group_info;
- }
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
- meta_group_info;
- }
-
- meta_group_info =
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
- i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
-
- meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
- if (meta_group_info[i] == NULL) {
- ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
- goto exit_group_info;
- }
- memset(meta_group_info[i], 0, kmem_cache_size(cachep));
- set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
- &(meta_group_info[i]->bb_state));
-
- /*
- * initialize bb_free to be able to skip
- * empty groups without initialization
- */
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- meta_group_info[i]->bb_free =
- ext4_free_clusters_after_init(sb, group, desc);
- } else {
- meta_group_info[i]->bb_free =
- ext4_free_group_clusters(sb, desc);
- }
-
- INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
- init_rwsem(&meta_group_info[i]->alloc_sem);
- meta_group_info[i]->bb_free_root = RB_ROOT;
- meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
-
-#ifdef DOUBLE_CHECK
- {
- struct buffer_head *bh;
- meta_group_info[i]->bb_bitmap =
- kmalloc(sb->s_blocksize, GFP_KERNEL);
- BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
- bh = ext4_read_block_bitmap(sb, group);
- BUG_ON(bh == NULL);
- memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
- sb->s_blocksize);
- put_bh(bh);
- }
-#endif
-
- return 0;
-
-exit_group_info:
- /* If a meta_group_info table has been allocated, release it now */
- if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
- kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
- sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
- }
-exit_meta_group_info:
- return -ENOMEM;
-} /* ext4_mb_add_groupinfo */
-
-static int ext4_mb_init_backend(struct super_block *sb)
-{
- ext4_group_t ngroups = ext4_get_groups_count(sb);
- ext4_group_t i;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- int num_meta_group_infos;
- int num_meta_group_infos_max;
- int array_size;
- struct ext4_group_desc *desc;
- struct kmem_cache *cachep;
-
- /* This is the number of blocks used by GDT */
- num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
- 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
-
- /*
- * This is the total number of blocks used by GDT including
- * the number of reserved blocks for GDT.
- * The s_group_info array is allocated with this value
- * to allow a clean online resize without a complex
- * manipulation of pointer.
- * The drawback is the unused memory when no resize
- * occurs but it's very low in terms of pages
- * (see comments below)
- * Need to handle this properly when META_BG resizing is allowed
- */
- num_meta_group_infos_max = num_meta_group_infos +
- le16_to_cpu(es->s_reserved_gdt_blocks);
-
- /*
- * array_size is the size of s_group_info array. We round it
- * to the next power of two because this approximation is done
- * internally by kmalloc so we can have some more memory
- * for free here (e.g. may be used for META_BG resize).
- */
- array_size = 1;
- while (array_size < sizeof(*sbi->s_group_info) *
- num_meta_group_infos_max)
- array_size = array_size << 1;
- /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
- * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
- * So a two level scheme suffices for now. */
- sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
- if (sbi->s_group_info == NULL) {
- ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
- return -ENOMEM;
- }
- sbi->s_buddy_cache = new_inode(sb);
- if (sbi->s_buddy_cache == NULL) {
- ext4_msg(sb, KERN_ERR, "can't get new inode");
- goto err_freesgi;
- }
- /* To avoid potentially colliding with an valid on-disk inode number,
- * use EXT4_BAD_INO for the buddy cache inode number. This inode is
- * not in the inode hash, so it should never be found by iget(), but
- * this will avoid confusion if it ever shows up during debugging. */
- sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
- EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
- for (i = 0; i < ngroups; i++) {
- desc = ext4_get_group_desc(sb, i, NULL);
- if (desc == NULL) {
- ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
- goto err_freebuddy;
- }
- if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
- goto err_freebuddy;
- }
-
- return 0;
-
-err_freebuddy:
- cachep = get_groupinfo_cache(sb->s_blocksize_bits);
- while (i-- > 0)
- kmem_cache_free(cachep, ext4_get_group_info(sb, i));
- i = num_meta_group_infos;
- while (i-- > 0)
- kfree(sbi->s_group_info[i]);
- iput(sbi->s_buddy_cache);
-err_freesgi:
- ext4_kvfree(sbi->s_group_info);
- return -ENOMEM;
-}
-
-static void ext4_groupinfo_destroy_slabs(void)
-{
- int i;
-
- for (i = 0; i < NR_GRPINFO_CACHES; i++) {
- if (ext4_groupinfo_caches[i])
- kmem_cache_destroy(ext4_groupinfo_caches[i]);
- ext4_groupinfo_caches[i] = NULL;
- }
-}
-
-static int ext4_groupinfo_create_slab(size_t size)
-{
- static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
- int slab_size;
- int blocksize_bits = order_base_2(size);
- int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
- struct kmem_cache *cachep;
-
- if (cache_index >= NR_GRPINFO_CACHES)
- return -EINVAL;
-
- if (unlikely(cache_index < 0))
- cache_index = 0;
-
- mutex_lock(&ext4_grpinfo_slab_create_mutex);
- if (ext4_groupinfo_caches[cache_index]) {
- mutex_unlock(&ext4_grpinfo_slab_create_mutex);
- return 0; /* Already created */
- }
-
- slab_size = offsetof(struct ext4_group_info,
- bb_counters[blocksize_bits + 2]);
-
- cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
- slab_size, 0, SLAB_RECLAIM_ACCOUNT,
- NULL);
-
- ext4_groupinfo_caches[cache_index] = cachep;
-
- mutex_unlock(&ext4_grpinfo_slab_create_mutex);
- if (!cachep) {
- printk(KERN_EMERG
- "EXT4-fs: no memory for groupinfo slab cache\n");
- return -ENOMEM;
- }
-
- return 0;
-}
-
-int ext4_mb_init(struct super_block *sb, int needs_recovery)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned i, j;
- unsigned offset;
- unsigned max;
- int ret;
-
- i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
-
- sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
- if (sbi->s_mb_offsets == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
- sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
- if (sbi->s_mb_maxs == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = ext4_groupinfo_create_slab(sb->s_blocksize);
- if (ret < 0)
- goto out;
-
- /* order 0 is regular bitmap */
- sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
- sbi->s_mb_offsets[0] = 0;
-
- i = 1;
- offset = 0;
- max = sb->s_blocksize << 2;
- do {
- sbi->s_mb_offsets[i] = offset;
- sbi->s_mb_maxs[i] = max;
- offset += 1 << (sb->s_blocksize_bits - i);
- max = max >> 1;
- i++;
- } while (i <= sb->s_blocksize_bits + 1);
-
- spin_lock_init(&sbi->s_md_lock);
- spin_lock_init(&sbi->s_bal_lock);
-
- sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
- sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
- sbi->s_mb_stats = MB_DEFAULT_STATS;
- sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
- sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
- /*
- * The default group preallocation is 512, which for 4k block
- * sizes translates to 2 megabytes. However for bigalloc file
- * systems, this is probably too big (i.e, if the cluster size
- * is 1 megabyte, then group preallocation size becomes half a
- * gigabyte!). As a default, we will keep a two megabyte
- * group pralloc size for cluster sizes up to 64k, and after
- * that, we will force a minimum group preallocation size of
- * 32 clusters. This translates to 8 megs when the cluster
- * size is 256k, and 32 megs when the cluster size is 1 meg,
- * which seems reasonable as a default.
- */
- sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
- sbi->s_cluster_bits, 32);
- /*
- * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
- * to the lowest multiple of s_stripe which is bigger than
- * the s_mb_group_prealloc as determined above. We want
- * the preallocation size to be an exact multiple of the
- * RAID stripe size so that preallocations don't fragment
- * the stripes.
- */
- if (sbi->s_stripe > 1) {
- sbi->s_mb_group_prealloc = roundup(
- sbi->s_mb_group_prealloc, sbi->s_stripe);
- }
-
- sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
- if (sbi->s_locality_groups == NULL) {
- ret = -ENOMEM;
- goto out_free_groupinfo_slab;
- }
- for_each_possible_cpu(i) {
- struct ext4_locality_group *lg;
- lg = per_cpu_ptr(sbi->s_locality_groups, i);
- mutex_init(&lg->lg_mutex);
- for (j = 0; j < PREALLOC_TB_SIZE; j++)
- INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
- spin_lock_init(&lg->lg_prealloc_lock);
- }
-
- /* init file for buddy data */
- ret = ext4_mb_init_backend(sb);
- if (ret != 0)
- goto out_free_locality_groups;
-
- if (sbi->s_proc)
- proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
- &ext4_mb_seq_groups_fops, sb);
-
- return 0;
-
-out_free_locality_groups:
- free_percpu(sbi->s_locality_groups);
- sbi->s_locality_groups = NULL;
-out_free_groupinfo_slab:
- ext4_groupinfo_destroy_slabs();
-out:
- kfree(sbi->s_mb_offsets);
- sbi->s_mb_offsets = NULL;
- kfree(sbi->s_mb_maxs);
- sbi->s_mb_maxs = NULL;
- return ret;
-}
-
-/* need to called with the ext4 group lock held */
-static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
-{
- struct ext4_prealloc_space *pa;
- struct list_head *cur, *tmp;
- int count = 0;
-
- list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
- pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
- list_del(&pa->pa_group_list);
- count++;
- kmem_cache_free(ext4_pspace_cachep, pa);
- }
- if (count)
- mb_debug(1, "mballoc: %u PAs left\n", count);
-
-}
-
-int ext4_mb_release(struct super_block *sb)
-{
- ext4_group_t ngroups = ext4_get_groups_count(sb);
- ext4_group_t i;
- int num_meta_group_infos;
- struct ext4_group_info *grinfo;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
-
- if (sbi->s_proc)
- remove_proc_entry("mb_groups", sbi->s_proc);
-
- if (sbi->s_group_info) {
- for (i = 0; i < ngroups; i++) {
- grinfo = ext4_get_group_info(sb, i);
-#ifdef DOUBLE_CHECK
- kfree(grinfo->bb_bitmap);
-#endif
- ext4_lock_group(sb, i);
- ext4_mb_cleanup_pa(grinfo);
- ext4_unlock_group(sb, i);
- kmem_cache_free(cachep, grinfo);
- }
- num_meta_group_infos = (ngroups +
- EXT4_DESC_PER_BLOCK(sb) - 1) >>
- EXT4_DESC_PER_BLOCK_BITS(sb);
- for (i = 0; i < num_meta_group_infos; i++)
- kfree(sbi->s_group_info[i]);
- ext4_kvfree(sbi->s_group_info);
- }
- kfree(sbi->s_mb_offsets);
- kfree(sbi->s_mb_maxs);
- if (sbi->s_buddy_cache)
- iput(sbi->s_buddy_cache);
- if (sbi->s_mb_stats) {
- ext4_msg(sb, KERN_INFO,
- "mballoc: %u blocks %u reqs (%u success)",
- atomic_read(&sbi->s_bal_allocated),
- atomic_read(&sbi->s_bal_reqs),
- atomic_read(&sbi->s_bal_success));
- ext4_msg(sb, KERN_INFO,
- "mballoc: %u extents scanned, %u goal hits, "
- "%u 2^N hits, %u breaks, %u lost",
- atomic_read(&sbi->s_bal_ex_scanned),
- atomic_read(&sbi->s_bal_goals),
- atomic_read(&sbi->s_bal_2orders),
- atomic_read(&sbi->s_bal_breaks),
- atomic_read(&sbi->s_mb_lost_chunks));
- ext4_msg(sb, KERN_INFO,
- "mballoc: %lu generated and it took %Lu",
- sbi->s_mb_buddies_generated,
- sbi->s_mb_generation_time);
- ext4_msg(sb, KERN_INFO,
- "mballoc: %u preallocated, %u discarded",
- atomic_read(&sbi->s_mb_preallocated),
- atomic_read(&sbi->s_mb_discarded));
- }
-
- free_percpu(sbi->s_locality_groups);
-
- return 0;
-}
-
-static inline int ext4_issue_discard(struct super_block *sb,
- ext4_group_t block_group, ext4_grpblk_t cluster, int count)
-{
- ext4_fsblk_t discard_block;
-
- discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
- ext4_group_first_block_no(sb, block_group));
- count = EXT4_C2B(EXT4_SB(sb), count);
- trace_ext4_discard_blocks(sb,
- (unsigned long long) discard_block, count);
- return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
-}
-
-/*
- * This function is called by the jbd2 layer once the commit has finished,
- * so we know we can free the blocks that were released with that commit.
- */
-static void ext4_free_data_callback(struct super_block *sb,
- struct ext4_journal_cb_entry *jce,
- int rc)
-{
- struct ext4_free_data *entry = (struct ext4_free_data *)jce;
- struct ext4_buddy e4b;
- struct ext4_group_info *db;
- int err, count = 0, count2 = 0;
-
- mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
- entry->efd_count, entry->efd_group, entry);
-
- if (test_opt(sb, DISCARD))
- ext4_issue_discard(sb, entry->efd_group,
- entry->efd_start_cluster, entry->efd_count);
-
- err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
- /* we expect to find existing buddy because it's pinned */
- BUG_ON(err != 0);
-
-
- db = e4b.bd_info;
- /* there are blocks to put in buddy to make them really free */
- count += entry->efd_count;
- count2++;
- ext4_lock_group(sb, entry->efd_group);
- /* Take it out of per group rb tree */
- rb_erase(&entry->efd_node, &(db->bb_free_root));
- mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
-
- /*
- * Clear the trimmed flag for the group so that the next
- * ext4_trim_fs can trim it.
- * If the volume is mounted with -o discard, online discard
- * is supported and the free blocks will be trimmed online.
- */
- if (!test_opt(sb, DISCARD))
- EXT4_MB_GRP_CLEAR_TRIMMED(db);
-
- if (!db->bb_free_root.rb_node) {
- /* No more items in the per group rb tree
- * balance refcounts from ext4_mb_free_metadata()
- */
- page_cache_release(e4b.bd_buddy_page);
- page_cache_release(e4b.bd_bitmap_page);
- }
- ext4_unlock_group(sb, entry->efd_group);
- kmem_cache_free(ext4_free_data_cachep, entry);
- ext4_mb_unload_buddy(&e4b);
-
- mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
-}
-
-#ifdef CONFIG_EXT4_DEBUG
-u8 mb_enable_debug __read_mostly;
-
-static struct dentry *debugfs_dir;
-static struct dentry *debugfs_debug;
-
-static void __init ext4_create_debugfs_entry(void)
-{
- debugfs_dir = debugfs_create_dir("ext4", NULL);
- if (debugfs_dir)
- debugfs_debug = debugfs_create_u8("mballoc-debug",
- S_IRUGO | S_IWUSR,
- debugfs_dir,
- &mb_enable_debug);
-}
-
-static void ext4_remove_debugfs_entry(void)
-{
- debugfs_remove(debugfs_debug);
- debugfs_remove(debugfs_dir);
-}
-
-#else
-
-static void __init ext4_create_debugfs_entry(void)
-{
-}
-
-static void ext4_remove_debugfs_entry(void)
-{
-}
-
-#endif
-
-int __init ext4_init_mballoc(void)
-{
- ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
- SLAB_RECLAIM_ACCOUNT);
- if (ext4_pspace_cachep == NULL)
- return -ENOMEM;
-
- ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
- SLAB_RECLAIM_ACCOUNT);
- if (ext4_ac_cachep == NULL) {
- kmem_cache_destroy(ext4_pspace_cachep);
- return -ENOMEM;
- }
-
- ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
- SLAB_RECLAIM_ACCOUNT);
- if (ext4_free_data_cachep == NULL) {
- kmem_cache_destroy(ext4_pspace_cachep);
- kmem_cache_destroy(ext4_ac_cachep);
- return -ENOMEM;
- }
- ext4_create_debugfs_entry();
- return 0;
-}
-
-void ext4_exit_mballoc(void)
-{
- /*
- * Wait for completion of call_rcu()'s on ext4_pspace_cachep
- * before destroying the slab cache.
- */
- rcu_barrier();
- kmem_cache_destroy(ext4_pspace_cachep);
- kmem_cache_destroy(ext4_ac_cachep);
- kmem_cache_destroy(ext4_free_data_cachep);
- ext4_groupinfo_destroy_slabs();
- ext4_remove_debugfs_entry();
-}
-
-
-/*
- * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
- * Returns 0 if success or error code
- */
-static noinline_for_stack int
-ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
- handle_t *handle, unsigned int reserv_clstrs)
-{
- struct buffer_head *bitmap_bh = NULL;
- struct ext4_group_desc *gdp;
- struct buffer_head *gdp_bh;
- struct ext4_sb_info *sbi;
- struct super_block *sb;
- ext4_fsblk_t block;
- int err, len;
-
- BUG_ON(ac->ac_status != AC_STATUS_FOUND);
- BUG_ON(ac->ac_b_ex.fe_len <= 0);
-
- sb = ac->ac_sb;
- sbi = EXT4_SB(sb);
-
- err = -EIO;
- bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
- if (!bitmap_bh)
- goto out_err;
-
- err = ext4_journal_get_write_access(handle, bitmap_bh);
- if (err)
- goto out_err;
-
- err = -EIO;
- gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
- if (!gdp)
- goto out_err;
-
- ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
- ext4_free_group_clusters(sb, gdp));
-
- err = ext4_journal_get_write_access(handle, gdp_bh);
- if (err)
- goto out_err;
-
- block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
-
- len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (!ext4_data_block_valid(sbi, block, len)) {
- ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
- "fs metadata", block, block+len);
- /* File system mounted not to panic on error
- * Fix the bitmap and repeat the block allocation
- * We leak some of the blocks here.
- */
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
- ac->ac_b_ex.fe_len);
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
- err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
- if (!err)
- err = -EAGAIN;
- goto out_err;
- }
-
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
-#ifdef AGGRESSIVE_CHECK
- {
- int i;
- for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
- BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
- bitmap_bh->b_data));
- }
- }
-#endif
- ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
- ac->ac_b_ex.fe_len);
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- ext4_free_group_clusters_set(sb, gdp,
- ext4_free_clusters_after_init(sb,
- ac->ac_b_ex.fe_group, gdp));
- }
- len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
- ext4_free_group_clusters_set(sb, gdp, len);
- gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
-
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
- percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
- /*
- * Now reduce the dirty block count also. Should not go negative
- */
- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
- /* release all the reserved blocks if non delalloc */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- reserv_clstrs);
-
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group = ext4_flex_group(sbi,
- ac->ac_b_ex.fe_group);
- atomic_sub(ac->ac_b_ex.fe_len,
- &sbi->s_flex_groups[flex_group].free_clusters);
- }
-
- err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
- if (err)
- goto out_err;
- err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
-
-out_err:
- ext4_mark_super_dirty(sb);
- brelse(bitmap_bh);
- return err;
-}
-
-/*
- * here we normalize request for locality group
- * Group request are normalized to s_mb_group_prealloc, which goes to
- * s_strip if we set the same via mount option.
- * s_mb_group_prealloc can be configured via
- * /sys/fs/ext4/<partition>/mb_group_prealloc
- *
- * XXX: should we try to preallocate more than the group has now?
- */
-static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
-{
- struct super_block *sb = ac->ac_sb;
- struct ext4_locality_group *lg = ac->ac_lg;
-
- BUG_ON(lg == NULL);
- ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
- mb_debug(1, "#%u: goal %u blocks for locality group\n",
- current->pid, ac->ac_g_ex.fe_len);
-}
-
-/*
- * Normalization means making request better in terms of
- * size and alignment
- */
-static noinline_for_stack void
-ext4_mb_normalize_request(struct ext4_allocation_context *ac,
- struct ext4_allocation_request *ar)
-{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- int bsbits, max;
- ext4_lblk_t end;
- loff_t size, start_off;
- loff_t orig_size __maybe_unused;
- ext4_lblk_t start;
- struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
- struct ext4_prealloc_space *pa;
-
- /* do normalize only data requests, metadata requests
- do not need preallocation */
- if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
- return;
-
- /* sometime caller may want exact blocks */
- if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
- return;
-
- /* caller may indicate that preallocation isn't
- * required (it's a tail, for example) */
- if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
- return;
-
- if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
- ext4_mb_normalize_group_request(ac);
- return ;
- }
-
- bsbits = ac->ac_sb->s_blocksize_bits;
-
- /* first, let's learn actual file size
- * given current request is allocated */
- size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
- size = size << bsbits;
- if (size < i_size_read(ac->ac_inode))
- size = i_size_read(ac->ac_inode);
- orig_size = size;
-
- /* max size of free chunks */
- max = 2 << bsbits;
-
-#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
- (req <= (size) || max <= (chunk_size))
-
- /* first, try to predict filesize */
- /* XXX: should this table be tunable? */
- start_off = 0;
- if (size <= 16 * 1024) {
- size = 16 * 1024;
- } else if (size <= 32 * 1024) {
- size = 32 * 1024;
- } else if (size <= 64 * 1024) {
- size = 64 * 1024;
- } else if (size <= 128 * 1024) {
- size = 128 * 1024;
- } else if (size <= 256 * 1024) {
- size = 256 * 1024;
- } else if (size <= 512 * 1024) {
- size = 512 * 1024;
- } else if (size <= 1024 * 1024) {
- size = 1024 * 1024;
- } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
- (21 - bsbits)) << 21;
- size = 2 * 1024 * 1024;
- } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
- (22 - bsbits)) << 22;
- size = 4 * 1024 * 1024;
- } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
- (8<<20)>>bsbits, max, 8 * 1024)) {
- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
- (23 - bsbits)) << 23;
- size = 8 * 1024 * 1024;
- } else {
- start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
- size = ac->ac_o_ex.fe_len << bsbits;
- }
- size = size >> bsbits;
- start = start_off >> bsbits;
-
- /* don't cover already allocated blocks in selected range */
- if (ar->pleft && start <= ar->lleft) {
- size -= ar->lleft + 1 - start;
- start = ar->lleft + 1;
- }
- if (ar->pright && start + size - 1 >= ar->lright)
- size -= start + size - ar->lright;
-
- end = start + size;
-
- /* check we don't cross already preallocated blocks */
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
- ext4_lblk_t pa_end;
-
- if (pa->pa_deleted)
- continue;
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted) {
- spin_unlock(&pa->pa_lock);
- continue;
- }
-
- pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
- pa->pa_len);
-
- /* PA must not overlap original request */
- BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
- ac->ac_o_ex.fe_logical < pa->pa_lstart));
-
- /* skip PAs this normalized request doesn't overlap with */
- if (pa->pa_lstart >= end || pa_end <= start) {
- spin_unlock(&pa->pa_lock);
- continue;
- }
- BUG_ON(pa->pa_lstart <= start && pa_end >= end);
-
- /* adjust start or end to be adjacent to this pa */
- if (pa_end <= ac->ac_o_ex.fe_logical) {
- BUG_ON(pa_end < start);
- start = pa_end;
- } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
- BUG_ON(pa->pa_lstart > end);
- end = pa->pa_lstart;
- }
- spin_unlock(&pa->pa_lock);
- }
- rcu_read_unlock();
- size = end - start;
-
- /* XXX: extra loop to check we really don't overlap preallocations */
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
- ext4_lblk_t pa_end;
-
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 0) {
- pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
- pa->pa_len);
- BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
- }
- spin_unlock(&pa->pa_lock);
- }
- rcu_read_unlock();
-
- if (start + size <= ac->ac_o_ex.fe_logical &&
- start > ac->ac_o_ex.fe_logical) {
- ext4_msg(ac->ac_sb, KERN_ERR,
- "start %lu, size %lu, fe_logical %lu",
- (unsigned long) start, (unsigned long) size,
- (unsigned long) ac->ac_o_ex.fe_logical);
- }
- BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
- start > ac->ac_o_ex.fe_logical);
- BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
-
- /* now prepare goal request */
-
- /* XXX: is it better to align blocks WRT to logical
- * placement or satisfy big request as is */
- ac->ac_g_ex.fe_logical = start;
- ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
-
- /* define goal start in order to merge */
- if (ar->pright && (ar->lright == (start + size))) {
- /* merge to the right */
- ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
- &ac->ac_f_ex.fe_group,
- &ac->ac_f_ex.fe_start);
- ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
- }
- if (ar->pleft && (ar->lleft + 1 == start)) {
- /* merge to the left */
- ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
- &ac->ac_f_ex.fe_group,
- &ac->ac_f_ex.fe_start);
- ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
- }
-
- mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
- (unsigned) orig_size, (unsigned) start);
-}
-
-static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
-{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
-
- if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
- atomic_inc(&sbi->s_bal_reqs);
- atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
- if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
- atomic_inc(&sbi->s_bal_success);
- atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
- if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
- ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
- atomic_inc(&sbi->s_bal_goals);
- if (ac->ac_found > sbi->s_mb_max_to_scan)
- atomic_inc(&sbi->s_bal_breaks);
- }
-
- if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
- trace_ext4_mballoc_alloc(ac);
- else
- trace_ext4_mballoc_prealloc(ac);
-}
-
-/*
- * Called on failure; free up any blocks from the inode PA for this
- * context. We don't need this for MB_GROUP_PA because we only change
- * pa_free in ext4_mb_release_context(), but on failure, we've already
- * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
- */
-static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
-{
- struct ext4_prealloc_space *pa = ac->ac_pa;
- int len;
-
- if (pa && pa->pa_type == MB_INODE_PA) {
- len = ac->ac_b_ex.fe_len;
- pa->pa_free += len;
- }
-
-}
-
-/*
- * use blocks preallocated to inode
- */
-static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
- struct ext4_prealloc_space *pa)
-{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- ext4_fsblk_t start;
- ext4_fsblk_t end;
- int len;
-
- /* found preallocated blocks, use them */
- start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
- end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
- start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
- len = EXT4_NUM_B2C(sbi, end - start);
- ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
- &ac->ac_b_ex.fe_start);
- ac->ac_b_ex.fe_len = len;
- ac->ac_status = AC_STATUS_FOUND;
- ac->ac_pa = pa;
-
- BUG_ON(start < pa->pa_pstart);
- BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
- BUG_ON(pa->pa_free < len);
- pa->pa_free -= len;
-
- mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
-}
-
-/*
- * use blocks preallocated to locality group
- */
-static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
- struct ext4_prealloc_space *pa)
-{
- unsigned int len = ac->ac_o_ex.fe_len;
-
- ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
- &ac->ac_b_ex.fe_group,
- &ac->ac_b_ex.fe_start);
- ac->ac_b_ex.fe_len = len;
- ac->ac_status = AC_STATUS_FOUND;
- ac->ac_pa = pa;
-
- /* we don't correct pa_pstart or pa_plen here to avoid
- * possible race when the group is being loaded concurrently
- * instead we correct pa later, after blocks are marked
- * in on-disk bitmap -- see ext4_mb_release_context()
- * Other CPUs are prevented from allocating from this pa by lg_mutex
- */
- mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
-}
-
-/*
- * Return the prealloc space that have minimal distance
- * from the goal block. @cpa is the prealloc
- * space that is having currently known minimal distance
- * from the goal block.
- */
-static struct ext4_prealloc_space *
-ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
- struct ext4_prealloc_space *pa,
- struct ext4_prealloc_space *cpa)
-{
- ext4_fsblk_t cur_distance, new_distance;
-
- if (cpa == NULL) {
- atomic_inc(&pa->pa_count);
- return pa;
- }
- cur_distance = abs(goal_block - cpa->pa_pstart);
- new_distance = abs(goal_block - pa->pa_pstart);
-
- if (cur_distance <= new_distance)
- return cpa;
-
- /* drop the previous reference */
- atomic_dec(&cpa->pa_count);
- atomic_inc(&pa->pa_count);
- return pa;
-}
-
-/*
- * search goal blocks in preallocated space
- */
-static noinline_for_stack int
-ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
-{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- int order, i;
- struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
- struct ext4_locality_group *lg;
- struct ext4_prealloc_space *pa, *cpa = NULL;
- ext4_fsblk_t goal_block;
-
- /* only data can be preallocated */
- if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
- return 0;
-
- /* first, try per-file preallocation */
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
-
- /* all fields in this condition don't change,
- * so we can skip locking for them */
- if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
- ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
- EXT4_C2B(sbi, pa->pa_len)))
- continue;
-
- /* non-extent files can't have physical blocks past 2^32 */
- if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
- (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
- EXT4_MAX_BLOCK_FILE_PHYS))
- continue;
-
- /* found preallocated blocks, use them */
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 0 && pa->pa_free) {
- atomic_inc(&pa->pa_count);
- ext4_mb_use_inode_pa(ac, pa);
- spin_unlock(&pa->pa_lock);
- ac->ac_criteria = 10;
- rcu_read_unlock();
- return 1;
- }
- spin_unlock(&pa->pa_lock);
- }
- rcu_read_unlock();
-
- /* can we use group allocation? */
- if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
- return 0;
-
- /* inode may have no locality group for some reason */
- lg = ac->ac_lg;
- if (lg == NULL)
- return 0;
- order = fls(ac->ac_o_ex.fe_len) - 1;
- if (order > PREALLOC_TB_SIZE - 1)
- /* The max size of hash table is PREALLOC_TB_SIZE */
- order = PREALLOC_TB_SIZE - 1;
-
- goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
- /*
- * search for the prealloc space that is having
- * minimal distance from the goal block.
- */
- for (i = order; i < PREALLOC_TB_SIZE; i++) {
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
- pa_inode_list) {
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 0 &&
- pa->pa_free >= ac->ac_o_ex.fe_len) {
-
- cpa = ext4_mb_check_group_pa(goal_block,
- pa, cpa);
- }
- spin_unlock(&pa->pa_lock);
- }
- rcu_read_unlock();
- }
- if (cpa) {
- ext4_mb_use_group_pa(ac, cpa);
- ac->ac_criteria = 20;
- return 1;
- }
- return 0;
-}
-
-/*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with the ext4 group lock held
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct rb_node *n;
- struct ext4_group_info *grp;
- struct ext4_free_data *entry;
-
- grp = ext4_get_group_info(sb, group);
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
- entry = rb_entry(n, struct ext4_free_data, efd_node);
- ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
- n = rb_next(n);
- }
- return;
-}
-
-/*
- * the function goes through all preallocation in this group and marks them
- * used in in-core bitmap. buddy must be generated from this bitmap
- * Need to be called with ext4 group lock held
- */
-static noinline_for_stack
-void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- struct ext4_prealloc_space *pa;
- struct list_head *cur;
- ext4_group_t groupnr;
- ext4_grpblk_t start;
- int preallocated = 0;
- int len;
-
- /* all form of preallocation discards first load group,
- * so the only competing code is preallocation use.
- * we don't need any locking here
- * notice we do NOT ignore preallocations with pa_deleted
- * otherwise we could leave used blocks available for
- * allocation in buddy when concurrent ext4_mb_put_pa()
- * is dropping preallocation
- */
- list_for_each(cur, &grp->bb_prealloc_list) {
- pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
- spin_lock(&pa->pa_lock);
- ext4_get_group_no_and_offset(sb, pa->pa_pstart,
- &groupnr, &start);
- len = pa->pa_len;
- spin_unlock(&pa->pa_lock);
- if (unlikely(len == 0))
- continue;
- BUG_ON(groupnr != group);
- ext4_set_bits(bitmap, start, len);
- preallocated += len;
- }
- mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
-}
-
-static void ext4_mb_pa_callback(struct rcu_head *head)
-{
- struct ext4_prealloc_space *pa;
- pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
- kmem_cache_free(ext4_pspace_cachep, pa);
-}
-
-/*
- * drops a reference to preallocated space descriptor
- * if this was the last reference and the space is consumed
- */
-static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
- struct super_block *sb, struct ext4_prealloc_space *pa)
-{
- ext4_group_t grp;
- ext4_fsblk_t grp_blk;
-
- if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
- return;
-
- /* in this short window concurrent discard can set pa_deleted */
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 1) {
- spin_unlock(&pa->pa_lock);
- return;
- }
-
- pa->pa_deleted = 1;
- spin_unlock(&pa->pa_lock);
-
- grp_blk = pa->pa_pstart;
- /*
- * If doing group-based preallocation, pa_pstart may be in the
- * next group when pa is used up
- */
- if (pa->pa_type == MB_GROUP_PA)
- grp_blk--;
-
- ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
-
- /*
- * possible race:
- *
- * P1 (buddy init) P2 (regular allocation)
- * find block B in PA
- * copy on-disk bitmap to buddy
- * mark B in on-disk bitmap
- * drop PA from group
- * mark all PAs in buddy
- *
- * thus, P1 initializes buddy with B available. to prevent this
- * we make "copy" and "mark all PAs" atomic and serialize "drop PA"
- * against that pair
- */
- ext4_lock_group(sb, grp);
- list_del(&pa->pa_group_list);
- ext4_unlock_group(sb, grp);
-
- spin_lock(pa->pa_obj_lock);
- list_del_rcu(&pa->pa_inode_list);
- spin_unlock(pa->pa_obj_lock);
-
- call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
-}
-
-/*
- * creates new preallocated space for given inode
- */
-static noinline_for_stack int
-ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
-{
- struct super_block *sb = ac->ac_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_prealloc_space *pa;
- struct ext4_group_info *grp;
- struct ext4_inode_info *ei;
-
- /* preallocate only when found space is larger then requested */
- BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
- BUG_ON(ac->ac_status != AC_STATUS_FOUND);
- BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
-
- pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
- if (pa == NULL)
- return -ENOMEM;
-
- if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
- int winl;
- int wins;
- int win;
- int offs;
-
- /* we can't allocate as much as normalizer wants.
- * so, found space must get proper lstart
- * to cover original request */
- BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
- BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
-
- /* we're limited by original request in that
- * logical block must be covered any way
- * winl is window we can move our chunk within */
- winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
-
- /* also, we should cover whole original request */
- wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
-
- /* the smallest one defines real window */
- win = min(winl, wins);
-
- offs = ac->ac_o_ex.fe_logical %
- EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (offs && offs < win)
- win = offs;
-
- ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
- EXT4_B2C(sbi, win);
- BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
- BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
- }
-
- /* preallocation can change ac_b_ex, thus we store actually
- * allocated blocks for history */
- ac->ac_f_ex = ac->ac_b_ex;
-
- pa->pa_lstart = ac->ac_b_ex.fe_logical;
- pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
- pa->pa_len = ac->ac_b_ex.fe_len;
- pa->pa_free = pa->pa_len;
- atomic_set(&pa->pa_count, 1);
- spin_lock_init(&pa->pa_lock);
- INIT_LIST_HEAD(&pa->pa_inode_list);
- INIT_LIST_HEAD(&pa->pa_group_list);
- pa->pa_deleted = 0;
- pa->pa_type = MB_INODE_PA;
-
- mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
- pa->pa_pstart, pa->pa_len, pa->pa_lstart);
- trace_ext4_mb_new_inode_pa(ac, pa);
-
- ext4_mb_use_inode_pa(ac, pa);
- atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
-
- ei = EXT4_I(ac->ac_inode);
- grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
-
- pa->pa_obj_lock = &ei->i_prealloc_lock;
- pa->pa_inode = ac->ac_inode;
-
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-
- spin_lock(pa->pa_obj_lock);
- list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
- spin_unlock(pa->pa_obj_lock);
-
- return 0;
-}
-
-/*
- * creates new preallocated space for locality group inodes belongs to
- */
-static noinline_for_stack int
-ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
-{
- struct super_block *sb = ac->ac_sb;
- struct ext4_locality_group *lg;
- struct ext4_prealloc_space *pa;
- struct ext4_group_info *grp;
-
- /* preallocate only when found space is larger then requested */
- BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
- BUG_ON(ac->ac_status != AC_STATUS_FOUND);
- BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
-
- BUG_ON(ext4_pspace_cachep == NULL);
- pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
- if (pa == NULL)
- return -ENOMEM;
-
- /* preallocation can change ac_b_ex, thus we store actually
- * allocated blocks for history */
- ac->ac_f_ex = ac->ac_b_ex;
-
- pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
- pa->pa_lstart = pa->pa_pstart;
- pa->pa_len = ac->ac_b_ex.fe_len;
- pa->pa_free = pa->pa_len;
- atomic_set(&pa->pa_count, 1);
- spin_lock_init(&pa->pa_lock);
- INIT_LIST_HEAD(&pa->pa_inode_list);
- INIT_LIST_HEAD(&pa->pa_group_list);
- pa->pa_deleted = 0;
- pa->pa_type = MB_GROUP_PA;
-
- mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
- pa->pa_pstart, pa->pa_len, pa->pa_lstart);
- trace_ext4_mb_new_group_pa(ac, pa);
-
- ext4_mb_use_group_pa(ac, pa);
- atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
-
- grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
- lg = ac->ac_lg;
- BUG_ON(lg == NULL);
-
- pa->pa_obj_lock = &lg->lg_prealloc_lock;
- pa->pa_inode = NULL;
-
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-
- /*
- * We will later add the new pa to the right bucket
- * after updating the pa_free in ext4_mb_release_context
- */
- return 0;
-}
-
-static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
-{
- int err;
-
- if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
- err = ext4_mb_new_group_pa(ac);
- else
- err = ext4_mb_new_inode_pa(ac);
- return err;
-}
-
-/*
- * finds all unused blocks in on-disk bitmap, frees them in
- * in-core bitmap and buddy.
- * @pa must be unlinked from inode and group lists, so that
- * nobody else can find/use it.
- * the caller MUST hold group/inode locks.
- * TODO: optimize the case when there are no in-core structures yet
- */
-static noinline_for_stack int
-ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
- struct ext4_prealloc_space *pa)
-{
- struct super_block *sb = e4b->bd_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned int end;
- unsigned int next;
- ext4_group_t group;
- ext4_grpblk_t bit;
- unsigned long long grp_blk_start;
- int err = 0;
- int free = 0;
-
- BUG_ON(pa->pa_deleted == 0);
- ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
- end = bit + pa->pa_len;
-
- while (bit < end) {
- bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
- if (bit >= end)
- break;
- next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
- mb_debug(1, " free preallocated %u/%u in group %u\n",
- (unsigned) ext4_group_first_block_no(sb, group) + bit,
- (unsigned) next - bit, (unsigned) group);
- free += next - bit;
-
- trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
- trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
- EXT4_C2B(sbi, bit)),
- next - bit);
- mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
- bit = next + 1;
- }
- if (free != pa->pa_free) {
- ext4_msg(e4b->bd_sb, KERN_CRIT,
- "pa %p: logic %lu, phys. %lu, len %lu",
- pa, (unsigned long) pa->pa_lstart,
- (unsigned long) pa->pa_pstart,
- (unsigned long) pa->pa_len);
- ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
- free, pa->pa_free);
- /*
- * pa is already deleted so we use the value obtained
- * from the bitmap and continue.
- */
- }
- atomic_add(free, &sbi->s_mb_discarded);
-
- return err;
-}
-
-static noinline_for_stack int
-ext4_mb_release_group_pa(struct ext4_buddy *e4b,
- struct ext4_prealloc_space *pa)
-{
- struct super_block *sb = e4b->bd_sb;
- ext4_group_t group;
- ext4_grpblk_t bit;
-
- trace_ext4_mb_release_group_pa(sb, pa);
- BUG_ON(pa->pa_deleted == 0);
- ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
- mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
- atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
- trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
-
- return 0;
-}
-
-/*
- * releases all preallocations in given group
- *
- * first, we need to decide discard policy:
- * - when do we discard
- * 1) ENOSPC
- * - how many do we discard
- * 1) how many requested
- */
-static noinline_for_stack int
-ext4_mb_discard_group_preallocations(struct super_block *sb,
- ext4_group_t group, int needed)
-{
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- struct buffer_head *bitmap_bh = NULL;
- struct ext4_prealloc_space *pa, *tmp;
- struct list_head list;
- struct ext4_buddy e4b;
- int err;
- int busy = 0;
- int free = 0;
-
- mb_debug(1, "discard preallocation for group %u\n", group);
-
- if (list_empty(&grp->bb_prealloc_list))
- return 0;
-
- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (bitmap_bh == NULL) {
- ext4_error(sb, "Error reading block bitmap for %u", group);
- return 0;
- }
-
- err = ext4_mb_load_buddy(sb, group, &e4b);
- if (err) {
- ext4_error(sb, "Error loading buddy information for %u", group);
- put_bh(bitmap_bh);
- return 0;
- }
-
- if (needed == 0)
- needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
-
- INIT_LIST_HEAD(&list);
-repeat:
- ext4_lock_group(sb, group);
- list_for_each_entry_safe(pa, tmp,
- &grp->bb_prealloc_list, pa_group_list) {
- spin_lock(&pa->pa_lock);
- if (atomic_read(&pa->pa_count)) {
- spin_unlock(&pa->pa_lock);
- busy = 1;
- continue;
- }
- if (pa->pa_deleted) {
- spin_unlock(&pa->pa_lock);
- continue;
- }
-
- /* seems this one can be freed ... */
- pa->pa_deleted = 1;
-
- /* we can trust pa_free ... */
- free += pa->pa_free;
-
- spin_unlock(&pa->pa_lock);
-
- list_del(&pa->pa_group_list);
- list_add(&pa->u.pa_tmp_list, &list);
- }
-
- /* if we still need more blocks and some PAs were used, try again */
- if (free < needed && busy) {
- busy = 0;
- ext4_unlock_group(sb, group);
- /*
- * Yield the CPU here so that we don't get soft lockup
- * in non preempt case.
- */
- yield();
- goto repeat;
- }
-
- /* found anything to free? */
- if (list_empty(&list)) {
- BUG_ON(free != 0);
- goto out;
- }
-
- /* now free all selected PAs */
- list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
-
- /* remove from object (inode or locality group) */
- spin_lock(pa->pa_obj_lock);
- list_del_rcu(&pa->pa_inode_list);
- spin_unlock(pa->pa_obj_lock);
-
- if (pa->pa_type == MB_GROUP_PA)
- ext4_mb_release_group_pa(&e4b, pa);
- else
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
-
- list_del(&pa->u.pa_tmp_list);
- call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
- }
-
-out:
- ext4_unlock_group(sb, group);
- ext4_mb_unload_buddy(&e4b);
- put_bh(bitmap_bh);
- return free;
-}
-
-/*
- * releases all non-used preallocated blocks for given inode
- *
- * It's important to discard preallocations under i_data_sem
- * We don't want another block to be served from the prealloc
- * space when we are discarding the inode prealloc space.
- *
- * FIXME!! Make sure it is valid at all the call sites
- */
-void ext4_discard_preallocations(struct inode *inode)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct super_block *sb = inode->i_sb;
- struct buffer_head *bitmap_bh = NULL;
- struct ext4_prealloc_space *pa, *tmp;
- ext4_group_t group = 0;
- struct list_head list;
- struct ext4_buddy e4b;
- int err;
-
- if (!S_ISREG(inode->i_mode)) {
- /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
- return;
- }
-
- mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
- trace_ext4_discard_preallocations(inode);
-
- INIT_LIST_HEAD(&list);
-
-repeat:
- /* first, collect all pa's in the inode */
- spin_lock(&ei->i_prealloc_lock);
- while (!list_empty(&ei->i_prealloc_list)) {
- pa = list_entry(ei->i_prealloc_list.next,
- struct ext4_prealloc_space, pa_inode_list);
- BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
- spin_lock(&pa->pa_lock);
- if (atomic_read(&pa->pa_count)) {
- /* this shouldn't happen often - nobody should
- * use preallocation while we're discarding it */
- spin_unlock(&pa->pa_lock);
- spin_unlock(&ei->i_prealloc_lock);
- ext4_msg(sb, KERN_ERR,
- "uh-oh! used pa while discarding");
- WARN_ON(1);
- schedule_timeout_uninterruptible(HZ);
- goto repeat;
-
- }
- if (pa->pa_deleted == 0) {
- pa->pa_deleted = 1;
- spin_unlock(&pa->pa_lock);
- list_del_rcu(&pa->pa_inode_list);
- list_add(&pa->u.pa_tmp_list, &list);
- continue;
- }
-
- /* someone is deleting pa right now */
- spin_unlock(&pa->pa_lock);
- spin_unlock(&ei->i_prealloc_lock);
-
- /* we have to wait here because pa_deleted
- * doesn't mean pa is already unlinked from
- * the list. as we might be called from
- * ->clear_inode() the inode will get freed
- * and concurrent thread which is unlinking
- * pa from inode's list may access already
- * freed memory, bad-bad-bad */
-
- /* XXX: if this happens too often, we can
- * add a flag to force wait only in case
- * of ->clear_inode(), but not in case of
- * regular truncate */
- schedule_timeout_uninterruptible(HZ);
- goto repeat;
- }
- spin_unlock(&ei->i_prealloc_lock);
-
- list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
- BUG_ON(pa->pa_type != MB_INODE_PA);
- ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
-
- err = ext4_mb_load_buddy(sb, group, &e4b);
- if (err) {
- ext4_error(sb, "Error loading buddy information for %u",
- group);
- continue;
- }
-
- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (bitmap_bh == NULL) {
- ext4_error(sb, "Error reading block bitmap for %u",
- group);
- ext4_mb_unload_buddy(&e4b);
- continue;
- }
-
- ext4_lock_group(sb, group);
- list_del(&pa->pa_group_list);
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
- ext4_unlock_group(sb, group);
-
- ext4_mb_unload_buddy(&e4b);
- put_bh(bitmap_bh);
-
- list_del(&pa->u.pa_tmp_list);
- call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
- }
-}
-
-#ifdef CONFIG_EXT4_DEBUG
-static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
-{
- struct super_block *sb = ac->ac_sb;
- ext4_group_t ngroups, i;
-
- if (!mb_enable_debug ||
- (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
- return;
-
- ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
- " Allocation context details:");
- ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
- ac->ac_status, ac->ac_flags);
- ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
- "goal %lu/%lu/%lu@%lu, "
- "best %lu/%lu/%lu@%lu cr %d",
- (unsigned long)ac->ac_o_ex.fe_group,
- (unsigned long)ac->ac_o_ex.fe_start,
- (unsigned long)ac->ac_o_ex.fe_len,
- (unsigned long)ac->ac_o_ex.fe_logical,
- (unsigned long)ac->ac_g_ex.fe_group,
- (unsigned long)ac->ac_g_ex.fe_start,
- (unsigned long)ac->ac_g_ex.fe_len,
- (unsigned long)ac->ac_g_ex.fe_logical,
- (unsigned long)ac->ac_b_ex.fe_group,
- (unsigned long)ac->ac_b_ex.fe_start,
- (unsigned long)ac->ac_b_ex.fe_len,
- (unsigned long)ac->ac_b_ex.fe_logical,
- (int)ac->ac_criteria);
- ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
- ac->ac_ex_scanned, ac->ac_found);
- ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
- ngroups = ext4_get_groups_count(sb);
- for (i = 0; i < ngroups; i++) {
- struct ext4_group_info *grp = ext4_get_group_info(sb, i);
- struct ext4_prealloc_space *pa;
- ext4_grpblk_t start;
- struct list_head *cur;
- ext4_lock_group(sb, i);
- list_for_each(cur, &grp->bb_prealloc_list) {
- pa = list_entry(cur, struct ext4_prealloc_space,
- pa_group_list);
- spin_lock(&pa->pa_lock);
- ext4_get_group_no_and_offset(sb, pa->pa_pstart,
- NULL, &start);
- spin_unlock(&pa->pa_lock);
- printk(KERN_ERR "PA:%u:%d:%u \n", i,
- start, pa->pa_len);
- }
- ext4_unlock_group(sb, i);
-
- if (grp->bb_free == 0)
- continue;
- printk(KERN_ERR "%u: %d/%d \n",
- i, grp->bb_free, grp->bb_fragments);
- }
- printk(KERN_ERR "\n");
-}
-#else
-static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
-{
- return;
-}
-#endif
-
-/*
- * We use locality group preallocation for small size file. The size of the
- * file is determined by the current size or the resulting size after
- * allocation which ever is larger
- *
- * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
- */
-static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
-{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- int bsbits = ac->ac_sb->s_blocksize_bits;
- loff_t size, isize;
-
- if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
- return;
-
- if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
- return;
-
- size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
- isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
- >> bsbits;
-
- if ((size == isize) &&
- !ext4_fs_is_busy(sbi) &&
- (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
- ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
- return;
- }
-
- if (sbi->s_mb_group_prealloc <= 0) {
- ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
- return;
- }
-
- /* don't use group allocation for large files */
- size = max(size, isize);
- if (size > sbi->s_mb_stream_request) {
- ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
- return;
- }
-
- BUG_ON(ac->ac_lg != NULL);
- /*
- * locality group prealloc space are per cpu. The reason for having
- * per cpu locality group is to reduce the contention between block
- * request from multiple CPUs.
- */
- ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
-
- /* we're going to use group allocation */
- ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
-
- /* serialize all allocations in the group */
- mutex_lock(&ac->ac_lg->lg_mutex);
-}
-
-static noinline_for_stack int
-ext4_mb_initialize_context(struct ext4_allocation_context *ac,
- struct ext4_allocation_request *ar)
-{
- struct super_block *sb = ar->inode->i_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- ext4_group_t group;
- unsigned int len;
- ext4_fsblk_t goal;
- ext4_grpblk_t block;
-
- /* we can't allocate > group size */
- len = ar->len;
-
- /* just a dirty hack to filter too big requests */
- if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
- len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
-
- /* start searching from the goal */
- goal = ar->goal;
- if (goal < le32_to_cpu(es->s_first_data_block) ||
- goal >= ext4_blocks_count(es))
- goal = le32_to_cpu(es->s_first_data_block);
- ext4_get_group_no_and_offset(sb, goal, &group, &block);
-
- /* set up allocation goals */
- memset(ac, 0, sizeof(struct ext4_allocation_context));
- ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
- ac->ac_status = AC_STATUS_CONTINUE;
- ac->ac_sb = sb;
- ac->ac_inode = ar->inode;
- ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
- ac->ac_o_ex.fe_group = group;
- ac->ac_o_ex.fe_start = block;
- ac->ac_o_ex.fe_len = len;
- ac->ac_g_ex = ac->ac_o_ex;
- ac->ac_flags = ar->flags;
-
- /* we have to define context: we'll we work with a file or
- * locality group. this is a policy, actually */
- ext4_mb_group_or_file(ac);
-
- mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
- "left: %u/%u, right %u/%u to %swritable\n",
- (unsigned) ar->len, (unsigned) ar->logical,
- (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
- (unsigned) ar->lleft, (unsigned) ar->pleft,
- (unsigned) ar->lright, (unsigned) ar->pright,
- atomic_read(&ar->inode->i_writecount) ? "" : "non-");
- return 0;
-
-}
-
-static noinline_for_stack void
-ext4_mb_discard_lg_preallocations(struct super_block *sb,
- struct ext4_locality_group *lg,
- int order, int total_entries)
-{
- ext4_group_t group = 0;
- struct ext4_buddy e4b;
- struct list_head discard_list;
- struct ext4_prealloc_space *pa, *tmp;
-
- mb_debug(1, "discard locality group preallocation\n");
-
- INIT_LIST_HEAD(&discard_list);
-
- spin_lock(&lg->lg_prealloc_lock);
- list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
- pa_inode_list) {
- spin_lock(&pa->pa_lock);
- if (atomic_read(&pa->pa_count)) {
- /*
- * This is the pa that we just used
- * for block allocation. So don't
- * free that
- */
- spin_unlock(&pa->pa_lock);
- continue;
- }
- if (pa->pa_deleted) {
- spin_unlock(&pa->pa_lock);
- continue;
- }
- /* only lg prealloc space */
- BUG_ON(pa->pa_type != MB_GROUP_PA);
-
- /* seems this one can be freed ... */
- pa->pa_deleted = 1;
- spin_unlock(&pa->pa_lock);
-
- list_del_rcu(&pa->pa_inode_list);
- list_add(&pa->u.pa_tmp_list, &discard_list);
-
- total_entries--;
- if (total_entries <= 5) {
- /*
- * we want to keep only 5 entries
- * allowing it to grow to 8. This
- * mak sure we don't call discard
- * soon for this list.
- */
- break;
- }
- }
- spin_unlock(&lg->lg_prealloc_lock);
-
- list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
-
- ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
- if (ext4_mb_load_buddy(sb, group, &e4b)) {
- ext4_error(sb, "Error loading buddy information for %u",
- group);
- continue;
- }
- ext4_lock_group(sb, group);
- list_del(&pa->pa_group_list);
- ext4_mb_release_group_pa(&e4b, pa);
- ext4_unlock_group(sb, group);
-
- ext4_mb_unload_buddy(&e4b);
- list_del(&pa->u.pa_tmp_list);
- call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
- }
-}
-
-/*
- * We have incremented pa_count. So it cannot be freed at this
- * point. Also we hold lg_mutex. So no parallel allocation is
- * possible from this lg. That means pa_free cannot be updated.
- *
- * A parallel ext4_mb_discard_group_preallocations is possible.
- * which can cause the lg_prealloc_list to be updated.
- */
-
-static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
-{
- int order, added = 0, lg_prealloc_count = 1;
- struct super_block *sb = ac->ac_sb;
- struct ext4_locality_group *lg = ac->ac_lg;
- struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
-
- order = fls(pa->pa_free) - 1;
- if (order > PREALLOC_TB_SIZE - 1)
- /* The max size of hash table is PREALLOC_TB_SIZE */
- order = PREALLOC_TB_SIZE - 1;
- /* Add the prealloc space to lg */
- rcu_read_lock();
- list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
- pa_inode_list) {
- spin_lock(&tmp_pa->pa_lock);
- if (tmp_pa->pa_deleted) {
- spin_unlock(&tmp_pa->pa_lock);
- continue;
- }
- if (!added && pa->pa_free < tmp_pa->pa_free) {
- /* Add to the tail of the previous entry */
- list_add_tail_rcu(&pa->pa_inode_list,
- &tmp_pa->pa_inode_list);
- added = 1;
- /*
- * we want to count the total
- * number of entries in the list
- */
- }
- spin_unlock(&tmp_pa->pa_lock);
- lg_prealloc_count++;
- }
- if (!added)
- list_add_tail_rcu(&pa->pa_inode_list,
- &lg->lg_prealloc_list[order]);
- rcu_read_unlock();
-
- /* Now trim the list to be not more than 8 elements */
- if (lg_prealloc_count > 8) {
- ext4_mb_discard_lg_preallocations(sb, lg,
- order, lg_prealloc_count);
- return;
- }
- return ;
-}
-
-/*
- * release all resource we used in allocation
- */
-static int ext4_mb_release_context(struct ext4_allocation_context *ac)
-{
- struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- struct ext4_prealloc_space *pa = ac->ac_pa;
- if (pa) {
- if (pa->pa_type == MB_GROUP_PA) {
- /* see comment in ext4_mb_use_group_pa() */
- spin_lock(&pa->pa_lock);
- pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- pa->pa_free -= ac->ac_b_ex.fe_len;
- pa->pa_len -= ac->ac_b_ex.fe_len;
- spin_unlock(&pa->pa_lock);
- }
- }
- if (pa) {
- /*
- * We want to add the pa to the right bucket.
- * Remove it from the list and while adding
- * make sure the list to which we are adding
- * doesn't grow big.
- */
- if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
- spin_lock(pa->pa_obj_lock);
- list_del_rcu(&pa->pa_inode_list);
- spin_unlock(pa->pa_obj_lock);
- ext4_mb_add_n_trim(ac);
- }
- ext4_mb_put_pa(ac, ac->ac_sb, pa);
- }
- if (ac->ac_bitmap_page)
- page_cache_release(ac->ac_bitmap_page);
- if (ac->ac_buddy_page)
- page_cache_release(ac->ac_buddy_page);
- if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
- mutex_unlock(&ac->ac_lg->lg_mutex);
- ext4_mb_collect_stats(ac);
- return 0;
-}
-
-static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
-{
- ext4_group_t i, ngroups = ext4_get_groups_count(sb);
- int ret;
- int freed = 0;
-
- trace_ext4_mb_discard_preallocations(sb, needed);
- for (i = 0; i < ngroups && needed > 0; i++) {
- ret = ext4_mb_discard_group_preallocations(sb, i, needed);
- freed += ret;
- needed -= ret;
- }
-
- return freed;
-}
-
-/*
- * Main entry point into mballoc to allocate blocks
- * it tries to use preallocation first, then falls back
- * to usual allocation
- */
-ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
- struct ext4_allocation_request *ar, int *errp)
-{
- int freed;
- struct ext4_allocation_context *ac = NULL;
- struct ext4_sb_info *sbi;
- struct super_block *sb;
- ext4_fsblk_t block = 0;
- unsigned int inquota = 0;
- unsigned int reserv_clstrs = 0;
-
- sb = ar->inode->i_sb;
- sbi = EXT4_SB(sb);
-
- trace_ext4_request_blocks(ar);
-
- /* Allow to use superuser reservation for quota file */
- if (IS_NOQUOTA(ar->inode))
- ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
-
- /*
- * For delayed allocation, we could skip the ENOSPC and
- * EDQUOT check, as blocks and quotas have been already
- * reserved when data being copied into pagecache.
- */
- if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
- ar->flags |= EXT4_MB_DELALLOC_RESERVED;
- else {
- /* Without delayed allocation we need to verify
- * there is enough free blocks to do block allocation
- * and verify allocation doesn't exceed the quota limits.
- */
- while (ar->len &&
- ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
-
- /* let others to free the space */
- yield();
- ar->len = ar->len >> 1;
- }
- if (!ar->len) {
- *errp = -ENOSPC;
- return 0;
- }
- reserv_clstrs = ar->len;
- if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
- dquot_alloc_block_nofail(ar->inode,
- EXT4_C2B(sbi, ar->len));
- } else {
- while (ar->len &&
- dquot_alloc_block(ar->inode,
- EXT4_C2B(sbi, ar->len))) {
-
- ar->flags |= EXT4_MB_HINT_NOPREALLOC;
- ar->len--;
- }
- }
- inquota = ar->len;
- if (ar->len == 0) {
- *errp = -EDQUOT;
- goto out;
- }
- }
-
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (!ac) {
- ar->len = 0;
- *errp = -ENOMEM;
- goto out;
- }
-
- *errp = ext4_mb_initialize_context(ac, ar);
- if (*errp) {
- ar->len = 0;
- goto out;
- }
-
- ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
- if (!ext4_mb_use_preallocated(ac)) {
- ac->ac_op = EXT4_MB_HISTORY_ALLOC;
- ext4_mb_normalize_request(ac, ar);
-repeat:
- /* allocate space in core */
- *errp = ext4_mb_regular_allocator(ac);
- if (*errp)
- goto errout;
-
- /* as we've just preallocated more space than
- * user requested orinally, we store allocated
- * space in a special descriptor */
- if (ac->ac_status == AC_STATUS_FOUND &&
- ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
- ext4_mb_new_preallocation(ac);
- }
- if (likely(ac->ac_status == AC_STATUS_FOUND)) {
- *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
- if (*errp == -EAGAIN) {
- /*
- * drop the reference that we took
- * in ext4_mb_use_best_found
- */
- ext4_mb_release_context(ac);
- ac->ac_b_ex.fe_group = 0;
- ac->ac_b_ex.fe_start = 0;
- ac->ac_b_ex.fe_len = 0;
- ac->ac_status = AC_STATUS_CONTINUE;
- goto repeat;
- } else if (*errp)
- errout:
- ext4_discard_allocated_blocks(ac);
- else {
- block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
- ar->len = ac->ac_b_ex.fe_len;
- }
- } else {
- freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
- if (freed)
- goto repeat;
- *errp = -ENOSPC;
- }
-
- if (*errp) {
- ac->ac_b_ex.fe_len = 0;
- ar->len = 0;
- ext4_mb_show_ac(ac);
- }
- ext4_mb_release_context(ac);
-out:
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
- if (inquota && ar->len < inquota)
- dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
- if (!ar->len) {
- if (!ext4_test_inode_state(ar->inode,
- EXT4_STATE_DELALLOC_RESERVED))
- /* release all the reserved blocks if non delalloc */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- reserv_clstrs);
- }
-
- trace_ext4_allocate_blocks(ar, (unsigned long long)block);
-
- return block;
-}
-
-/*
- * We can merge two free data extents only if the physical blocks
- * are contiguous, AND the extents were freed by the same transaction,
- * AND the blocks are associated with the same group.
- */
-static int can_merge(struct ext4_free_data *entry1,
- struct ext4_free_data *entry2)
-{
- if ((entry1->efd_tid == entry2->efd_tid) &&
- (entry1->efd_group == entry2->efd_group) &&
- ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
- return 1;
- return 0;
-}
-
-static noinline_for_stack int
-ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
- struct ext4_free_data *new_entry)
-{
- ext4_group_t group = e4b->bd_group;
- ext4_grpblk_t cluster;
- struct ext4_free_data *entry;
- struct ext4_group_info *db = e4b->bd_info;
- struct super_block *sb = e4b->bd_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct rb_node **n = &db->bb_free_root.rb_node, *node;
- struct rb_node *parent = NULL, *new_node;
-
- BUG_ON(!ext4_handle_valid(handle));
- BUG_ON(e4b->bd_bitmap_page == NULL);
- BUG_ON(e4b->bd_buddy_page == NULL);
-
- new_node = &new_entry->efd_node;
- cluster = new_entry->efd_start_cluster;
-
- if (!*n) {
- /* first free block exent. We need to
- protect buddy cache from being freed,
- * otherwise we'll refresh it from
- * on-disk bitmap and lose not-yet-available
- * blocks */
- page_cache_get(e4b->bd_buddy_page);
- page_cache_get(e4b->bd_bitmap_page);
- }
- while (*n) {
- parent = *n;
- entry = rb_entry(parent, struct ext4_free_data, efd_node);
- if (cluster < entry->efd_start_cluster)
- n = &(*n)->rb_left;
- else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
- n = &(*n)->rb_right;
- else {
- ext4_grp_locked_error(sb, group, 0,
- ext4_group_first_block_no(sb, group) +
- EXT4_C2B(sbi, cluster),
- "Block already on to-be-freed list");
- return 0;
- }
- }
-
- rb_link_node(new_node, parent, n);
- rb_insert_color(new_node, &db->bb_free_root);
-
- /* Now try to see the extent can be merged to left and right */
- node = rb_prev(new_node);
- if (node) {
- entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(entry, new_entry)) {
- new_entry->efd_start_cluster = entry->efd_start_cluster;
- new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
- ext4_journal_callback_del(handle, &entry->efd_jce);
- kmem_cache_free(ext4_free_data_cachep, entry);
- }
- }
-
- node = rb_next(new_node);
- if (node) {
- entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(new_entry, entry)) {
- new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
- ext4_journal_callback_del(handle, &entry->efd_jce);
- kmem_cache_free(ext4_free_data_cachep, entry);
- }
- }
- /* Add the extent to transaction's private list */
- ext4_journal_callback_add(handle, ext4_free_data_callback,
- &new_entry->efd_jce);
- return 0;
-}
-
-/**
- * ext4_free_blocks() -- Free given blocks and update quota
- * @handle: handle for this transaction
- * @inode: inode
- * @block: start physical block to free
- * @count: number of blocks to count
- * @flags: flags used by ext4_free_blocks
- */
-void ext4_free_blocks(handle_t *handle, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t block,
- unsigned long count, int flags)
-{
- struct buffer_head *bitmap_bh = NULL;
- struct super_block *sb = inode->i_sb;
- struct ext4_group_desc *gdp;
- unsigned long freed = 0;
- unsigned int overflow;
- ext4_grpblk_t bit;
- struct buffer_head *gd_bh;
- ext4_group_t block_group;
- struct ext4_sb_info *sbi;
- struct ext4_buddy e4b;
- unsigned int count_clusters;
- int err = 0;
- int ret;
-
- if (bh) {
- if (block)
- BUG_ON(block != bh->b_blocknr);
- else
- block = bh->b_blocknr;
- }
-
- sbi = EXT4_SB(sb);
- if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
- !ext4_data_block_valid(sbi, block, count)) {
- ext4_error(sb, "Freeing blocks not in datazone - "
- "block = %llu, count = %lu", block, count);
- goto error_return;
- }
-
- ext4_debug("freeing block %llu\n", block);
- trace_ext4_free_blocks(inode, block, count, flags);
-
- if (flags & EXT4_FREE_BLOCKS_FORGET) {
- struct buffer_head *tbh = bh;
- int i;
-
- BUG_ON(bh && (count > 1));
-
- for (i = 0; i < count; i++) {
- if (!bh)
- tbh = sb_find_get_block(inode->i_sb,
- block + i);
- if (unlikely(!tbh))
- continue;
- ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
- inode, tbh, block + i);
- }
- }
-
- /*
- * We need to make sure we don't reuse the freed block until
- * after the transaction is committed, which we can do by
- * treating the block as metadata, below. We make an
- * exception if the inode is to be written in writeback mode
- * since writeback mode has weak data consistency guarantees.
- */
- if (!ext4_should_writeback_data(inode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
-
- /*
- * If the extent to be freed does not begin on a cluster
- * boundary, we need to deal with partial clusters at the
- * beginning and end of the extent. Normally we will free
- * blocks at the beginning or the end unless we are explicitly
- * requested to avoid doing so.
- */
- overflow = block & (sbi->s_cluster_ratio - 1);
- if (overflow) {
- if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
- overflow = sbi->s_cluster_ratio - overflow;
- block += overflow;
- if (count > overflow)
- count -= overflow;
- else
- return;
- } else {
- block -= overflow;
- count += overflow;
- }
- }
- overflow = count & (sbi->s_cluster_ratio - 1);
- if (overflow) {
- if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
- if (count > overflow)
- count -= overflow;
- else
- return;
- } else
- count += sbi->s_cluster_ratio - overflow;
- }
-
-do_more:
- overflow = 0;
- ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
-
- /*
- * Check to see if we are freeing blocks across a group
- * boundary.
- */
- if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
- overflow = EXT4_C2B(sbi, bit) + count -
- EXT4_BLOCKS_PER_GROUP(sb);
- count -= overflow;
- }
- count_clusters = EXT4_B2C(sbi, count);
- bitmap_bh = ext4_read_block_bitmap(sb, block_group);
- if (!bitmap_bh) {
- err = -EIO;
- goto error_return;
- }
- gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
- if (!gdp) {
- err = -EIO;
- goto error_return;
- }
-
- if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
- in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
- in_range(block, ext4_inode_table(sb, gdp),
- EXT4_SB(sb)->s_itb_per_group) ||
- in_range(block + count - 1, ext4_inode_table(sb, gdp),
- EXT4_SB(sb)->s_itb_per_group)) {
-
- ext4_error(sb, "Freeing blocks in system zone - "
- "Block = %llu, count = %lu", block, count);
- /* err = 0. ext4_std_error should be a no op */
- goto error_return;
- }
-
- BUFFER_TRACE(bitmap_bh, "getting write access");
- err = ext4_journal_get_write_access(handle, bitmap_bh);
- if (err)
- goto error_return;
-
- /*
- * We are about to modify some metadata. Call the journal APIs
- * to unshare ->b_data if a currently-committing transaction is
- * using it
- */
- BUFFER_TRACE(gd_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, gd_bh);
- if (err)
- goto error_return;
-#ifdef AGGRESSIVE_CHECK
- {
- int i;
- for (i = 0; i < count_clusters; i++)
- BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
- }
-#endif
- trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
-
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
- if (err)
- goto error_return;
-
- if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
- struct ext4_free_data *new_entry;
- /*
- * blocks being freed are metadata. these blocks shouldn't
- * be used until this transaction is committed
- */
- new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
- if (!new_entry) {
- ext4_mb_unload_buddy(&e4b);
- err = -ENOMEM;
- goto error_return;
- }
- new_entry->efd_start_cluster = bit;
- new_entry->efd_group = block_group;
- new_entry->efd_count = count_clusters;
- new_entry->efd_tid = handle->h_transaction->t_tid;
-
- ext4_lock_group(sb, block_group);
- mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
- ext4_mb_free_metadata(handle, &e4b, new_entry);
- } else {
- /* need to update group_info->bb_free and bitmap
- * with group lock held. generate_buddy look at
- * them with group lock_held
- */
- ext4_lock_group(sb, block_group);
- mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
- mb_free_blocks(inode, &e4b, bit, count_clusters);
- }
-
- ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
- ext4_free_group_clusters_set(sb, gdp, ret);
- gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
- ext4_unlock_group(sb, block_group);
- percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
-
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
- atomic_add(count_clusters,
- &sbi->s_flex_groups[flex_group].free_clusters);
- }
-
- ext4_mb_unload_buddy(&e4b);
-
- freed += count;
-
- if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
- dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
-
- /* We dirtied the bitmap block */
- BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
- err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
-
- /* And the group descriptor block */
- BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
- ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
- if (!err)
- err = ret;
-
- if (overflow && !err) {
- block += count;
- count = overflow;
- put_bh(bitmap_bh);
- goto do_more;
- }
- ext4_mark_super_dirty(sb);
-error_return:
- brelse(bitmap_bh);
- ext4_std_error(sb, err);
- return;
-}
-
-/**
- * ext4_group_add_blocks() -- Add given blocks to an existing group
- * @handle: handle to this transaction
- * @sb: super block
- * @block: start physcial block to add to the block group
- * @count: number of blocks to free
- *
- * This marks the blocks as free in the bitmap and buddy.
- */
-int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
- ext4_fsblk_t block, unsigned long count)
-{
- struct buffer_head *bitmap_bh = NULL;
- struct buffer_head *gd_bh;
- ext4_group_t block_group;
- ext4_grpblk_t bit;
- unsigned int i;
- struct ext4_group_desc *desc;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_buddy e4b;
- int err = 0, ret, blk_free_count;
- ext4_grpblk_t blocks_freed;
-
- ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
-
- if (count == 0)
- return 0;
-
- ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
- /*
- * Check to see if we are freeing blocks across a group
- * boundary.
- */
- if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
- ext4_warning(sb, "too much blocks added to group %u\n",
- block_group);
- err = -EINVAL;
- goto error_return;
- }
-
- bitmap_bh = ext4_read_block_bitmap(sb, block_group);
- if (!bitmap_bh) {
- err = -EIO;
- goto error_return;
- }
-
- desc = ext4_get_group_desc(sb, block_group, &gd_bh);
- if (!desc) {
- err = -EIO;
- goto error_return;
- }
-
- if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
- in_range(ext4_inode_bitmap(sb, desc), block, count) ||
- in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
- in_range(block + count - 1, ext4_inode_table(sb, desc),
- sbi->s_itb_per_group)) {
- ext4_error(sb, "Adding blocks in system zones - "
- "Block = %llu, count = %lu",
- block, count);
- err = -EINVAL;
- goto error_return;
- }
-
- BUFFER_TRACE(bitmap_bh, "getting write access");
- err = ext4_journal_get_write_access(handle, bitmap_bh);
- if (err)
- goto error_return;
-
- /*
- * We are about to modify some metadata. Call the journal APIs
- * to unshare ->b_data if a currently-committing transaction is
- * using it
- */
- BUFFER_TRACE(gd_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, gd_bh);
- if (err)
- goto error_return;
-
- for (i = 0, blocks_freed = 0; i < count; i++) {
- BUFFER_TRACE(bitmap_bh, "clear bit");
- if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
- ext4_error(sb, "bit already cleared for block %llu",
- (ext4_fsblk_t)(block + i));
- BUFFER_TRACE(bitmap_bh, "bit already cleared");
- } else {
- blocks_freed++;
- }
- }
-
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
- if (err)
- goto error_return;
-
- /*
- * need to update group_info->bb_free and bitmap
- * with group lock held. generate_buddy look at
- * them with group lock_held
- */
- ext4_lock_group(sb, block_group);
- mb_clear_bits(bitmap_bh->b_data, bit, count);
- mb_free_blocks(NULL, &e4b, bit, count);
- blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
- ext4_free_group_clusters_set(sb, desc, blk_free_count);
- desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
- ext4_unlock_group(sb, block_group);
- percpu_counter_add(&sbi->s_freeclusters_counter,
- EXT4_B2C(sbi, blocks_freed));
-
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
- atomic_add(EXT4_B2C(sbi, blocks_freed),
- &sbi->s_flex_groups[flex_group].free_clusters);
- }
-
- ext4_mb_unload_buddy(&e4b);
-
- /* We dirtied the bitmap block */
- BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
- err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
-
- /* And the group descriptor block */
- BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
- ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
- if (!err)
- err = ret;
-
-error_return:
- brelse(bitmap_bh);
- ext4_std_error(sb, err);
- return err;
-}
-
-/**
- * ext4_trim_extent -- function to TRIM one single free extent in the group
- * @sb: super block for the file system
- * @start: starting block of the free extent in the alloc. group
- * @count: number of blocks to TRIM
- * @group: alloc. group we are working with
- * @e4b: ext4 buddy for the group
- *
- * Trim "count" blocks starting at "start" in the "group". To assure that no
- * one will allocate those blocks, mark it as used in buddy bitmap. This must
- * be called with under the group lock.
- */
-static void ext4_trim_extent(struct super_block *sb, int start, int count,
- ext4_group_t group, struct ext4_buddy *e4b)
-{
- struct ext4_free_extent ex;
-
- trace_ext4_trim_extent(sb, group, start, count);
-
- assert_spin_locked(ext4_group_lock_ptr(sb, group));
-
- ex.fe_start = start;
- ex.fe_group = group;
- ex.fe_len = count;
-
- /*
- * Mark blocks used, so no one can reuse them while
- * being trimmed.
- */
- mb_mark_used(e4b, &ex);
- ext4_unlock_group(sb, group);
- ext4_issue_discard(sb, group, start, count);
- ext4_lock_group(sb, group);
- mb_free_blocks(NULL, e4b, start, ex.fe_len);
-}
-
-/**
- * ext4_trim_all_free -- function to trim all free space in alloc. group
- * @sb: super block for file system
- * @group: group to be trimmed
- * @start: first group block to examine
- * @max: last group block to examine
- * @minblocks: minimum extent block count
- *
- * ext4_trim_all_free walks through group's buddy bitmap searching for free
- * extents. When the free block is found, ext4_trim_extent is called to TRIM
- * the extent.
- *
- *
- * ext4_trim_all_free walks through group's block bitmap searching for free
- * extents. When the free extent is found, mark it as used in group buddy
- * bitmap. Then issue a TRIM command on this extent and free the extent in
- * the group buddy bitmap. This is done until whole group is scanned.
- */
-static ext4_grpblk_t
-ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
- ext4_grpblk_t start, ext4_grpblk_t max,
- ext4_grpblk_t minblocks)
-{
- void *bitmap;
- ext4_grpblk_t next, count = 0, free_count = 0;
- struct ext4_buddy e4b;
- int ret;
-
- trace_ext4_trim_all_free(sb, group, start, max);
-
- ret = ext4_mb_load_buddy(sb, group, &e4b);
- if (ret) {
- ext4_error(sb, "Error in loading buddy "
- "information for %u", group);
- return ret;
- }
- bitmap = e4b.bd_bitmap;
-
- ext4_lock_group(sb, group);
- if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
- minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
- goto out;
-
- start = (e4b.bd_info->bb_first_free > start) ?
- e4b.bd_info->bb_first_free : start;
-
- while (start <= max) {
- start = mb_find_next_zero_bit(bitmap, max + 1, start);
- if (start > max)
- break;
- next = mb_find_next_bit(bitmap, max + 1, start);
-
- if ((next - start) >= minblocks) {
- ext4_trim_extent(sb, start,
- next - start, group, &e4b);
- count += next - start;
- }
- free_count += next - start;
- start = next + 1;
-
- if (fatal_signal_pending(current)) {
- count = -ERESTARTSYS;
- break;
- }
-
- if (need_resched()) {
- ext4_unlock_group(sb, group);
- cond_resched();
- ext4_lock_group(sb, group);
- }
-
- if ((e4b.bd_info->bb_free - free_count) < minblocks)
- break;
- }
-
- if (!ret)
- EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
-out:
- ext4_unlock_group(sb, group);
- ext4_mb_unload_buddy(&e4b);
-
- ext4_debug("trimmed %d blocks in the group %d\n",
- count, group);
-
- return count;
-}
-
-/**
- * ext4_trim_fs() -- trim ioctl handle function
- * @sb: superblock for filesystem
- * @range: fstrim_range structure
- *
- * start: First Byte to trim
- * len: number of Bytes to trim from start
- * minlen: minimum extent length in Bytes
- * ext4_trim_fs goes through all allocation groups containing Bytes from
- * start to start+len. For each such a group ext4_trim_all_free function
- * is invoked to trim all free space.
- */
-int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
-{
- struct ext4_group_info *grp;
- ext4_group_t group, first_group, last_group;
- ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
- uint64_t start, end, minlen, trimmed = 0;
- ext4_fsblk_t first_data_blk =
- le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
- ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
- int ret = 0;
-
- start = range->start >> sb->s_blocksize_bits;
- end = start + (range->len >> sb->s_blocksize_bits) - 1;
- minlen = range->minlen >> sb->s_blocksize_bits;
-
- if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
- unlikely(start >= max_blks))
- return -EINVAL;
- if (end >= max_blks)
- end = max_blks - 1;
- if (end <= first_data_blk)
- goto out;
- if (start < first_data_blk)
- start = first_data_blk;
-
- /* Determine first and last group to examine based on start and end */
- ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
- &first_group, &first_cluster);
- ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
- &last_group, &last_cluster);
-
- /* end now represents the last cluster to discard in this group */
- end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
-
- for (group = first_group; group <= last_group; group++) {
- grp = ext4_get_group_info(sb, group);
- /* We only do this if the grp has never been initialized */
- if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- ret = ext4_mb_init_group(sb, group);
- if (ret)
- break;
- }
-
- /*
- * For all the groups except the last one, last cluster will
- * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
- * change it for the last group, note that last_cluster is
- * already computed earlier by ext4_get_group_no_and_offset()
- */
- if (group == last_group)
- end = last_cluster;
-
- if (grp->bb_free >= minlen) {
- cnt = ext4_trim_all_free(sb, group, first_cluster,
- end, minlen);
- if (cnt < 0) {
- ret = cnt;
- break;
- }
- trimmed += cnt;
- }
-
- /*
- * For every group except the first one, we are sure
- * that the first cluster to discard will be cluster #0.
- */
- first_cluster = 0;
- }
-
- if (!ret)
- atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
-
-out:
- range->len = trimmed * sb->s_blocksize;
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/mballoc.h b/ANDROID_3.4.5/fs/ext4/mballoc.h
deleted file mode 100644
index c070618c..00000000
--- a/ANDROID_3.4.5/fs/ext4/mballoc.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * fs/ext4/mballoc.h
- *
- * Written by: Alex Tomas <alex@clusterfs.com>
- *
- */
-#ifndef _EXT4_MBALLOC_H
-#define _EXT4_MBALLOC_H
-
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/namei.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/module.h>
-#include <linux/swap.h>
-#include <linux/proc_fs.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-#include <linux/blkdev.h>
-#include <linux/mutex.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-
-/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-
-/*
- */
-#ifdef CONFIG_EXT4_DEBUG
-extern u8 mb_enable_debug;
-
-#define mb_debug(n, fmt, a...) \
- do { \
- if ((n) <= mb_enable_debug) { \
- printk(KERN_DEBUG "(%s, %d): %s: ", \
- __FILE__, __LINE__, __func__); \
- printk(fmt, ## a); \
- } \
- } while (0)
-#else
-#define mb_debug(n, fmt, a...)
-#endif
-
-#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
-#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
-
-/*
- * How long mballoc can look for a best extent (in found extents)
- */
-#define MB_DEFAULT_MAX_TO_SCAN 200
-
-/*
- * How long mballoc must look for a best extent
- */
-#define MB_DEFAULT_MIN_TO_SCAN 10
-
-/*
- * How many groups mballoc will scan looking for the best chunk
- */
-#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
-
-/*
- * with 'ext4_mb_stats' allocator will collect stats that will be
- * shown at umount. The collecting costs though!
- */
-#define MB_DEFAULT_STATS 0
-
-/*
- * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
- * by the stream allocator, which purpose is to pack requests
- * as close each to other as possible to produce smooth I/O traffic
- * We use locality group prealloc space for stream request.
- * We can tune the same via /proc/fs/ext4/<parition>/stream_req
- */
-#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
-
-/*
- * for which requests use 2^N search using buddies
- */
-#define MB_DEFAULT_ORDER2_REQS 2
-
-/*
- * default group prealloc size 512 blocks
- */
-#define MB_DEFAULT_GROUP_PREALLOC 512
-
-
-struct ext4_free_data {
- /* MUST be the first member */
- struct ext4_journal_cb_entry efd_jce;
-
- /* ext4_free_data private data starts from here */
-
- /* this links the free block information from group_info */
- struct rb_node efd_node;
-
- /* group which free block extent belongs */
- ext4_group_t efd_group;
-
- /* free block extent */
- ext4_grpblk_t efd_start_cluster;
- ext4_grpblk_t efd_count;
-
- /* transaction which freed this extent */
- tid_t efd_tid;
-};
-
-struct ext4_prealloc_space {
- struct list_head pa_inode_list;
- struct list_head pa_group_list;
- union {
- struct list_head pa_tmp_list;
- struct rcu_head pa_rcu;
- } u;
- spinlock_t pa_lock;
- atomic_t pa_count;
- unsigned pa_deleted;
- ext4_fsblk_t pa_pstart; /* phys. block */
- ext4_lblk_t pa_lstart; /* log. block */
- ext4_grpblk_t pa_len; /* len of preallocated chunk */
- ext4_grpblk_t pa_free; /* how many blocks are free */
- unsigned short pa_type; /* pa type. inode or group */
- spinlock_t *pa_obj_lock;
- struct inode *pa_inode; /* hack, for history only */
-};
-
-enum {
- MB_INODE_PA = 0,
- MB_GROUP_PA = 1
-};
-
-struct ext4_free_extent {
- ext4_lblk_t fe_logical;
- ext4_grpblk_t fe_start; /* In cluster units */
- ext4_group_t fe_group;
- ext4_grpblk_t fe_len; /* In cluster units */
-};
-
-/*
- * Locality group:
- * we try to group all related changes together
- * so that writeback can flush/allocate them together as well
- * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
- * (512). We store prealloc space into the hash based on the pa_free blocks
- * order value.ie, fls(pa_free)-1;
- */
-#define PREALLOC_TB_SIZE 10
-struct ext4_locality_group {
- /* for allocator */
- /* to serialize allocates */
- struct mutex lg_mutex;
- /* list of preallocations */
- struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
- spinlock_t lg_prealloc_lock;
-};
-
-struct ext4_allocation_context {
- struct inode *ac_inode;
- struct super_block *ac_sb;
-
- /* original request */
- struct ext4_free_extent ac_o_ex;
-
- /* goal request (normalized ac_o_ex) */
- struct ext4_free_extent ac_g_ex;
-
- /* the best found extent */
- struct ext4_free_extent ac_b_ex;
-
- /* copy of the best found extent taken before preallocation efforts */
- struct ext4_free_extent ac_f_ex;
-
- /* number of iterations done. we have to track to limit searching */
- unsigned long ac_ex_scanned;
- __u16 ac_groups_scanned;
- __u16 ac_found;
- __u16 ac_tail;
- __u16 ac_buddy;
- __u16 ac_flags; /* allocation hints */
- __u8 ac_status;
- __u8 ac_criteria;
- __u8 ac_2order; /* if request is to allocate 2^N blocks and
- * N > 0, the field stores N, otherwise 0 */
- __u8 ac_op; /* operation, for history only */
- struct page *ac_bitmap_page;
- struct page *ac_buddy_page;
- struct ext4_prealloc_space *ac_pa;
- struct ext4_locality_group *ac_lg;
-};
-
-#define AC_STATUS_CONTINUE 1
-#define AC_STATUS_FOUND 2
-#define AC_STATUS_BREAK 3
-
-struct ext4_buddy {
- struct page *bd_buddy_page;
- void *bd_buddy;
- struct page *bd_bitmap_page;
- void *bd_bitmap;
- struct ext4_group_info *bd_info;
- struct super_block *bd_sb;
- __u16 bd_blkbits;
- ext4_group_t bd_group;
-};
-
-static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
- struct ext4_free_extent *fex)
-{
- return ext4_group_first_block_no(sb, fex->fe_group) +
- (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
-}
-#endif
diff --git a/ANDROID_3.4.5/fs/ext4/migrate.c b/ANDROID_3.4.5/fs/ext4/migrate.c
deleted file mode 100644
index f39f80f8..00000000
--- a/ANDROID_3.4.5/fs/ext4/migrate.c
+++ /dev/null
@@ -1,604 +0,0 @@
-/*
- * Copyright IBM Corporation, 2007
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-#include <linux/slab.h>
-#include "ext4_jbd2.h"
-
-/*
- * The contiguous blocks details which can be
- * represented by a single extent
- */
-struct migrate_struct {
- ext4_lblk_t first_block, last_block, curr_block;
- ext4_fsblk_t first_pblock, last_pblock;
-};
-
-static int finish_range(handle_t *handle, struct inode *inode,
- struct migrate_struct *lb)
-
-{
- int retval = 0, needed;
- struct ext4_extent newext;
- struct ext4_ext_path *path;
- if (lb->first_pblock == 0)
- return 0;
-
- /* Add the extent to temp inode*/
- newext.ee_block = cpu_to_le32(lb->first_block);
- newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
- ext4_ext_store_pblock(&newext, lb->first_pblock);
- path = ext4_ext_find_extent(inode, lb->first_block, NULL);
-
- if (IS_ERR(path)) {
- retval = PTR_ERR(path);
- path = NULL;
- goto err_out;
- }
-
- /*
- * Calculate the credit needed to inserting this extent
- * Since we are doing this in loop we may accumalate extra
- * credit. But below we try to not accumalate too much
- * of them by restarting the journal.
- */
- needed = ext4_ext_calc_credits_for_single_extent(inode,
- lb->last_block - lb->first_block + 1, path);
-
- /*
- * Make sure the credit we accumalated is not really high
- */
- if (needed && ext4_handle_has_enough_credits(handle,
- EXT4_RESERVE_TRANS_BLOCKS)) {
- retval = ext4_journal_restart(handle, needed);
- if (retval)
- goto err_out;
- } else if (needed) {
- retval = ext4_journal_extend(handle, needed);
- if (retval) {
- /*
- * IF not able to extend the journal restart the journal
- */
- retval = ext4_journal_restart(handle, needed);
- if (retval)
- goto err_out;
- }
- }
- retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
-err_out:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
- lb->first_pblock = 0;
- return retval;
-}
-
-static int update_extent_range(handle_t *handle, struct inode *inode,
- ext4_fsblk_t pblock, struct migrate_struct *lb)
-{
- int retval;
- /*
- * See if we can add on to the existing range (if it exists)
- */
- if (lb->first_pblock &&
- (lb->last_pblock+1 == pblock) &&
- (lb->last_block+1 == lb->curr_block)) {
- lb->last_pblock = pblock;
- lb->last_block = lb->curr_block;
- lb->curr_block++;
- return 0;
- }
- /*
- * Start a new range.
- */
- retval = finish_range(handle, inode, lb);
- lb->first_pblock = lb->last_pblock = pblock;
- lb->first_block = lb->last_block = lb->curr_block;
- lb->curr_block++;
- return retval;
-}
-
-static int update_ind_extent_range(handle_t *handle, struct inode *inode,
- ext4_fsblk_t pblock,
- struct migrate_struct *lb)
-{
- struct buffer_head *bh;
- __le32 *i_data;
- int i, retval = 0;
- unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
-
- bh = sb_bread(inode->i_sb, pblock);
- if (!bh)
- return -EIO;
-
- i_data = (__le32 *)bh->b_data;
- for (i = 0; i < max_entries; i++) {
- if (i_data[i]) {
- retval = update_extent_range(handle, inode,
- le32_to_cpu(i_data[i]), lb);
- if (retval)
- break;
- } else {
- lb->curr_block++;
- }
- }
- put_bh(bh);
- return retval;
-
-}
-
-static int update_dind_extent_range(handle_t *handle, struct inode *inode,
- ext4_fsblk_t pblock,
- struct migrate_struct *lb)
-{
- struct buffer_head *bh;
- __le32 *i_data;
- int i, retval = 0;
- unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
-
- bh = sb_bread(inode->i_sb, pblock);
- if (!bh)
- return -EIO;
-
- i_data = (__le32 *)bh->b_data;
- for (i = 0; i < max_entries; i++) {
- if (i_data[i]) {
- retval = update_ind_extent_range(handle, inode,
- le32_to_cpu(i_data[i]), lb);
- if (retval)
- break;
- } else {
- /* Only update the file block number */
- lb->curr_block += max_entries;
- }
- }
- put_bh(bh);
- return retval;
-
-}
-
-static int update_tind_extent_range(handle_t *handle, struct inode *inode,
- ext4_fsblk_t pblock,
- struct migrate_struct *lb)
-{
- struct buffer_head *bh;
- __le32 *i_data;
- int i, retval = 0;
- unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
-
- bh = sb_bread(inode->i_sb, pblock);
- if (!bh)
- return -EIO;
-
- i_data = (__le32 *)bh->b_data;
- for (i = 0; i < max_entries; i++) {
- if (i_data[i]) {
- retval = update_dind_extent_range(handle, inode,
- le32_to_cpu(i_data[i]), lb);
- if (retval)
- break;
- } else {
- /* Only update the file block number */
- lb->curr_block += max_entries * max_entries;
- }
- }
- put_bh(bh);
- return retval;
-
-}
-
-static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
-{
- int retval = 0, needed;
-
- if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
- return 0;
- /*
- * We are freeing a blocks. During this we touch
- * superblock, group descriptor and block bitmap.
- * So allocate a credit of 3. We may update
- * quota (user and group).
- */
- needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
-
- if (ext4_journal_extend(handle, needed) != 0)
- retval = ext4_journal_restart(handle, needed);
-
- return retval;
-}
-
-static int free_dind_blocks(handle_t *handle,
- struct inode *inode, __le32 i_data)
-{
- int i;
- __le32 *tmp_idata;
- struct buffer_head *bh;
- unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
-
- bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
- if (!bh)
- return -EIO;
-
- tmp_idata = (__le32 *)bh->b_data;
- for (i = 0; i < max_entries; i++) {
- if (tmp_idata[i]) {
- extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, NULL,
- le32_to_cpu(tmp_idata[i]), 1,
- EXT4_FREE_BLOCKS_METADATA |
- EXT4_FREE_BLOCKS_FORGET);
- }
- }
- put_bh(bh);
- extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
- EXT4_FREE_BLOCKS_METADATA |
- EXT4_FREE_BLOCKS_FORGET);
- return 0;
-}
-
-static int free_tind_blocks(handle_t *handle,
- struct inode *inode, __le32 i_data)
-{
- int i, retval = 0;
- __le32 *tmp_idata;
- struct buffer_head *bh;
- unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
-
- bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
- if (!bh)
- return -EIO;
-
- tmp_idata = (__le32 *)bh->b_data;
- for (i = 0; i < max_entries; i++) {
- if (tmp_idata[i]) {
- retval = free_dind_blocks(handle,
- inode, tmp_idata[i]);
- if (retval) {
- put_bh(bh);
- return retval;
- }
- }
- }
- put_bh(bh);
- extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
- EXT4_FREE_BLOCKS_METADATA |
- EXT4_FREE_BLOCKS_FORGET);
- return 0;
-}
-
-static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
-{
- int retval;
-
- /* ei->i_data[EXT4_IND_BLOCK] */
- if (i_data[0]) {
- extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, NULL,
- le32_to_cpu(i_data[0]), 1,
- EXT4_FREE_BLOCKS_METADATA |
- EXT4_FREE_BLOCKS_FORGET);
- }
-
- /* ei->i_data[EXT4_DIND_BLOCK] */
- if (i_data[1]) {
- retval = free_dind_blocks(handle, inode, i_data[1]);
- if (retval)
- return retval;
- }
-
- /* ei->i_data[EXT4_TIND_BLOCK] */
- if (i_data[2]) {
- retval = free_tind_blocks(handle, inode, i_data[2]);
- if (retval)
- return retval;
- }
- return 0;
-}
-
-static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
- struct inode *tmp_inode)
-{
- int retval;
- __le32 i_data[3];
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
-
- /*
- * One credit accounted for writing the
- * i_data field of the original inode
- */
- retval = ext4_journal_extend(handle, 1);
- if (retval) {
- retval = ext4_journal_restart(handle, 1);
- if (retval)
- goto err_out;
- }
-
- i_data[0] = ei->i_data[EXT4_IND_BLOCK];
- i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
- i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
-
- down_write(&EXT4_I(inode)->i_data_sem);
- /*
- * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
- * happened after we started the migrate. We need to
- * fail the migrate
- */
- if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
- retval = -EAGAIN;
- up_write(&EXT4_I(inode)->i_data_sem);
- goto err_out;
- } else
- ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
- /*
- * We have the extent map build with the tmp inode.
- * Now copy the i_data across
- */
- ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
- memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
-
- /*
- * Update i_blocks with the new blocks that got
- * allocated while adding extents for extent index
- * blocks.
- *
- * While converting to extents we need not
- * update the orignal inode i_blocks for extent blocks
- * via quota APIs. The quota update happened via tmp_inode already.
- */
- spin_lock(&inode->i_lock);
- inode->i_blocks += tmp_inode->i_blocks;
- spin_unlock(&inode->i_lock);
- up_write(&EXT4_I(inode)->i_data_sem);
-
- /*
- * We mark the inode dirty after, because we decrement the
- * i_blocks when freeing the indirect meta-data blocks
- */
- retval = free_ind_block(handle, inode, i_data);
- ext4_mark_inode_dirty(handle, inode);
-
-err_out:
- return retval;
-}
-
-static int free_ext_idx(handle_t *handle, struct inode *inode,
- struct ext4_extent_idx *ix)
-{
- int i, retval = 0;
- ext4_fsblk_t block;
- struct buffer_head *bh;
- struct ext4_extent_header *eh;
-
- block = ext4_idx_pblock(ix);
- bh = sb_bread(inode->i_sb, block);
- if (!bh)
- return -EIO;
-
- eh = (struct ext4_extent_header *)bh->b_data;
- if (eh->eh_depth != 0) {
- ix = EXT_FIRST_INDEX(eh);
- for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
- retval = free_ext_idx(handle, inode, ix);
- if (retval)
- break;
- }
- }
- put_bh(bh);
- extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, NULL, block, 1,
- EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
- return retval;
-}
-
-/*
- * Free the extent meta data blocks only
- */
-static int free_ext_block(handle_t *handle, struct inode *inode)
-{
- int i, retval = 0;
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
- struct ext4_extent_idx *ix;
- if (eh->eh_depth == 0)
- /*
- * No extra blocks allocated for extent meta data
- */
- return 0;
- ix = EXT_FIRST_INDEX(eh);
- for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
- retval = free_ext_idx(handle, inode, ix);
- if (retval)
- return retval;
- }
- return retval;
-
-}
-
-int ext4_ext_migrate(struct inode *inode)
-{
- handle_t *handle;
- int retval = 0, i;
- __le32 *i_data;
- struct ext4_inode_info *ei;
- struct inode *tmp_inode = NULL;
- struct migrate_struct lb;
- unsigned long max_entries;
- __u32 goal;
- uid_t owner[2];
-
- /*
- * If the filesystem does not support extents, or the inode
- * already is extent-based, error out.
- */
- if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_INCOMPAT_EXTENTS) ||
- (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- return -EINVAL;
-
- if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
- /*
- * don't migrate fast symlink
- */
- return retval;
-
- handle = ext4_journal_start(inode,
- EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
- + 1);
- if (IS_ERR(handle)) {
- retval = PTR_ERR(handle);
- return retval;
- }
- goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
- EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
- owner[0] = inode->i_uid;
- owner[1] = inode->i_gid;
- tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
- S_IFREG, NULL, goal, owner);
- if (IS_ERR(tmp_inode)) {
- retval = PTR_ERR(tmp_inode);
- ext4_journal_stop(handle);
- return retval;
- }
- i_size_write(tmp_inode, i_size_read(inode));
- /*
- * Set the i_nlink to zero so it will be deleted later
- * when we drop inode reference.
- */
- clear_nlink(tmp_inode);
-
- ext4_ext_tree_init(handle, tmp_inode);
- ext4_orphan_add(handle, tmp_inode);
- ext4_journal_stop(handle);
-
- /*
- * start with one credit accounted for
- * superblock modification.
- *
- * For the tmp_inode we already have committed the
- * trascation that created the inode. Later as and
- * when we add extents we extent the journal
- */
- /*
- * Even though we take i_mutex we can still cause block
- * allocation via mmap write to holes. If we have allocated
- * new blocks we fail migrate. New block allocation will
- * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated
- * with i_data_sem held to prevent racing with block
- * allocation.
- */
- down_read((&EXT4_I(inode)->i_data_sem));
- ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
- up_read((&EXT4_I(inode)->i_data_sem));
-
- handle = ext4_journal_start(inode, 1);
- if (IS_ERR(handle)) {
- /*
- * It is impossible to update on-disk structures without
- * a handle, so just rollback in-core changes and live other
- * work to orphan_list_cleanup()
- */
- ext4_orphan_del(NULL, tmp_inode);
- retval = PTR_ERR(handle);
- goto out;
- }
-
- ei = EXT4_I(inode);
- i_data = ei->i_data;
- memset(&lb, 0, sizeof(lb));
-
- /* 32 bit block address 4 bytes */
- max_entries = inode->i_sb->s_blocksize >> 2;
- for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
- if (i_data[i]) {
- retval = update_extent_range(handle, tmp_inode,
- le32_to_cpu(i_data[i]), &lb);
- if (retval)
- goto err_out;
- } else
- lb.curr_block++;
- }
- if (i_data[EXT4_IND_BLOCK]) {
- retval = update_ind_extent_range(handle, tmp_inode,
- le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
- if (retval)
- goto err_out;
- } else
- lb.curr_block += max_entries;
- if (i_data[EXT4_DIND_BLOCK]) {
- retval = update_dind_extent_range(handle, tmp_inode,
- le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
- if (retval)
- goto err_out;
- } else
- lb.curr_block += max_entries * max_entries;
- if (i_data[EXT4_TIND_BLOCK]) {
- retval = update_tind_extent_range(handle, tmp_inode,
- le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
- if (retval)
- goto err_out;
- }
- /*
- * Build the last extent
- */
- retval = finish_range(handle, tmp_inode, &lb);
-err_out:
- if (retval)
- /*
- * Failure case delete the extent information with the
- * tmp_inode
- */
- free_ext_block(handle, tmp_inode);
- else {
- retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
- if (retval)
- /*
- * if we fail to swap inode data free the extent
- * details of the tmp inode
- */
- free_ext_block(handle, tmp_inode);
- }
-
- /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
- if (ext4_journal_extend(handle, 1) != 0)
- ext4_journal_restart(handle, 1);
-
- /*
- * Mark the tmp_inode as of size zero
- */
- i_size_write(tmp_inode, 0);
-
- /*
- * set the i_blocks count to zero
- * so that the ext4_delete_inode does the
- * right job
- *
- * We don't need to take the i_lock because
- * the inode is not visible to user space.
- */
- tmp_inode->i_blocks = 0;
-
- /* Reset the extent details */
- ext4_ext_tree_init(handle, tmp_inode);
- ext4_journal_stop(handle);
-out:
- unlock_new_inode(tmp_inode);
- iput(tmp_inode);
-
- return retval;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/mmp.c b/ANDROID_3.4.5/fs/ext4/mmp.c
deleted file mode 100644
index ed6548d8..00000000
--- a/ANDROID_3.4.5/fs/ext4/mmp.c
+++ /dev/null
@@ -1,353 +0,0 @@
-#include <linux/fs.h>
-#include <linux/random.h>
-#include <linux/buffer_head.h>
-#include <linux/utsname.h>
-#include <linux/kthread.h>
-
-#include "ext4.h"
-
-/*
- * Write the MMP block using WRITE_SYNC to try to get the block on-disk
- * faster.
- */
-static int write_mmp_block(struct buffer_head *bh)
-{
- mark_buffer_dirty(bh);
- lock_buffer(bh);
- bh->b_end_io = end_buffer_write_sync;
- get_bh(bh);
- submit_bh(WRITE_SYNC, bh);
- wait_on_buffer(bh);
- if (unlikely(!buffer_uptodate(bh)))
- return 1;
-
- return 0;
-}
-
-/*
- * Read the MMP block. It _must_ be read from disk and hence we clear the
- * uptodate flag on the buffer.
- */
-static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
- ext4_fsblk_t mmp_block)
-{
- struct mmp_struct *mmp;
-
- if (*bh)
- clear_buffer_uptodate(*bh);
-
- /* This would be sb_bread(sb, mmp_block), except we need to be sure
- * that the MD RAID device cache has been bypassed, and that the read
- * is not blocked in the elevator. */
- if (!*bh)
- *bh = sb_getblk(sb, mmp_block);
- if (*bh) {
- get_bh(*bh);
- lock_buffer(*bh);
- (*bh)->b_end_io = end_buffer_read_sync;
- submit_bh(READ_SYNC, *bh);
- wait_on_buffer(*bh);
- if (!buffer_uptodate(*bh)) {
- brelse(*bh);
- *bh = NULL;
- }
- }
- if (!*bh) {
- ext4_warning(sb, "Error while reading MMP block %llu",
- mmp_block);
- return -EIO;
- }
-
- mmp = (struct mmp_struct *)((*bh)->b_data);
- if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
- return -EINVAL;
-
- return 0;
-}
-
-/*
- * Dump as much information as possible to help the admin.
- */
-void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
- const char *function, unsigned int line, const char *msg)
-{
- __ext4_warning(sb, function, line, msg);
- __ext4_warning(sb, function, line,
- "MMP failure info: last update time: %llu, last update "
- "node: %s, last update device: %s\n",
- (long long unsigned int) le64_to_cpu(mmp->mmp_time),
- mmp->mmp_nodename, mmp->mmp_bdevname);
-}
-
-/*
- * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
- */
-static int kmmpd(void *data)
-{
- struct super_block *sb = ((struct mmpd_data *) data)->sb;
- struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- struct mmp_struct *mmp;
- ext4_fsblk_t mmp_block;
- u32 seq = 0;
- unsigned long failed_writes = 0;
- int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
- unsigned mmp_check_interval;
- unsigned long last_update_time;
- unsigned long diff;
- int retval;
-
- mmp_block = le64_to_cpu(es->s_mmp_block);
- mmp = (struct mmp_struct *)(bh->b_data);
- mmp->mmp_time = cpu_to_le64(get_seconds());
- /*
- * Start with the higher mmp_check_interval and reduce it if
- * the MMP block is being updated on time.
- */
- mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
- EXT4_MMP_MIN_CHECK_INTERVAL);
- mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
- bdevname(bh->b_bdev, mmp->mmp_bdevname);
-
- memcpy(mmp->mmp_nodename, init_utsname()->nodename,
- sizeof(mmp->mmp_nodename));
-
- while (!kthread_should_stop()) {
- if (++seq > EXT4_MMP_SEQ_MAX)
- seq = 1;
-
- mmp->mmp_seq = cpu_to_le32(seq);
- mmp->mmp_time = cpu_to_le64(get_seconds());
- last_update_time = jiffies;
-
- retval = write_mmp_block(bh);
- /*
- * Don't spew too many error messages. Print one every
- * (s_mmp_update_interval * 60) seconds.
- */
- if (retval) {
- if ((failed_writes % 60) == 0)
- ext4_error(sb, "Error writing to MMP block");
- failed_writes++;
- }
-
- if (!(le32_to_cpu(es->s_feature_incompat) &
- EXT4_FEATURE_INCOMPAT_MMP)) {
- ext4_warning(sb, "kmmpd being stopped since MMP feature"
- " has been disabled.");
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
- }
-
- if (sb->s_flags & MS_RDONLY) {
- ext4_warning(sb, "kmmpd being stopped since filesystem "
- "has been remounted as readonly.");
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
- }
-
- diff = jiffies - last_update_time;
- if (diff < mmp_update_interval * HZ)
- schedule_timeout_interruptible(mmp_update_interval *
- HZ - diff);
-
- /*
- * We need to make sure that more than mmp_check_interval
- * seconds have not passed since writing. If that has happened
- * we need to check if the MMP block is as we left it.
- */
- diff = jiffies - last_update_time;
- if (diff > mmp_check_interval * HZ) {
- struct buffer_head *bh_check = NULL;
- struct mmp_struct *mmp_check;
-
- retval = read_mmp_block(sb, &bh_check, mmp_block);
- if (retval) {
- ext4_error(sb, "error reading MMP data: %d",
- retval);
-
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
- }
-
- mmp_check = (struct mmp_struct *)(bh_check->b_data);
- if (mmp->mmp_seq != mmp_check->mmp_seq ||
- memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
- sizeof(mmp->mmp_nodename))) {
- dump_mmp_msg(sb, mmp_check,
- "Error while updating MMP info. "
- "The filesystem seems to have been"
- " multiply mounted.");
- ext4_error(sb, "abort");
- goto failed;
- }
- put_bh(bh_check);
- }
-
- /*
- * Adjust the mmp_check_interval depending on how much time
- * it took for the MMP block to be written.
- */
- mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
- EXT4_MMP_MAX_CHECK_INTERVAL),
- EXT4_MMP_MIN_CHECK_INTERVAL);
- mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
- }
-
- /*
- * Unmount seems to be clean.
- */
- mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
- mmp->mmp_time = cpu_to_le64(get_seconds());
-
- retval = write_mmp_block(bh);
-
-failed:
- kfree(data);
- brelse(bh);
- return retval;
-}
-
-/*
- * Get a random new sequence number but make sure it is not greater than
- * EXT4_MMP_SEQ_MAX.
- */
-static unsigned int mmp_new_seq(void)
-{
- u32 new_seq;
-
- do {
- get_random_bytes(&new_seq, sizeof(u32));
- } while (new_seq > EXT4_MMP_SEQ_MAX);
-
- return new_seq;
-}
-
-/*
- * Protect the filesystem from being mounted more than once.
- */
-int ext4_multi_mount_protect(struct super_block *sb,
- ext4_fsblk_t mmp_block)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- struct buffer_head *bh = NULL;
- struct mmp_struct *mmp = NULL;
- struct mmpd_data *mmpd_data;
- u32 seq;
- unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
- unsigned int wait_time = 0;
- int retval;
-
- if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
- mmp_block >= ext4_blocks_count(es)) {
- ext4_warning(sb, "Invalid MMP block in superblock");
- goto failed;
- }
-
- retval = read_mmp_block(sb, &bh, mmp_block);
- if (retval)
- goto failed;
-
- mmp = (struct mmp_struct *)(bh->b_data);
-
- if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
- mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
-
- /*
- * If check_interval in MMP block is larger, use that instead of
- * update_interval from the superblock.
- */
- if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
- mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
-
- seq = le32_to_cpu(mmp->mmp_seq);
- if (seq == EXT4_MMP_SEQ_CLEAN)
- goto skip;
-
- if (seq == EXT4_MMP_SEQ_FSCK) {
- dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
- goto failed;
- }
-
- wait_time = min(mmp_check_interval * 2 + 1,
- mmp_check_interval + 60);
-
- /* Print MMP interval if more than 20 secs. */
- if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
- ext4_warning(sb, "MMP interval %u higher than expected, please"
- " wait.\n", wait_time * 2);
-
- if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
- ext4_warning(sb, "MMP startup interrupted, failing mount\n");
- goto failed;
- }
-
- retval = read_mmp_block(sb, &bh, mmp_block);
- if (retval)
- goto failed;
- mmp = (struct mmp_struct *)(bh->b_data);
- if (seq != le32_to_cpu(mmp->mmp_seq)) {
- dump_mmp_msg(sb, mmp,
- "Device is already active on another node.");
- goto failed;
- }
-
-skip:
- /*
- * write a new random sequence number.
- */
- seq = mmp_new_seq();
- mmp->mmp_seq = cpu_to_le32(seq);
-
- retval = write_mmp_block(bh);
- if (retval)
- goto failed;
-
- /*
- * wait for MMP interval and check mmp_seq.
- */
- if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
- ext4_warning(sb, "MMP startup interrupted, failing mount\n");
- goto failed;
- }
-
- retval = read_mmp_block(sb, &bh, mmp_block);
- if (retval)
- goto failed;
- mmp = (struct mmp_struct *)(bh->b_data);
- if (seq != le32_to_cpu(mmp->mmp_seq)) {
- dump_mmp_msg(sb, mmp,
- "Device is already active on another node.");
- goto failed;
- }
-
- mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
- if (!mmpd_data) {
- ext4_warning(sb, "not enough memory for mmpd_data");
- goto failed;
- }
- mmpd_data->sb = sb;
- mmpd_data->bh = bh;
-
- /*
- * Start a kernel thread to update the MMP block periodically.
- */
- EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
- bdevname(bh->b_bdev,
- mmp->mmp_bdevname));
- if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- kfree(mmpd_data);
- ext4_warning(sb, "Unable to create kmmpd thread for %s.",
- sb->s_id);
- goto failed;
- }
-
- return 0;
-
-failed:
- brelse(bh);
- return 1;
-}
-
-
diff --git a/ANDROID_3.4.5/fs/ext4/move_extent.c b/ANDROID_3.4.5/fs/ext4/move_extent.c
deleted file mode 100644
index c5826c62..00000000
--- a/ANDROID_3.4.5/fs/ext4/move_extent.c
+++ /dev/null
@@ -1,1423 +0,0 @@
-/*
- * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
- * Written by Takashi Sato <t-sato@yk.jp.nec.com>
- * Akira Fujita <a-fujita@rs.jp.nec.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/fs.h>
-#include <linux/quotaops.h>
-#include <linux/slab.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-
-/**
- * get_ext_path - Find an extent path for designated logical block number.
- *
- * @inode: an inode which is searched
- * @lblock: logical block number to find an extent path
- * @path: pointer to an extent path pointer (for output)
- *
- * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value
- * on failure.
- */
-static inline int
-get_ext_path(struct inode *inode, ext4_lblk_t lblock,
- struct ext4_ext_path **path)
-{
- int ret = 0;
-
- *path = ext4_ext_find_extent(inode, lblock, *path);
- if (IS_ERR(*path)) {
- ret = PTR_ERR(*path);
- *path = NULL;
- } else if ((*path)[ext_depth(inode)].p_ext == NULL)
- ret = -ENODATA;
-
- return ret;
-}
-
-/**
- * copy_extent_status - Copy the extent's initialization status
- *
- * @src: an extent for getting initialize status
- * @dest: an extent to be set the status
- */
-static void
-copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
-{
- if (ext4_ext_is_uninitialized(src))
- ext4_ext_mark_uninitialized(dest);
- else
- dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
-}
-
-/**
- * mext_next_extent - Search for the next extent and set it to "extent"
- *
- * @inode: inode which is searched
- * @path: this will obtain data for the next extent
- * @extent: pointer to the next extent we have just gotten
- *
- * Search the next extent in the array of ext4_ext_path structure (@path)
- * and set it to ext4_extent structure (@extent). In addition, the member of
- * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
- * ext4_ext_path structure refers to the last extent, or a negative error
- * value on failure.
- */
-static int
-mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
- struct ext4_extent **extent)
-{
- struct ext4_extent_header *eh;
- int ppos, leaf_ppos = path->p_depth;
-
- ppos = leaf_ppos;
- if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
- /* leaf block */
- *extent = ++path[ppos].p_ext;
- path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
- return 0;
- }
-
- while (--ppos >= 0) {
- if (EXT_LAST_INDEX(path[ppos].p_hdr) >
- path[ppos].p_idx) {
- int cur_ppos = ppos;
-
- /* index block */
- path[ppos].p_idx++;
- path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
- if (path[ppos+1].p_bh)
- brelse(path[ppos+1].p_bh);
- path[ppos+1].p_bh =
- sb_bread(inode->i_sb, path[ppos].p_block);
- if (!path[ppos+1].p_bh)
- return -EIO;
- path[ppos+1].p_hdr =
- ext_block_hdr(path[ppos+1].p_bh);
-
- /* Halfway index block */
- while (++cur_ppos < leaf_ppos) {
- path[cur_ppos].p_idx =
- EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
- path[cur_ppos].p_block =
- ext4_idx_pblock(path[cur_ppos].p_idx);
- if (path[cur_ppos+1].p_bh)
- brelse(path[cur_ppos+1].p_bh);
- path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
- path[cur_ppos].p_block);
- if (!path[cur_ppos+1].p_bh)
- return -EIO;
- path[cur_ppos+1].p_hdr =
- ext_block_hdr(path[cur_ppos+1].p_bh);
- }
-
- path[leaf_ppos].p_ext = *extent = NULL;
-
- eh = path[leaf_ppos].p_hdr;
- if (le16_to_cpu(eh->eh_entries) == 0)
- /* empty leaf is found */
- return -ENODATA;
-
- /* leaf block */
- path[leaf_ppos].p_ext = *extent =
- EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
- path[leaf_ppos].p_block =
- ext4_ext_pblock(path[leaf_ppos].p_ext);
- return 0;
- }
- }
- /* We found the last extent */
- return 1;
-}
-
-/**
- * mext_check_null_inode - NULL check for two inodes
- *
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
- */
-static int
-mext_check_null_inode(struct inode *inode1, struct inode *inode2,
- const char *function, unsigned int line)
-{
- int ret = 0;
-
- if (inode1 == NULL) {
- __ext4_error(inode2->i_sb, function, line,
- "Both inodes should not be NULL: "
- "inode1 NULL inode2 %lu", inode2->i_ino);
- ret = -EIO;
- } else if (inode2 == NULL) {
- __ext4_error(inode1->i_sb, function, line,
- "Both inodes should not be NULL: "
- "inode1 %lu inode2 NULL", inode1->i_ino);
- ret = -EIO;
- }
- return ret;
-}
-
-/**
- * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
- *
- * @orig_inode: original inode structure
- * @donor_inode: donor inode structure
- * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
- * i_ino order.
- */
-static void
-double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
-{
- struct inode *first = orig_inode, *second = donor_inode;
-
- /*
- * Use the inode number to provide the stable locking order instead
- * of its address, because the C language doesn't guarantee you can
- * compare pointers that don't come from the same array.
- */
- if (donor_inode->i_ino < orig_inode->i_ino) {
- first = donor_inode;
- second = orig_inode;
- }
-
- down_write(&EXT4_I(first)->i_data_sem);
- down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
-}
-
-/**
- * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
- *
- * @orig_inode: original inode structure to be released its lock first
- * @donor_inode: donor inode structure to be released its lock second
- * Release write lock of i_data_sem of two inodes (orig and donor).
- */
-static void
-double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
-{
- up_write(&EXT4_I(orig_inode)->i_data_sem);
- up_write(&EXT4_I(donor_inode)->i_data_sem);
-}
-
-/**
- * mext_insert_across_blocks - Insert extents across leaf block
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @o_start: first original extent to be changed
- * @o_end: last original extent to be changed
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- *
- * Allocate a new leaf block and insert extents into it. Return 0 on success,
- * or a negative error value on failure.
- */
-static int
-mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
- struct ext4_extent *o_start, struct ext4_extent *o_end,
- struct ext4_extent *start_ext, struct ext4_extent *new_ext,
- struct ext4_extent *end_ext)
-{
- struct ext4_ext_path *orig_path = NULL;
- ext4_lblk_t eblock = 0;
- int new_flag = 0;
- int end_flag = 0;
- int err = 0;
-
- if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
- if (o_start == o_end) {
-
- /* start_ext new_ext end_ext
- * donor |---------|-----------|--------|
- * orig |------------------------------|
- */
- end_flag = 1;
- } else {
-
- /* start_ext new_ext end_ext
- * donor |---------|----------|---------|
- * orig |---------------|--------------|
- */
- o_end->ee_block = end_ext->ee_block;
- o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
- }
-
- o_start->ee_len = start_ext->ee_len;
- eblock = le32_to_cpu(start_ext->ee_block);
- new_flag = 1;
-
- } else if (start_ext->ee_len && new_ext->ee_len &&
- !end_ext->ee_len && o_start == o_end) {
-
- /* start_ext new_ext
- * donor |--------------|---------------|
- * orig |------------------------------|
- */
- o_start->ee_len = start_ext->ee_len;
- eblock = le32_to_cpu(start_ext->ee_block);
- new_flag = 1;
-
- } else if (!start_ext->ee_len && new_ext->ee_len &&
- end_ext->ee_len && o_start == o_end) {
-
- /* new_ext end_ext
- * donor |--------------|---------------|
- * orig |------------------------------|
- */
- o_end->ee_block = end_ext->ee_block;
- o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
-
- /*
- * Set 0 to the extent block if new_ext was
- * the first block.
- */
- if (new_ext->ee_block)
- eblock = le32_to_cpu(new_ext->ee_block);
-
- new_flag = 1;
- } else {
- ext4_debug("ext4 move extent: Unexpected insert case\n");
- return -EIO;
- }
-
- if (new_flag) {
- err = get_ext_path(orig_inode, eblock, &orig_path);
- if (err)
- goto out;
-
- if (ext4_ext_insert_extent(handle, orig_inode,
- orig_path, new_ext, 0))
- goto out;
- }
-
- if (end_flag) {
- err = get_ext_path(orig_inode,
- le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
- if (err)
- goto out;
-
- if (ext4_ext_insert_extent(handle, orig_inode,
- orig_path, end_ext, 0))
- goto out;
- }
-out:
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
-
- return err;
-
-}
-
-/**
- * mext_insert_inside_block - Insert new extent to the extent block
- *
- * @o_start: first original extent to be moved
- * @o_end: last original extent to be moved
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- * @eh: extent header of target leaf block
- * @range_to_move: used to decide how to insert extent
- *
- * Insert extents into the leaf block. The extent (@o_start) is overwritten
- * by inserted extents.
- */
-static void
-mext_insert_inside_block(struct ext4_extent *o_start,
- struct ext4_extent *o_end,
- struct ext4_extent *start_ext,
- struct ext4_extent *new_ext,
- struct ext4_extent *end_ext,
- struct ext4_extent_header *eh,
- int range_to_move)
-{
- int i = 0;
- unsigned long len;
-
- /* Move the existing extents */
- if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
- len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
- (unsigned long)(o_end + 1);
- memmove(o_end + 1 + range_to_move, o_end + 1, len);
- }
-
- /* Insert start entry */
- if (start_ext->ee_len)
- o_start[i++].ee_len = start_ext->ee_len;
-
- /* Insert new entry */
- if (new_ext->ee_len) {
- o_start[i] = *new_ext;
- ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
- }
-
- /* Insert end entry */
- if (end_ext->ee_len)
- o_start[i] = *end_ext;
-
- /* Increment the total entries counter on the extent block */
- le16_add_cpu(&eh->eh_entries, range_to_move);
-}
-
-/**
- * mext_insert_extents - Insert new extent
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @orig_path: path indicates first extent to be changed
- * @o_start: first original extent to be changed
- * @o_end: last original extent to be changed
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- *
- * Call the function to insert extents. If we cannot add more extents into
- * the leaf block, we call mext_insert_across_blocks() to create a
- * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
- * on success, or a negative error value on failure.
- */
-static int
-mext_insert_extents(handle_t *handle, struct inode *orig_inode,
- struct ext4_ext_path *orig_path,
- struct ext4_extent *o_start,
- struct ext4_extent *o_end,
- struct ext4_extent *start_ext,
- struct ext4_extent *new_ext,
- struct ext4_extent *end_ext)
-{
- struct ext4_extent_header *eh;
- unsigned long need_slots, slots_range;
- int range_to_move, depth, ret;
-
- /*
- * The extents need to be inserted
- * start_extent + new_extent + end_extent.
- */
- need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
- (new_ext->ee_len ? 1 : 0);
-
- /* The number of slots between start and end */
- slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
- / sizeof(struct ext4_extent);
-
- /* Range to move the end of extent */
- range_to_move = need_slots - slots_range;
- depth = orig_path->p_depth;
- orig_path += depth;
- eh = orig_path->p_hdr;
-
- if (depth) {
- /* Register to journal */
- ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
- if (ret)
- return ret;
- }
-
- /* Expansion */
- if (range_to_move > 0 &&
- (range_to_move > le16_to_cpu(eh->eh_max)
- - le16_to_cpu(eh->eh_entries))) {
-
- ret = mext_insert_across_blocks(handle, orig_inode, o_start,
- o_end, start_ext, new_ext, end_ext);
- if (ret < 0)
- return ret;
- } else
- mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
- end_ext, eh, range_to_move);
-
- if (depth) {
- ret = ext4_handle_dirty_metadata(handle, orig_inode,
- orig_path->p_bh);
- if (ret)
- return ret;
- } else {
- ret = ext4_mark_inode_dirty(handle, orig_inode);
- if (ret < 0)
- return ret;
- }
-
- return 0;
-}
-
-/**
- * mext_leaf_block - Move one leaf extent block into the inode.
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @orig_path: path indicates first extent to be changed
- * @dext: donor extent
- * @from: start offset on the target file
- *
- * In order to insert extents into the leaf block, we must divide the extent
- * in the leaf block into three extents. The one is located to be inserted
- * extents, and the others are located around it.
- *
- * Therefore, this function creates structures to save extents of the leaf
- * block, and inserts extents by calling mext_insert_extents() with
- * created extents. Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_leaf_block(handle_t *handle, struct inode *orig_inode,
- struct ext4_ext_path *orig_path, struct ext4_extent *dext,
- ext4_lblk_t *from)
-{
- struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
- struct ext4_extent new_ext, start_ext, end_ext;
- ext4_lblk_t new_ext_end;
- int oext_alen, new_ext_alen, end_ext_alen;
- int depth = ext_depth(orig_inode);
- int ret;
-
- start_ext.ee_block = end_ext.ee_block = 0;
- o_start = o_end = oext = orig_path[depth].p_ext;
- oext_alen = ext4_ext_get_actual_len(oext);
- start_ext.ee_len = end_ext.ee_len = 0;
-
- new_ext.ee_block = cpu_to_le32(*from);
- ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
- new_ext.ee_len = dext->ee_len;
- new_ext_alen = ext4_ext_get_actual_len(&new_ext);
- new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
-
- /*
- * Case: original extent is first
- * oext |--------|
- * new_ext |--|
- * start_ext |--|
- */
- if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
- le32_to_cpu(new_ext.ee_block) <
- le32_to_cpu(oext->ee_block) + oext_alen) {
- start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
- le32_to_cpu(oext->ee_block));
- start_ext.ee_block = oext->ee_block;
- copy_extent_status(oext, &start_ext);
- } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
- prev_ext = oext - 1;
- /*
- * We can merge new_ext into previous extent,
- * if these are contiguous and same extent type.
- */
- if (ext4_can_extents_be_merged(orig_inode, prev_ext,
- &new_ext)) {
- o_start = prev_ext;
- start_ext.ee_len = cpu_to_le16(
- ext4_ext_get_actual_len(prev_ext) +
- new_ext_alen);
- start_ext.ee_block = oext->ee_block;
- copy_extent_status(prev_ext, &start_ext);
- new_ext.ee_len = 0;
- }
- }
-
- /*
- * Case: new_ext_end must be less than oext
- * oext |-----------|
- * new_ext |-------|
- */
- if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
- EXT4_ERROR_INODE(orig_inode,
- "new_ext_end(%u) should be less than or equal to "
- "oext->ee_block(%u) + oext_alen(%d) - 1",
- new_ext_end, le32_to_cpu(oext->ee_block),
- oext_alen);
- ret = -EIO;
- goto out;
- }
-
- /*
- * Case: new_ext is smaller than original extent
- * oext |---------------|
- * new_ext |-----------|
- * end_ext |---|
- */
- if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
- new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
- end_ext.ee_len =
- cpu_to_le16(le32_to_cpu(oext->ee_block) +
- oext_alen - 1 - new_ext_end);
- copy_extent_status(oext, &end_ext);
- end_ext_alen = ext4_ext_get_actual_len(&end_ext);
- ext4_ext_store_pblock(&end_ext,
- (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
- end_ext.ee_block =
- cpu_to_le32(le32_to_cpu(o_end->ee_block) +
- oext_alen - end_ext_alen);
- }
-
- ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
- o_end, &start_ext, &new_ext, &end_ext);
-out:
- return ret;
-}
-
-/**
- * mext_calc_swap_extents - Calculate extents for extent swapping.
- *
- * @tmp_dext: the extent that will belong to the original inode
- * @tmp_oext: the extent that will belong to the donor inode
- * @orig_off: block offset of original inode
- * @donor_off: block offset of donor inode
- * @max_count: the maximum length of extents
- *
- * Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_calc_swap_extents(struct ext4_extent *tmp_dext,
- struct ext4_extent *tmp_oext,
- ext4_lblk_t orig_off, ext4_lblk_t donor_off,
- ext4_lblk_t max_count)
-{
- ext4_lblk_t diff, orig_diff;
- struct ext4_extent dext_old, oext_old;
-
- BUG_ON(orig_off != donor_off);
-
- /* original and donor extents have to cover the same block offset */
- if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
- le32_to_cpu(tmp_oext->ee_block) +
- ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
- return -ENODATA;
-
- if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
- le32_to_cpu(tmp_dext->ee_block) +
- ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
- return -ENODATA;
-
- dext_old = *tmp_dext;
- oext_old = *tmp_oext;
-
- /* When tmp_dext is too large, pick up the target range. */
- diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
-
- ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
- tmp_dext->ee_block =
- cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
- tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
-
- if (max_count < ext4_ext_get_actual_len(tmp_dext))
- tmp_dext->ee_len = cpu_to_le16(max_count);
-
- orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
- ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
-
- /* Adjust extent length if donor extent is larger than orig */
- if (ext4_ext_get_actual_len(tmp_dext) >
- ext4_ext_get_actual_len(tmp_oext) - orig_diff)
- tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
- orig_diff);
-
- tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
-
- copy_extent_status(&oext_old, tmp_dext);
- copy_extent_status(&dext_old, tmp_oext);
-
- return 0;
-}
-
-/**
- * mext_replace_branches - Replace original extents with new extents
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @donor_inode: donor inode
- * @from: block offset of orig_inode
- * @count: block count to be replaced
- * @err: pointer to save return value
- *
- * Replace original inode extents and donor inode extents page by page.
- * We implement this replacement in the following three steps:
- * 1. Save the block information of original and donor inodes into
- * dummy extents.
- * 2. Change the block information of original inode to point at the
- * donor inode blocks.
- * 3. Change the block information of donor inode to point at the saved
- * original inode blocks in the dummy extents.
- *
- * Return replaced block count.
- */
-static int
-mext_replace_branches(handle_t *handle, struct inode *orig_inode,
- struct inode *donor_inode, ext4_lblk_t from,
- ext4_lblk_t count, int *err)
-{
- struct ext4_ext_path *orig_path = NULL;
- struct ext4_ext_path *donor_path = NULL;
- struct ext4_extent *oext, *dext;
- struct ext4_extent tmp_dext, tmp_oext;
- ext4_lblk_t orig_off = from, donor_off = from;
- int depth;
- int replaced_count = 0;
- int dext_alen;
-
- /* Protect extent trees against block allocations via delalloc */
- double_down_write_data_sem(orig_inode, donor_inode);
-
- /* Get the original extent for the block "orig_off" */
- *err = get_ext_path(orig_inode, orig_off, &orig_path);
- if (*err)
- goto out;
-
- /* Get the donor extent for the head */
- *err = get_ext_path(donor_inode, donor_off, &donor_path);
- if (*err)
- goto out;
- depth = ext_depth(orig_inode);
- oext = orig_path[depth].p_ext;
- tmp_oext = *oext;
-
- depth = ext_depth(donor_inode);
- dext = donor_path[depth].p_ext;
- tmp_dext = *dext;
-
- *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
- donor_off, count);
- if (*err)
- goto out;
-
- /* Loop for the donor extents */
- while (1) {
- /* The extent for donor must be found. */
- if (!dext) {
- EXT4_ERROR_INODE(donor_inode,
- "The extent for donor must be found");
- *err = -EIO;
- goto out;
- } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
- EXT4_ERROR_INODE(donor_inode,
- "Donor offset(%u) and the first block of donor "
- "extent(%u) should be equal",
- donor_off,
- le32_to_cpu(tmp_dext.ee_block));
- *err = -EIO;
- goto out;
- }
-
- /* Set donor extent to orig extent */
- *err = mext_leaf_block(handle, orig_inode,
- orig_path, &tmp_dext, &orig_off);
- if (*err)
- goto out;
-
- /* Set orig extent to donor extent */
- *err = mext_leaf_block(handle, donor_inode,
- donor_path, &tmp_oext, &donor_off);
- if (*err)
- goto out;
-
- dext_alen = ext4_ext_get_actual_len(&tmp_dext);
- replaced_count += dext_alen;
- donor_off += dext_alen;
- orig_off += dext_alen;
-
- /* Already moved the expected blocks */
- if (replaced_count >= count)
- break;
-
- if (orig_path)
- ext4_ext_drop_refs(orig_path);
- *err = get_ext_path(orig_inode, orig_off, &orig_path);
- if (*err)
- goto out;
- depth = ext_depth(orig_inode);
- oext = orig_path[depth].p_ext;
- tmp_oext = *oext;
-
- if (donor_path)
- ext4_ext_drop_refs(donor_path);
- *err = get_ext_path(donor_inode, donor_off, &donor_path);
- if (*err)
- goto out;
- depth = ext_depth(donor_inode);
- dext = donor_path[depth].p_ext;
- tmp_dext = *dext;
-
- *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
- donor_off, count - replaced_count);
- if (*err)
- goto out;
- }
-
-out:
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
- if (donor_path) {
- ext4_ext_drop_refs(donor_path);
- kfree(donor_path);
- }
-
- ext4_ext_invalidate_cache(orig_inode);
- ext4_ext_invalidate_cache(donor_inode);
-
- double_up_write_data_sem(orig_inode, donor_inode);
-
- return replaced_count;
-}
-
-/**
- * move_extent_per_page - Move extent data per page
- *
- * @o_filp: file structure of original file
- * @donor_inode: donor inode
- * @orig_page_offset: page index on original file
- * @data_offset_in_page: block index where data swapping starts
- * @block_len_in_page: the number of blocks to be swapped
- * @uninit: orig extent is uninitialized or not
- * @err: pointer to save return value
- *
- * Save the data in original inode blocks and replace original inode extents
- * with donor inode extents by calling mext_replace_branches().
- * Finally, write out the saved data in new original inode blocks. Return
- * replaced block count.
- */
-static int
-move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
- pgoff_t orig_page_offset, int data_offset_in_page,
- int block_len_in_page, int uninit, int *err)
-{
- struct inode *orig_inode = o_filp->f_dentry->d_inode;
- struct address_space *mapping = orig_inode->i_mapping;
- struct buffer_head *bh;
- struct page *page = NULL;
- const struct address_space_operations *a_ops = mapping->a_ops;
- handle_t *handle;
- ext4_lblk_t orig_blk_offset;
- long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
- unsigned long blocksize = orig_inode->i_sb->s_blocksize;
- unsigned int w_flags = 0;
- unsigned int tmp_data_size, data_size, replaced_size;
- void *fsdata;
- int i, jblocks;
- int err2 = 0;
- int replaced_count = 0;
- int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
-
- /*
- * It needs twice the amount of ordinary journal buffers because
- * inode and donor_inode may change each different metadata blocks.
- */
- jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
- handle = ext4_journal_start(orig_inode, jblocks);
- if (IS_ERR(handle)) {
- *err = PTR_ERR(handle);
- return 0;
- }
-
- if (segment_eq(get_fs(), KERNEL_DS))
- w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
- orig_blk_offset = orig_page_offset * blocks_per_page +
- data_offset_in_page;
-
- /*
- * If orig extent is uninitialized one,
- * it's not necessary force the page into memory
- * and then force it to be written out again.
- * Just swap data blocks between orig and donor.
- */
- if (uninit) {
- replaced_count = mext_replace_branches(handle, orig_inode,
- donor_inode, orig_blk_offset,
- block_len_in_page, err);
- goto out2;
- }
-
- offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
-
- /* Calculate data_size */
- if ((orig_blk_offset + block_len_in_page - 1) ==
- ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
- /* Replace the last block */
- tmp_data_size = orig_inode->i_size & (blocksize - 1);
- /*
- * If data_size equal zero, it shows data_size is multiples of
- * blocksize. So we set appropriate value.
- */
- if (tmp_data_size == 0)
- tmp_data_size = blocksize;
-
- data_size = tmp_data_size +
- ((block_len_in_page - 1) << orig_inode->i_blkbits);
- } else
- data_size = block_len_in_page << orig_inode->i_blkbits;
-
- replaced_size = data_size;
-
- *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
- &page, &fsdata);
- if (unlikely(*err < 0))
- goto out;
-
- if (!PageUptodate(page)) {
- mapping->a_ops->readpage(o_filp, page);
- lock_page(page);
- }
-
- /*
- * try_to_release_page() doesn't call releasepage in writeback mode.
- * We should care about the order of writing to the same file
- * by multiple move extent processes.
- * It needs to call wait_on_page_writeback() to wait for the
- * writeback of the page.
- */
- wait_on_page_writeback(page);
-
- /* Release old bh and drop refs */
- try_to_release_page(page, 0);
-
- replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
- orig_blk_offset, block_len_in_page,
- &err2);
- if (err2) {
- if (replaced_count) {
- block_len_in_page = replaced_count;
- replaced_size =
- block_len_in_page << orig_inode->i_blkbits;
- } else
- goto out;
- }
-
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
-
- bh = page_buffers(page);
- for (i = 0; i < data_offset_in_page; i++)
- bh = bh->b_this_page;
-
- for (i = 0; i < block_len_in_page; i++) {
- *err = ext4_get_block(orig_inode,
- (sector_t)(orig_blk_offset + i), bh, 0);
- if (*err < 0)
- goto out;
-
- if (bh->b_this_page != NULL)
- bh = bh->b_this_page;
- }
-
- *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
- page, fsdata);
- page = NULL;
-
-out:
- if (unlikely(page)) {
- if (PageLocked(page))
- unlock_page(page);
- page_cache_release(page);
- ext4_journal_stop(handle);
- }
-out2:
- ext4_journal_stop(handle);
-
- if (err2)
- *err = err2;
-
- return replaced_count;
-}
-
-/**
- * mext_check_arguments - Check whether move extent can be done
- *
- * @orig_inode: original inode
- * @donor_inode: donor inode
- * @orig_start: logical start offset in block for orig
- * @donor_start: logical start offset in block for donor
- * @len: the number of blocks to be moved
- *
- * Check the arguments of ext4_move_extents() whether the files can be
- * exchanged with each other.
- * Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_check_arguments(struct inode *orig_inode,
- struct inode *donor_inode, __u64 orig_start,
- __u64 donor_start, __u64 *len)
-{
- ext4_lblk_t orig_blocks, donor_blocks;
- unsigned int blkbits = orig_inode->i_blkbits;
- unsigned int blocksize = 1 << blkbits;
-
- if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
- ext4_debug("ext4 move extent: suid or sgid is set"
- " to donor file [ino:orig %lu, donor %lu]\n",
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
- return -EPERM;
-
- /* Ext4 move extent does not support swapfile */
- if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
- ext4_debug("ext4 move extent: The argument files should "
- "not be swapfile [ino:orig %lu, donor %lu]\n",
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- /* Files should be in the same ext4 FS */
- if (orig_inode->i_sb != donor_inode->i_sb) {
- ext4_debug("ext4 move extent: The argument files "
- "should be in same FS [ino:orig %lu, donor %lu]\n",
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- /* Ext4 move extent supports only extent based file */
- if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
- ext4_debug("ext4 move extent: orig file is not extents "
- "based file [ino:orig %lu]\n", orig_inode->i_ino);
- return -EOPNOTSUPP;
- } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
- ext4_debug("ext4 move extent: donor file is not extents "
- "based file [ino:donor %lu]\n", donor_inode->i_ino);
- return -EOPNOTSUPP;
- }
-
- if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
- ext4_debug("ext4 move extent: File size is 0 byte\n");
- return -EINVAL;
- }
-
- /* Start offset should be same */
- if (orig_start != donor_start) {
- ext4_debug("ext4 move extent: orig and donor's start "
- "offset are not same [ino:orig %lu, donor %lu]\n",
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- if ((orig_start >= EXT_MAX_BLOCKS) ||
- (donor_start >= EXT_MAX_BLOCKS) ||
- (*len > EXT_MAX_BLOCKS) ||
- (orig_start + *len >= EXT_MAX_BLOCKS)) {
- ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
- "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- if (orig_inode->i_size > donor_inode->i_size) {
- donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits;
- /* TODO: eliminate this artificial restriction */
- if (orig_start >= donor_blocks) {
- ext4_debug("ext4 move extent: orig start offset "
- "[%llu] should be less than donor file blocks "
- "[%u] [ino:orig %lu, donor %lu]\n",
- orig_start, donor_blocks,
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- /* TODO: eliminate this artificial restriction */
- if (orig_start + *len > donor_blocks) {
- ext4_debug("ext4 move extent: End offset [%llu] should "
- "be less than donor file blocks [%u]."
- "So adjust length from %llu to %llu "
- "[ino:orig %lu, donor %lu]\n",
- orig_start + *len, donor_blocks,
- *len, donor_blocks - orig_start,
- orig_inode->i_ino, donor_inode->i_ino);
- *len = donor_blocks - orig_start;
- }
- } else {
- orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
- if (orig_start >= orig_blocks) {
- ext4_debug("ext4 move extent: start offset [%llu] "
- "should be less than original file blocks "
- "[%u] [ino:orig %lu, donor %lu]\n",
- orig_start, orig_blocks,
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- if (orig_start + *len > orig_blocks) {
- ext4_debug("ext4 move extent: Adjust length "
- "from %llu to %llu. Because it should be "
- "less than original file blocks "
- "[ino:orig %lu, donor %lu]\n",
- *len, orig_blocks - orig_start,
- orig_inode->i_ino, donor_inode->i_ino);
- *len = orig_blocks - orig_start;
- }
- }
-
- if (!*len) {
- ext4_debug("ext4 move extent: len should not be 0 "
- "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
- donor_inode->i_ino);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
- *
- * @inode1: the inode structure
- * @inode2: the inode structure
- *
- * Lock two inodes' i_mutex by i_ino order.
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
- */
-static int
-mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
-{
- int ret = 0;
-
- BUG_ON(inode1 == NULL && inode2 == NULL);
-
- ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
- if (ret < 0)
- goto out;
-
- if (inode1 == inode2) {
- mutex_lock(&inode1->i_mutex);
- goto out;
- }
-
- if (inode1->i_ino < inode2->i_ino) {
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
- }
-
-out:
- return ret;
-}
-
-/**
- * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
- *
- * @inode1: the inode that is released first
- * @inode2: the inode that is released second
- *
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
- */
-
-static int
-mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
-{
- int ret = 0;
-
- BUG_ON(inode1 == NULL && inode2 == NULL);
-
- ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
- if (ret < 0)
- goto out;
-
- if (inode1)
- mutex_unlock(&inode1->i_mutex);
-
- if (inode2 && inode2 != inode1)
- mutex_unlock(&inode2->i_mutex);
-
-out:
- return ret;
-}
-
-/**
- * ext4_move_extents - Exchange the specified range of a file
- *
- * @o_filp: file structure of the original file
- * @d_filp: file structure of the donor file
- * @orig_start: start offset in block for orig
- * @donor_start: start offset in block for donor
- * @len: the number of blocks to be moved
- * @moved_len: moved block length
- *
- * This function returns 0 and moved block length is set in moved_len
- * if succeed, otherwise returns error value.
- *
- * Note: ext4_move_extents() proceeds the following order.
- * 1:ext4_move_extents() calculates the last block number of moving extent
- * function by the start block number (orig_start) and the number of blocks
- * to be moved (len) specified as arguments.
- * If the {orig, donor}_start points a hole, the extent's start offset
- * pointed by ext_cur (current extent), holecheck_path, orig_path are set
- * after hole behind.
- * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
- * or the ext_cur exceeds the block_end which is last logical block number.
- * 3:To get the length of continues area, call mext_next_extent()
- * specified with the ext_cur (initial value is holecheck_path) re-cursive,
- * until find un-continuous extent, the start logical block number exceeds
- * the block_end or the extent points to the last extent.
- * 4:Exchange the original inode data with donor inode data
- * from orig_page_offset to seq_end_page.
- * The start indexes of data are specified as arguments.
- * That of the original inode is orig_page_offset,
- * and the donor inode is also orig_page_offset
- * (To easily handle blocksize != pagesize case, the offset for the
- * donor inode is block unit).
- * 5:Update holecheck_path and orig_path to points a next proceeding extent,
- * then returns to step 2.
- * 6:Release holecheck_path, orig_path and set the len to moved_len
- * which shows the number of moved blocks.
- * The moved_len is useful for the command to calculate the file offset
- * for starting next move extent ioctl.
- * 7:Return 0 on success, or a negative error value on failure.
- */
-int
-ext4_move_extents(struct file *o_filp, struct file *d_filp,
- __u64 orig_start, __u64 donor_start, __u64 len,
- __u64 *moved_len)
-{
- struct inode *orig_inode = o_filp->f_dentry->d_inode;
- struct inode *donor_inode = d_filp->f_dentry->d_inode;
- struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
- struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
- ext4_lblk_t block_start = orig_start;
- ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
- ext4_lblk_t rest_blocks;
- pgoff_t orig_page_offset = 0, seq_end_page;
- int ret1, ret2, depth, last_extent = 0;
- int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
- int data_offset_in_page;
- int block_len_in_page;
- int uninit;
-
- /* orig and donor should be different file */
- if (orig_inode->i_ino == donor_inode->i_ino) {
- ext4_debug("ext4 move extent: The argument files should not "
- "be same file [ino:orig %lu, donor %lu]\n",
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- /* Regular file check */
- if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
- ext4_debug("ext4 move extent: The argument files should be "
- "regular file [ino:orig %lu, donor %lu]\n",
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- /* Protect orig and donor inodes against a truncate */
- ret1 = mext_inode_double_lock(orig_inode, donor_inode);
- if (ret1 < 0)
- return ret1;
-
- /* Protect extent tree against block allocations via delalloc */
- double_down_write_data_sem(orig_inode, donor_inode);
- /* Check the filesystem environment whether move_extent can be done */
- ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
- donor_start, &len);
- if (ret1)
- goto out;
-
- file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
- block_end = block_start + len - 1;
- if (file_end < block_end)
- len -= block_end - file_end;
-
- ret1 = get_ext_path(orig_inode, block_start, &orig_path);
- if (ret1)
- goto out;
-
- /* Get path structure to check the hole */
- ret1 = get_ext_path(orig_inode, block_start, &holecheck_path);
- if (ret1)
- goto out;
-
- depth = ext_depth(orig_inode);
- ext_cur = holecheck_path[depth].p_ext;
-
- /*
- * Get proper starting location of block replacement if block_start was
- * within the hole.
- */
- if (le32_to_cpu(ext_cur->ee_block) +
- ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
- /*
- * The hole exists between extents or the tail of
- * original file.
- */
- last_extent = mext_next_extent(orig_inode,
- holecheck_path, &ext_cur);
- if (last_extent < 0) {
- ret1 = last_extent;
- goto out;
- }
- last_extent = mext_next_extent(orig_inode, orig_path,
- &ext_dummy);
- if (last_extent < 0) {
- ret1 = last_extent;
- goto out;
- }
- seq_start = le32_to_cpu(ext_cur->ee_block);
- } else if (le32_to_cpu(ext_cur->ee_block) > block_start)
- /* The hole exists at the beginning of original file. */
- seq_start = le32_to_cpu(ext_cur->ee_block);
- else
- seq_start = block_start;
-
- /* No blocks within the specified range. */
- if (le32_to_cpu(ext_cur->ee_block) > block_end) {
- ext4_debug("ext4 move extent: The specified range of file "
- "may be the hole\n");
- ret1 = -EINVAL;
- goto out;
- }
-
- /* Adjust start blocks */
- add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
- ext4_ext_get_actual_len(ext_cur), block_end + 1) -
- max(le32_to_cpu(ext_cur->ee_block), block_start);
-
- while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
- seq_blocks += add_blocks;
-
- /* Adjust tail blocks */
- if (seq_start + seq_blocks - 1 > block_end)
- seq_blocks = block_end - seq_start + 1;
-
- ext_prev = ext_cur;
- last_extent = mext_next_extent(orig_inode, holecheck_path,
- &ext_cur);
- if (last_extent < 0) {
- ret1 = last_extent;
- break;
- }
- add_blocks = ext4_ext_get_actual_len(ext_cur);
-
- /*
- * Extend the length of contiguous block (seq_blocks)
- * if extents are contiguous.
- */
- if (ext4_can_extents_be_merged(orig_inode,
- ext_prev, ext_cur) &&
- block_end >= le32_to_cpu(ext_cur->ee_block) &&
- !last_extent)
- continue;
-
- /* Is original extent is uninitialized */
- uninit = ext4_ext_is_uninitialized(ext_prev);
-
- data_offset_in_page = seq_start % blocks_per_page;
-
- /*
- * Calculate data blocks count that should be swapped
- * at the first page.
- */
- if (data_offset_in_page + seq_blocks > blocks_per_page) {
- /* Swapped blocks are across pages */
- block_len_in_page =
- blocks_per_page - data_offset_in_page;
- } else {
- /* Swapped blocks are in a page */
- block_len_in_page = seq_blocks;
- }
-
- orig_page_offset = seq_start >>
- (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
- seq_end_page = (seq_start + seq_blocks - 1) >>
- (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
- seq_start = le32_to_cpu(ext_cur->ee_block);
- rest_blocks = seq_blocks;
-
- /*
- * Up semaphore to avoid following problems:
- * a. transaction deadlock among ext4_journal_start,
- * ->write_begin via pagefault, and jbd2_journal_commit
- * b. racing with ->readpage, ->write_begin, and ext4_get_block
- * in move_extent_per_page
- */
- double_up_write_data_sem(orig_inode, donor_inode);
-
- while (orig_page_offset <= seq_end_page) {
-
- /* Swap original branches with new branches */
- block_len_in_page = move_extent_per_page(
- o_filp, donor_inode,
- orig_page_offset,
- data_offset_in_page,
- block_len_in_page, uninit,
- &ret1);
-
- /* Count how many blocks we have exchanged */
- *moved_len += block_len_in_page;
- if (ret1 < 0)
- break;
- if (*moved_len > len) {
- EXT4_ERROR_INODE(orig_inode,
- "We replaced blocks too much! "
- "sum of replaced: %llu requested: %llu",
- *moved_len, len);
- ret1 = -EIO;
- break;
- }
-
- orig_page_offset++;
- data_offset_in_page = 0;
- rest_blocks -= block_len_in_page;
- if (rest_blocks > blocks_per_page)
- block_len_in_page = blocks_per_page;
- else
- block_len_in_page = rest_blocks;
- }
-
- double_down_write_data_sem(orig_inode, donor_inode);
- if (ret1 < 0)
- break;
-
- /* Decrease buffer counter */
- if (holecheck_path)
- ext4_ext_drop_refs(holecheck_path);
- ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path);
- if (ret1)
- break;
- depth = holecheck_path->p_depth;
-
- /* Decrease buffer counter */
- if (orig_path)
- ext4_ext_drop_refs(orig_path);
- ret1 = get_ext_path(orig_inode, seq_start, &orig_path);
- if (ret1)
- break;
-
- ext_cur = holecheck_path[depth].p_ext;
- add_blocks = ext4_ext_get_actual_len(ext_cur);
- seq_blocks = 0;
-
- }
-out:
- if (*moved_len) {
- ext4_discard_preallocations(orig_inode);
- ext4_discard_preallocations(donor_inode);
- }
-
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
- if (holecheck_path) {
- ext4_ext_drop_refs(holecheck_path);
- kfree(holecheck_path);
- }
- double_up_write_data_sem(orig_inode, donor_inode);
- ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
-
- if (ret1)
- return ret1;
- else if (ret2)
- return ret2;
-
- return 0;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/namei.c b/ANDROID_3.4.5/fs/ext4/namei.c
deleted file mode 100644
index 0a94cbbe..00000000
--- a/ANDROID_3.4.5/fs/ext4/namei.c
+++ /dev/null
@@ -1,2607 +0,0 @@
-/*
- * linux/fs/ext4/namei.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/namei.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * Big-endian to little-endian byte-swapping/bitmaps by
- * David S. Miller (davem@caip.rutgers.edu), 1995
- * Directory entry file type support and forward compatibility hooks
- * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
- * Hash Tree Directory indexing (c)
- * Daniel Phillips, 2001
- * Hash Tree Directory indexing porting
- * Christopher Li, 2002
- * Hash Tree Directory indexing cleanup
- * Theodore Ts'o, 2002
- */
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/jbd2.h>
-#include <linux/time.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/bio.h>
-#include "ext4.h"
-#include "ext4_jbd2.h"
-
-#include "xattr.h"
-#include "acl.h"
-
-#include <trace/events/ext4.h>
-/*
- * define how far ahead to read directories while searching them.
- */
-#define NAMEI_RA_CHUNKS 2
-#define NAMEI_RA_BLOCKS 4
-#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
-#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
-
-static struct buffer_head *ext4_append(handle_t *handle,
- struct inode *inode,
- ext4_lblk_t *block, int *err)
-{
- struct buffer_head *bh;
-
- *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-
- bh = ext4_bread(handle, inode, *block, 1, err);
- if (bh) {
- inode->i_size += inode->i_sb->s_blocksize;
- EXT4_I(inode)->i_disksize = inode->i_size;
- *err = ext4_journal_get_write_access(handle, bh);
- if (*err) {
- brelse(bh);
- bh = NULL;
- }
- }
- return bh;
-}
-
-#ifndef assert
-#define assert(test) J_ASSERT(test)
-#endif
-
-#ifdef DX_DEBUG
-#define dxtrace(command) command
-#else
-#define dxtrace(command)
-#endif
-
-struct fake_dirent
-{
- __le32 inode;
- __le16 rec_len;
- u8 name_len;
- u8 file_type;
-};
-
-struct dx_countlimit
-{
- __le16 limit;
- __le16 count;
-};
-
-struct dx_entry
-{
- __le32 hash;
- __le32 block;
-};
-
-/*
- * dx_root_info is laid out so that if it should somehow get overlaid by a
- * dirent the two low bits of the hash version will be zero. Therefore, the
- * hash version mod 4 should never be 0. Sincerely, the paranoia department.
- */
-
-struct dx_root
-{
- struct fake_dirent dot;
- char dot_name[4];
- struct fake_dirent dotdot;
- char dotdot_name[4];
- struct dx_root_info
- {
- __le32 reserved_zero;
- u8 hash_version;
- u8 info_length; /* 8 */
- u8 indirect_levels;
- u8 unused_flags;
- }
- info;
- struct dx_entry entries[0];
-};
-
-struct dx_node
-{
- struct fake_dirent fake;
- struct dx_entry entries[0];
-};
-
-
-struct dx_frame
-{
- struct buffer_head *bh;
- struct dx_entry *entries;
- struct dx_entry *at;
-};
-
-struct dx_map_entry
-{
- u32 hash;
- u16 offs;
- u16 size;
-};
-
-static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
-static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
-static inline unsigned dx_get_hash(struct dx_entry *entry);
-static void dx_set_hash(struct dx_entry *entry, unsigned value);
-static unsigned dx_get_count(struct dx_entry *entries);
-static unsigned dx_get_limit(struct dx_entry *entries);
-static void dx_set_count(struct dx_entry *entries, unsigned value);
-static void dx_set_limit(struct dx_entry *entries, unsigned value);
-static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
-static unsigned dx_node_limit(struct inode *dir);
-static struct dx_frame *dx_probe(const struct qstr *d_name,
- struct inode *dir,
- struct dx_hash_info *hinfo,
- struct dx_frame *frame,
- int *err);
-static void dx_release(struct dx_frame *frames);
-static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
- struct dx_hash_info *hinfo, struct dx_map_entry map[]);
-static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
- struct dx_map_entry *offsets, int count, unsigned blocksize);
-static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
-static void dx_insert_block(struct dx_frame *frame,
- u32 hash, ext4_lblk_t block);
-static int ext4_htree_next_block(struct inode *dir, __u32 hash,
- struct dx_frame *frame,
- struct dx_frame *frames,
- __u32 *start_hash);
-static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
- const struct qstr *d_name,
- struct ext4_dir_entry_2 **res_dir,
- int *err);
-static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
- struct inode *inode);
-
-/*
- * p is at least 6 bytes before the end of page
- */
-static inline struct ext4_dir_entry_2 *
-ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
-{
- return (struct ext4_dir_entry_2 *)((char *)p +
- ext4_rec_len_from_disk(p->rec_len, blocksize));
-}
-
-/*
- * Future: use high four bits of block for coalesce-on-delete flags
- * Mask them off for now.
- */
-
-static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
-{
- return le32_to_cpu(entry->block) & 0x00ffffff;
-}
-
-static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
-{
- entry->block = cpu_to_le32(value);
-}
-
-static inline unsigned dx_get_hash(struct dx_entry *entry)
-{
- return le32_to_cpu(entry->hash);
-}
-
-static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
-{
- entry->hash = cpu_to_le32(value);
-}
-
-static inline unsigned dx_get_count(struct dx_entry *entries)
-{
- return le16_to_cpu(((struct dx_countlimit *) entries)->count);
-}
-
-static inline unsigned dx_get_limit(struct dx_entry *entries)
-{
- return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
-}
-
-static inline void dx_set_count(struct dx_entry *entries, unsigned value)
-{
- ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
-}
-
-static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
-{
- ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
-}
-
-static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
-{
- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
- EXT4_DIR_REC_LEN(2) - infosize;
- return entry_space / sizeof(struct dx_entry);
-}
-
-static inline unsigned dx_node_limit(struct inode *dir)
-{
- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
- return entry_space / sizeof(struct dx_entry);
-}
-
-/*
- * Debug
- */
-#ifdef DX_DEBUG
-static void dx_show_index(char * label, struct dx_entry *entries)
-{
- int i, n = dx_get_count (entries);
- printk(KERN_DEBUG "%s index ", label);
- for (i = 0; i < n; i++) {
- printk("%x->%lu ", i ? dx_get_hash(entries + i) :
- 0, (unsigned long)dx_get_block(entries + i));
- }
- printk("\n");
-}
-
-struct stats
-{
- unsigned names;
- unsigned space;
- unsigned bcount;
-};
-
-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
- int size, int show_names)
-{
- unsigned names = 0, space = 0;
- char *base = (char *) de;
- struct dx_hash_info h = *hinfo;
-
- printk("names: ");
- while ((char *) de < base + size)
- {
- if (de->inode)
- {
- if (show_names)
- {
- int len = de->name_len;
- char *name = de->name;
- while (len--) printk("%c", *name++);
- ext4fs_dirhash(de->name, de->name_len, &h);
- printk(":%x.%u ", h.hash,
- (unsigned) ((char *) de - base));
- }
- space += EXT4_DIR_REC_LEN(de->name_len);
- names++;
- }
- de = ext4_next_entry(de, size);
- }
- printk("(%i)\n", names);
- return (struct stats) { names, space, 1 };
-}
-
-struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
- struct dx_entry *entries, int levels)
-{
- unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count = dx_get_count(entries), names = 0, space = 0, i;
- unsigned bcount = 0;
- struct buffer_head *bh;
- int err;
- printk("%i indexed blocks...\n", count);
- for (i = 0; i < count; i++, entries++)
- {
- ext4_lblk_t block = dx_get_block(entries);
- ext4_lblk_t hash = i ? dx_get_hash(entries): 0;
- u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
- struct stats stats;
- printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
- if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
- stats = levels?
- dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
- dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
- names += stats.names;
- space += stats.space;
- bcount += stats.bcount;
- brelse(bh);
- }
- if (bcount)
- printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
- levels ? "" : " ", names, space/bcount,
- (space/bcount)*100/blocksize);
- return (struct stats) { names, space, bcount};
-}
-#endif /* DX_DEBUG */
-
-/*
- * Probe for a directory leaf block to search.
- *
- * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
- * error in the directory index, and the caller should fall back to
- * searching the directory normally. The callers of dx_probe **MUST**
- * check for this error code, and make sure it never gets reflected
- * back to userspace.
- */
-static struct dx_frame *
-dx_probe(const struct qstr *d_name, struct inode *dir,
- struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
-{
- unsigned count, indirect;
- struct dx_entry *at, *entries, *p, *q, *m;
- struct dx_root *root;
- struct buffer_head *bh;
- struct dx_frame *frame = frame_in;
- u32 hash;
-
- frame->bh = NULL;
- if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
- goto fail;
- root = (struct dx_root *) bh->b_data;
- if (root->info.hash_version != DX_HASH_TEA &&
- root->info.hash_version != DX_HASH_HALF_MD4 &&
- root->info.hash_version != DX_HASH_LEGACY) {
- ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
- root->info.hash_version);
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto fail;
- }
- hinfo->hash_version = root->info.hash_version;
- if (hinfo->hash_version <= DX_HASH_TEA)
- hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
- hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
- if (d_name)
- ext4fs_dirhash(d_name->name, d_name->len, hinfo);
- hash = hinfo->hash;
-
- if (root->info.unused_flags & 1) {
- ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
- root->info.unused_flags);
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto fail;
- }
-
- if ((indirect = root->info.indirect_levels) > 1) {
- ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
- root->info.indirect_levels);
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto fail;
- }
-
- entries = (struct dx_entry *) (((char *)&root->info) +
- root->info.info_length);
-
- if (dx_get_limit(entries) != dx_root_limit(dir,
- root->info.info_length)) {
- ext4_warning(dir->i_sb, "dx entry: limit != root limit");
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto fail;
- }
-
- dxtrace(printk("Look up %x", hash));
- while (1)
- {
- count = dx_get_count(entries);
- if (!count || count > dx_get_limit(entries)) {
- ext4_warning(dir->i_sb,
- "dx entry: no count or count > limit");
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto fail2;
- }
-
- p = entries + 1;
- q = entries + count - 1;
- while (p <= q)
- {
- m = p + (q - p)/2;
- dxtrace(printk("."));
- if (dx_get_hash(m) > hash)
- q = m - 1;
- else
- p = m + 1;
- }
-
- if (0) // linear search cross check
- {
- unsigned n = count - 1;
- at = entries;
- while (n--)
- {
- dxtrace(printk(","));
- if (dx_get_hash(++at) > hash)
- {
- at--;
- break;
- }
- }
- assert (at == p - 1);
- }
-
- at = p - 1;
- dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
- frame->bh = bh;
- frame->entries = entries;
- frame->at = at;
- if (!indirect--) return frame;
- if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
- goto fail2;
- at = entries = ((struct dx_node *) bh->b_data)->entries;
- if (dx_get_limit(entries) != dx_node_limit (dir)) {
- ext4_warning(dir->i_sb,
- "dx entry: limit != node limit");
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto fail2;
- }
- frame++;
- frame->bh = NULL;
- }
-fail2:
- while (frame >= frame_in) {
- brelse(frame->bh);
- frame--;
- }
-fail:
- if (*err == ERR_BAD_DX_DIR)
- ext4_warning(dir->i_sb,
- "Corrupt dir inode %lu, running e2fsck is "
- "recommended.", dir->i_ino);
- return NULL;
-}
-
-static void dx_release (struct dx_frame *frames)
-{
- if (frames[0].bh == NULL)
- return;
-
- if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
- brelse(frames[1].bh);
- brelse(frames[0].bh);
-}
-
-/*
- * This function increments the frame pointer to search the next leaf
- * block, and reads in the necessary intervening nodes if the search
- * should be necessary. Whether or not the search is necessary is
- * controlled by the hash parameter. If the hash value is even, then
- * the search is only continued if the next block starts with that
- * hash value. This is used if we are searching for a specific file.
- *
- * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
- *
- * This function returns 1 if the caller should continue to search,
- * or 0 if it should not. If there is an error reading one of the
- * index blocks, it will a negative error code.
- *
- * If start_hash is non-null, it will be filled in with the starting
- * hash of the next page.
- */
-static int ext4_htree_next_block(struct inode *dir, __u32 hash,
- struct dx_frame *frame,
- struct dx_frame *frames,
- __u32 *start_hash)
-{
- struct dx_frame *p;
- struct buffer_head *bh;
- int err, num_frames = 0;
- __u32 bhash;
-
- p = frame;
- /*
- * Find the next leaf page by incrementing the frame pointer.
- * If we run out of entries in the interior node, loop around and
- * increment pointer in the parent node. When we break out of
- * this loop, num_frames indicates the number of interior
- * nodes need to be read.
- */
- while (1) {
- if (++(p->at) < p->entries + dx_get_count(p->entries))
- break;
- if (p == frames)
- return 0;
- num_frames++;
- p--;
- }
-
- /*
- * If the hash is 1, then continue only if the next page has a
- * continuation hash of any value. This is used for readdir
- * handling. Otherwise, check to see if the hash matches the
- * desired contiuation hash. If it doesn't, return since
- * there's no point to read in the successive index pages.
- */
- bhash = dx_get_hash(p->at);
- if (start_hash)
- *start_hash = bhash;
- if ((hash & 1) == 0) {
- if ((bhash & ~1) != hash)
- return 0;
- }
- /*
- * If the hash is HASH_NB_ALWAYS, we always go to the next
- * block so no check is necessary
- */
- while (num_frames--) {
- if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
- 0, &err)))
- return err; /* Failure */
- p++;
- brelse(p->bh);
- p->bh = bh;
- p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
- }
- return 1;
-}
-
-
-/*
- * This function fills a red-black tree with information from a
- * directory block. It returns the number directory entries loaded
- * into the tree. If there is an error it is returned in err.
- */
-static int htree_dirblock_to_tree(struct file *dir_file,
- struct inode *dir, ext4_lblk_t block,
- struct dx_hash_info *hinfo,
- __u32 start_hash, __u32 start_minor_hash)
-{
- struct buffer_head *bh;
- struct ext4_dir_entry_2 *de, *top;
- int err, count = 0;
-
- dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
- (unsigned long)block));
- if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
- return err;
-
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- top = (struct ext4_dir_entry_2 *) ((char *) de +
- dir->i_sb->s_blocksize -
- EXT4_DIR_REC_LEN(0));
- for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
- if (ext4_check_dir_entry(dir, NULL, de, bh,
- (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
- + ((char *)de - bh->b_data))) {
- /* On error, skip the f_pos to the next block. */
- dir_file->f_pos = (dir_file->f_pos |
- (dir->i_sb->s_blocksize - 1)) + 1;
- brelse(bh);
- return count;
- }
- ext4fs_dirhash(de->name, de->name_len, hinfo);
- if ((hinfo->hash < start_hash) ||
- ((hinfo->hash == start_hash) &&
- (hinfo->minor_hash < start_minor_hash)))
- continue;
- if (de->inode == 0)
- continue;
- if ((err = ext4_htree_store_dirent(dir_file,
- hinfo->hash, hinfo->minor_hash, de)) != 0) {
- brelse(bh);
- return err;
- }
- count++;
- }
- brelse(bh);
- return count;
-}
-
-
-/*
- * This function fills a red-black tree with information from a
- * directory. We start scanning the directory in hash order, starting
- * at start_hash and start_minor_hash.
- *
- * This function returns the number of entries inserted into the tree,
- * or a negative error code.
- */
-int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
- __u32 start_minor_hash, __u32 *next_hash)
-{
- struct dx_hash_info hinfo;
- struct ext4_dir_entry_2 *de;
- struct dx_frame frames[2], *frame;
- struct inode *dir;
- ext4_lblk_t block;
- int count = 0;
- int ret, err;
- __u32 hashval;
-
- dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
- start_hash, start_minor_hash));
- dir = dir_file->f_path.dentry->d_inode;
- if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
- hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
- if (hinfo.hash_version <= DX_HASH_TEA)
- hinfo.hash_version +=
- EXT4_SB(dir->i_sb)->s_hash_unsigned;
- hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
- count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
- start_hash, start_minor_hash);
- *next_hash = ~0;
- return count;
- }
- hinfo.hash = start_hash;
- hinfo.minor_hash = 0;
- frame = dx_probe(NULL, dir, &hinfo, frames, &err);
- if (!frame)
- return err;
-
- /* Add '.' and '..' from the htree header */
- if (!start_hash && !start_minor_hash) {
- de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
- if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
- goto errout;
- count++;
- }
- if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
- de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
- de = ext4_next_entry(de, dir->i_sb->s_blocksize);
- if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
- goto errout;
- count++;
- }
-
- while (1) {
- block = dx_get_block(frame->at);
- ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
- start_hash, start_minor_hash);
- if (ret < 0) {
- err = ret;
- goto errout;
- }
- count += ret;
- hashval = ~0;
- ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
- frame, frames, &hashval);
- *next_hash = hashval;
- if (ret < 0) {
- err = ret;
- goto errout;
- }
- /*
- * Stop if: (a) there are no more entries, or
- * (b) we have inserted at least one entry and the
- * next hash value is not a continuation
- */
- if ((ret == 0) ||
- (count && ((hashval & 1) == 0)))
- break;
- }
- dx_release(frames);
- dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
- "next hash: %x\n", count, *next_hash));
- return count;
-errout:
- dx_release(frames);
- return (err);
-}
-
-
-/*
- * Directory block splitting, compacting
- */
-
-/*
- * Create map of hash values, offsets, and sizes, stored at end of block.
- * Returns number of entries mapped.
- */
-static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
- struct dx_hash_info *hinfo,
- struct dx_map_entry *map_tail)
-{
- int count = 0;
- char *base = (char *) de;
- struct dx_hash_info h = *hinfo;
-
- while ((char *) de < base + blocksize) {
- if (de->name_len && de->inode) {
- ext4fs_dirhash(de->name, de->name_len, &h);
- map_tail--;
- map_tail->hash = h.hash;
- map_tail->offs = ((char *) de - base)>>2;
- map_tail->size = le16_to_cpu(de->rec_len);
- count++;
- cond_resched();
- }
- /* XXX: do we need to check rec_len == 0 case? -Chris */
- de = ext4_next_entry(de, blocksize);
- }
- return count;
-}
-
-/* Sort map by hash value */
-static void dx_sort_map (struct dx_map_entry *map, unsigned count)
-{
- struct dx_map_entry *p, *q, *top = map + count - 1;
- int more;
- /* Combsort until bubble sort doesn't suck */
- while (count > 2) {
- count = count*10/13;
- if (count - 9 < 2) /* 9, 10 -> 11 */
- count = 11;
- for (p = top, q = p - count; q >= map; p--, q--)
- if (p->hash < q->hash)
- swap(*p, *q);
- }
- /* Garden variety bubble sort */
- do {
- more = 0;
- q = top;
- while (q-- > map) {
- if (q[1].hash >= q[0].hash)
- continue;
- swap(*(q+1), *q);
- more = 1;
- }
- } while(more);
-}
-
-static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
-{
- struct dx_entry *entries = frame->entries;
- struct dx_entry *old = frame->at, *new = old + 1;
- int count = dx_get_count(entries);
-
- assert(count < dx_get_limit(entries));
- assert(old < entries + count);
- memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
- dx_set_hash(new, hash);
- dx_set_block(new, block);
- dx_set_count(entries, count + 1);
-}
-
-static void ext4_update_dx_flag(struct inode *inode)
-{
- if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_COMPAT_DIR_INDEX))
- ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
-}
-
-/*
- * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
- *
- * `len <= EXT4_NAME_LEN' is guaranteed by caller.
- * `de != NULL' is guaranteed by caller.
- */
-static inline int ext4_match (int len, const char * const name,
- struct ext4_dir_entry_2 * de)
-{
- if (len != de->name_len)
- return 0;
- if (!de->inode)
- return 0;
- return !memcmp(name, de->name, len);
-}
-
-/*
- * Returns 0 if not found, -1 on failure, and 1 on success
- */
-static inline int search_dirblock(struct buffer_head *bh,
- struct inode *dir,
- const struct qstr *d_name,
- unsigned int offset,
- struct ext4_dir_entry_2 ** res_dir)
-{
- struct ext4_dir_entry_2 * de;
- char * dlimit;
- int de_len;
- const char *name = d_name->name;
- int namelen = d_name->len;
-
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- dlimit = bh->b_data + dir->i_sb->s_blocksize;
- while ((char *) de < dlimit) {
- /* this code is executed quadratically often */
- /* do minimal checking `by hand' */
-
- if ((char *) de + namelen <= dlimit &&
- ext4_match (namelen, name, de)) {
- /* found a match - just to be sure, do a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
- return -1;
- *res_dir = de;
- return 1;
- }
- /* prevent looping on a bad block */
- de_len = ext4_rec_len_from_disk(de->rec_len,
- dir->i_sb->s_blocksize);
- if (de_len <= 0)
- return -1;
- offset += de_len;
- de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
- }
- return 0;
-}
-
-
-/*
- * ext4_find_entry()
- *
- * finds an entry in the specified directory with the wanted name. It
- * returns the cache buffer in which the entry was found, and the entry
- * itself (as a parameter - res_dir). It does NOT read the inode of the
- * entry - you'll have to do that yourself if you want to.
- *
- * The returned buffer_head has ->b_count elevated. The caller is expected
- * to brelse() it when appropriate.
- */
-static struct buffer_head * ext4_find_entry (struct inode *dir,
- const struct qstr *d_name,
- struct ext4_dir_entry_2 ** res_dir)
-{
- struct super_block *sb;
- struct buffer_head *bh_use[NAMEI_RA_SIZE];
- struct buffer_head *bh, *ret = NULL;
- ext4_lblk_t start, block, b;
- const u8 *name = d_name->name;
- int ra_max = 0; /* Number of bh's in the readahead
- buffer, bh_use[] */
- int ra_ptr = 0; /* Current index into readahead
- buffer */
- int num = 0;
- ext4_lblk_t nblocks;
- int i, err;
- int namelen;
-
- *res_dir = NULL;
- sb = dir->i_sb;
- namelen = d_name->len;
- if (namelen > EXT4_NAME_LEN)
- return NULL;
- if ((namelen <= 2) && (name[0] == '.') &&
- (name[1] == '.' || name[1] == '\0')) {
- /*
- * "." or ".." will only be in the first block
- * NFS may look up ".."; "." should be handled by the VFS
- */
- block = start = 0;
- nblocks = 1;
- goto restart;
- }
- if (is_dx(dir)) {
- bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
- /*
- * On success, or if the error was file not found,
- * return. Otherwise, fall back to doing a search the
- * old fashioned way.
- */
- if (bh || (err != ERR_BAD_DX_DIR))
- return bh;
- dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
- "falling back\n"));
- }
- nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
- start = EXT4_I(dir)->i_dir_start_lookup;
- if (start >= nblocks)
- start = 0;
- block = start;
-restart:
- do {
- /*
- * We deal with the read-ahead logic here.
- */
- if (ra_ptr >= ra_max) {
- /* Refill the readahead buffer */
- ra_ptr = 0;
- b = block;
- for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
- /*
- * Terminate if we reach the end of the
- * directory and must wrap, or if our
- * search has finished at this block.
- */
- if (b >= nblocks || (num && block == start)) {
- bh_use[ra_max] = NULL;
- break;
- }
- num++;
- bh = ext4_getblk(NULL, dir, b++, 0, &err);
- bh_use[ra_max] = bh;
- if (bh)
- ll_rw_block(READ | REQ_META | REQ_PRIO,
- 1, &bh);
- }
- }
- if ((bh = bh_use[ra_ptr++]) == NULL)
- goto next;
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- /* read error, skip block & hope for the best */
- EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
- (unsigned long) block);
- brelse(bh);
- goto next;
- }
- i = search_dirblock(bh, dir, d_name,
- block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
- if (i == 1) {
- EXT4_I(dir)->i_dir_start_lookup = block;
- ret = bh;
- goto cleanup_and_exit;
- } else {
- brelse(bh);
- if (i < 0)
- goto cleanup_and_exit;
- }
- next:
- if (++block >= nblocks)
- block = 0;
- } while (block != start);
-
- /*
- * If the directory has grown while we were searching, then
- * search the last part of the directory before giving up.
- */
- block = nblocks;
- nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
- if (block < nblocks) {
- start = 0;
- goto restart;
- }
-
-cleanup_and_exit:
- /* Clean up the read-ahead blocks */
- for (; ra_ptr < ra_max; ra_ptr++)
- brelse(bh_use[ra_ptr]);
- return ret;
-}
-
-static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
- struct ext4_dir_entry_2 **res_dir, int *err)
-{
- struct super_block * sb = dir->i_sb;
- struct dx_hash_info hinfo;
- struct dx_frame frames[2], *frame;
- struct buffer_head *bh;
- ext4_lblk_t block;
- int retval;
-
- if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
- return NULL;
- do {
- block = dx_get_block(frame->at);
- if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
- goto errout;
-
- retval = search_dirblock(bh, dir, d_name,
- block << EXT4_BLOCK_SIZE_BITS(sb),
- res_dir);
- if (retval == 1) { /* Success! */
- dx_release(frames);
- return bh;
- }
- brelse(bh);
- if (retval == -1) {
- *err = ERR_BAD_DX_DIR;
- goto errout;
- }
-
- /* Check to see if we should continue to search */
- retval = ext4_htree_next_block(dir, hinfo.hash, frame,
- frames, NULL);
- if (retval < 0) {
- ext4_warning(sb,
- "error reading index page in directory #%lu",
- dir->i_ino);
- *err = retval;
- goto errout;
- }
- } while (retval == 1);
-
- *err = -ENOENT;
-errout:
- dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
- dx_release (frames);
- return NULL;
-}
-
-static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
- struct inode *inode;
- struct ext4_dir_entry_2 *de;
- struct buffer_head *bh;
-
- if (dentry->d_name.len > EXT4_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
-
- bh = ext4_find_entry(dir, &dentry->d_name, &de);
- inode = NULL;
- if (bh) {
- __u32 ino = le32_to_cpu(de->inode);
- brelse(bh);
- if (!ext4_valid_inum(dir->i_sb, ino)) {
- EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
- return ERR_PTR(-EIO);
- }
- if (unlikely(ino == dir->i_ino)) {
- EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir",
- dentry->d_name.len,
- dentry->d_name.name);
- return ERR_PTR(-EIO);
- }
- inode = ext4_iget(dir->i_sb, ino);
- if (inode == ERR_PTR(-ESTALE)) {
- EXT4_ERROR_INODE(dir,
- "deleted inode referenced: %u",
- ino);
- return ERR_PTR(-EIO);
- }
- }
- return d_splice_alias(inode, dentry);
-}
-
-
-struct dentry *ext4_get_parent(struct dentry *child)
-{
- __u32 ino;
- static const struct qstr dotdot = {
- .name = "..",
- .len = 2,
- };
- struct ext4_dir_entry_2 * de;
- struct buffer_head *bh;
-
- bh = ext4_find_entry(child->d_inode, &dotdot, &de);
- if (!bh)
- return ERR_PTR(-ENOENT);
- ino = le32_to_cpu(de->inode);
- brelse(bh);
-
- if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
- EXT4_ERROR_INODE(child->d_inode,
- "bad parent inode number: %u", ino);
- return ERR_PTR(-EIO);
- }
-
- return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
-}
-
-#define S_SHIFT 12
-static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR,
- [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK,
-};
-
-static inline void ext4_set_de_type(struct super_block *sb,
- struct ext4_dir_entry_2 *de,
- umode_t mode) {
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
- de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
-
-/*
- * Move count entries from end of map between two memory locations.
- * Returns pointer to last entry moved.
- */
-static struct ext4_dir_entry_2 *
-dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
- unsigned blocksize)
-{
- unsigned rec_len = 0;
-
- while (count--) {
- struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
- (from + (map->offs<<2));
- rec_len = EXT4_DIR_REC_LEN(de->name_len);
- memcpy (to, de, rec_len);
- ((struct ext4_dir_entry_2 *) to)->rec_len =
- ext4_rec_len_to_disk(rec_len, blocksize);
- de->inode = 0;
- map++;
- to += rec_len;
- }
- return (struct ext4_dir_entry_2 *) (to - rec_len);
-}
-
-/*
- * Compact each dir entry in the range to the minimal rec_len.
- * Returns pointer to last entry in range.
- */
-static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
-{
- struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
- unsigned rec_len = 0;
-
- prev = to = de;
- while ((char*)de < base + blocksize) {
- next = ext4_next_entry(de, blocksize);
- if (de->inode && de->name_len) {
- rec_len = EXT4_DIR_REC_LEN(de->name_len);
- if (de > to)
- memmove(to, de, rec_len);
- to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
- prev = to;
- to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
- }
- de = next;
- }
- return prev;
-}
-
-/*
- * Split a full leaf block to make room for a new dir entry.
- * Allocate a new block, and move entries so that they are approx. equally full.
- * Returns pointer to de in block into which the new entry will be inserted.
- */
-static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
- struct buffer_head **bh,struct dx_frame *frame,
- struct dx_hash_info *hinfo, int *error)
-{
- unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
- struct buffer_head *bh2;
- ext4_lblk_t newblock;
- u32 hash2;
- struct dx_map_entry *map;
- char *data1 = (*bh)->b_data, *data2;
- unsigned split, move, size;
- struct ext4_dir_entry_2 *de = NULL, *de2;
- int err = 0, i;
-
- bh2 = ext4_append (handle, dir, &newblock, &err);
- if (!(bh2)) {
- brelse(*bh);
- *bh = NULL;
- goto errout;
- }
-
- BUFFER_TRACE(*bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, *bh);
- if (err)
- goto journal_error;
-
- BUFFER_TRACE(frame->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, frame->bh);
- if (err)
- goto journal_error;
-
- data2 = bh2->b_data;
-
- /* create map in the end of data2 block */
- map = (struct dx_map_entry *) (data2 + blocksize);
- count = dx_make_map((struct ext4_dir_entry_2 *) data1,
- blocksize, hinfo, map);
- map -= count;
- dx_sort_map(map, count);
- /* Split the existing block in the middle, size-wise */
- size = 0;
- move = 0;
- for (i = count-1; i >= 0; i--) {
- /* is more than half of this entry in 2nd half of the block? */
- if (size + map[i].size/2 > blocksize/2)
- break;
- size += map[i].size;
- move++;
- }
- /* map index at which we will split */
- split = count - move;
- hash2 = map[split].hash;
- continued = hash2 == map[split - 1].hash;
- dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
- (unsigned long)dx_get_block(frame->at),
- hash2, split, count-split));
-
- /* Fancy dance to stay within two buffers */
- de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
- de = dx_pack_dirents(data1, blocksize);
- de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
- blocksize);
- de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2,
- blocksize);
- dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
- dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
-
- /* Which block gets the new entry? */
- if (hinfo->hash >= hash2)
- {
- swap(*bh, bh2);
- de = de2;
- }
- dx_insert_block(frame, hash2 + continued, newblock);
- err = ext4_handle_dirty_metadata(handle, dir, bh2);
- if (err)
- goto journal_error;
- err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
- if (err)
- goto journal_error;
- brelse(bh2);
- dxtrace(dx_show_index("frame", frame->entries));
- return de;
-
-journal_error:
- brelse(*bh);
- brelse(bh2);
- *bh = NULL;
- ext4_std_error(dir->i_sb, err);
-errout:
- *error = err;
- return NULL;
-}
-
-/*
- * Add a new entry into a directory (leaf) block. If de is non-NULL,
- * it points to a directory entry which is guaranteed to be large
- * enough for new directory entry. If de is NULL, then
- * add_dirent_to_buf will attempt search the directory block for
- * space. It will return -ENOSPC if no space is available, and -EIO
- * and -EEXIST if directory entry already exists.
- */
-static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
- struct inode *inode, struct ext4_dir_entry_2 *de,
- struct buffer_head *bh)
-{
- struct inode *dir = dentry->d_parent->d_inode;
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
- unsigned int offset = 0;
- unsigned int blocksize = dir->i_sb->s_blocksize;
- unsigned short reclen;
- int nlen, rlen, err;
- char *top;
-
- reclen = EXT4_DIR_REC_LEN(namelen);
- if (!de) {
- de = (struct ext4_dir_entry_2 *)bh->b_data;
- top = bh->b_data + blocksize - reclen;
- while ((char *) de <= top) {
- if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
- return -EIO;
- if (ext4_match(namelen, name, de))
- return -EEXIST;
- nlen = EXT4_DIR_REC_LEN(de->name_len);
- rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
- if ((de->inode? rlen - nlen: rlen) >= reclen)
- break;
- de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
- offset += rlen;
- }
- if ((char *) de > top)
- return -ENOSPC;
- }
- BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
- if (err) {
- ext4_std_error(dir->i_sb, err);
- return err;
- }
-
- /* By now the buffer is marked for journaling */
- nlen = EXT4_DIR_REC_LEN(de->name_len);
- rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
- if (de->inode) {
- struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
- de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
- de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
- de = de1;
- }
- de->file_type = EXT4_FT_UNKNOWN;
- if (inode) {
- de->inode = cpu_to_le32(inode->i_ino);
- ext4_set_de_type(dir->i_sb, de, inode->i_mode);
- } else
- de->inode = 0;
- de->name_len = namelen;
- memcpy(de->name, name, namelen);
- /*
- * XXX shouldn't update any times until successful
- * completion of syscall, but too many callers depend
- * on this.
- *
- * XXX similarly, too many callers depend on
- * ext4_new_inode() setting the times, but error
- * recovery deletes the inode, so the worst that can
- * happen is that the times are slightly out of date
- * and/or different from the directory change time.
- */
- dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
- ext4_update_dx_flag(dir);
- dir->i_version++;
- ext4_mark_inode_dirty(handle, dir);
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, dir, bh);
- if (err)
- ext4_std_error(dir->i_sb, err);
- return 0;
-}
-
-/*
- * This converts a one block unindexed directory to a 3 block indexed
- * directory, and adds the dentry to the indexed directory.
- */
-static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
- struct inode *inode, struct buffer_head *bh)
-{
- struct inode *dir = dentry->d_parent->d_inode;
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
- struct buffer_head *bh2;
- struct dx_root *root;
- struct dx_frame frames[2], *frame;
- struct dx_entry *entries;
- struct ext4_dir_entry_2 *de, *de2;
- char *data1, *top;
- unsigned len;
- int retval;
- unsigned blocksize;
- struct dx_hash_info hinfo;
- ext4_lblk_t block;
- struct fake_dirent *fde;
-
- blocksize = dir->i_sb->s_blocksize;
- dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
- retval = ext4_journal_get_write_access(handle, bh);
- if (retval) {
- ext4_std_error(dir->i_sb, retval);
- brelse(bh);
- return retval;
- }
- root = (struct dx_root *) bh->b_data;
-
- /* The 0th block becomes the root, move the dirents out */
- fde = &root->dotdot;
- de = (struct ext4_dir_entry_2 *)((char *)fde +
- ext4_rec_len_from_disk(fde->rec_len, blocksize));
- if ((char *) de >= (((char *) root) + blocksize)) {
- EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
- brelse(bh);
- return -EIO;
- }
- len = ((char *) root) + blocksize - (char *) de;
-
- /* Allocate new block for the 0th block's dirents */
- bh2 = ext4_append(handle, dir, &block, &retval);
- if (!(bh2)) {
- brelse(bh);
- return retval;
- }
- ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
- data1 = bh2->b_data;
-
- memcpy (data1, de, len);
- de = (struct ext4_dir_entry_2 *) data1;
- top = data1 + len;
- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
- de = de2;
- de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
- blocksize);
- /* Initialize the root; the dot dirents already exist */
- de = (struct ext4_dir_entry_2 *) (&root->dotdot);
- de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
- blocksize);
- memset (&root->info, 0, sizeof(root->info));
- root->info.info_length = sizeof(root->info);
- root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
- entries = root->entries;
- dx_set_block(entries, 1);
- dx_set_count(entries, 1);
- dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
-
- /* Initialize as for dx_probe */
- hinfo.hash_version = root->info.hash_version;
- if (hinfo.hash_version <= DX_HASH_TEA)
- hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
- hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
- ext4fs_dirhash(name, namelen, &hinfo);
- frame = frames;
- frame->entries = entries;
- frame->at = entries;
- frame->bh = bh;
- bh = bh2;
-
- ext4_handle_dirty_metadata(handle, dir, frame->bh);
- ext4_handle_dirty_metadata(handle, dir, bh);
-
- de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
- if (!de) {
- /*
- * Even if the block split failed, we have to properly write
- * out all the changes we did so far. Otherwise we can end up
- * with corrupted filesystem.
- */
- ext4_mark_inode_dirty(handle, dir);
- dx_release(frames);
- return retval;
- }
- dx_release(frames);
-
- retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
- brelse(bh);
- return retval;
-}
-
-/*
- * ext4_add_entry()
- *
- * adds a file entry to the specified directory, using the same
- * semantics as ext4_find_entry(). It returns NULL if it failed.
- *
- * NOTE!! The inode part of 'de' is left at 0 - which means you
- * may not sleep between calling this and putting something into
- * the entry, as someone else might have used it while you slept.
- */
-static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
- struct inode *inode)
-{
- struct inode *dir = dentry->d_parent->d_inode;
- struct buffer_head *bh;
- struct ext4_dir_entry_2 *de;
- struct super_block *sb;
- int retval;
- int dx_fallback=0;
- unsigned blocksize;
- ext4_lblk_t block, blocks;
-
- sb = dir->i_sb;
- blocksize = sb->s_blocksize;
- if (!dentry->d_name.len)
- return -EINVAL;
- if (is_dx(dir)) {
- retval = ext4_dx_add_entry(handle, dentry, inode);
- if (!retval || (retval != ERR_BAD_DX_DIR))
- return retval;
- ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
- dx_fallback++;
- ext4_mark_inode_dirty(handle, dir);
- }
- blocks = dir->i_size >> sb->s_blocksize_bits;
- for (block = 0; block < blocks; block++) {
- bh = ext4_bread(handle, dir, block, 0, &retval);
- if(!bh)
- return retval;
- retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
- if (retval != -ENOSPC) {
- brelse(bh);
- return retval;
- }
-
- if (blocks == 1 && !dx_fallback &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
- return make_indexed_dir(handle, dentry, inode, bh);
- brelse(bh);
- }
- bh = ext4_append(handle, dir, &block, &retval);
- if (!bh)
- return retval;
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- de->inode = 0;
- de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
- retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
- brelse(bh);
- if (retval == 0)
- ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
- return retval;
-}
-
-/*
- * Returns 0 for success, or a negative error value
- */
-static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
- struct inode *inode)
-{
- struct dx_frame frames[2], *frame;
- struct dx_entry *entries, *at;
- struct dx_hash_info hinfo;
- struct buffer_head *bh;
- struct inode *dir = dentry->d_parent->d_inode;
- struct super_block *sb = dir->i_sb;
- struct ext4_dir_entry_2 *de;
- int err;
-
- frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
- if (!frame)
- return err;
- entries = frame->entries;
- at = frame->at;
-
- if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
- goto cleanup;
-
- BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
- if (err)
- goto journal_error;
-
- err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
- if (err != -ENOSPC)
- goto cleanup;
-
- /* Block full, should compress but for now just split */
- dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
- dx_get_count(entries), dx_get_limit(entries)));
- /* Need to split index? */
- if (dx_get_count(entries) == dx_get_limit(entries)) {
- ext4_lblk_t newblock;
- unsigned icount = dx_get_count(entries);
- int levels = frame - frames;
- struct dx_entry *entries2;
- struct dx_node *node2;
- struct buffer_head *bh2;
-
- if (levels && (dx_get_count(frames->entries) ==
- dx_get_limit(frames->entries))) {
- ext4_warning(sb, "Directory index full!");
- err = -ENOSPC;
- goto cleanup;
- }
- bh2 = ext4_append (handle, dir, &newblock, &err);
- if (!(bh2))
- goto cleanup;
- node2 = (struct dx_node *)(bh2->b_data);
- entries2 = node2->entries;
- memset(&node2->fake, 0, sizeof(struct fake_dirent));
- node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
- sb->s_blocksize);
- BUFFER_TRACE(frame->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, frame->bh);
- if (err)
- goto journal_error;
- if (levels) {
- unsigned icount1 = icount/2, icount2 = icount - icount1;
- unsigned hash2 = dx_get_hash(entries + icount1);
- dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
- icount1, icount2));
-
- BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
- err = ext4_journal_get_write_access(handle,
- frames[0].bh);
- if (err)
- goto journal_error;
-
- memcpy((char *) entries2, (char *) (entries + icount1),
- icount2 * sizeof(struct dx_entry));
- dx_set_count(entries, icount1);
- dx_set_count(entries2, icount2);
- dx_set_limit(entries2, dx_node_limit(dir));
-
- /* Which index block gets the new entry? */
- if (at - entries >= icount1) {
- frame->at = at = at - entries - icount1 + entries2;
- frame->entries = entries = entries2;
- swap(frame->bh, bh2);
- }
- dx_insert_block(frames + 0, hash2, newblock);
- dxtrace(dx_show_index("node", frames[1].entries));
- dxtrace(dx_show_index("node",
- ((struct dx_node *) bh2->b_data)->entries));
- err = ext4_handle_dirty_metadata(handle, dir, bh2);
- if (err)
- goto journal_error;
- brelse (bh2);
- } else {
- dxtrace(printk(KERN_DEBUG
- "Creating second level index...\n"));
- memcpy((char *) entries2, (char *) entries,
- icount * sizeof(struct dx_entry));
- dx_set_limit(entries2, dx_node_limit(dir));
-
- /* Set up root */
- dx_set_count(entries, 1);
- dx_set_block(entries + 0, newblock);
- ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
-
- /* Add new access path frame */
- frame = frames + 1;
- frame->at = at = at - entries + entries2;
- frame->entries = entries = entries2;
- frame->bh = bh2;
- err = ext4_journal_get_write_access(handle,
- frame->bh);
- if (err)
- goto journal_error;
- }
- err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
- if (err) {
- ext4_std_error(inode->i_sb, err);
- goto cleanup;
- }
- }
- de = do_split(handle, dir, &bh, frame, &hinfo, &err);
- if (!de)
- goto cleanup;
- err = add_dirent_to_buf(handle, dentry, inode, de, bh);
- goto cleanup;
-
-journal_error:
- ext4_std_error(dir->i_sb, err);
-cleanup:
- if (bh)
- brelse(bh);
- dx_release(frames);
- return err;
-}
-
-/*
- * ext4_delete_entry deletes a directory entry by merging it with the
- * previous entry
- */
-static int ext4_delete_entry(handle_t *handle,
- struct inode *dir,
- struct ext4_dir_entry_2 *de_del,
- struct buffer_head *bh)
-{
- struct ext4_dir_entry_2 *de, *pde;
- unsigned int blocksize = dir->i_sb->s_blocksize;
- int i, err;
-
- i = 0;
- pde = NULL;
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- while (i < bh->b_size) {
- if (ext4_check_dir_entry(dir, NULL, de, bh, i))
- return -EIO;
- if (de == de_del) {
- BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
- if (unlikely(err)) {
- ext4_std_error(dir->i_sb, err);
- return err;
- }
- if (pde)
- pde->rec_len = ext4_rec_len_to_disk(
- ext4_rec_len_from_disk(pde->rec_len,
- blocksize) +
- ext4_rec_len_from_disk(de->rec_len,
- blocksize),
- blocksize);
- else
- de->inode = 0;
- dir->i_version++;
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, dir, bh);
- if (unlikely(err)) {
- ext4_std_error(dir->i_sb, err);
- return err;
- }
- return 0;
- }
- i += ext4_rec_len_from_disk(de->rec_len, blocksize);
- pde = de;
- de = ext4_next_entry(de, blocksize);
- }
- return -ENOENT;
-}
-
-/*
- * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
- * since this indicates that nlinks count was previously 1.
- */
-static void ext4_inc_count(handle_t *handle, struct inode *inode)
-{
- inc_nlink(inode);
- if (is_dx(inode) && inode->i_nlink > 1) {
- /* limit is 16-bit i_links_count */
- if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
- set_nlink(inode, 1);
- EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
- }
- }
-}
-
-/*
- * If a directory had nlink == 1, then we should let it be 1. This indicates
- * directory has >EXT4_LINK_MAX subdirs.
- */
-static void ext4_dec_count(handle_t *handle, struct inode *inode)
-{
- if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
- drop_nlink(inode);
-}
-
-
-static int ext4_add_nondir(handle_t *handle,
- struct dentry *dentry, struct inode *inode)
-{
- int err = ext4_add_entry(handle, dentry, inode);
- if (!err) {
- ext4_mark_inode_dirty(handle, inode);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
- return 0;
- }
- drop_nlink(inode);
- unlock_new_inode(inode);
- iput(inode);
- return err;
-}
-
-/*
- * By the time this is called, we already have created
- * the directory cache entry for the new file, but it
- * is so far negative - it has no inode.
- *
- * If the create succeeds, we fill in the inode information
- * with d_instantiate().
- */
-static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- struct nameidata *nd)
-{
- handle_t *handle;
- struct inode *inode;
- int err, retries = 0;
-
- dquot_initialize(dir);
-
-retry:
- handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- if (IS_DIRSYNC(dir))
- ext4_handle_sync(handle);
-
- inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
- err = PTR_ERR(inode);
- if (!IS_ERR(inode)) {
- inode->i_op = &ext4_file_inode_operations;
- inode->i_fop = &ext4_file_operations;
- ext4_set_aops(inode);
- err = ext4_add_nondir(handle, dentry, inode);
- }
- ext4_journal_stop(handle);
- if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
- goto retry;
- return err;
-}
-
-static int ext4_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
-{
- handle_t *handle;
- struct inode *inode;
- int err, retries = 0;
-
- if (!new_valid_dev(rdev))
- return -EINVAL;
-
- dquot_initialize(dir);
-
-retry:
- handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- if (IS_DIRSYNC(dir))
- ext4_handle_sync(handle);
-
- inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
- err = PTR_ERR(inode);
- if (!IS_ERR(inode)) {
- init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT4_FS_XATTR
- inode->i_op = &ext4_special_inode_operations;
-#endif
- err = ext4_add_nondir(handle, dentry, inode);
- }
- ext4_journal_stop(handle);
- if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
- goto retry;
- return err;
-}
-
-static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
-{
- handle_t *handle;
- struct inode *inode;
- struct buffer_head *dir_block = NULL;
- struct ext4_dir_entry_2 *de;
- unsigned int blocksize = dir->i_sb->s_blocksize;
- int err, retries = 0;
-
- if (EXT4_DIR_LINK_MAX(dir))
- return -EMLINK;
-
- dquot_initialize(dir);
-
-retry:
- handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- if (IS_DIRSYNC(dir))
- ext4_handle_sync(handle);
-
- inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
- &dentry->d_name, 0, NULL);
- err = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto out_stop;
-
- inode->i_op = &ext4_dir_inode_operations;
- inode->i_fop = &ext4_dir_operations;
- inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
- dir_block = ext4_bread(handle, inode, 0, 1, &err);
- if (!dir_block)
- goto out_clear_inode;
- BUFFER_TRACE(dir_block, "get_write_access");
- err = ext4_journal_get_write_access(handle, dir_block);
- if (err)
- goto out_clear_inode;
- de = (struct ext4_dir_entry_2 *) dir_block->b_data;
- de->inode = cpu_to_le32(inode->i_ino);
- de->name_len = 1;
- de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
- blocksize);
- strcpy(de->name, ".");
- ext4_set_de_type(dir->i_sb, de, S_IFDIR);
- de = ext4_next_entry(de, blocksize);
- de->inode = cpu_to_le32(dir->i_ino);
- de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
- blocksize);
- de->name_len = 2;
- strcpy(de->name, "..");
- ext4_set_de_type(dir->i_sb, de, S_IFDIR);
- set_nlink(inode, 2);
- BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, dir_block);
- if (err)
- goto out_clear_inode;
- err = ext4_mark_inode_dirty(handle, inode);
- if (!err)
- err = ext4_add_entry(handle, dentry, inode);
- if (err) {
-out_clear_inode:
- clear_nlink(inode);
- unlock_new_inode(inode);
- ext4_mark_inode_dirty(handle, inode);
- iput(inode);
- goto out_stop;
- }
- ext4_inc_count(handle, dir);
- ext4_update_dx_flag(dir);
- err = ext4_mark_inode_dirty(handle, dir);
- if (err)
- goto out_clear_inode;
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
-out_stop:
- brelse(dir_block);
- ext4_journal_stop(handle);
- if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
- goto retry;
- return err;
-}
-
-/*
- * routine to check that the specified directory is empty (for rmdir)
- */
-static int empty_dir(struct inode *inode)
-{
- unsigned int offset;
- struct buffer_head *bh;
- struct ext4_dir_entry_2 *de, *de1;
- struct super_block *sb;
- int err = 0;
-
- sb = inode->i_sb;
- if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
- !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
- if (err)
- EXT4_ERROR_INODE(inode,
- "error %d reading directory lblock 0", err);
- else
- ext4_warning(inode->i_sb,
- "bad directory (dir #%lu) - no data block",
- inode->i_ino);
- return 1;
- }
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- de1 = ext4_next_entry(de, sb->s_blocksize);
- if (le32_to_cpu(de->inode) != inode->i_ino ||
- !le32_to_cpu(de1->inode) ||
- strcmp(".", de->name) ||
- strcmp("..", de1->name)) {
- ext4_warning(inode->i_sb,
- "bad directory (dir #%lu) - no `.' or `..'",
- inode->i_ino);
- brelse(bh);
- return 1;
- }
- offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
- ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
- de = ext4_next_entry(de1, sb->s_blocksize);
- while (offset < inode->i_size) {
- if (!bh ||
- (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
- unsigned int lblock;
- err = 0;
- brelse(bh);
- lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
- bh = ext4_bread(NULL, inode, lblock, 0, &err);
- if (!bh) {
- if (err)
- EXT4_ERROR_INODE(inode,
- "error %d reading directory "
- "lblock %u", err, lblock);
- offset += sb->s_blocksize;
- continue;
- }
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- }
- if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) {
- de = (struct ext4_dir_entry_2 *)(bh->b_data +
- sb->s_blocksize);
- offset = (offset | (sb->s_blocksize - 1)) + 1;
- continue;
- }
- if (le32_to_cpu(de->inode)) {
- brelse(bh);
- return 0;
- }
- offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
- de = ext4_next_entry(de, sb->s_blocksize);
- }
- brelse(bh);
- return 1;
-}
-
-/* ext4_orphan_add() links an unlinked or truncated inode into a list of
- * such inodes, starting at the superblock, in case we crash before the
- * file is closed/deleted, or in case the inode truncate spans multiple
- * transactions and the last transaction is not recovered after a crash.
- *
- * At filesystem recovery time, we walk this list deleting unlinked
- * inodes and truncating linked inodes in ext4_orphan_cleanup().
- */
-int ext4_orphan_add(handle_t *handle, struct inode *inode)
-{
- struct super_block *sb = inode->i_sb;
- struct ext4_iloc iloc;
- int err = 0, rc;
-
- if (!ext4_handle_valid(handle))
- return 0;
-
- mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
- if (!list_empty(&EXT4_I(inode)->i_orphan))
- goto out_unlock;
-
- /*
- * Orphan handling is only valid for files with data blocks
- * being truncated, or files being unlinked. Note that we either
- * hold i_mutex, or the inode can not be referenced from outside,
- * so i_nlink should not be bumped due to race
- */
- J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
-
- BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
- err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
- if (err)
- goto out_unlock;
-
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto out_unlock;
- /*
- * Due to previous errors inode may be already a part of on-disk
- * orphan list. If so skip on-disk list modification.
- */
- if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
- (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
- goto mem_insert;
-
- /* Insert this inode at the head of the on-disk orphan list... */
- NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
- EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
- err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
- rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
- if (!err)
- err = rc;
-
- /* Only add to the head of the in-memory list if all the
- * previous operations succeeded. If the orphan_add is going to
- * fail (possibly taking the journal offline), we can't risk
- * leaving the inode on the orphan list: stray orphan-list
- * entries can cause panics at unmount time.
- *
- * This is safe: on error we're going to ignore the orphan list
- * anyway on the next recovery. */
-mem_insert:
- if (!err)
- list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
-
- jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
- jbd_debug(4, "orphan inode %lu will point to %d\n",
- inode->i_ino, NEXT_ORPHAN(inode));
-out_unlock:
- mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
- ext4_std_error(inode->i_sb, err);
- return err;
-}
-
-/*
- * ext4_orphan_del() removes an unlinked or truncated inode from the list
- * of such inodes stored on disk, because it is finally being cleaned up.
- */
-int ext4_orphan_del(handle_t *handle, struct inode *inode)
-{
- struct list_head *prev;
- struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_sb_info *sbi;
- __u32 ino_next;
- struct ext4_iloc iloc;
- int err = 0;
-
- /* ext4_handle_valid() assumes a valid handle_t pointer */
- if (handle && !ext4_handle_valid(handle))
- return 0;
-
- mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
- if (list_empty(&ei->i_orphan))
- goto out;
-
- ino_next = NEXT_ORPHAN(inode);
- prev = ei->i_orphan.prev;
- sbi = EXT4_SB(inode->i_sb);
-
- jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
-
- list_del_init(&ei->i_orphan);
-
- /* If we're on an error path, we may not have a valid
- * transaction handle with which to update the orphan list on
- * disk, but we still need to remove the inode from the linked
- * list in memory. */
- if (sbi->s_journal && !handle)
- goto out;
-
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto out_err;
-
- if (prev == &sbi->s_orphan) {
- jbd_debug(4, "superblock will point to %u\n", ino_next);
- BUFFER_TRACE(sbi->s_sbh, "get_write_access");
- err = ext4_journal_get_write_access(handle, sbi->s_sbh);
- if (err)
- goto out_brelse;
- sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
- err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
- } else {
- struct ext4_iloc iloc2;
- struct inode *i_prev =
- &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
-
- jbd_debug(4, "orphan inode %lu will point to %u\n",
- i_prev->i_ino, ino_next);
- err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
- if (err)
- goto out_brelse;
- NEXT_ORPHAN(i_prev) = ino_next;
- err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
- }
- if (err)
- goto out_brelse;
- NEXT_ORPHAN(inode) = 0;
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-
-out_err:
- ext4_std_error(inode->i_sb, err);
-out:
- mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
- return err;
-
-out_brelse:
- brelse(iloc.bh);
- goto out_err;
-}
-
-static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
-{
- int retval;
- struct inode *inode;
- struct buffer_head *bh;
- struct ext4_dir_entry_2 *de;
- handle_t *handle;
-
- /* Initialize quotas before so that eventual writes go in
- * separate transaction */
- dquot_initialize(dir);
- dquot_initialize(dentry->d_inode);
-
- handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- retval = -ENOENT;
- bh = ext4_find_entry(dir, &dentry->d_name, &de);
- if (!bh)
- goto end_rmdir;
-
- if (IS_DIRSYNC(dir))
- ext4_handle_sync(handle);
-
- inode = dentry->d_inode;
-
- retval = -EIO;
- if (le32_to_cpu(de->inode) != inode->i_ino)
- goto end_rmdir;
-
- retval = -ENOTEMPTY;
- if (!empty_dir(inode))
- goto end_rmdir;
-
- retval = ext4_delete_entry(handle, dir, de, bh);
- if (retval)
- goto end_rmdir;
- if (!EXT4_DIR_LINK_EMPTY(inode))
- ext4_warning(inode->i_sb,
- "empty directory has too many links (%d)",
- inode->i_nlink);
- inode->i_version++;
- clear_nlink(inode);
- /* There's no need to set i_disksize: the fact that i_nlink is
- * zero will ensure that the right thing happens during any
- * recovery. */
- inode->i_size = 0;
- ext4_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- ext4_dec_count(handle, dir);
- ext4_update_dx_flag(dir);
- ext4_mark_inode_dirty(handle, dir);
-
-end_rmdir:
- ext4_journal_stop(handle);
- brelse(bh);
- return retval;
-}
-
-static int ext4_unlink(struct inode *dir, struct dentry *dentry)
-{
- int retval;
- struct inode *inode;
- struct buffer_head *bh;
- struct ext4_dir_entry_2 *de;
- handle_t *handle;
-
- trace_ext4_unlink_enter(dir, dentry);
- /* Initialize quotas before so that eventual writes go
- * in separate transaction */
- dquot_initialize(dir);
- dquot_initialize(dentry->d_inode);
-
- handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- if (IS_DIRSYNC(dir))
- ext4_handle_sync(handle);
-
- retval = -ENOENT;
- bh = ext4_find_entry(dir, &dentry->d_name, &de);
- if (!bh)
- goto end_unlink;
-
- inode = dentry->d_inode;
-
- retval = -EIO;
- if (le32_to_cpu(de->inode) != inode->i_ino)
- goto end_unlink;
-
- if (!inode->i_nlink) {
- ext4_warning(inode->i_sb,
- "Deleting nonexistent file (%lu), %d",
- inode->i_ino, inode->i_nlink);
- set_nlink(inode, 1);
- }
- retval = ext4_delete_entry(handle, dir, de, bh);
- if (retval)
- goto end_unlink;
- dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
- ext4_update_dx_flag(dir);
- ext4_mark_inode_dirty(handle, dir);
- drop_nlink(inode);
- if (!inode->i_nlink)
- ext4_orphan_add(handle, inode);
- inode->i_ctime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- retval = 0;
-
-end_unlink:
- ext4_journal_stop(handle);
- brelse(bh);
- trace_ext4_unlink_exit(dentry, retval);
- return retval;
-}
-
-static int ext4_symlink(struct inode *dir,
- struct dentry *dentry, const char *symname)
-{
- handle_t *handle;
- struct inode *inode;
- int l, err, retries = 0;
- int credits;
-
- l = strlen(symname)+1;
- if (l > dir->i_sb->s_blocksize)
- return -ENAMETOOLONG;
-
- dquot_initialize(dir);
-
- if (l > EXT4_N_BLOCKS * 4) {
- /*
- * For non-fast symlinks, we just allocate inode and put it on
- * orphan list in the first transaction => we need bitmap,
- * group descriptor, sb, inode block, quota blocks, and
- * possibly selinux xattr blocks.
- */
- credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
- EXT4_XATTR_TRANS_BLOCKS;
- } else {
- /*
- * Fast symlink. We have to add entry to directory
- * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
- * allocate new inode (bitmap, group descriptor, inode block,
- * quota blocks, sb is already counted in previous macros).
- */
- credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
- }
-retry:
- handle = ext4_journal_start(dir, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- if (IS_DIRSYNC(dir))
- ext4_handle_sync(handle);
-
- inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
- &dentry->d_name, 0, NULL);
- err = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto out_stop;
-
- if (l > EXT4_N_BLOCKS * 4) {
- inode->i_op = &ext4_symlink_inode_operations;
- ext4_set_aops(inode);
- /*
- * We cannot call page_symlink() with transaction started
- * because it calls into ext4_write_begin() which can wait
- * for transaction commit if we are running out of space
- * and thus we deadlock. So we have to stop transaction now
- * and restart it when symlink contents is written.
- *
- * To keep fs consistent in case of crash, we have to put inode
- * to orphan list in the mean time.
- */
- drop_nlink(inode);
- err = ext4_orphan_add(handle, inode);
- ext4_journal_stop(handle);
- if (err)
- goto err_drop_inode;
- err = __page_symlink(inode, symname, l, 1);
- if (err)
- goto err_drop_inode;
- /*
- * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
- * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
- */
- handle = ext4_journal_start(dir,
- EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- goto err_drop_inode;
- }
- set_nlink(inode, 1);
- err = ext4_orphan_del(handle, inode);
- if (err) {
- ext4_journal_stop(handle);
- clear_nlink(inode);
- goto err_drop_inode;
- }
- } else {
- /* clear the extent format for fast symlink */
- ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
- inode->i_op = &ext4_fast_symlink_inode_operations;
- memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
- inode->i_size = l-1;
- }
- EXT4_I(inode)->i_disksize = inode->i_size;
- err = ext4_add_nondir(handle, dentry, inode);
-out_stop:
- ext4_journal_stop(handle);
- if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
- goto retry;
- return err;
-err_drop_inode:
- unlock_new_inode(inode);
- iput(inode);
- return err;
-}
-
-static int ext4_link(struct dentry *old_dentry,
- struct inode *dir, struct dentry *dentry)
-{
- handle_t *handle;
- struct inode *inode = old_dentry->d_inode;
- int err, retries = 0;
-
- if (inode->i_nlink >= EXT4_LINK_MAX)
- return -EMLINK;
-
- dquot_initialize(dir);
-
-retry:
- handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- if (IS_DIRSYNC(dir))
- ext4_handle_sync(handle);
-
- inode->i_ctime = ext4_current_time(inode);
- ext4_inc_count(handle, inode);
- ihold(inode);
-
- err = ext4_add_entry(handle, dentry, inode);
- if (!err) {
- ext4_mark_inode_dirty(handle, inode);
- d_instantiate(dentry, inode);
- } else {
- drop_nlink(inode);
- iput(inode);
- }
- ext4_journal_stop(handle);
- if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
- goto retry;
- return err;
-}
-
-#define PARENT_INO(buffer, size) \
- (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode)
-
-/*
- * Anybody can rename anything with this: the permission checks are left to the
- * higher-level routines.
- */
-static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-{
- handle_t *handle;
- struct inode *old_inode, *new_inode;
- struct buffer_head *old_bh, *new_bh, *dir_bh;
- struct ext4_dir_entry_2 *old_de, *new_de;
- int retval, force_da_alloc = 0;
-
- dquot_initialize(old_dir);
- dquot_initialize(new_dir);
-
- old_bh = new_bh = dir_bh = NULL;
-
- /* Initialize quotas before so that eventual writes go
- * in separate transaction */
- if (new_dentry->d_inode)
- dquot_initialize(new_dentry->d_inode);
- handle = ext4_journal_start(old_dir, 2 *
- EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
- ext4_handle_sync(handle);
-
- old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
- /*
- * Check for inode number is _not_ due to possible IO errors.
- * We might rmdir the source, keep it as pwd of some process
- * and merrily kill the link to whatever was created under the
- * same name. Goodbye sticky bit ;-<
- */
- old_inode = old_dentry->d_inode;
- retval = -ENOENT;
- if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
- goto end_rename;
-
- new_inode = new_dentry->d_inode;
- new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
- if (new_bh) {
- if (!new_inode) {
- brelse(new_bh);
- new_bh = NULL;
- }
- }
- if (S_ISDIR(old_inode->i_mode)) {
- if (new_inode) {
- retval = -ENOTEMPTY;
- if (!empty_dir(new_inode))
- goto end_rename;
- }
- retval = -EIO;
- dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
- if (!dir_bh)
- goto end_rename;
- if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
- old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
- goto end_rename;
- retval = -EMLINK;
- if (!new_inode && new_dir != old_dir &&
- EXT4_DIR_LINK_MAX(new_dir))
- goto end_rename;
- BUFFER_TRACE(dir_bh, "get_write_access");
- retval = ext4_journal_get_write_access(handle, dir_bh);
- if (retval)
- goto end_rename;
- }
- if (!new_bh) {
- retval = ext4_add_entry(handle, new_dentry, old_inode);
- if (retval)
- goto end_rename;
- } else {
- BUFFER_TRACE(new_bh, "get write access");
- retval = ext4_journal_get_write_access(handle, new_bh);
- if (retval)
- goto end_rename;
- new_de->inode = cpu_to_le32(old_inode->i_ino);
- if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
- EXT4_FEATURE_INCOMPAT_FILETYPE))
- new_de->file_type = old_de->file_type;
- new_dir->i_version++;
- new_dir->i_ctime = new_dir->i_mtime =
- ext4_current_time(new_dir);
- ext4_mark_inode_dirty(handle, new_dir);
- BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
- retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh);
- if (unlikely(retval)) {
- ext4_std_error(new_dir->i_sb, retval);
- goto end_rename;
- }
- brelse(new_bh);
- new_bh = NULL;
- }
-
- /*
- * Like most other Unix systems, set the ctime for inodes on a
- * rename.
- */
- old_inode->i_ctime = ext4_current_time(old_inode);
- ext4_mark_inode_dirty(handle, old_inode);
-
- /*
- * ok, that's it
- */
- if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
- old_de->name_len != old_dentry->d_name.len ||
- strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
- (retval = ext4_delete_entry(handle, old_dir,
- old_de, old_bh)) == -ENOENT) {
- /* old_de could have moved from under us during htree split, so
- * make sure that we are deleting the right entry. We might
- * also be pointing to a stale entry in the unused part of
- * old_bh so just checking inum and the name isn't enough. */
- struct buffer_head *old_bh2;
- struct ext4_dir_entry_2 *old_de2;
-
- old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
- if (old_bh2) {
- retval = ext4_delete_entry(handle, old_dir,
- old_de2, old_bh2);
- brelse(old_bh2);
- }
- }
- if (retval) {
- ext4_warning(old_dir->i_sb,
- "Deleting old file (%lu), %d, error=%d",
- old_dir->i_ino, old_dir->i_nlink, retval);
- }
-
- if (new_inode) {
- ext4_dec_count(handle, new_inode);
- new_inode->i_ctime = ext4_current_time(new_inode);
- }
- old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
- ext4_update_dx_flag(old_dir);
- if (dir_bh) {
- PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
- cpu_to_le32(new_dir->i_ino);
- BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
- retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
- if (retval) {
- ext4_std_error(old_dir->i_sb, retval);
- goto end_rename;
- }
- ext4_dec_count(handle, old_dir);
- if (new_inode) {
- /* checked empty_dir above, can't have another parent,
- * ext4_dec_count() won't work for many-linked dirs */
- clear_nlink(new_inode);
- } else {
- ext4_inc_count(handle, new_dir);
- ext4_update_dx_flag(new_dir);
- ext4_mark_inode_dirty(handle, new_dir);
- }
- }
- ext4_mark_inode_dirty(handle, old_dir);
- if (new_inode) {
- ext4_mark_inode_dirty(handle, new_inode);
- if (!new_inode->i_nlink)
- ext4_orphan_add(handle, new_inode);
- if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
- force_da_alloc = 1;
- }
- retval = 0;
-
-end_rename:
- brelse(dir_bh);
- brelse(old_bh);
- brelse(new_bh);
- ext4_journal_stop(handle);
- if (retval == 0 && force_da_alloc)
- ext4_alloc_da_blocks(old_inode);
- return retval;
-}
-
-/*
- * directories can handle most operations...
- */
-const struct inode_operations ext4_dir_inode_operations = {
- .create = ext4_create,
- .lookup = ext4_lookup,
- .link = ext4_link,
- .unlink = ext4_unlink,
- .symlink = ext4_symlink,
- .mkdir = ext4_mkdir,
- .rmdir = ext4_rmdir,
- .mknod = ext4_mknod,
- .rename = ext4_rename,
- .setattr = ext4_setattr,
-#ifdef CONFIG_EXT4_FS_XATTR
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .listxattr = ext4_listxattr,
- .removexattr = generic_removexattr,
-#endif
- .get_acl = ext4_get_acl,
- .fiemap = ext4_fiemap,
-};
-
-const struct inode_operations ext4_special_inode_operations = {
- .setattr = ext4_setattr,
-#ifdef CONFIG_EXT4_FS_XATTR
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .listxattr = ext4_listxattr,
- .removexattr = generic_removexattr,
-#endif
- .get_acl = ext4_get_acl,
-};
diff --git a/ANDROID_3.4.5/fs/ext4/page-io.c b/ANDROID_3.4.5/fs/ext4/page-io.c
deleted file mode 100644
index dcdeef16..00000000
--- a/ANDROID_3.4.5/fs/ext4/page-io.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * linux/fs/ext4/page-io.c
- *
- * This contains the new page_io functions for ext4
- *
- * Written by Theodore Ts'o, 2010.
- */
-
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/jbd2.h>
-#include <linux/highuid.h>
-#include <linux/pagemap.h>
-#include <linux/quotaops.h>
-#include <linux/string.h>
-#include <linux/buffer_head.h>
-#include <linux/writeback.h>
-#include <linux/pagevec.h>
-#include <linux/mpage.h>
-#include <linux/namei.h>
-#include <linux/uio.h>
-#include <linux/bio.h>
-#include <linux/workqueue.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-
-#include "ext4_jbd2.h"
-#include "xattr.h"
-#include "acl.h"
-#include "ext4_extents.h"
-
-static struct kmem_cache *io_page_cachep, *io_end_cachep;
-
-int __init ext4_init_pageio(void)
-{
- io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
- if (io_page_cachep == NULL)
- return -ENOMEM;
- io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
- if (io_end_cachep == NULL) {
- kmem_cache_destroy(io_page_cachep);
- return -ENOMEM;
- }
- return 0;
-}
-
-void ext4_exit_pageio(void)
-{
- kmem_cache_destroy(io_end_cachep);
- kmem_cache_destroy(io_page_cachep);
-}
-
-void ext4_ioend_wait(struct inode *inode)
-{
- wait_queue_head_t *wq = ext4_ioend_wq(inode);
-
- wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
-}
-
-static void put_io_page(struct ext4_io_page *io_page)
-{
- if (atomic_dec_and_test(&io_page->p_count)) {
- end_page_writeback(io_page->p_page);
- put_page(io_page->p_page);
- kmem_cache_free(io_page_cachep, io_page);
- }
-}
-
-void ext4_free_io_end(ext4_io_end_t *io)
-{
- int i;
-
- BUG_ON(!io);
- if (io->page)
- put_page(io->page);
- for (i = 0; i < io->num_io_pages; i++)
- put_io_page(io->pages[i]);
- io->num_io_pages = 0;
- if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
- wake_up_all(ext4_ioend_wq(io->inode));
- kmem_cache_free(io_end_cachep, io);
-}
-
-/*
- * check a range of space and convert unwritten extents to written.
- *
- * Called with inode->i_mutex; we depend on this when we manipulate
- * io->flag, since we could otherwise race with ext4_flush_completed_IO()
- */
-int ext4_end_io_nolock(ext4_io_end_t *io)
-{
- struct inode *inode = io->inode;
- loff_t offset = io->offset;
- ssize_t size = io->size;
- int ret = 0;
-
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
- "list->prev 0x%p\n",
- io, inode->i_ino, io->list.next, io->list.prev);
-
- ret = ext4_convert_unwritten_extents(inode, offset, size);
- if (ret < 0) {
- ext4_msg(inode->i_sb, KERN_EMERG,
- "failed to convert unwritten extents to written "
- "extents -- potential data loss! "
- "(inode %lu, offset %llu, size %zd, error %d)",
- inode->i_ino, offset, size, ret);
- }
-
- if (io->iocb)
- aio_complete(io->iocb, io->result, 0);
-
- if (io->flag & EXT4_IO_END_DIRECT)
- inode_dio_done(inode);
- /* Wake up anyone waiting on unwritten extent conversion */
- if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
- wake_up_all(ext4_ioend_wq(io->inode));
- return ret;
-}
-
-/*
- * work on completed aio dio IO, to convert unwritten extents to extents
- */
-static void ext4_end_io_work(struct work_struct *work)
-{
- ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
- struct inode *inode = io->inode;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
-
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (io->flag & EXT4_IO_END_IN_FSYNC)
- goto requeue;
- if (list_empty(&io->list)) {
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- goto free;
- }
-
- if (!mutex_trylock(&inode->i_mutex)) {
- bool was_queued;
-requeue:
- was_queued = !!(io->flag & EXT4_IO_END_QUEUED);
- io->flag |= EXT4_IO_END_QUEUED;
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- /*
- * Requeue the work instead of waiting so that the work
- * items queued after this can be processed.
- */
- queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work);
- /*
- * To prevent the ext4-dio-unwritten thread from keeping
- * requeueing end_io requests and occupying cpu for too long,
- * yield the cpu if it sees an end_io request that has already
- * been requeued.
- */
- if (was_queued)
- yield();
- return;
- }
- list_del_init(&io->list);
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- (void) ext4_end_io_nolock(io);
- mutex_unlock(&inode->i_mutex);
-free:
- ext4_free_io_end(io);
-}
-
-ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
-{
- ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
- if (io) {
- atomic_inc(&EXT4_I(inode)->i_ioend_count);
- io->inode = inode;
- INIT_WORK(&io->work, ext4_end_io_work);
- INIT_LIST_HEAD(&io->list);
- }
- return io;
-}
-
-/*
- * Print an buffer I/O error compatible with the fs/buffer.c. This
- * provides compatibility with dmesg scrapers that look for a specific
- * buffer I/O error message. We really need a unified error reporting
- * structure to userspace ala Digital Unix's uerf system, but it's
- * probably not going to happen in my lifetime, due to LKML politics...
- */
-static void buffer_io_error(struct buffer_head *bh)
-{
- char b[BDEVNAME_SIZE];
- printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
- bdevname(bh->b_bdev, b),
- (unsigned long long)bh->b_blocknr);
-}
-
-static void ext4_end_bio(struct bio *bio, int error)
-{
- ext4_io_end_t *io_end = bio->bi_private;
- struct workqueue_struct *wq;
- struct inode *inode;
- unsigned long flags;
- int i;
- sector_t bi_sector = bio->bi_sector;
-
- BUG_ON(!io_end);
- bio->bi_private = NULL;
- bio->bi_end_io = NULL;
- if (test_bit(BIO_UPTODATE, &bio->bi_flags))
- error = 0;
- bio_put(bio);
-
- for (i = 0; i < io_end->num_io_pages; i++) {
- struct page *page = io_end->pages[i]->p_page;
- struct buffer_head *bh, *head;
- loff_t offset;
- loff_t io_end_offset;
-
- if (error) {
- SetPageError(page);
- set_bit(AS_EIO, &page->mapping->flags);
- head = page_buffers(page);
- BUG_ON(!head);
-
- io_end_offset = io_end->offset + io_end->size;
-
- offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
- bh = head;
- do {
- if ((offset >= io_end->offset) &&
- (offset+bh->b_size <= io_end_offset))
- buffer_io_error(bh);
-
- offset += bh->b_size;
- bh = bh->b_this_page;
- } while (bh != head);
- }
-
- put_io_page(io_end->pages[i]);
- }
- io_end->num_io_pages = 0;
- inode = io_end->inode;
-
- if (error) {
- io_end->flag |= EXT4_IO_END_ERROR;
- ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
- "(offset %llu size %ld starting block %llu)",
- inode->i_ino,
- (unsigned long long) io_end->offset,
- (long) io_end->size,
- (unsigned long long)
- bi_sector >> (inode->i_blkbits - 9));
- }
-
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
- ext4_free_io_end(io_end);
- return;
- }
-
- /* Add the io_end to per-inode completed io list*/
- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
- list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-
- wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
- /* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
-}
-
-void ext4_io_submit(struct ext4_io_submit *io)
-{
- struct bio *bio = io->io_bio;
-
- if (bio) {
- bio_get(io->io_bio);
- submit_bio(io->io_op, io->io_bio);
- BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
- bio_put(io->io_bio);
- }
- io->io_bio = NULL;
- io->io_op = 0;
- io->io_end = NULL;
-}
-
-static int io_submit_init(struct ext4_io_submit *io,
- struct inode *inode,
- struct writeback_control *wbc,
- struct buffer_head *bh)
-{
- ext4_io_end_t *io_end;
- struct page *page = bh->b_page;
- int nvecs = bio_get_nr_vecs(bh->b_bdev);
- struct bio *bio;
-
- io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end)
- return -ENOMEM;
- bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
- bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
- bio->bi_private = io->io_end = io_end;
- bio->bi_end_io = ext4_end_bio;
-
- io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
-
- io->io_bio = bio;
- io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
- io->io_next_block = bh->b_blocknr;
- return 0;
-}
-
-static int io_submit_add_bh(struct ext4_io_submit *io,
- struct ext4_io_page *io_page,
- struct inode *inode,
- struct writeback_control *wbc,
- struct buffer_head *bh)
-{
- ext4_io_end_t *io_end;
- int ret;
-
- if (buffer_new(bh)) {
- clear_buffer_new(bh);
- unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
- }
-
- if (!buffer_mapped(bh) || buffer_delay(bh)) {
- if (!buffer_mapped(bh))
- clear_buffer_dirty(bh);
- if (io->io_bio)
- ext4_io_submit(io);
- return 0;
- }
-
- if (io->io_bio && bh->b_blocknr != io->io_next_block) {
-submit_and_retry:
- ext4_io_submit(io);
- }
- if (io->io_bio == NULL) {
- ret = io_submit_init(io, inode, wbc, bh);
- if (ret)
- return ret;
- }
- io_end = io->io_end;
- if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
- (io_end->pages[io_end->num_io_pages-1] != io_page))
- goto submit_and_retry;
- if (buffer_uninit(bh))
- ext4_set_io_unwritten_flag(inode, io_end);
- io->io_end->size += bh->b_size;
- io->io_next_block++;
- ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
- if (ret != bh->b_size)
- goto submit_and_retry;
- if ((io_end->num_io_pages == 0) ||
- (io_end->pages[io_end->num_io_pages-1] != io_page)) {
- io_end->pages[io_end->num_io_pages++] = io_page;
- atomic_inc(&io_page->p_count);
- }
- return 0;
-}
-
-int ext4_bio_write_page(struct ext4_io_submit *io,
- struct page *page,
- int len,
- struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- unsigned block_start, block_end, blocksize;
- struct ext4_io_page *io_page;
- struct buffer_head *bh, *head;
- int ret = 0;
-
- blocksize = 1 << inode->i_blkbits;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
-
- io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
- if (!io_page) {
- set_page_dirty(page);
- unlock_page(page);
- return -ENOMEM;
- }
- io_page->p_page = page;
- atomic_set(&io_page->p_count, 1);
- get_page(page);
- set_page_writeback(page);
- ClearPageError(page);
-
- for (bh = head = page_buffers(page), block_start = 0;
- bh != head || !block_start;
- block_start = block_end, bh = bh->b_this_page) {
-
- block_end = block_start + blocksize;
- if (block_start >= len) {
- /*
- * Comments copied from block_write_full_page_endio:
- *
- * The page straddles i_size. It must be zeroed out on
- * each and every writepage invocation because it may
- * be mmapped. "A file is mapped in multiples of the
- * page size. For a file that is not a multiple of
- * the page size, the remaining memory is zeroed when
- * mapped, and writes to that region are not written
- * out to the file."
- */
- zero_user_segment(page, block_start, block_end);
- clear_buffer_dirty(bh);
- set_buffer_uptodate(bh);
- continue;
- }
- clear_buffer_dirty(bh);
- ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
- if (ret) {
- /*
- * We only get here on ENOMEM. Not much else
- * we can do but mark the page as dirty, and
- * better luck next time.
- */
- set_page_dirty(page);
- break;
- }
- }
- unlock_page(page);
- /*
- * If the page was truncated before we could do the writeback,
- * or we had a memory allocation error while trying to write
- * the first buffer head, we won't have submitted any pages for
- * I/O. In that case we need to make sure we've cleared the
- * PageWriteback bit from the page to prevent the system from
- * wedging later on.
- */
- put_io_page(io_page);
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/resize.c b/ANDROID_3.4.5/fs/ext4/resize.c
deleted file mode 100644
index 53589ff8..00000000
--- a/ANDROID_3.4.5/fs/ext4/resize.c
+++ /dev/null
@@ -1,1689 +0,0 @@
-/*
- * linux/fs/ext4/resize.c
- *
- * Support for resizing an ext4 filesystem while it is mounted.
- *
- * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
- *
- * This could probably be made into a module, because it is not often in use.
- */
-
-
-#define EXT4FS_DEBUG
-
-#include <linux/errno.h>
-#include <linux/slab.h>
-
-#include "ext4_jbd2.h"
-
-int ext4_resize_begin(struct super_block *sb)
-{
- int ret = 0;
-
- if (!capable(CAP_SYS_RESOURCE))
- return -EPERM;
-
- /*
- * We are not allowed to do online-resizing on a filesystem mounted
- * with error, because it can destroy the filesystem easily.
- */
- if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- ext4_warning(sb, "There are errors in the filesystem, "
- "so online resizing is not allowed\n");
- return -EPERM;
- }
-
- if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags))
- ret = -EBUSY;
-
- return ret;
-}
-
-void ext4_resize_end(struct super_block *sb)
-{
- clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags);
- smp_mb__after_clear_bit();
-}
-
-#define outside(b, first, last) ((b) < (first) || (b) >= (last))
-#define inside(b, first, last) ((b) >= (first) && (b) < (last))
-
-static int verify_group_input(struct super_block *sb,
- struct ext4_new_group_data *input)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- ext4_fsblk_t start = ext4_blocks_count(es);
- ext4_fsblk_t end = start + input->blocks_count;
- ext4_group_t group = input->group;
- ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
- unsigned overhead = ext4_bg_has_super(sb, group) ?
- (1 + ext4_bg_num_gdb(sb, group) +
- le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
- ext4_fsblk_t metaend = start + overhead;
- struct buffer_head *bh = NULL;
- ext4_grpblk_t free_blocks_count, offset;
- int err = -EINVAL;
-
- input->free_blocks_count = free_blocks_count =
- input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
-
- if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks "
- "(%d free, %u reserved)\n",
- ext4_bg_has_super(sb, input->group) ? "normal" :
- "no-super", input->group, input->blocks_count,
- free_blocks_count, input->reserved_blocks);
-
- ext4_get_group_no_and_offset(sb, start, NULL, &offset);
- if (group != sbi->s_groups_count)
- ext4_warning(sb, "Cannot add at group %u (only %u groups)",
- input->group, sbi->s_groups_count);
- else if (offset != 0)
- ext4_warning(sb, "Last group not full");
- else if (input->reserved_blocks > input->blocks_count / 5)
- ext4_warning(sb, "Reserved blocks too high (%u)",
- input->reserved_blocks);
- else if (free_blocks_count < 0)
- ext4_warning(sb, "Bad blocks count %u",
- input->blocks_count);
- else if (!(bh = sb_bread(sb, end - 1)))
- ext4_warning(sb, "Cannot read last block (%llu)",
- end - 1);
- else if (outside(input->block_bitmap, start, end))
- ext4_warning(sb, "Block bitmap not in group (block %llu)",
- (unsigned long long)input->block_bitmap);
- else if (outside(input->inode_bitmap, start, end))
- ext4_warning(sb, "Inode bitmap not in group (block %llu)",
- (unsigned long long)input->inode_bitmap);
- else if (outside(input->inode_table, start, end) ||
- outside(itend - 1, start, end))
- ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
- (unsigned long long)input->inode_table, itend - 1);
- else if (input->inode_bitmap == input->block_bitmap)
- ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
- (unsigned long long)input->block_bitmap);
- else if (inside(input->block_bitmap, input->inode_table, itend))
- ext4_warning(sb, "Block bitmap (%llu) in inode table "
- "(%llu-%llu)",
- (unsigned long long)input->block_bitmap,
- (unsigned long long)input->inode_table, itend - 1);
- else if (inside(input->inode_bitmap, input->inode_table, itend))
- ext4_warning(sb, "Inode bitmap (%llu) in inode table "
- "(%llu-%llu)",
- (unsigned long long)input->inode_bitmap,
- (unsigned long long)input->inode_table, itend - 1);
- else if (inside(input->block_bitmap, start, metaend))
- ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
- (unsigned long long)input->block_bitmap,
- start, metaend - 1);
- else if (inside(input->inode_bitmap, start, metaend))
- ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
- (unsigned long long)input->inode_bitmap,
- start, metaend - 1);
- else if (inside(input->inode_table, start, metaend) ||
- inside(itend - 1, start, metaend))
- ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
- "(%llu-%llu)",
- (unsigned long long)input->inode_table,
- itend - 1, start, metaend - 1);
- else
- err = 0;
- brelse(bh);
-
- return err;
-}
-
-/*
- * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
- * group each time.
- */
-struct ext4_new_flex_group_data {
- struct ext4_new_group_data *groups; /* new_group_data for groups
- in the flex group */
- __u16 *bg_flags; /* block group flags of groups
- in @groups */
- ext4_group_t count; /* number of groups in @groups
- */
-};
-
-/*
- * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
- * @flexbg_size.
- *
- * Returns NULL on failure otherwise address of the allocated structure.
- */
-static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
-{
- struct ext4_new_flex_group_data *flex_gd;
-
- flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
- if (flex_gd == NULL)
- goto out3;
-
- if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data))
- goto out2;
- flex_gd->count = flexbg_size;
-
- flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
- flexbg_size, GFP_NOFS);
- if (flex_gd->groups == NULL)
- goto out2;
-
- flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS);
- if (flex_gd->bg_flags == NULL)
- goto out1;
-
- return flex_gd;
-
-out1:
- kfree(flex_gd->groups);
-out2:
- kfree(flex_gd);
-out3:
- return NULL;
-}
-
-static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
-{
- kfree(flex_gd->bg_flags);
- kfree(flex_gd->groups);
- kfree(flex_gd);
-}
-
-/*
- * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
- * and inode tables for a flex group.
- *
- * This function is used by 64bit-resize. Note that this function allocates
- * group tables from the 1st group of groups contained by @flexgd, which may
- * be a partial of a flex group.
- *
- * @sb: super block of fs to which the groups belongs
- */
-static void ext4_alloc_group_tables(struct super_block *sb,
- struct ext4_new_flex_group_data *flex_gd,
- int flexbg_size)
-{
- struct ext4_new_group_data *group_data = flex_gd->groups;
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- ext4_fsblk_t start_blk;
- ext4_fsblk_t last_blk;
- ext4_group_t src_group;
- ext4_group_t bb_index = 0;
- ext4_group_t ib_index = 0;
- ext4_group_t it_index = 0;
- ext4_group_t group;
- ext4_group_t last_group;
- unsigned overhead;
-
- BUG_ON(flex_gd->count == 0 || group_data == NULL);
-
- src_group = group_data[0].group;
- last_group = src_group + flex_gd->count - 1;
-
- BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
- (last_group & ~(flexbg_size - 1))));
-next_group:
- group = group_data[0].group;
- start_blk = ext4_group_first_block_no(sb, src_group);
- last_blk = start_blk + group_data[src_group - group].blocks_count;
-
- overhead = ext4_bg_has_super(sb, src_group) ?
- (1 + ext4_bg_num_gdb(sb, src_group) +
- le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
-
- start_blk += overhead;
-
- BUG_ON(src_group >= group_data[0].group + flex_gd->count);
- /* We collect contiguous blocks as much as possible. */
- src_group++;
- for (; src_group <= last_group; src_group++)
- if (!ext4_bg_has_super(sb, src_group))
- last_blk += group_data[src_group - group].blocks_count;
- else
- break;
-
- /* Allocate block bitmaps */
- for (; bb_index < flex_gd->count; bb_index++) {
- if (start_blk >= last_blk)
- goto next_group;
- group_data[bb_index].block_bitmap = start_blk++;
- ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
- group -= group_data[0].group;
- group_data[group].free_blocks_count--;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
- }
-
- /* Allocate inode bitmaps */
- for (; ib_index < flex_gd->count; ib_index++) {
- if (start_blk >= last_blk)
- goto next_group;
- group_data[ib_index].inode_bitmap = start_blk++;
- ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
- group -= group_data[0].group;
- group_data[group].free_blocks_count--;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
- }
-
- /* Allocate inode tables */
- for (; it_index < flex_gd->count; it_index++) {
- if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
- goto next_group;
- group_data[it_index].inode_table = start_blk;
- ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
- group -= group_data[0].group;
- group_data[group].free_blocks_count -=
- EXT4_SB(sb)->s_itb_per_group;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
-
- start_blk += EXT4_SB(sb)->s_itb_per_group;
- }
-
- if (test_opt(sb, DEBUG)) {
- int i;
- group = group_data[0].group;
-
- printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
- "%d groups, flexbg size is %d:\n", flex_gd->count,
- flexbg_size);
-
- for (i = 0; i < flex_gd->count; i++) {
- printk(KERN_DEBUG "adding %s group %u: %u "
- "blocks (%d free)\n",
- ext4_bg_has_super(sb, group + i) ? "normal" :
- "no-super", group + i,
- group_data[i].blocks_count,
- group_data[i].free_blocks_count);
- }
- }
-}
-
-static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
- ext4_fsblk_t blk)
-{
- struct buffer_head *bh;
- int err;
-
- bh = sb_getblk(sb, blk);
- if (!bh)
- return ERR_PTR(-EIO);
- if ((err = ext4_journal_get_write_access(handle, bh))) {
- brelse(bh);
- bh = ERR_PTR(err);
- } else {
- memset(bh->b_data, 0, sb->s_blocksize);
- set_buffer_uptodate(bh);
- }
-
- return bh;
-}
-
-/*
- * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
- * If that fails, restart the transaction & regain write access for the
- * buffer head which is used for block_bitmap modifications.
- */
-static int extend_or_restart_transaction(handle_t *handle, int thresh)
-{
- int err;
-
- if (ext4_handle_has_enough_credits(handle, thresh))
- return 0;
-
- err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
- if (err < 0)
- return err;
- if (err) {
- err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-/*
- * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
- *
- * Helper function for ext4_setup_new_group_blocks() which set .
- *
- * @sb: super block
- * @handle: journal handle
- * @flex_gd: flex group data
- */
-static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
- struct ext4_new_flex_group_data *flex_gd,
- ext4_fsblk_t block, ext4_group_t count)
-{
- ext4_group_t count2;
-
- ext4_debug("mark blocks [%llu/%u] used\n", block, count);
- for (count2 = count; count > 0; count -= count2, block += count2) {
- ext4_fsblk_t start;
- struct buffer_head *bh;
- ext4_group_t group;
- int err;
-
- ext4_get_group_no_and_offset(sb, block, &group, NULL);
- start = ext4_group_first_block_no(sb, group);
- group -= flex_gd->groups[0].group;
-
- count2 = sb->s_blocksize * 8 - (block - start);
- if (count2 > count)
- count2 = count;
-
- if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
- BUG_ON(flex_gd->count > 1);
- continue;
- }
-
- err = extend_or_restart_transaction(handle, 1);
- if (err)
- return err;
-
- bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
- if (!bh)
- return -EIO;
-
- err = ext4_journal_get_write_access(handle, bh);
- if (err)
- return err;
- ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
- block - start, count2);
- ext4_set_bits(bh->b_data, block - start, count2);
-
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (unlikely(err))
- return err;
- brelse(bh);
- }
-
- return 0;
-}
-
-/*
- * Set up the block and inode bitmaps, and the inode table for the new groups.
- * This doesn't need to be part of the main transaction, since we are only
- * changing blocks outside the actual filesystem. We still do journaling to
- * ensure the recovery is correct in case of a failure just after resize.
- * If any part of this fails, we simply abort the resize.
- *
- * setup_new_flex_group_blocks handles a flex group as follow:
- * 1. copy super block and GDT, and initialize group tables if necessary.
- * In this step, we only set bits in blocks bitmaps for blocks taken by
- * super block and GDT.
- * 2. allocate group tables in block bitmaps, that is, set bits in block
- * bitmap for blocks taken by group tables.
- */
-static int setup_new_flex_group_blocks(struct super_block *sb,
- struct ext4_new_flex_group_data *flex_gd)
-{
- int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
- ext4_fsblk_t start;
- ext4_fsblk_t block;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- struct ext4_new_group_data *group_data = flex_gd->groups;
- __u16 *bg_flags = flex_gd->bg_flags;
- handle_t *handle;
- ext4_group_t group, count;
- struct buffer_head *bh = NULL;
- int reserved_gdb, i, j, err = 0, err2;
-
- BUG_ON(!flex_gd->count || !group_data ||
- group_data[0].group != sbi->s_groups_count);
-
- reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
-
- /* This transaction may be extended/restarted along the way */
- handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- group = group_data[0].group;
- for (i = 0; i < flex_gd->count; i++, group++) {
- unsigned long gdblocks;
-
- gdblocks = ext4_bg_num_gdb(sb, group);
- start = ext4_group_first_block_no(sb, group);
-
- /* Copy all of the GDT blocks into the backup in this group */
- for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
- struct buffer_head *gdb;
-
- ext4_debug("update backup group %#04llx\n", block);
- err = extend_or_restart_transaction(handle, 1);
- if (err)
- goto out;
-
- gdb = sb_getblk(sb, block);
- if (!gdb) {
- err = -EIO;
- goto out;
- }
-
- err = ext4_journal_get_write_access(handle, gdb);
- if (err) {
- brelse(gdb);
- goto out;
- }
- memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
- gdb->b_size);
- set_buffer_uptodate(gdb);
-
- err = ext4_handle_dirty_metadata(handle, NULL, gdb);
- if (unlikely(err)) {
- brelse(gdb);
- goto out;
- }
- brelse(gdb);
- }
-
- /* Zero out all of the reserved backup group descriptor
- * table blocks
- */
- if (ext4_bg_has_super(sb, group)) {
- err = sb_issue_zeroout(sb, gdblocks + start + 1,
- reserved_gdb, GFP_NOFS);
- if (err)
- goto out;
- }
-
- /* Initialize group tables of the grop @group */
- if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
- goto handle_bb;
-
- /* Zero out all of the inode table blocks */
- block = group_data[i].inode_table;
- ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
- block, sbi->s_itb_per_group);
- err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
- GFP_NOFS);
- if (err)
- goto out;
-
-handle_bb:
- if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
- goto handle_ib;
-
- /* Initialize block bitmap of the @group */
- block = group_data[i].block_bitmap;
- err = extend_or_restart_transaction(handle, 1);
- if (err)
- goto out;
-
- bh = bclean(handle, sb, block);
- if (IS_ERR(bh)) {
- err = PTR_ERR(bh);
- goto out;
- }
- if (ext4_bg_has_super(sb, group)) {
- ext4_debug("mark backup superblock %#04llx (+0)\n",
- start);
- ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
- 1);
- }
- ext4_mark_bitmap_end(group_data[i].blocks_count,
- sb->s_blocksize * 8, bh->b_data);
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (err)
- goto out;
- brelse(bh);
-
-handle_ib:
- if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
- continue;
-
- /* Initialize inode bitmap of the @group */
- block = group_data[i].inode_bitmap;
- err = extend_or_restart_transaction(handle, 1);
- if (err)
- goto out;
- /* Mark unused entries in inode bitmap used */
- bh = bclean(handle, sb, block);
- if (IS_ERR(bh)) {
- err = PTR_ERR(bh);
- goto out;
- }
-
- ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
- sb->s_blocksize * 8, bh->b_data);
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (err)
- goto out;
- brelse(bh);
- }
- bh = NULL;
-
- /* Mark group tables in block bitmap */
- for (j = 0; j < GROUP_TABLE_COUNT; j++) {
- count = group_table_count[j];
- start = (&group_data[0].block_bitmap)[j];
- block = start;
- for (i = 1; i < flex_gd->count; i++) {
- block += group_table_count[j];
- if (block == (&group_data[i].block_bitmap)[j]) {
- count += group_table_count[j];
- continue;
- }
- err = set_flexbg_block_bitmap(sb, handle,
- flex_gd, start, count);
- if (err)
- goto out;
- count = group_table_count[j];
- start = group_data[i].block_bitmap;
- block = start;
- }
-
- if (count) {
- err = set_flexbg_block_bitmap(sb, handle,
- flex_gd, start, count);
- if (err)
- goto out;
- }
- }
-
-out:
- brelse(bh);
- err2 = ext4_journal_stop(handle);
- if (err2 && !err)
- err = err2;
-
- return err;
-}
-
-/*
- * Iterate through the groups which hold BACKUP superblock/GDT copies in an
- * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before
- * calling this for the first time. In a sparse filesystem it will be the
- * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
- * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
- */
-static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
- unsigned *five, unsigned *seven)
-{
- unsigned *min = three;
- int mult = 3;
- unsigned ret;
-
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
- ret = *min;
- *min += 1;
- return ret;
- }
-
- if (*five < *min) {
- min = five;
- mult = 5;
- }
- if (*seven < *min) {
- min = seven;
- mult = 7;
- }
-
- ret = *min;
- *min *= mult;
-
- return ret;
-}
-
-/*
- * Check that all of the backup GDT blocks are held in the primary GDT block.
- * It is assumed that they are stored in group order. Returns the number of
- * groups in current filesystem that have BACKUPS, or -ve error code.
- */
-static int verify_reserved_gdb(struct super_block *sb,
- ext4_group_t end,
- struct buffer_head *primary)
-{
- const ext4_fsblk_t blk = primary->b_blocknr;
- unsigned three = 1;
- unsigned five = 5;
- unsigned seven = 7;
- unsigned grp;
- __le32 *p = (__le32 *)primary->b_data;
- int gdbackups = 0;
-
- while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
- if (le32_to_cpu(*p++) !=
- grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
- ext4_warning(sb, "reserved GDT %llu"
- " missing grp %d (%llu)",
- blk, grp,
- grp *
- (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
- blk);
- return -EINVAL;
- }
- if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
- return -EFBIG;
- }
-
- return gdbackups;
-}
-
-/*
- * Called when we need to bring a reserved group descriptor table block into
- * use from the resize inode. The primary copy of the new GDT block currently
- * is an indirect block (under the double indirect block in the resize inode).
- * The new backup GDT blocks will be stored as leaf blocks in this indirect
- * block, in group order. Even though we know all the block numbers we need,
- * we check to ensure that the resize inode has actually reserved these blocks.
- *
- * Don't need to update the block bitmaps because the blocks are still in use.
- *
- * We get all of the error cases out of the way, so that we are sure to not
- * fail once we start modifying the data on disk, because JBD has no rollback.
- */
-static int add_new_gdb(handle_t *handle, struct inode *inode,
- ext4_group_t group)
-{
- struct super_block *sb = inode->i_sb;
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
- ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
- struct buffer_head **o_group_desc, **n_group_desc;
- struct buffer_head *dind;
- struct buffer_head *gdb_bh;
- int gdbackups;
- struct ext4_iloc iloc;
- __le32 *data;
- int err;
-
- if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG
- "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
- gdb_num);
-
- /*
- * If we are not using the primary superblock/GDT copy don't resize,
- * because the user tools have no way of handling this. Probably a
- * bad time to do it anyways.
- */
- if (EXT4_SB(sb)->s_sbh->b_blocknr !=
- le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
- ext4_warning(sb, "won't resize using backup superblock at %llu",
- (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
- return -EPERM;
- }
-
- gdb_bh = sb_bread(sb, gdblock);
- if (!gdb_bh)
- return -EIO;
-
- gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
- if (gdbackups < 0) {
- err = gdbackups;
- goto exit_bh;
- }
-
- data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
- dind = sb_bread(sb, le32_to_cpu(*data));
- if (!dind) {
- err = -EIO;
- goto exit_bh;
- }
-
- data = (__le32 *)dind->b_data;
- if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
- ext4_warning(sb, "new group %u GDT block %llu not reserved",
- group, gdblock);
- err = -EINVAL;
- goto exit_dind;
- }
-
- err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
- if (unlikely(err))
- goto exit_dind;
-
- err = ext4_journal_get_write_access(handle, gdb_bh);
- if (unlikely(err))
- goto exit_sbh;
-
- err = ext4_journal_get_write_access(handle, dind);
- if (unlikely(err))
- ext4_std_error(sb, err);
-
- /* ext4_reserve_inode_write() gets a reference on the iloc */
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (unlikely(err))
- goto exit_dindj;
-
- n_group_desc = ext4_kvmalloc((gdb_num + 1) *
- sizeof(struct buffer_head *),
- GFP_NOFS);
- if (!n_group_desc) {
- err = -ENOMEM;
- ext4_warning(sb, "not enough memory for %lu groups",
- gdb_num + 1);
- goto exit_inode;
- }
-
- /*
- * Finally, we have all of the possible failures behind us...
- *
- * Remove new GDT block from inode double-indirect block and clear out
- * the new GDT block for use (which also "frees" the backup GDT blocks
- * from the reserved inode). We don't need to change the bitmaps for
- * these blocks, because they are marked as in-use from being in the
- * reserved inode, and will become GDT blocks (primary and backup).
- */
- data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
- err = ext4_handle_dirty_metadata(handle, NULL, dind);
- if (unlikely(err)) {
- ext4_std_error(sb, err);
- goto exit_inode;
- }
- inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
- ext4_mark_iloc_dirty(handle, inode, &iloc);
- memset(gdb_bh->b_data, 0, sb->s_blocksize);
- err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
- if (unlikely(err)) {
- ext4_std_error(sb, err);
- goto exit_inode;
- }
- brelse(dind);
-
- o_group_desc = EXT4_SB(sb)->s_group_desc;
- memcpy(n_group_desc, o_group_desc,
- EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
- n_group_desc[gdb_num] = gdb_bh;
- EXT4_SB(sb)->s_group_desc = n_group_desc;
- EXT4_SB(sb)->s_gdb_count++;
- ext4_kvfree(o_group_desc);
-
- le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
- if (err)
- ext4_std_error(sb, err);
-
- return err;
-
-exit_inode:
- ext4_kvfree(n_group_desc);
- /* ext4_handle_release_buffer(handle, iloc.bh); */
- brelse(iloc.bh);
-exit_dindj:
- /* ext4_handle_release_buffer(handle, dind); */
-exit_sbh:
- /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
-exit_dind:
- brelse(dind);
-exit_bh:
- brelse(gdb_bh);
-
- ext4_debug("leaving with error %d\n", err);
- return err;
-}
-
-/*
- * Called when we are adding a new group which has a backup copy of each of
- * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
- * We need to add these reserved backup GDT blocks to the resize inode, so
- * that they are kept for future resizing and not allocated to files.
- *
- * Each reserved backup GDT block will go into a different indirect block.
- * The indirect blocks are actually the primary reserved GDT blocks,
- * so we know in advance what their block numbers are. We only get the
- * double-indirect block to verify it is pointing to the primary reserved
- * GDT blocks so we don't overwrite a data block by accident. The reserved
- * backup GDT blocks are stored in their reserved primary GDT block.
- */
-static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
- ext4_group_t group)
-{
- struct super_block *sb = inode->i_sb;
- int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
- struct buffer_head **primary;
- struct buffer_head *dind;
- struct ext4_iloc iloc;
- ext4_fsblk_t blk;
- __le32 *data, *end;
- int gdbackups = 0;
- int res, i;
- int err;
-
- primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
- if (!primary)
- return -ENOMEM;
-
- data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
- dind = sb_bread(sb, le32_to_cpu(*data));
- if (!dind) {
- err = -EIO;
- goto exit_free;
- }
-
- blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
- data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
- EXT4_ADDR_PER_BLOCK(sb));
- end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
-
- /* Get each reserved primary GDT block and verify it holds backups */
- for (res = 0; res < reserved_gdb; res++, blk++) {
- if (le32_to_cpu(*data) != blk) {
- ext4_warning(sb, "reserved block %llu"
- " not at offset %ld",
- blk,
- (long)(data - (__le32 *)dind->b_data));
- err = -EINVAL;
- goto exit_bh;
- }
- primary[res] = sb_bread(sb, blk);
- if (!primary[res]) {
- err = -EIO;
- goto exit_bh;
- }
- gdbackups = verify_reserved_gdb(sb, group, primary[res]);
- if (gdbackups < 0) {
- brelse(primary[res]);
- err = gdbackups;
- goto exit_bh;
- }
- if (++data >= end)
- data = (__le32 *)dind->b_data;
- }
-
- for (i = 0; i < reserved_gdb; i++) {
- if ((err = ext4_journal_get_write_access(handle, primary[i]))) {
- /*
- int j;
- for (j = 0; j < i; j++)
- ext4_handle_release_buffer(handle, primary[j]);
- */
- goto exit_bh;
- }
- }
-
- if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
- goto exit_bh;
-
- /*
- * Finally we can add each of the reserved backup GDT blocks from
- * the new group to its reserved primary GDT block.
- */
- blk = group * EXT4_BLOCKS_PER_GROUP(sb);
- for (i = 0; i < reserved_gdb; i++) {
- int err2;
- data = (__le32 *)primary[i]->b_data;
- /* printk("reserving backup %lu[%u] = %lu\n",
- primary[i]->b_blocknr, gdbackups,
- blk + primary[i]->b_blocknr); */
- data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
- err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
- if (!err)
- err = err2;
- }
- inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
- ext4_mark_iloc_dirty(handle, inode, &iloc);
-
-exit_bh:
- while (--res >= 0)
- brelse(primary[res]);
- brelse(dind);
-
-exit_free:
- kfree(primary);
-
- return err;
-}
-
-/*
- * Update the backup copies of the ext4 metadata. These don't need to be part
- * of the main resize transaction, because e2fsck will re-write them if there
- * is a problem (basically only OOM will cause a problem). However, we
- * _should_ update the backups if possible, in case the primary gets trashed
- * for some reason and we need to run e2fsck from a backup superblock. The
- * important part is that the new block and inode counts are in the backup
- * superblocks, and the location of the new group metadata in the GDT backups.
- *
- * We do not need take the s_resize_lock for this, because these
- * blocks are not otherwise touched by the filesystem code when it is
- * mounted. We don't need to worry about last changing from
- * sbi->s_groups_count, because the worst that can happen is that we
- * do not copy the full number of backups at this time. The resize
- * which changed s_groups_count will backup again.
- */
-static void update_backups(struct super_block *sb,
- int blk_off, char *data, int size)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- const ext4_group_t last = sbi->s_groups_count;
- const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
- unsigned three = 1;
- unsigned five = 5;
- unsigned seven = 7;
- ext4_group_t group;
- int rest = sb->s_blocksize - size;
- handle_t *handle;
- int err = 0, err2;
-
- handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
- if (IS_ERR(handle)) {
- group = 1;
- err = PTR_ERR(handle);
- goto exit_err;
- }
-
- while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) {
- struct buffer_head *bh;
-
- /* Out of journal space, and can't get more - abort - so sad */
- if (ext4_handle_valid(handle) &&
- handle->h_buffer_credits == 0 &&
- ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
- (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
- break;
-
- bh = sb_getblk(sb, group * bpg + blk_off);
- if (!bh) {
- err = -EIO;
- break;
- }
- ext4_debug("update metadata backup %#04lx\n",
- (unsigned long)bh->b_blocknr);
- if ((err = ext4_journal_get_write_access(handle, bh)))
- break;
- lock_buffer(bh);
- memcpy(bh->b_data, data, size);
- if (rest)
- memset(bh->b_data + size, 0, rest);
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (unlikely(err))
- ext4_std_error(sb, err);
- brelse(bh);
- }
- if ((err2 = ext4_journal_stop(handle)) && !err)
- err = err2;
-
- /*
- * Ugh! Need to have e2fsck write the backup copies. It is too
- * late to revert the resize, we shouldn't fail just because of
- * the backup copies (they are only needed in case of corruption).
- *
- * However, if we got here we have a journal problem too, so we
- * can't really start a transaction to mark the superblock.
- * Chicken out and just set the flag on the hope it will be written
- * to disk, and if not - we will simply wait until next fsck.
- */
-exit_err:
- if (err) {
- ext4_warning(sb, "can't update backup for group %u (err %d), "
- "forcing fsck on next reboot", group, err);
- sbi->s_mount_state &= ~EXT4_VALID_FS;
- sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
- mark_buffer_dirty(sbi->s_sbh);
- }
-}
-
-/*
- * ext4_add_new_descs() adds @count group descriptor of groups
- * starting at @group
- *
- * @handle: journal handle
- * @sb: super block
- * @group: the group no. of the first group desc to be added
- * @resize_inode: the resize inode
- * @count: number of group descriptors to be added
- */
-static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
- ext4_group_t group, struct inode *resize_inode,
- ext4_group_t count)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- struct buffer_head *gdb_bh;
- int i, gdb_off, gdb_num, err = 0;
-
- for (i = 0; i < count; i++, group++) {
- int reserved_gdb = ext4_bg_has_super(sb, group) ?
- le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
-
- gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
- gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
-
- /*
- * We will only either add reserved group blocks to a backup group
- * or remove reserved blocks for the first group in a new group block.
- * Doing both would be mean more complex code, and sane people don't
- * use non-sparse filesystems anymore. This is already checked above.
- */
- if (gdb_off) {
- gdb_bh = sbi->s_group_desc[gdb_num];
- err = ext4_journal_get_write_access(handle, gdb_bh);
-
- if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
- err = reserve_backup_gdb(handle, resize_inode, group);
- } else
- err = add_new_gdb(handle, resize_inode, group);
- if (err)
- break;
- }
- return err;
-}
-
-/*
- * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
- */
-static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
- struct ext4_new_flex_group_data *flex_gd)
-{
- struct ext4_new_group_data *group_data = flex_gd->groups;
- struct ext4_group_desc *gdp;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct buffer_head *gdb_bh;
- ext4_group_t group;
- __u16 *bg_flags = flex_gd->bg_flags;
- int i, gdb_off, gdb_num, err = 0;
-
-
- for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
- group = group_data->group;
-
- gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
- gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
-
- /*
- * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
- */
- gdb_bh = sbi->s_group_desc[gdb_num];
- /* Update group descriptor block for new group */
- gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
- gdb_off * EXT4_DESC_SIZE(sb));
-
- memset(gdp, 0, EXT4_DESC_SIZE(sb));
- ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
- ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
- ext4_inode_table_set(sb, gdp, group_data->inode_table);
- ext4_free_group_clusters_set(sb, gdp,
- EXT4_B2C(sbi, group_data->free_blocks_count));
- ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
- gdp->bg_flags = cpu_to_le16(*bg_flags);
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-
- err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
- if (unlikely(err)) {
- ext4_std_error(sb, err);
- break;
- }
-
- /*
- * We can allocate memory for mb_alloc based on the new group
- * descriptor
- */
- err = ext4_mb_add_groupinfo(sb, group, gdp);
- if (err)
- break;
- }
- return err;
-}
-
-/*
- * ext4_update_super() updates the super block so that the newly added
- * groups can be seen by the filesystem.
- *
- * @sb: super block
- * @flex_gd: new added groups
- */
-static void ext4_update_super(struct super_block *sb,
- struct ext4_new_flex_group_data *flex_gd)
-{
- ext4_fsblk_t blocks_count = 0;
- ext4_fsblk_t free_blocks = 0;
- ext4_fsblk_t reserved_blocks = 0;
- struct ext4_new_group_data *group_data = flex_gd->groups;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- int i;
-
- BUG_ON(flex_gd->count == 0 || group_data == NULL);
- /*
- * Make the new blocks and inodes valid next. We do this before
- * increasing the group count so that once the group is enabled,
- * all of its blocks and inodes are already valid.
- *
- * We always allocate group-by-group, then block-by-block or
- * inode-by-inode within a group, so enabling these
- * blocks/inodes before the group is live won't actually let us
- * allocate the new space yet.
- */
- for (i = 0; i < flex_gd->count; i++) {
- blocks_count += group_data[i].blocks_count;
- free_blocks += group_data[i].free_blocks_count;
- }
-
- reserved_blocks = ext4_r_blocks_count(es) * 100;
- do_div(reserved_blocks, ext4_blocks_count(es));
- reserved_blocks *= blocks_count;
- do_div(reserved_blocks, 100);
-
- ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
- ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
- le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
- flex_gd->count);
- le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
- flex_gd->count);
-
- /*
- * We need to protect s_groups_count against other CPUs seeing
- * inconsistent state in the superblock.
- *
- * The precise rules we use are:
- *
- * * Writers must perform a smp_wmb() after updating all
- * dependent data and before modifying the groups count
- *
- * * Readers must perform an smp_rmb() after reading the groups
- * count and before reading any dependent data.
- *
- * NB. These rules can be relaxed when checking the group count
- * while freeing data, as we can only allocate from a block
- * group after serialising against the group count, and we can
- * only then free after serialising in turn against that
- * allocation.
- */
- smp_wmb();
-
- /* Update the global fs size fields */
- sbi->s_groups_count += flex_gd->count;
-
- /* Update the reserved block counts only once the new group is
- * active. */
- ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
- reserved_blocks);
-
- /* Update the free space counts */
- percpu_counter_add(&sbi->s_freeclusters_counter,
- EXT4_B2C(sbi, free_blocks));
- percpu_counter_add(&sbi->s_freeinodes_counter,
- EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb,
- EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
- sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group;
- flex_group = ext4_flex_group(sbi, group_data[0].group);
- atomic_add(EXT4_B2C(sbi, free_blocks),
- &sbi->s_flex_groups[flex_group].free_clusters);
- atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
- &sbi->s_flex_groups[flex_group].free_inodes);
- }
-
- if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG "EXT4-fs: added group %u:"
- "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
- blocks_count, free_blocks, reserved_blocks);
-}
-
-/* Add a flex group to an fs. Ensure we handle all possible error conditions
- * _before_ we start modifying the filesystem, because we cannot abort the
- * transaction and not have it write the data to disk.
- */
-static int ext4_flex_group_add(struct super_block *sb,
- struct inode *resize_inode,
- struct ext4_new_flex_group_data *flex_gd)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- ext4_fsblk_t o_blocks_count;
- ext4_grpblk_t last;
- ext4_group_t group;
- handle_t *handle;
- unsigned reserved_gdb;
- int err = 0, err2 = 0, credit;
-
- BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
-
- reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
- o_blocks_count = ext4_blocks_count(es);
- ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
- BUG_ON(last);
-
- err = setup_new_flex_group_blocks(sb, flex_gd);
- if (err)
- goto exit;
- /*
- * We will always be modifying at least the superblock and GDT
- * block. If we are adding a group past the last current GDT block,
- * we will also modify the inode and the dindirect block. If we
- * are adding a group with superblock/GDT backups we will also
- * modify each of the reserved GDT dindirect blocks.
- */
- credit = flex_gd->count * 4 + reserved_gdb;
- handle = ext4_journal_start_sb(sb, credit);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- goto exit;
- }
-
- err = ext4_journal_get_write_access(handle, sbi->s_sbh);
- if (err)
- goto exit_journal;
-
- group = flex_gd->groups[0].group;
- BUG_ON(group != EXT4_SB(sb)->s_groups_count);
- err = ext4_add_new_descs(handle, sb, group,
- resize_inode, flex_gd->count);
- if (err)
- goto exit_journal;
-
- err = ext4_setup_new_descs(handle, sb, flex_gd);
- if (err)
- goto exit_journal;
-
- ext4_update_super(sb, flex_gd);
-
- err = ext4_handle_dirty_super(handle, sb);
-
-exit_journal:
- err2 = ext4_journal_stop(handle);
- if (!err)
- err = err2;
-
- if (!err) {
- int i;
- update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
- sizeof(struct ext4_super_block));
- for (i = 0; i < flex_gd->count; i++, group++) {
- struct buffer_head *gdb_bh;
- int gdb_num;
- gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
- gdb_bh = sbi->s_group_desc[gdb_num];
- update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
- gdb_bh->b_size);
- }
- }
-exit:
- return err;
-}
-
-static int ext4_setup_next_flex_gd(struct super_block *sb,
- struct ext4_new_flex_group_data *flex_gd,
- ext4_fsblk_t n_blocks_count,
- unsigned long flexbg_size)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- struct ext4_new_group_data *group_data = flex_gd->groups;
- ext4_fsblk_t o_blocks_count;
- ext4_group_t n_group;
- ext4_group_t group;
- ext4_group_t last_group;
- ext4_grpblk_t last;
- ext4_grpblk_t blocks_per_group;
- unsigned long i;
-
- blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
-
- o_blocks_count = ext4_blocks_count(es);
-
- if (o_blocks_count == n_blocks_count)
- return 0;
-
- ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
- BUG_ON(last);
- ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
-
- last_group = group | (flexbg_size - 1);
- if (last_group > n_group)
- last_group = n_group;
-
- flex_gd->count = last_group - group + 1;
-
- for (i = 0; i < flex_gd->count; i++) {
- int overhead;
-
- group_data[i].group = group + i;
- group_data[i].blocks_count = blocks_per_group;
- overhead = ext4_bg_has_super(sb, group + i) ?
- (1 + ext4_bg_num_gdb(sb, group + i) +
- le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
- group_data[i].free_blocks_count = blocks_per_group - overhead;
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
- flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
- EXT4_BG_INODE_UNINIT;
- else
- flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
- }
-
- if (last_group == n_group &&
- EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
- /* We need to initialize block bitmap of last group. */
- flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
-
- if ((last_group == n_group) && (last != blocks_per_group - 1)) {
- group_data[i - 1].blocks_count = last + 1;
- group_data[i - 1].free_blocks_count -= blocks_per_group-
- last - 1;
- }
-
- return 1;
-}
-
-/* Add group descriptor data to an existing or new group descriptor block.
- * Ensure we handle all possible error conditions _before_ we start modifying
- * the filesystem, because we cannot abort the transaction and not have it
- * write the data to disk.
- *
- * If we are on a GDT block boundary, we need to get the reserved GDT block.
- * Otherwise, we may need to add backup GDT blocks for a sparse group.
- *
- * We only need to hold the superblock lock while we are actually adding
- * in the new group's counts to the superblock. Prior to that we have
- * not really "added" the group at all. We re-check that we are still
- * adding in the last group in case things have changed since verifying.
- */
-int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
-{
- struct ext4_new_flex_group_data flex_gd;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
- le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
- struct inode *inode = NULL;
- int gdb_off, gdb_num;
- int err;
- __u16 bg_flags = 0;
-
- gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
- gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
-
- if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
- ext4_warning(sb, "Can't resize non-sparse filesystem further");
- return -EPERM;
- }
-
- if (ext4_blocks_count(es) + input->blocks_count <
- ext4_blocks_count(es)) {
- ext4_warning(sb, "blocks_count overflow");
- return -EINVAL;
- }
-
- if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
- le32_to_cpu(es->s_inodes_count)) {
- ext4_warning(sb, "inodes_count overflow");
- return -EINVAL;
- }
-
- if (reserved_gdb || gdb_off == 0) {
- if (!EXT4_HAS_COMPAT_FEATURE(sb,
- EXT4_FEATURE_COMPAT_RESIZE_INODE)
- || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
- ext4_warning(sb,
- "No reserved GDT blocks, can't resize");
- return -EPERM;
- }
- inode = ext4_iget(sb, EXT4_RESIZE_INO);
- if (IS_ERR(inode)) {
- ext4_warning(sb, "Error opening resize inode");
- return PTR_ERR(inode);
- }
- }
-
-
- err = verify_group_input(sb, input);
- if (err)
- goto out;
-
- flex_gd.count = 1;
- flex_gd.groups = input;
- flex_gd.bg_flags = &bg_flags;
- err = ext4_flex_group_add(sb, inode, &flex_gd);
-out:
- iput(inode);
- return err;
-} /* ext4_group_add */
-
-/*
- * extend a group without checking assuming that checking has been done.
- */
-static int ext4_group_extend_no_check(struct super_block *sb,
- ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- handle_t *handle;
- int err = 0, err2;
-
- /* We will update the superblock, one block bitmap, and
- * one group descriptor via ext4_group_add_blocks().
- */
- handle = ext4_journal_start_sb(sb, 3);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- ext4_warning(sb, "error %d on journal start", err);
- return err;
- }
-
- err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
- if (err) {
- ext4_warning(sb, "error %d on journal write access", err);
- goto errout;
- }
-
- ext4_blocks_count_set(es, o_blocks_count + add);
- ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
- ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
- o_blocks_count + add);
- /* We add the blocks to the bitmap and set the group need init bit */
- err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
- if (err)
- goto errout;
- ext4_handle_dirty_super(handle, sb);
- ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
- o_blocks_count + add);
-errout:
- err2 = ext4_journal_stop(handle);
- if (err2 && !err)
- err = err2;
-
- if (!err) {
- if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
- "blocks\n", ext4_blocks_count(es));
- update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
- sizeof(struct ext4_super_block));
- }
- return err;
-}
-
-/*
- * Extend the filesystem to the new number of blocks specified. This entry
- * point is only used to extend the current filesystem to the end of the last
- * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
- * for emergencies (because it has no dependencies on reserved blocks).
- *
- * If we _really_ wanted, we could use default values to call ext4_group_add()
- * allow the "remount" trick to work for arbitrary resizing, assuming enough
- * GDT blocks are reserved to grow to the desired size.
- */
-int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
- ext4_fsblk_t n_blocks_count)
-{
- ext4_fsblk_t o_blocks_count;
- ext4_grpblk_t last;
- ext4_grpblk_t add;
- struct buffer_head *bh;
- int err;
- ext4_group_t group;
-
- o_blocks_count = ext4_blocks_count(es);
-
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "extending last group from %llu to %llu blocks",
- o_blocks_count, n_blocks_count);
-
- if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
- return 0;
-
- if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
- ext4_msg(sb, KERN_ERR,
- "filesystem too large to resize to %llu blocks safely",
- n_blocks_count);
- if (sizeof(sector_t) < 8)
- ext4_warning(sb, "CONFIG_LBDAF not enabled");
- return -EINVAL;
- }
-
- if (n_blocks_count < o_blocks_count) {
- ext4_warning(sb, "can't shrink FS - resize aborted");
- return -EINVAL;
- }
-
- /* Handle the remaining blocks in the last group only. */
- ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
-
- if (last == 0) {
- ext4_warning(sb, "need to use ext2online to resize further");
- return -EPERM;
- }
-
- add = EXT4_BLOCKS_PER_GROUP(sb) - last;
-
- if (o_blocks_count + add < o_blocks_count) {
- ext4_warning(sb, "blocks_count overflow");
- return -EINVAL;
- }
-
- if (o_blocks_count + add > n_blocks_count)
- add = n_blocks_count - o_blocks_count;
-
- if (o_blocks_count + add < n_blocks_count)
- ext4_warning(sb, "will only finish group (%llu blocks, %u new)",
- o_blocks_count + add, add);
-
- /* See if the device is actually as big as what was requested */
- bh = sb_bread(sb, o_blocks_count + add - 1);
- if (!bh) {
- ext4_warning(sb, "can't read last block, resize aborted");
- return -ENOSPC;
- }
- brelse(bh);
-
- err = ext4_group_extend_no_check(sb, o_blocks_count, add);
- return err;
-} /* ext4_group_extend */
-
-/*
- * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
- *
- * @sb: super block of the fs to be resized
- * @n_blocks_count: the number of blocks resides in the resized fs
- */
-int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
-{
- struct ext4_new_flex_group_data *flex_gd = NULL;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- struct buffer_head *bh;
- struct inode *resize_inode;
- ext4_fsblk_t o_blocks_count;
- ext4_group_t o_group;
- ext4_group_t n_group;
- ext4_grpblk_t offset, add;
- unsigned long n_desc_blocks;
- unsigned long o_desc_blocks;
- unsigned long desc_blocks;
- int err = 0, flexbg_size = 1;
-
- o_blocks_count = ext4_blocks_count(es);
-
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu "
- "to %llu blocks", o_blocks_count, n_blocks_count);
-
- if (n_blocks_count < o_blocks_count) {
- /* On-line shrinking not supported */
- ext4_warning(sb, "can't shrink FS - resize aborted");
- return -EINVAL;
- }
-
- if (n_blocks_count == o_blocks_count)
- /* Nothing need to do */
- return 0;
-
- ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
- ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
-
- n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
- EXT4_DESC_PER_BLOCK(sb);
- o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
- EXT4_DESC_PER_BLOCK(sb);
- desc_blocks = n_desc_blocks - o_desc_blocks;
-
- if (desc_blocks &&
- (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
- le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
- ext4_warning(sb, "No reserved GDT blocks, can't resize");
- return -EPERM;
- }
-
- resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
- if (IS_ERR(resize_inode)) {
- ext4_warning(sb, "Error opening resize inode");
- return PTR_ERR(resize_inode);
- }
-
- /* See if the device is actually as big as what was requested */
- bh = sb_bread(sb, n_blocks_count - 1);
- if (!bh) {
- ext4_warning(sb, "can't read last block, resize aborted");
- return -ENOSPC;
- }
- brelse(bh);
-
- /* extend the last group */
- if (n_group == o_group)
- add = n_blocks_count - o_blocks_count;
- else
- add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1);
- if (add > 0) {
- err = ext4_group_extend_no_check(sb, o_blocks_count, add);
- if (err)
- goto out;
- }
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
- es->s_log_groups_per_flex)
- flexbg_size = 1 << es->s_log_groups_per_flex;
-
- o_blocks_count = ext4_blocks_count(es);
- if (o_blocks_count == n_blocks_count)
- goto out;
-
- flex_gd = alloc_flex_gd(flexbg_size);
- if (flex_gd == NULL) {
- err = -ENOMEM;
- goto out;
- }
-
- /* Add flex groups. Note that a regular group is a
- * flex group with 1 group.
- */
- while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
- flexbg_size)) {
- ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
- err = ext4_flex_group_add(sb, resize_inode, flex_gd);
- if (unlikely(err))
- break;
- }
-
-out:
- if (flex_gd)
- free_flex_gd(flex_gd);
-
- iput(resize_inode);
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
- "upto %llu blocks", o_blocks_count, n_blocks_count);
- return err;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/super.c b/ANDROID_3.4.5/fs/ext4/super.c
deleted file mode 100644
index a68703a5..00000000
--- a/ANDROID_3.4.5/fs/ext4/super.c
+++ /dev/null
@@ -1,4980 +0,0 @@
-/*
- * linux/fs/ext4/super.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/inode.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * Big-endian to little-endian byte-swapping/bitmaps by
- * David S. Miller (davem@caip.rutgers.edu), 1995
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/vmalloc.h>
-#include <linux/jbd2.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-#include <linux/parser.h>
-#include <linux/buffer_head.h>
-#include <linux/exportfs.h>
-#include <linux/vfs.h>
-#include <linux/random.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/quotaops.h>
-#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
-#include <linux/ctype.h>
-#include <linux/log2.h>
-#include <linux/crc16.h>
-#include <linux/cleancache.h>
-#include <asm/uaccess.h>
-
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-
-#include "ext4.h"
-#include "ext4_extents.h"
-#include "ext4_jbd2.h"
-#include "xattr.h"
-#include "acl.h"
-#include "mballoc.h"
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/ext4.h>
-
-static struct proc_dir_entry *ext4_proc_root;
-static struct kset *ext4_kset;
-static struct ext4_lazy_init *ext4_li_info;
-static struct mutex ext4_li_mtx;
-static struct ext4_features *ext4_feat;
-
-static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
- unsigned long journal_devnum);
-static int ext4_show_options(struct seq_file *seq, struct dentry *root);
-static int ext4_commit_super(struct super_block *sb, int sync);
-static void ext4_mark_recovery_complete(struct super_block *sb,
- struct ext4_super_block *es);
-static void ext4_clear_journal_err(struct super_block *sb,
- struct ext4_super_block *es);
-static int ext4_sync_fs(struct super_block *sb, int wait);
-static const char *ext4_decode_error(struct super_block *sb, int errno,
- char nbuf[16]);
-static int ext4_remount(struct super_block *sb, int *flags, char *data);
-static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
-static int ext4_unfreeze(struct super_block *sb);
-static void ext4_write_super(struct super_block *sb);
-static int ext4_freeze(struct super_block *sb);
-static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data);
-static inline int ext2_feature_set_ok(struct super_block *sb);
-static inline int ext3_feature_set_ok(struct super_block *sb);
-static int ext4_feature_set_ok(struct super_block *sb, int readonly);
-static void ext4_destroy_lazyinit_thread(void);
-static void ext4_unregister_li_request(struct super_block *sb);
-static void ext4_clear_request_list(void);
-
-#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
-static struct file_system_type ext2_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext2",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
-#else
-#define IS_EXT2_SB(sb) (0)
-#endif
-
-
-#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
-static struct file_system_type ext3_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext3",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
-#else
-#define IS_EXT3_SB(sb) (0)
-#endif
-
-void *ext4_kvmalloc(size_t size, gfp_t flags)
-{
- void *ret;
-
- ret = kmalloc(size, flags);
- if (!ret)
- ret = __vmalloc(size, flags, PAGE_KERNEL);
- return ret;
-}
-
-void *ext4_kvzalloc(size_t size, gfp_t flags)
-{
- void *ret;
-
- ret = kzalloc(size, flags);
- if (!ret)
- ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
- return ret;
-}
-
-void ext4_kvfree(void *ptr)
-{
- if (is_vmalloc_addr(ptr))
- vfree(ptr);
- else
- kfree(ptr);
-
-}
-
-ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
- struct ext4_group_desc *bg)
-{
- return le32_to_cpu(bg->bg_block_bitmap_lo) |
- (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
-}
-
-ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
- struct ext4_group_desc *bg)
-{
- return le32_to_cpu(bg->bg_inode_bitmap_lo) |
- (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
-}
-
-ext4_fsblk_t ext4_inode_table(struct super_block *sb,
- struct ext4_group_desc *bg)
-{
- return le32_to_cpu(bg->bg_inode_table_lo) |
- (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
-}
-
-__u32 ext4_free_group_clusters(struct super_block *sb,
- struct ext4_group_desc *bg)
-{
- return le16_to_cpu(bg->bg_free_blocks_count_lo) |
- (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
-}
-
-__u32 ext4_free_inodes_count(struct super_block *sb,
- struct ext4_group_desc *bg)
-{
- return le16_to_cpu(bg->bg_free_inodes_count_lo) |
- (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
-}
-
-__u32 ext4_used_dirs_count(struct super_block *sb,
- struct ext4_group_desc *bg)
-{
- return le16_to_cpu(bg->bg_used_dirs_count_lo) |
- (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
-}
-
-__u32 ext4_itable_unused_count(struct super_block *sb,
- struct ext4_group_desc *bg)
-{
- return le16_to_cpu(bg->bg_itable_unused_lo) |
- (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
-}
-
-void ext4_block_bitmap_set(struct super_block *sb,
- struct ext4_group_desc *bg, ext4_fsblk_t blk)
-{
- bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
- if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
-}
-
-void ext4_inode_bitmap_set(struct super_block *sb,
- struct ext4_group_desc *bg, ext4_fsblk_t blk)
-{
- bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
- if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
-}
-
-void ext4_inode_table_set(struct super_block *sb,
- struct ext4_group_desc *bg, ext4_fsblk_t blk)
-{
- bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
- if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
-}
-
-void ext4_free_group_clusters_set(struct super_block *sb,
- struct ext4_group_desc *bg, __u32 count)
-{
- bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
- if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
-}
-
-void ext4_free_inodes_set(struct super_block *sb,
- struct ext4_group_desc *bg, __u32 count)
-{
- bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
- if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
-}
-
-void ext4_used_dirs_set(struct super_block *sb,
- struct ext4_group_desc *bg, __u32 count)
-{
- bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
- if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
-}
-
-void ext4_itable_unused_set(struct super_block *sb,
- struct ext4_group_desc *bg, __u32 count)
-{
- bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
- if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
-}
-
-
-/* Just increment the non-pointer handle value */
-static handle_t *ext4_get_nojournal(void)
-{
- handle_t *handle = current->journal_info;
- unsigned long ref_cnt = (unsigned long)handle;
-
- BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
-
- ref_cnt++;
- handle = (handle_t *)ref_cnt;
-
- current->journal_info = handle;
- return handle;
-}
-
-
-/* Decrement the non-pointer handle value */
-static void ext4_put_nojournal(handle_t *handle)
-{
- unsigned long ref_cnt = (unsigned long)handle;
-
- BUG_ON(ref_cnt == 0);
-
- ref_cnt--;
- handle = (handle_t *)ref_cnt;
-
- current->journal_info = handle;
-}
-
-/*
- * Wrappers for jbd2_journal_start/end.
- *
- * The only special thing we need to do here is to make sure that all
- * journal_end calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
- *
- * To avoid j_barrier hold in userspace when a user calls freeze(),
- * ext4 prevents a new handle from being started by s_frozen, which
- * is in an upper layer.
- */
-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
-{
- journal_t *journal;
- handle_t *handle;
-
- trace_ext4_journal_start(sb, nblocks, _RET_IP_);
- if (sb->s_flags & MS_RDONLY)
- return ERR_PTR(-EROFS);
-
- journal = EXT4_SB(sb)->s_journal;
- handle = ext4_journal_current_handle();
-
- /*
- * If a handle has been started, it should be allowed to
- * finish, otherwise deadlock could happen between freeze
- * and others(e.g. truncate) due to the restart of the
- * journal handle if the filesystem is forzen and active
- * handles are not stopped.
- */
- if (!handle)
- vfs_check_frozen(sb, SB_FREEZE_TRANS);
-
- if (!journal)
- return ext4_get_nojournal();
- /*
- * Special case here: if the journal has aborted behind our
- * backs (eg. EIO in the commit thread), then we still need to
- * take the FS itself readonly cleanly.
- */
- if (is_journal_aborted(journal)) {
- ext4_abort(sb, "Detected aborted journal");
- return ERR_PTR(-EROFS);
- }
- return jbd2_journal_start(journal, nblocks);
-}
-
-/*
- * The only special thing we need to do here is to make sure that all
- * jbd2_journal_stop calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
- */
-int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
-{
- struct super_block *sb;
- int err;
- int rc;
-
- if (!ext4_handle_valid(handle)) {
- ext4_put_nojournal(handle);
- return 0;
- }
- sb = handle->h_transaction->t_journal->j_private;
- err = handle->h_err;
- rc = jbd2_journal_stop(handle);
-
- if (!err)
- err = rc;
- if (err)
- __ext4_std_error(sb, where, line, err);
- return err;
-}
-
-void ext4_journal_abort_handle(const char *caller, unsigned int line,
- const char *err_fn, struct buffer_head *bh,
- handle_t *handle, int err)
-{
- char nbuf[16];
- const char *errstr = ext4_decode_error(NULL, err, nbuf);
-
- BUG_ON(!ext4_handle_valid(handle));
-
- if (bh)
- BUFFER_TRACE(bh, "abort");
-
- if (!handle->h_err)
- handle->h_err = err;
-
- if (is_handle_aborted(handle))
- return;
-
- printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
- caller, line, errstr, err_fn);
-
- jbd2_journal_abort_handle(handle);
-}
-
-static void __save_error_info(struct super_block *sb, const char *func,
- unsigned int line)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- es->s_last_error_time = cpu_to_le32(get_seconds());
- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
- es->s_last_error_line = cpu_to_le32(line);
- if (!es->s_first_error_time) {
- es->s_first_error_time = es->s_last_error_time;
- strncpy(es->s_first_error_func, func,
- sizeof(es->s_first_error_func));
- es->s_first_error_line = cpu_to_le32(line);
- es->s_first_error_ino = es->s_last_error_ino;
- es->s_first_error_block = es->s_last_error_block;
- }
- /*
- * Start the daily error reporting function if it hasn't been
- * started already
- */
- if (!es->s_error_count)
- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
- es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
-}
-
-static void save_error_info(struct super_block *sb, const char *func,
- unsigned int line)
-{
- __save_error_info(sb, func, line);
- ext4_commit_super(sb, 1);
-}
-
-/*
- * The del_gendisk() function uninitializes the disk-specific data
- * structures, including the bdi structure, without telling anyone
- * else. Once this happens, any attempt to call mark_buffer_dirty()
- * (for example, by ext4_commit_super), will cause a kernel OOPS.
- * This is a kludge to prevent these oops until we can put in a proper
- * hook in del_gendisk() to inform the VFS and file system layers.
- */
-static int block_device_ejected(struct super_block *sb)
-{
- struct inode *bd_inode = sb->s_bdev->bd_inode;
- struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
-
- return bdi->dev == NULL;
-}
-
-static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
-{
- struct super_block *sb = journal->j_private;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int error = is_journal_aborted(journal);
- struct ext4_journal_cb_entry *jce, *tmp;
-
- spin_lock(&sbi->s_md_lock);
- list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
- list_del_init(&jce->jce_list);
- spin_unlock(&sbi->s_md_lock);
- jce->jce_func(sb, jce, error);
- spin_lock(&sbi->s_md_lock);
- }
- spin_unlock(&sbi->s_md_lock);
-}
-
-/* Deal with the reporting of failure conditions on a filesystem such as
- * inconsistencies detected or read IO failures.
- *
- * On ext2, we can store the error state of the filesystem in the
- * superblock. That is not possible on ext4, because we may have other
- * write ordering constraints on the superblock which prevent us from
- * writing it out straight away; and given that the journal is about to
- * be aborted, we can't rely on the current, or future, transactions to
- * write out the superblock safely.
- *
- * We'll just use the jbd2_journal_abort() error code to record an error in
- * the journal instead. On recovery, the journal will complain about
- * that error until we've noted it down and cleared it.
- */
-
-static void ext4_handle_error(struct super_block *sb)
-{
- if (sb->s_flags & MS_RDONLY)
- return;
-
- if (!test_opt(sb, ERRORS_CONT)) {
- journal_t *journal = EXT4_SB(sb)->s_journal;
-
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- if (journal)
- jbd2_journal_abort(journal, -EIO);
- }
- if (test_opt(sb, ERRORS_RO)) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- sb->s_flags |= MS_RDONLY;
- }
- if (test_opt(sb, ERRORS_PANIC))
- panic("EXT4-fs (device %s): panic forced after error\n",
- sb->s_id);
-}
-
-void __ext4_error(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
- sb->s_id, function, line, current->comm, &vaf);
- va_end(args);
- save_error_info(sb, function, line);
-
- ext4_handle_error(sb);
-}
-
-void ext4_error_inode(struct inode *inode, const char *function,
- unsigned int line, ext4_fsblk_t block,
- const char *fmt, ...)
-{
- va_list args;
- struct va_format vaf;
- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
-
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
- es->s_last_error_block = cpu_to_le64(block);
- save_error_info(inode->i_sb, function, line);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: block %llu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, &vaf);
- else
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, &vaf);
- va_end(args);
-
- ext4_handle_error(inode->i_sb);
-}
-
-void ext4_error_file(struct file *file, const char *function,
- unsigned int line, ext4_fsblk_t block,
- const char *fmt, ...)
-{
- va_list args;
- struct va_format vaf;
- struct ext4_super_block *es;
- struct inode *inode = file->f_dentry->d_inode;
- char pathname[80], *path;
-
- es = EXT4_SB(inode->i_sb)->s_es;
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
- save_error_info(inode->i_sb, function, line);
- path = d_path(&(file->f_path), pathname, sizeof(pathname));
- if (IS_ERR(path))
- path = "(unknown)";
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "block %llu: comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, path, &vaf);
- else
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, path, &vaf);
- va_end(args);
-
- ext4_handle_error(inode->i_sb);
-}
-
-static const char *ext4_decode_error(struct super_block *sb, int errno,
- char nbuf[16])
-{
- char *errstr = NULL;
-
- switch (errno) {
- case -EIO:
- errstr = "IO failure";
- break;
- case -ENOMEM:
- errstr = "Out of memory";
- break;
- case -EROFS:
- if (!sb || (EXT4_SB(sb)->s_journal &&
- EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
- errstr = "Journal has aborted";
- else
- errstr = "Readonly filesystem";
- break;
- default:
- /* If the caller passed in an extra buffer for unknown
- * errors, textualise them now. Else we just return
- * NULL. */
- if (nbuf) {
- /* Check for truncated error codes... */
- if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
- errstr = nbuf;
- }
- break;
- }
-
- return errstr;
-}
-
-/* __ext4_std_error decodes expected errors from journaling functions
- * automatically and invokes the appropriate error response. */
-
-void __ext4_std_error(struct super_block *sb, const char *function,
- unsigned int line, int errno)
-{
- char nbuf[16];
- const char *errstr;
-
- /* Special case: if the error is EROFS, and we're not already
- * inside a transaction, then there's really no point in logging
- * an error. */
- if (errno == -EROFS && journal_current_handle() == NULL &&
- (sb->s_flags & MS_RDONLY))
- return;
-
- errstr = ext4_decode_error(sb, errno, nbuf);
- printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
- sb->s_id, function, line, errstr);
- save_error_info(sb, function, line);
-
- ext4_handle_error(sb);
-}
-
-/*
- * ext4_abort is a much stronger failure handler than ext4_error. The
- * abort function may be used to deal with unrecoverable failures such
- * as journal IO errors or ENOMEM at a critical moment in log management.
- *
- * We unconditionally force the filesystem into an ABORT|READONLY state,
- * unless the error response on the fs has been set to panic in which
- * case we take the easy way out and panic immediately.
- */
-
-void __ext4_abort(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
-{
- va_list args;
-
- save_error_info(sb, function, line);
- va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
- function, line);
- vprintk(fmt, args);
- printk("\n");
- va_end(args);
-
- if ((sb->s_flags & MS_RDONLY) == 0) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- sb->s_flags |= MS_RDONLY;
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
- save_error_info(sb, function, line);
- }
- if (test_opt(sb, ERRORS_PANIC))
- panic("EXT4-fs panic from previous error\n");
-}
-
-void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
- va_end(args);
-}
-
-void __ext4_warning(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
- sb->s_id, function, line, &vaf);
- va_end(args);
-}
-
-void __ext4_grp_locked_error(const char *function, unsigned int line,
- struct super_block *sb, ext4_group_t grp,
- unsigned long ino, ext4_fsblk_t block,
- const char *fmt, ...)
-__releases(bitlock)
-__acquires(bitlock)
-{
- struct va_format vaf;
- va_list args;
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
- es->s_last_error_ino = cpu_to_le32(ino);
- es->s_last_error_block = cpu_to_le64(block);
- __save_error_info(sb, function, line);
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
- sb->s_id, function, line, grp);
- if (ino)
- printk(KERN_CONT "inode %lu: ", ino);
- if (block)
- printk(KERN_CONT "block %llu:", (unsigned long long) block);
- printk(KERN_CONT "%pV\n", &vaf);
- va_end(args);
-
- if (test_opt(sb, ERRORS_CONT)) {
- ext4_commit_super(sb, 0);
- return;
- }
-
- ext4_unlock_group(sb, grp);
- ext4_handle_error(sb);
- /*
- * We only get here in the ERRORS_RO case; relocking the group
- * may be dangerous, but nothing bad will happen since the
- * filesystem will have already been marked read/only and the
- * journal has been aborted. We return 1 as a hint to callers
- * who might what to use the return value from
- * ext4_grp_locked_error() to distinguish between the
- * ERRORS_CONT and ERRORS_RO case, and perhaps return more
- * aggressively from the ext4 function in question, with a
- * more appropriate error code.
- */
- ext4_lock_group(sb, grp);
- return;
-}
-
-void ext4_update_dynamic_rev(struct super_block *sb)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
- if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
- return;
-
- ext4_warning(sb,
- "updating to rev %d because of new feature flag, "
- "running e2fsck is recommended",
- EXT4_DYNAMIC_REV);
-
- es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
- es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
- es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
- /* leave es->s_feature_*compat flags alone */
- /* es->s_uuid will be set by e2fsck if empty */
-
- /*
- * The rest of the superblock fields should be zero, and if not it
- * means they are likely already in use, so leave them alone. We
- * can leave it up to e2fsck to clean up any inconsistencies there.
- */
-}
-
-/*
- * Open the external journal device
- */
-static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
-{
- struct block_device *bdev;
- char b[BDEVNAME_SIZE];
-
- bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
- if (IS_ERR(bdev))
- goto fail;
- return bdev;
-
-fail:
- ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
- __bdevname(dev, b), PTR_ERR(bdev));
- return NULL;
-}
-
-/*
- * Release the journal device
- */
-static int ext4_blkdev_put(struct block_device *bdev)
-{
- return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
-{
- struct block_device *bdev;
- int ret = -ENODEV;
-
- bdev = sbi->journal_bdev;
- if (bdev) {
- ret = ext4_blkdev_put(bdev);
- sbi->journal_bdev = NULL;
- }
- return ret;
-}
-
-static inline struct inode *orphan_list_entry(struct list_head *l)
-{
- return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
-}
-
-static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
-{
- struct list_head *l;
-
- ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
- le32_to_cpu(sbi->s_es->s_last_orphan));
-
- printk(KERN_ERR "sb_info orphan list:\n");
- list_for_each(l, &sbi->s_orphan) {
- struct inode *inode = orphan_list_entry(l);
- printk(KERN_ERR " "
- "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
- inode->i_sb->s_id, inode->i_ino, inode,
- inode->i_mode, inode->i_nlink,
- NEXT_ORPHAN(inode));
- }
-}
-
-static void ext4_put_super(struct super_block *sb)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- int i, err;
-
- ext4_unregister_li_request(sb);
- dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
-
- flush_workqueue(sbi->dio_unwritten_wq);
- destroy_workqueue(sbi->dio_unwritten_wq);
-
- lock_super(sb);
- if (sbi->s_journal) {
- err = jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
- if (err < 0)
- ext4_abort(sb, "Couldn't clean up the journal");
- }
-
- del_timer(&sbi->s_err_report);
- ext4_release_system_zone(sb);
- ext4_mb_release(sb);
- ext4_ext_release(sb);
- ext4_xattr_put_super(sb);
-
- if (!(sb->s_flags & MS_RDONLY)) {
- EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
- es->s_state = cpu_to_le16(sbi->s_mount_state);
- }
- if (sb->s_dirt || !(sb->s_flags & MS_RDONLY))
- ext4_commit_super(sb, 1);
-
- if (sbi->s_proc) {
- remove_proc_entry("options", sbi->s_proc);
- remove_proc_entry(sb->s_id, ext4_proc_root);
- }
- kobject_del(&sbi->s_kobj);
-
- for (i = 0; i < sbi->s_gdb_count; i++)
- brelse(sbi->s_group_desc[i]);
- ext4_kvfree(sbi->s_group_desc);
- ext4_kvfree(sbi->s_flex_groups);
- percpu_counter_destroy(&sbi->s_freeclusters_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
- brelse(sbi->s_sbh);
-#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++)
- kfree(sbi->s_qf_names[i]);
-#endif
-
- /* Debugging code just in case the in-memory inode orphan list
- * isn't empty. The on-disk one can be non-empty if we've
- * detected an error and taken the fs readonly, but the
- * in-memory list had better be clean by this point. */
- if (!list_empty(&sbi->s_orphan))
- dump_orphan_list(sb, sbi);
- J_ASSERT(list_empty(&sbi->s_orphan));
-
- invalidate_bdev(sb->s_bdev);
- if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
- /*
- * Invalidate the journal device's buffers. We don't want them
- * floating about in memory - the physical journal device may
- * hotswapped, and it breaks the `ro-after' testing code.
- */
- sync_blockdev(sbi->journal_bdev);
- invalidate_bdev(sbi->journal_bdev);
- ext4_blkdev_remove(sbi);
- }
- if (sbi->s_mmp_tsk)
- kthread_stop(sbi->s_mmp_tsk);
- sb->s_fs_info = NULL;
- /*
- * Now that we are completely done shutting down the
- * superblock, we need to actually destroy the kobject.
- */
- unlock_super(sb);
- kobject_put(&sbi->s_kobj);
- wait_for_completion(&sbi->s_kobj_unregister);
- kfree(sbi->s_blockgroup_lock);
- kfree(sbi);
-}
-
-static struct kmem_cache *ext4_inode_cachep;
-
-/*
- * Called inside transaction, so use GFP_NOFS
- */
-static struct inode *ext4_alloc_inode(struct super_block *sb)
-{
- struct ext4_inode_info *ei;
-
- ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
- if (!ei)
- return NULL;
-
- ei->vfs_inode.i_version = 1;
- ei->vfs_inode.i_data.writeback_index = 0;
- memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
- INIT_LIST_HEAD(&ei->i_prealloc_list);
- spin_lock_init(&ei->i_prealloc_lock);
- ei->i_reserved_data_blocks = 0;
- ei->i_reserved_meta_blocks = 0;
- ei->i_allocated_meta_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- spin_lock_init(&(ei->i_block_reservation_lock));
-#ifdef CONFIG_QUOTA
- ei->i_reserved_quota = 0;
-#endif
- ei->jinode = NULL;
- INIT_LIST_HEAD(&ei->i_completed_io_list);
- spin_lock_init(&ei->i_completed_io_lock);
- ei->cur_aio_dio = NULL;
- ei->i_sync_tid = 0;
- ei->i_datasync_tid = 0;
- atomic_set(&ei->i_ioend_count, 0);
- atomic_set(&ei->i_aiodio_unwritten, 0);
-
- return &ei->vfs_inode;
-}
-
-static int ext4_drop_inode(struct inode *inode)
-{
- int drop = generic_drop_inode(inode);
-
- trace_ext4_drop_inode(inode, drop);
- return drop;
-}
-
-static void ext4_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
-}
-
-static void ext4_destroy_inode(struct inode *inode)
-{
- if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
- ext4_msg(inode->i_sb, KERN_ERR,
- "Inode %lu (%p): orphan list check failed!",
- inode->i_ino, EXT4_I(inode));
- print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
- EXT4_I(inode), sizeof(struct ext4_inode_info),
- true);
- dump_stack();
- }
- call_rcu(&inode->i_rcu, ext4_i_callback);
-}
-
-static void init_once(void *foo)
-{
- struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
-
- INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT4_FS_XATTR
- init_rwsem(&ei->xattr_sem);
-#endif
- init_rwsem(&ei->i_data_sem);
- inode_init_once(&ei->vfs_inode);
-}
-
-static int init_inodecache(void)
-{
- ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
- sizeof(struct ext4_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
- init_once);
- if (ext4_inode_cachep == NULL)
- return -ENOMEM;
- return 0;
-}
-
-static void destroy_inodecache(void)
-{
- kmem_cache_destroy(ext4_inode_cachep);
-}
-
-void ext4_clear_inode(struct inode *inode)
-{
- invalidate_inode_buffers(inode);
- end_writeback(inode);
- dquot_drop(inode);
- ext4_discard_preallocations(inode);
- if (EXT4_I(inode)->jinode) {
- jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
- EXT4_I(inode)->jinode);
- jbd2_free_inode(EXT4_I(inode)->jinode);
- EXT4_I(inode)->jinode = NULL;
- }
-}
-
-static struct inode *ext4_nfs_get_inode(struct super_block *sb,
- u64 ino, u32 generation)
-{
- struct inode *inode;
-
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-ESTALE);
- if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
- return ERR_PTR(-ESTALE);
-
- /* iget isn't really right if the inode is currently unallocated!!
- *
- * ext4_read_inode will return a bad_inode if the inode had been
- * deleted, so we should be safe.
- *
- * Currently we don't know the generation for parent directory, so
- * a generation of 0 means "accept any"
- */
- inode = ext4_iget(sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- if (generation && inode->i_generation != generation) {
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
-
- return inode;
-}
-
-static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
- ext4_nfs_get_inode);
-}
-
-static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- return generic_fh_to_parent(sb, fid, fh_len, fh_type,
- ext4_nfs_get_inode);
-}
-
-/*
- * Try to release metadata pages (indirect blocks, directories) which are
- * mapped via the block device. Since these pages could have journal heads
- * which would prevent try_to_free_buffers() from freeing them, we must use
- * jbd2 layer's try_to_free_buffers() function to release them.
- */
-static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
- gfp_t wait)
-{
- journal_t *journal = EXT4_SB(sb)->s_journal;
-
- WARN_ON(PageChecked(page));
- if (!page_has_buffers(page))
- return 0;
- if (journal)
- return jbd2_journal_try_to_free_buffers(journal, page,
- wait & ~__GFP_WAIT);
- return try_to_free_buffers(page);
-}
-
-#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
-
-static int ext4_write_dquot(struct dquot *dquot);
-static int ext4_acquire_dquot(struct dquot *dquot);
-static int ext4_release_dquot(struct dquot *dquot);
-static int ext4_mark_dquot_dirty(struct dquot *dquot);
-static int ext4_write_info(struct super_block *sb, int type);
-static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path);
-static int ext4_quota_off(struct super_block *sb, int type);
-static int ext4_quota_on_mount(struct super_block *sb, int type);
-static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
- size_t len, loff_t off);
-static ssize_t ext4_quota_write(struct super_block *sb, int type,
- const char *data, size_t len, loff_t off);
-
-static const struct dquot_operations ext4_quota_operations = {
- .get_reserved_space = ext4_get_reserved_space,
- .write_dquot = ext4_write_dquot,
- .acquire_dquot = ext4_acquire_dquot,
- .release_dquot = ext4_release_dquot,
- .mark_dirty = ext4_mark_dquot_dirty,
- .write_info = ext4_write_info,
- .alloc_dquot = dquot_alloc,
- .destroy_dquot = dquot_destroy,
-};
-
-static const struct quotactl_ops ext4_qctl_operations = {
- .quota_on = ext4_quota_on,
- .quota_off = ext4_quota_off,
- .quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
- .set_info = dquot_set_dqinfo,
- .get_dqblk = dquot_get_dqblk,
- .set_dqblk = dquot_set_dqblk
-};
-#endif
-
-static const struct super_operations ext4_sops = {
- .alloc_inode = ext4_alloc_inode,
- .destroy_inode = ext4_destroy_inode,
- .write_inode = ext4_write_inode,
- .dirty_inode = ext4_dirty_inode,
- .drop_inode = ext4_drop_inode,
- .evict_inode = ext4_evict_inode,
- .put_super = ext4_put_super,
- .sync_fs = ext4_sync_fs,
- .freeze_fs = ext4_freeze,
- .unfreeze_fs = ext4_unfreeze,
- .statfs = ext4_statfs,
- .remount_fs = ext4_remount,
- .show_options = ext4_show_options,
-#ifdef CONFIG_QUOTA
- .quota_read = ext4_quota_read,
- .quota_write = ext4_quota_write,
-#endif
- .bdev_try_to_free_page = bdev_try_to_free_page,
-};
-
-static const struct super_operations ext4_nojournal_sops = {
- .alloc_inode = ext4_alloc_inode,
- .destroy_inode = ext4_destroy_inode,
- .write_inode = ext4_write_inode,
- .dirty_inode = ext4_dirty_inode,
- .drop_inode = ext4_drop_inode,
- .evict_inode = ext4_evict_inode,
- .write_super = ext4_write_super,
- .put_super = ext4_put_super,
- .statfs = ext4_statfs,
- .remount_fs = ext4_remount,
- .show_options = ext4_show_options,
-#ifdef CONFIG_QUOTA
- .quota_read = ext4_quota_read,
- .quota_write = ext4_quota_write,
-#endif
- .bdev_try_to_free_page = bdev_try_to_free_page,
-};
-
-static const struct export_operations ext4_export_ops = {
- .fh_to_dentry = ext4_fh_to_dentry,
- .fh_to_parent = ext4_fh_to_parent,
- .get_parent = ext4_get_parent,
-};
-
-enum {
- Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
- Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
- Opt_nouid32, Opt_debug, Opt_removed,
- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
- Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
- Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
- Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
- Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
- Opt_data_err_abort, Opt_data_err_ignore,
- Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
- Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
- Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_i_version,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
- Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
- Opt_inode_readahead_blks, Opt_journal_ioprio,
- Opt_dioread_nolock, Opt_dioread_lock,
- Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
-};
-
-static const match_table_t tokens = {
- {Opt_bsd_df, "bsddf"},
- {Opt_minix_df, "minixdf"},
- {Opt_grpid, "grpid"},
- {Opt_grpid, "bsdgroups"},
- {Opt_nogrpid, "nogrpid"},
- {Opt_nogrpid, "sysvgroups"},
- {Opt_resgid, "resgid=%u"},
- {Opt_resuid, "resuid=%u"},
- {Opt_sb, "sb=%u"},
- {Opt_err_cont, "errors=continue"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_nouid32, "nouid32"},
- {Opt_debug, "debug"},
- {Opt_removed, "oldalloc"},
- {Opt_removed, "orlov"},
- {Opt_user_xattr, "user_xattr"},
- {Opt_nouser_xattr, "nouser_xattr"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_noload, "norecovery"},
- {Opt_noload, "noload"},
- {Opt_removed, "nobh"},
- {Opt_removed, "bh"},
- {Opt_commit, "commit=%u"},
- {Opt_min_batch_time, "min_batch_time=%u"},
- {Opt_max_batch_time, "max_batch_time=%u"},
- {Opt_journal_dev, "journal_dev=%u"},
- {Opt_journal_checksum, "journal_checksum"},
- {Opt_journal_async_commit, "journal_async_commit"},
- {Opt_abort, "abort"},
- {Opt_data_journal, "data=journal"},
- {Opt_data_ordered, "data=ordered"},
- {Opt_data_writeback, "data=writeback"},
- {Opt_data_err_abort, "data_err=abort"},
- {Opt_data_err_ignore, "data_err=ignore"},
- {Opt_offusrjquota, "usrjquota="},
- {Opt_usrjquota, "usrjquota=%s"},
- {Opt_offgrpjquota, "grpjquota="},
- {Opt_grpjquota, "grpjquota=%s"},
- {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
- {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
- {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
- {Opt_grpquota, "grpquota"},
- {Opt_noquota, "noquota"},
- {Opt_quota, "quota"},
- {Opt_usrquota, "usrquota"},
- {Opt_barrier, "barrier=%u"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_i_version, "i_version"},
- {Opt_stripe, "stripe=%u"},
- {Opt_delalloc, "delalloc"},
- {Opt_nodelalloc, "nodelalloc"},
- {Opt_mblk_io_submit, "mblk_io_submit"},
- {Opt_nomblk_io_submit, "nomblk_io_submit"},
- {Opt_block_validity, "block_validity"},
- {Opt_noblock_validity, "noblock_validity"},
- {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
- {Opt_journal_ioprio, "journal_ioprio=%u"},
- {Opt_auto_da_alloc, "auto_da_alloc=%u"},
- {Opt_auto_da_alloc, "auto_da_alloc"},
- {Opt_noauto_da_alloc, "noauto_da_alloc"},
- {Opt_dioread_nolock, "dioread_nolock"},
- {Opt_dioread_lock, "dioread_lock"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_init_itable, "init_itable=%u"},
- {Opt_init_itable, "init_itable"},
- {Opt_noinit_itable, "noinit_itable"},
- {Opt_removed, "check=none"}, /* mount option from ext2/3 */
- {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
- {Opt_removed, "reservation"}, /* mount option from ext2/3 */
- {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
- {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
- {Opt_err, NULL},
-};
-
-static ext4_fsblk_t get_sb_block(void **data)
-{
- ext4_fsblk_t sb_block;
- char *options = (char *) *data;
-
- if (!options || strncmp(options, "sb=", 3) != 0)
- return 1; /* Default location */
-
- options += 3;
- /* TODO: use simple_strtoll with >32bit ext4 */
- sb_block = simple_strtoul(options, &options, 0);
- if (*options && *options != ',') {
- printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
- (char *) *data);
- return 1;
- }
- if (*options == ',')
- options++;
- *data = (void *) options;
-
- return sb_block;
-}
-
-#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
-static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
- "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
-
-#ifdef CONFIG_QUOTA
-static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *qname;
-
- if (sb_any_quota_loaded(sb) &&
- !sbi->s_qf_names[qtype]) {
- ext4_msg(sb, KERN_ERR,
- "Cannot change journaled "
- "quota options when quota turned on");
- return -1;
- }
- qname = match_strdup(args);
- if (!qname) {
- ext4_msg(sb, KERN_ERR,
- "Not enough memory for storing quotafile name");
- return -1;
- }
- if (sbi->s_qf_names[qtype] &&
- strcmp(sbi->s_qf_names[qtype], qname)) {
- ext4_msg(sb, KERN_ERR,
- "%s quota file already specified", QTYPE2NAME(qtype));
- kfree(qname);
- return -1;
- }
- sbi->s_qf_names[qtype] = qname;
- if (strchr(sbi->s_qf_names[qtype], '/')) {
- ext4_msg(sb, KERN_ERR,
- "quotafile must be on filesystem root");
- kfree(sbi->s_qf_names[qtype]);
- sbi->s_qf_names[qtype] = NULL;
- return -1;
- }
- set_opt(sb, QUOTA);
- return 1;
-}
-
-static int clear_qf_name(struct super_block *sb, int qtype)
-{
-
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- if (sb_any_quota_loaded(sb) &&
- sbi->s_qf_names[qtype]) {
- ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
- " when quota turned on");
- return -1;
- }
- /*
- * The space will be released later when all options are confirmed
- * to be correct
- */
- sbi->s_qf_names[qtype] = NULL;
- return 1;
-}
-#endif
-
-#define MOPT_SET 0x0001
-#define MOPT_CLEAR 0x0002
-#define MOPT_NOSUPPORT 0x0004
-#define MOPT_EXPLICIT 0x0008
-#define MOPT_CLEAR_ERR 0x0010
-#define MOPT_GTE0 0x0020
-#ifdef CONFIG_QUOTA
-#define MOPT_Q 0
-#define MOPT_QFMT 0x0040
-#else
-#define MOPT_Q MOPT_NOSUPPORT
-#define MOPT_QFMT MOPT_NOSUPPORT
-#endif
-#define MOPT_DATAJ 0x0080
-
-static const struct mount_opts {
- int token;
- int mount_opt;
- int flags;
-} ext4_mount_opts[] = {
- {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
- {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
- {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
- {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
- {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
- {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
- {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
- {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
- {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
- {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR},
- {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
- {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
- {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT},
- {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT},
- {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET},
- {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
- EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET},
- {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET},
- {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET},
- {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR},
- {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
- {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
- {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
- {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
- {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
- {Opt_commit, 0, MOPT_GTE0},
- {Opt_max_batch_time, 0, MOPT_GTE0},
- {Opt_min_batch_time, 0, MOPT_GTE0},
- {Opt_inode_readahead_blks, 0, MOPT_GTE0},
- {Opt_init_itable, 0, MOPT_GTE0},
- {Opt_stripe, 0, MOPT_GTE0},
- {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
- {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
- {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
-#ifdef CONFIG_EXT4_FS_XATTR
- {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
- {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
-#else
- {Opt_user_xattr, 0, MOPT_NOSUPPORT},
- {Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
-#endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
- {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
-#else
- {Opt_acl, 0, MOPT_NOSUPPORT},
- {Opt_noacl, 0, MOPT_NOSUPPORT},
-#endif
- {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
- {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
- {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
- {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
- MOPT_SET | MOPT_Q},
- {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
- MOPT_SET | MOPT_Q},
- {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
- EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
- {Opt_offusrjquota, 0, MOPT_Q},
- {Opt_offgrpjquota, 0, MOPT_Q},
- {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
- {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
- {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
- {Opt_err, 0, 0}
-};
-
-static int handle_mount_opt(struct super_block *sb, char *opt, int token,
- substring_t *args, unsigned long *journal_devnum,
- unsigned int *journal_ioprio, int is_remount)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct mount_opts *m;
- int arg = 0;
-
-#ifdef CONFIG_QUOTA
- if (token == Opt_usrjquota)
- return set_qf_name(sb, USRQUOTA, &args[0]);
- else if (token == Opt_grpjquota)
- return set_qf_name(sb, GRPQUOTA, &args[0]);
- else if (token == Opt_offusrjquota)
- return clear_qf_name(sb, USRQUOTA);
- else if (token == Opt_offgrpjquota)
- return clear_qf_name(sb, GRPQUOTA);
-#endif
- if (args->from && match_int(args, &arg))
- return -1;
- switch (token) {
- case Opt_noacl:
- case Opt_nouser_xattr:
- ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
- break;
- case Opt_sb:
- return 1; /* handled by get_sb_block() */
- case Opt_removed:
- ext4_msg(sb, KERN_WARNING,
- "Ignoring removed %s option", opt);
- return 1;
- case Opt_resuid:
- sbi->s_resuid = arg;
- return 1;
- case Opt_resgid:
- sbi->s_resgid = arg;
- return 1;
- case Opt_abort:
- sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
- return 1;
- case Opt_i_version:
- sb->s_flags |= MS_I_VERSION;
- return 1;
- case Opt_journal_dev:
- if (is_remount) {
- ext4_msg(sb, KERN_ERR,
- "Cannot specify journal on remount");
- return -1;
- }
- *journal_devnum = arg;
- return 1;
- case Opt_journal_ioprio:
- if (arg < 0 || arg > 7)
- return -1;
- *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
- return 1;
- }
-
- for (m = ext4_mount_opts; m->token != Opt_err; m++) {
- if (token != m->token)
- continue;
- if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
- return -1;
- if (m->flags & MOPT_EXPLICIT)
- set_opt2(sb, EXPLICIT_DELALLOC);
- if (m->flags & MOPT_CLEAR_ERR)
- clear_opt(sb, ERRORS_MASK);
- if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
- ext4_msg(sb, KERN_ERR, "Cannot change quota "
- "options when quota turned on");
- return -1;
- }
-
- if (m->flags & MOPT_NOSUPPORT) {
- ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
- } else if (token == Opt_commit) {
- if (arg == 0)
- arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
- sbi->s_commit_interval = HZ * arg;
- } else if (token == Opt_max_batch_time) {
- if (arg == 0)
- arg = EXT4_DEF_MAX_BATCH_TIME;
- sbi->s_max_batch_time = arg;
- } else if (token == Opt_min_batch_time) {
- sbi->s_min_batch_time = arg;
- } else if (token == Opt_inode_readahead_blks) {
- if (arg > (1 << 30))
- return -1;
- if (arg && !is_power_of_2(arg)) {
- ext4_msg(sb, KERN_ERR,
- "EXT4-fs: inode_readahead_blks"
- " must be a power of 2");
- return -1;
- }
- sbi->s_inode_readahead_blks = arg;
- } else if (token == Opt_init_itable) {
- set_opt(sb, INIT_INODE_TABLE);
- if (!args->from)
- arg = EXT4_DEF_LI_WAIT_MULT;
- sbi->s_li_wait_mult = arg;
- } else if (token == Opt_stripe) {
- sbi->s_stripe = arg;
- } else if (m->flags & MOPT_DATAJ) {
- if (is_remount) {
- if (!sbi->s_journal)
- ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
- else if (test_opt(sb, DATA_FLAGS) !=
- m->mount_opt) {
- ext4_msg(sb, KERN_ERR,
- "Cannot change data mode on remount");
- return -1;
- }
- } else {
- clear_opt(sb, DATA_FLAGS);
- sbi->s_mount_opt |= m->mount_opt;
- }
-#ifdef CONFIG_QUOTA
- } else if (m->flags & MOPT_QFMT) {
- if (sb_any_quota_loaded(sb) &&
- sbi->s_jquota_fmt != m->mount_opt) {
- ext4_msg(sb, KERN_ERR, "Cannot "
- "change journaled quota options "
- "when quota turned on");
- return -1;
- }
- sbi->s_jquota_fmt = m->mount_opt;
-#endif
- } else {
- if (!args->from)
- arg = 1;
- if (m->flags & MOPT_CLEAR)
- arg = !arg;
- else if (unlikely(!(m->flags & MOPT_SET))) {
- ext4_msg(sb, KERN_WARNING,
- "buggy handling of option %s", opt);
- WARN_ON(1);
- return -1;
- }
- if (arg != 0)
- sbi->s_mount_opt |= m->mount_opt;
- else
- sbi->s_mount_opt &= ~m->mount_opt;
- }
- return 1;
- }
- ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
- "or missing value", opt);
- return -1;
-}
-
-static int parse_options(char *options, struct super_block *sb,
- unsigned long *journal_devnum,
- unsigned int *journal_ioprio,
- int is_remount)
-{
-#ifdef CONFIG_QUOTA
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-#endif
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int token;
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- if (!*p)
- continue;
- /*
- * Initialize args struct so we know whether arg was
- * found; some options take optional arguments.
- */
- args[0].to = args[0].from = 0;
- token = match_token(p, tokens, args);
- if (handle_mount_opt(sb, p, token, args, journal_devnum,
- journal_ioprio, is_remount) < 0)
- return 0;
- }
-#ifdef CONFIG_QUOTA
- if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
- if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
- clear_opt(sb, USRQUOTA);
-
- if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
- clear_opt(sb, GRPQUOTA);
-
- if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
- ext4_msg(sb, KERN_ERR, "old and new quota "
- "format mixing");
- return 0;
- }
-
- if (!sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "not specified");
- return 0;
- }
- } else {
- if (sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "specified with no journaling "
- "enabled");
- return 0;
- }
- }
-#endif
- return 1;
-}
-
-static inline void ext4_show_quota_options(struct seq_file *seq,
- struct super_block *sb)
-{
-#if defined(CONFIG_QUOTA)
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- if (sbi->s_jquota_fmt) {
- char *fmtname = "";
-
- switch (sbi->s_jquota_fmt) {
- case QFMT_VFS_OLD:
- fmtname = "vfsold";
- break;
- case QFMT_VFS_V0:
- fmtname = "vfsv0";
- break;
- case QFMT_VFS_V1:
- fmtname = "vfsv1";
- break;
- }
- seq_printf(seq, ",jqfmt=%s", fmtname);
- }
-
- if (sbi->s_qf_names[USRQUOTA])
- seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
-
- if (sbi->s_qf_names[GRPQUOTA])
- seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
-
- if (test_opt(sb, USRQUOTA))
- seq_puts(seq, ",usrquota");
-
- if (test_opt(sb, GRPQUOTA))
- seq_puts(seq, ",grpquota");
-#endif
-}
-
-static const char *token2str(int token)
-{
- static const struct match_token *t;
-
- for (t = tokens; t->token != Opt_err; t++)
- if (t->token == token && !strchr(t->pattern, '='))
- break;
- return t->pattern;
-}
-
-/*
- * Show an option if
- * - it's set to a non-default value OR
- * - if the per-sb default is different from the global default
- */
-static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
- int nodefs)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
- const struct mount_opts *m;
- char sep = nodefs ? '\n' : ',';
-
-#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
-#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
-
- if (sbi->s_sb_block != 1)
- SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
-
- for (m = ext4_mount_opts; m->token != Opt_err; m++) {
- int want_set = m->flags & MOPT_SET;
- if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
- (m->flags & MOPT_CLEAR_ERR))
- continue;
- if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
- continue; /* skip if same as the default */
- if ((want_set &&
- (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
- (!want_set && (sbi->s_mount_opt & m->mount_opt)))
- continue; /* select Opt_noFoo vs Opt_Foo */
- SEQ_OPTS_PRINT("%s", token2str(m->token));
- }
-
- if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID ||
- le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
- SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid);
- if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID ||
- le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
- SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid);
- def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
- if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
- SEQ_OPTS_PUTS("errors=remount-ro");
- if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
- SEQ_OPTS_PUTS("errors=continue");
- if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
- SEQ_OPTS_PUTS("errors=panic");
- if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
- SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
- if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
- SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
- if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
- SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
- if (sb->s_flags & MS_I_VERSION)
- SEQ_OPTS_PUTS("i_version");
- if (nodefs || sbi->s_stripe)
- SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
- if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- SEQ_OPTS_PUTS("data=journal");
- else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
- SEQ_OPTS_PUTS("data=ordered");
- else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
- SEQ_OPTS_PUTS("data=writeback");
- }
- if (nodefs ||
- sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
- SEQ_OPTS_PRINT("inode_readahead_blks=%u",
- sbi->s_inode_readahead_blks);
-
- if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
- (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
- SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
-
- ext4_show_quota_options(seq, sb);
- return 0;
-}
-
-static int ext4_show_options(struct seq_file *seq, struct dentry *root)
-{
- return _ext4_show_options(seq, root->d_sb, 0);
-}
-
-static int options_seq_show(struct seq_file *seq, void *offset)
-{
- struct super_block *sb = seq->private;
- int rc;
-
- seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
- rc = _ext4_show_options(seq, sb, 1);
- seq_puts(seq, "\n");
- return rc;
-}
-
-static int options_open_fs(struct inode *inode, struct file *file)
-{
- return single_open(file, options_seq_show, PDE(inode)->data);
-}
-
-static const struct file_operations ext4_seq_options_fops = {
- .owner = THIS_MODULE,
- .open = options_open_fs,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
- int read_only)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int res = 0;
-
- if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
- ext4_msg(sb, KERN_ERR, "revision level too high, "
- "forcing read-only mode");
- res = MS_RDONLY;
- }
- if (read_only)
- goto done;
- if (!(sbi->s_mount_state & EXT4_VALID_FS))
- ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
- "running e2fsck is recommended");
- else if ((sbi->s_mount_state & EXT4_ERROR_FS))
- ext4_msg(sb, KERN_WARNING,
- "warning: mounting fs with errors, "
- "running e2fsck is recommended");
- else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
- le16_to_cpu(es->s_mnt_count) >=
- (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
- ext4_msg(sb, KERN_WARNING,
- "warning: maximal mount count reached, "
- "running e2fsck is recommended");
- else if (le32_to_cpu(es->s_checkinterval) &&
- (le32_to_cpu(es->s_lastcheck) +
- le32_to_cpu(es->s_checkinterval) <= get_seconds()))
- ext4_msg(sb, KERN_WARNING,
- "warning: checktime reached, "
- "running e2fsck is recommended");
- if (!sbi->s_journal)
- es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
- if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
- es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
- le16_add_cpu(&es->s_mnt_count, 1);
- es->s_mtime = cpu_to_le32(get_seconds());
- ext4_update_dynamic_rev(sb);
- if (sbi->s_journal)
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-
- ext4_commit_super(sb, 1);
-done:
- if (test_opt(sb, DEBUG))
- printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
- "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
- sb->s_blocksize,
- sbi->s_groups_count,
- EXT4_BLOCKS_PER_GROUP(sb),
- EXT4_INODES_PER_GROUP(sb),
- sbi->s_mount_opt, sbi->s_mount_opt2);
-
- cleancache_init_fs(sb);
- return res;
-}
-
-static int ext4_fill_flex_info(struct super_block *sb)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_group_desc *gdp = NULL;
- ext4_group_t flex_group_count;
- ext4_group_t flex_group;
- unsigned int groups_per_flex = 0;
- size_t size;
- int i;
-
- sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
- if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
- sbi->s_log_groups_per_flex = 0;
- return 1;
- }
- groups_per_flex = 1 << sbi->s_log_groups_per_flex;
-
- /* We allocate both existing and potentially added groups */
- flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
- ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
- EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
- size = flex_group_count * sizeof(struct flex_groups);
- sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
- if (sbi->s_flex_groups == NULL) {
- ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
- flex_group_count);
- goto failed;
- }
-
- for (i = 0; i < sbi->s_groups_count; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
-
- flex_group = ext4_flex_group(sbi, i);
- atomic_add(ext4_free_inodes_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_inodes);
- atomic_add(ext4_free_group_clusters(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_clusters);
- atomic_add(ext4_used_dirs_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].used_dirs);
- }
-
- return 1;
-failed:
- return 0;
-}
-
-__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
- struct ext4_group_desc *gdp)
-{
- __u16 crc = 0;
-
- if (sbi->s_es->s_feature_ro_compat &
- cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
- int offset = offsetof(struct ext4_group_desc, bg_checksum);
- __le32 le_group = cpu_to_le32(block_group);
-
- crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
- crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
- crc = crc16(crc, (__u8 *)gdp, offset);
- offset += sizeof(gdp->bg_checksum); /* skip checksum */
- /* for checksum of struct ext4_group_desc do the rest...*/
- if ((sbi->s_es->s_feature_incompat &
- cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
- crc = crc16(crc, (__u8 *)gdp + offset,
- le16_to_cpu(sbi->s_es->s_desc_size) -
- offset);
- }
-
- return cpu_to_le16(crc);
-}
-
-int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
- struct ext4_group_desc *gdp)
-{
- if ((sbi->s_es->s_feature_ro_compat &
- cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
- (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
- return 0;
-
- return 1;
-}
-
-/* Called at mount-time, super-block is locked */
-static int ext4_check_descriptors(struct super_block *sb,
- ext4_group_t *first_not_zeroed)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
- ext4_fsblk_t last_block;
- ext4_fsblk_t block_bitmap;
- ext4_fsblk_t inode_bitmap;
- ext4_fsblk_t inode_table;
- int flexbg_flag = 0;
- ext4_group_t i, grp = sbi->s_groups_count;
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
- flexbg_flag = 1;
-
- ext4_debug("Checking group descriptors");
-
- for (i = 0; i < sbi->s_groups_count; i++) {
- struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
-
- if (i == sbi->s_groups_count - 1 || flexbg_flag)
- last_block = ext4_blocks_count(sbi->s_es) - 1;
- else
- last_block = first_block +
- (EXT4_BLOCKS_PER_GROUP(sb) - 1);
-
- if ((grp == sbi->s_groups_count) &&
- !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
- grp = i;
-
- block_bitmap = ext4_block_bitmap(sb, gdp);
- if (block_bitmap < first_block || block_bitmap > last_block) {
- ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
- "Block bitmap for group %u not in group "
- "(block %llu)!", i, block_bitmap);
- return 0;
- }
- inode_bitmap = ext4_inode_bitmap(sb, gdp);
- if (inode_bitmap < first_block || inode_bitmap > last_block) {
- ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
- "Inode bitmap for group %u not in group "
- "(block %llu)!", i, inode_bitmap);
- return 0;
- }
- inode_table = ext4_inode_table(sb, gdp);
- if (inode_table < first_block ||
- inode_table + sbi->s_itb_per_group - 1 > last_block) {
- ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
- "Inode table for group %u not in group "
- "(block %llu)!", i, inode_table);
- return 0;
- }
- ext4_lock_group(sb, i);
- if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
- ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
- "Checksum for group %u failed (%u!=%u)",
- i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
- gdp)), le16_to_cpu(gdp->bg_checksum));
- if (!(sb->s_flags & MS_RDONLY)) {
- ext4_unlock_group(sb, i);
- return 0;
- }
- }
- ext4_unlock_group(sb, i);
- if (!flexbg_flag)
- first_block += EXT4_BLOCKS_PER_GROUP(sb);
- }
- if (NULL != first_not_zeroed)
- *first_not_zeroed = grp;
-
- ext4_free_blocks_count_set(sbi->s_es,
- EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
- sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
- return 1;
-}
-
-/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
- * the superblock) which were deleted from all directories, but held open by
- * a process at the time of a crash. We walk the list and try to delete these
- * inodes at recovery time (only with a read-write filesystem).
- *
- * In order to keep the orphan inode chain consistent during traversal (in
- * case of crash during recovery), we link each inode into the superblock
- * orphan list_head and handle it the same way as an inode deletion during
- * normal operation (which journals the operations for us).
- *
- * We only do an iget() and an iput() on each inode, which is very safe if we
- * accidentally point at an in-use or already deleted inode. The worst that
- * can happen in this case is that we get a "bit already cleared" message from
- * ext4_free_inode(). The only reason we would point at a wrong inode is if
- * e2fsck was run on this filesystem, and it must have already done the orphan
- * inode cleanup for us, so we can safely abort without any further action.
- */
-static void ext4_orphan_cleanup(struct super_block *sb,
- struct ext4_super_block *es)
-{
- unsigned int s_flags = sb->s_flags;
- int nr_orphans = 0, nr_truncates = 0;
-#ifdef CONFIG_QUOTA
- int i;
-#endif
- if (!es->s_last_orphan) {
- jbd_debug(4, "no orphan inodes to clean up\n");
- return;
- }
-
- if (bdev_read_only(sb->s_bdev)) {
- ext4_msg(sb, KERN_ERR, "write access "
- "unavailable, skipping orphan cleanup");
- return;
- }
-
- /* Check if feature set would not allow a r/w mount */
- if (!ext4_feature_set_ok(sb, 0)) {
- ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
- "unknown ROCOMPAT features");
- return;
- }
-
- if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- if (es->s_last_orphan)
- jbd_debug(1, "Errors on filesystem, "
- "clearing orphan list.\n");
- es->s_last_orphan = 0;
- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
- return;
- }
-
- if (s_flags & MS_RDONLY) {
- ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
- sb->s_flags &= ~MS_RDONLY;
- }
-#ifdef CONFIG_QUOTA
- /* Needed for iput() to work correctly and not trash data */
- sb->s_flags |= MS_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
- for (i = 0; i < MAXQUOTAS; i++) {
- if (EXT4_SB(sb)->s_qf_names[i]) {
- int ret = ext4_quota_on_mount(sb, i);
- if (ret < 0)
- ext4_msg(sb, KERN_ERR,
- "Cannot turn on journaled "
- "quota: error %d", ret);
- }
- }
-#endif
-
- while (es->s_last_orphan) {
- struct inode *inode;
-
- inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
- if (IS_ERR(inode)) {
- es->s_last_orphan = 0;
- break;
- }
-
- list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
- dquot_initialize(inode);
- if (inode->i_nlink) {
- ext4_msg(sb, KERN_DEBUG,
- "%s: truncating inode %lu to %lld bytes",
- __func__, inode->i_ino, inode->i_size);
- jbd_debug(2, "truncating inode %lu to %lld bytes\n",
- inode->i_ino, inode->i_size);
- ext4_truncate(inode);
- nr_truncates++;
- } else {
- ext4_msg(sb, KERN_DEBUG,
- "%s: deleting unreferenced inode %lu",
- __func__, inode->i_ino);
- jbd_debug(2, "deleting unreferenced inode %lu\n",
- inode->i_ino);
- nr_orphans++;
- }
- iput(inode); /* The delete magic happens here! */
- }
-
-#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
-
- if (nr_orphans)
- ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
- PLURAL(nr_orphans));
- if (nr_truncates)
- ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
- PLURAL(nr_truncates));
-#ifdef CONFIG_QUOTA
- /* Turn quotas off */
- for (i = 0; i < MAXQUOTAS; i++) {
- if (sb_dqopt(sb)->files[i])
- dquot_quota_off(sb, i);
- }
-#endif
- sb->s_flags = s_flags; /* Restore MS_RDONLY status */
-}
-
-/*
- * Maximal extent format file size.
- * Resulting logical blkno at s_maxbytes must fit in our on-disk
- * extent format containers, within a sector_t, and within i_blocks
- * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
- * so that won't be a limiting factor.
- *
- * However there is other limiting factor. We do store extents in the form
- * of starting block and length, hence the resulting length of the extent
- * covering maximum file size must fit into on-disk format containers as
- * well. Given that length is always by 1 unit bigger than max unit (because
- * we count 0 as well) we have to lower the s_maxbytes by one fs block.
- *
- * Note, this does *not* consider any metadata overhead for vfs i_blocks.
- */
-static loff_t ext4_max_size(int blkbits, int has_huge_files)
-{
- loff_t res;
- loff_t upper_limit = MAX_LFS_FILESIZE;
-
- /* small i_blocks in vfs inode? */
- if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
- /*
- * CONFIG_LBDAF is not enabled implies the inode
- * i_block represent total blocks in 512 bytes
- * 32 == size of vfs inode i_blocks * 8
- */
- upper_limit = (1LL << 32) - 1;
-
- /* total blocks in file system block size */
- upper_limit >>= (blkbits - 9);
- upper_limit <<= blkbits;
- }
-
- /*
- * 32-bit extent-start container, ee_block. We lower the maxbytes
- * by one fs block, so ee_len can cover the extent of maximum file
- * size
- */
- res = (1LL << 32) - 1;
- res <<= blkbits;
-
- /* Sanity check against vm- & vfs- imposed limits */
- if (res > upper_limit)
- res = upper_limit;
-
- return res;
-}
-
-/*
- * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
- * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
- * We need to be 1 filesystem block less than the 2^48 sector limit.
- */
-static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
-{
- loff_t res = EXT4_NDIR_BLOCKS;
- int meta_blocks;
- loff_t upper_limit;
- /* This is calculated to be the largest file size for a dense, block
- * mapped file such that the file's total number of 512-byte sectors,
- * including data and all indirect blocks, does not exceed (2^48 - 1).
- *
- * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
- * number of 512-byte sectors of the file.
- */
-
- if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
- /*
- * !has_huge_files or CONFIG_LBDAF not enabled implies that
- * the inode i_block field represents total file blocks in
- * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
- */
- upper_limit = (1LL << 32) - 1;
-
- /* total blocks in file system block size */
- upper_limit >>= (bits - 9);
-
- } else {
- /*
- * We use 48 bit ext4_inode i_blocks
- * With EXT4_HUGE_FILE_FL set the i_blocks
- * represent total number of blocks in
- * file system block size
- */
- upper_limit = (1LL << 48) - 1;
-
- }
-
- /* indirect blocks */
- meta_blocks = 1;
- /* double indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2));
- /* tripple indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
-
- upper_limit -= meta_blocks;
- upper_limit <<= bits;
-
- res += 1LL << (bits-2);
- res += 1LL << (2*(bits-2));
- res += 1LL << (3*(bits-2));
- res <<= bits;
- if (res > upper_limit)
- res = upper_limit;
-
- if (res > MAX_LFS_FILESIZE)
- res = MAX_LFS_FILESIZE;
-
- return res;
-}
-
-static ext4_fsblk_t descriptor_loc(struct super_block *sb,
- ext4_fsblk_t logical_sb_block, int nr)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_group_t bg, first_meta_bg;
- int has_super = 0;
-
- first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
-
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
- nr < first_meta_bg)
- return logical_sb_block + nr + 1;
- bg = sbi->s_desc_per_block * nr;
- if (ext4_bg_has_super(sb, bg))
- has_super = 1;
-
- return (has_super + ext4_group_first_block_no(sb, bg));
-}
-
-/**
- * ext4_get_stripe_size: Get the stripe size.
- * @sbi: In memory super block info
- *
- * If we have specified it via mount option, then
- * use the mount option value. If the value specified at mount time is
- * greater than the blocks per group use the super block value.
- * If the super block value is greater than blocks per group return 0.
- * Allocator needs it be less than blocks per group.
- *
- */
-static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
-{
- unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
- unsigned long stripe_width =
- le32_to_cpu(sbi->s_es->s_raid_stripe_width);
- int ret;
-
- if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
- ret = sbi->s_stripe;
- else if (stripe_width <= sbi->s_blocks_per_group)
- ret = stripe_width;
- else if (stride <= sbi->s_blocks_per_group)
- ret = stride;
- else
- ret = 0;
-
- /*
- * If the stripe width is 1, this makes no sense and
- * we set it to 0 to turn off stripe handling code.
- */
- if (ret <= 1)
- ret = 0;
-
- return ret;
-}
-
-/* sysfs supprt */
-
-struct ext4_attr {
- struct attribute attr;
- ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
- ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
- const char *, size_t);
- int offset;
-};
-
-static int parse_strtoul(const char *buf,
- unsigned long max, unsigned long *value)
-{
- char *endp;
-
- *value = simple_strtoul(skip_spaces(buf), &endp, 0);
- endp = skip_spaces(endp);
- if (*endp || *value > max)
- return -EINVAL;
-
- return 0;
-}
-
-static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi,
- char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (s64) EXT4_C2B(sbi,
- percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
-}
-
-static ssize_t session_write_kbytes_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi, char *buf)
-{
- struct super_block *sb = sbi->s_buddy_cache->i_sb;
-
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
- return snprintf(buf, PAGE_SIZE, "%lu\n",
- (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
- sbi->s_sectors_written_start) >> 1);
-}
-
-static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi, char *buf)
-{
- struct super_block *sb = sbi->s_buddy_cache->i_sb;
-
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)(sbi->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1)));
-}
-
-static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
- struct ext4_sb_info *sbi,
- const char *buf, size_t count)
-{
- unsigned long t;
-
- if (parse_strtoul(buf, 0x40000000, &t))
- return -EINVAL;
-
- if (t && !is_power_of_2(t))
- return -EINVAL;
-
- sbi->s_inode_readahead_blks = t;
- return count;
-}
-
-static ssize_t sbi_ui_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi, char *buf)
-{
- unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
-
- return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
-}
-
-static ssize_t sbi_ui_store(struct ext4_attr *a,
- struct ext4_sb_info *sbi,
- const char *buf, size_t count)
-{
- unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
- unsigned long t;
-
- if (parse_strtoul(buf, 0xffffffff, &t))
- return -EINVAL;
- *ui = t;
- return count;
-}
-
-#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
-static struct ext4_attr ext4_attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .show = _show, \
- .store = _store, \
- .offset = offsetof(struct ext4_sb_info, _elname), \
-}
-#define EXT4_ATTR(name, mode, show, store) \
-static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
-
-#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
-#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
-#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
-#define EXT4_RW_ATTR_SBI_UI(name, elname) \
- EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
-#define ATTR_LIST(name) &ext4_attr_##name.attr
-
-EXT4_RO_ATTR(delayed_allocation_blocks);
-EXT4_RO_ATTR(session_write_kbytes);
-EXT4_RO_ATTR(lifetime_write_kbytes);
-EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
- inode_readahead_blks_store, s_inode_readahead_blks);
-EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
-EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
-EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
-EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
-EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
-EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
-EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
-EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
-
-static struct attribute *ext4_attrs[] = {
- ATTR_LIST(delayed_allocation_blocks),
- ATTR_LIST(session_write_kbytes),
- ATTR_LIST(lifetime_write_kbytes),
- ATTR_LIST(inode_readahead_blks),
- ATTR_LIST(inode_goal),
- ATTR_LIST(mb_stats),
- ATTR_LIST(mb_max_to_scan),
- ATTR_LIST(mb_min_to_scan),
- ATTR_LIST(mb_order2_req),
- ATTR_LIST(mb_stream_req),
- ATTR_LIST(mb_group_prealloc),
- ATTR_LIST(max_writeback_mb_bump),
- NULL,
-};
-
-/* Features this copy of ext4 supports */
-EXT4_INFO_ATTR(lazy_itable_init);
-EXT4_INFO_ATTR(batched_discard);
-
-static struct attribute *ext4_feat_attrs[] = {
- ATTR_LIST(lazy_itable_init),
- ATTR_LIST(batched_discard),
- NULL,
-};
-
-static ssize_t ext4_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
- s_kobj);
- struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
-
- return a->show ? a->show(a, sbi, buf) : 0;
-}
-
-static ssize_t ext4_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
- s_kobj);
- struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
-
- return a->store ? a->store(a, sbi, buf, len) : 0;
-}
-
-static void ext4_sb_release(struct kobject *kobj)
-{
- struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
- s_kobj);
- complete(&sbi->s_kobj_unregister);
-}
-
-static const struct sysfs_ops ext4_attr_ops = {
- .show = ext4_attr_show,
- .store = ext4_attr_store,
-};
-
-static struct kobj_type ext4_ktype = {
- .default_attrs = ext4_attrs,
- .sysfs_ops = &ext4_attr_ops,
- .release = ext4_sb_release,
-};
-
-static void ext4_feat_release(struct kobject *kobj)
-{
- complete(&ext4_feat->f_kobj_unregister);
-}
-
-static struct kobj_type ext4_feat_ktype = {
- .default_attrs = ext4_feat_attrs,
- .sysfs_ops = &ext4_attr_ops,
- .release = ext4_feat_release,
-};
-
-/*
- * Check whether this filesystem can be mounted based on
- * the features present and the RDONLY/RDWR mount requested.
- * Returns 1 if this filesystem can be mounted as requested,
- * 0 if it cannot be.
- */
-static int ext4_feature_set_ok(struct super_block *sb, int readonly)
-{
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
- ext4_msg(sb, KERN_ERR,
- "Couldn't mount because of "
- "unsupported optional features (%x)",
- (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
- ~EXT4_FEATURE_INCOMPAT_SUPP));
- return 0;
- }
-
- if (readonly)
- return 1;
-
- /* Check that feature set is OK for a read-write mount */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
- ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
- "unsupported optional features (%x)",
- (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
- ~EXT4_FEATURE_RO_COMPAT_SUPP));
- return 0;
- }
- /*
- * Large file size enabled file system can only be mounted
- * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
- */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
- if (sizeof(blkcnt_t) < sizeof(u64)) {
- ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
- "cannot be mounted RDWR without "
- "CONFIG_LBDAF");
- return 0;
- }
- }
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
- !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
- ext4_msg(sb, KERN_ERR,
- "Can't support bigalloc feature without "
- "extents feature\n");
- return 0;
- }
- return 1;
-}
-
-/*
- * This function is called once a day if we have errors logged
- * on the file system
- */
-static void print_daily_error_info(unsigned long arg)
-{
- struct super_block *sb = (struct super_block *) arg;
- struct ext4_sb_info *sbi;
- struct ext4_super_block *es;
-
- sbi = EXT4_SB(sb);
- es = sbi->s_es;
-
- if (es->s_error_count)
- ext4_msg(sb, KERN_NOTICE, "error count: %u",
- le32_to_cpu(es->s_error_count));
- if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_first_error_time),
- (int) sizeof(es->s_first_error_func),
- es->s_first_error_func,
- le32_to_cpu(es->s_first_error_line));
- if (es->s_first_error_ino)
- printk(": inode %u",
- le32_to_cpu(es->s_first_error_ino));
- if (es->s_first_error_block)
- printk(": block %llu", (unsigned long long)
- le64_to_cpu(es->s_first_error_block));
- printk("\n");
- }
- if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_last_error_time),
- (int) sizeof(es->s_last_error_func),
- es->s_last_error_func,
- le32_to_cpu(es->s_last_error_line));
- if (es->s_last_error_ino)
- printk(": inode %u",
- le32_to_cpu(es->s_last_error_ino));
- if (es->s_last_error_block)
- printk(": block %llu", (unsigned long long)
- le64_to_cpu(es->s_last_error_block));
- printk("\n");
- }
- mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
-}
-
-/* Find next suitable group and run ext4_init_inode_table */
-static int ext4_run_li_request(struct ext4_li_request *elr)
-{
- struct ext4_group_desc *gdp = NULL;
- ext4_group_t group, ngroups;
- struct super_block *sb;
- unsigned long timeout = 0;
- int ret = 0;
-
- sb = elr->lr_super;
- ngroups = EXT4_SB(sb)->s_groups_count;
-
- for (group = elr->lr_next_group; group < ngroups; group++) {
- gdp = ext4_get_group_desc(sb, group, NULL);
- if (!gdp) {
- ret = 1;
- break;
- }
-
- if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
- break;
- }
-
- if (group == ngroups)
- ret = 1;
-
- if (!ret) {
- timeout = jiffies;
- ret = ext4_init_inode_table(sb, group,
- elr->lr_timeout ? 0 : 1);
- if (elr->lr_timeout == 0) {
- timeout = (jiffies - timeout) *
- elr->lr_sbi->s_li_wait_mult;
- elr->lr_timeout = timeout;
- }
- elr->lr_next_sched = jiffies + elr->lr_timeout;
- elr->lr_next_group = group + 1;
- }
-
- return ret;
-}
-
-/*
- * Remove lr_request from the list_request and free the
- * request structure. Should be called with li_list_mtx held
- */
-static void ext4_remove_li_request(struct ext4_li_request *elr)
-{
- struct ext4_sb_info *sbi;
-
- if (!elr)
- return;
-
- sbi = elr->lr_sbi;
-
- list_del(&elr->lr_request);
- sbi->s_li_request = NULL;
- kfree(elr);
-}
-
-static void ext4_unregister_li_request(struct super_block *sb)
-{
- mutex_lock(&ext4_li_mtx);
- if (!ext4_li_info) {
- mutex_unlock(&ext4_li_mtx);
- return;
- }
-
- mutex_lock(&ext4_li_info->li_list_mtx);
- ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
- mutex_unlock(&ext4_li_info->li_list_mtx);
- mutex_unlock(&ext4_li_mtx);
-}
-
-static struct task_struct *ext4_lazyinit_task;
-
-/*
- * This is the function where ext4lazyinit thread lives. It walks
- * through the request list searching for next scheduled filesystem.
- * When such a fs is found, run the lazy initialization request
- * (ext4_rn_li_request) and keep track of the time spend in this
- * function. Based on that time we compute next schedule time of
- * the request. When walking through the list is complete, compute
- * next waking time and put itself into sleep.
- */
-static int ext4_lazyinit_thread(void *arg)
-{
- struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
- struct list_head *pos, *n;
- struct ext4_li_request *elr;
- unsigned long next_wakeup, cur;
-
- BUG_ON(NULL == eli);
-
-cont_thread:
- while (true) {
- next_wakeup = MAX_JIFFY_OFFSET;
-
- mutex_lock(&eli->li_list_mtx);
- if (list_empty(&eli->li_request_list)) {
- mutex_unlock(&eli->li_list_mtx);
- goto exit_thread;
- }
-
- list_for_each_safe(pos, n, &eli->li_request_list) {
- elr = list_entry(pos, struct ext4_li_request,
- lr_request);
-
- if (time_after_eq(jiffies, elr->lr_next_sched)) {
- if (ext4_run_li_request(elr) != 0) {
- /* error, remove the lazy_init job */
- ext4_remove_li_request(elr);
- continue;
- }
- }
-
- if (time_before(elr->lr_next_sched, next_wakeup))
- next_wakeup = elr->lr_next_sched;
- }
- mutex_unlock(&eli->li_list_mtx);
-
- try_to_freeze();
-
- cur = jiffies;
- if ((time_after_eq(cur, next_wakeup)) ||
- (MAX_JIFFY_OFFSET == next_wakeup)) {
- cond_resched();
- continue;
- }
-
- schedule_timeout_interruptible(next_wakeup - cur);
-
- if (kthread_should_stop()) {
- ext4_clear_request_list();
- goto exit_thread;
- }
- }
-
-exit_thread:
- /*
- * It looks like the request list is empty, but we need
- * to check it under the li_list_mtx lock, to prevent any
- * additions into it, and of course we should lock ext4_li_mtx
- * to atomically free the list and ext4_li_info, because at
- * this point another ext4 filesystem could be registering
- * new one.
- */
- mutex_lock(&ext4_li_mtx);
- mutex_lock(&eli->li_list_mtx);
- if (!list_empty(&eli->li_request_list)) {
- mutex_unlock(&eli->li_list_mtx);
- mutex_unlock(&ext4_li_mtx);
- goto cont_thread;
- }
- mutex_unlock(&eli->li_list_mtx);
- kfree(ext4_li_info);
- ext4_li_info = NULL;
- mutex_unlock(&ext4_li_mtx);
-
- return 0;
-}
-
-static void ext4_clear_request_list(void)
-{
- struct list_head *pos, *n;
- struct ext4_li_request *elr;
-
- mutex_lock(&ext4_li_info->li_list_mtx);
- list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
- elr = list_entry(pos, struct ext4_li_request,
- lr_request);
- ext4_remove_li_request(elr);
- }
- mutex_unlock(&ext4_li_info->li_list_mtx);
-}
-
-static int ext4_run_lazyinit_thread(void)
-{
- ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
- ext4_li_info, "ext4lazyinit");
- if (IS_ERR(ext4_lazyinit_task)) {
- int err = PTR_ERR(ext4_lazyinit_task);
- ext4_clear_request_list();
- kfree(ext4_li_info);
- ext4_li_info = NULL;
- printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
- "initialization thread\n",
- err);
- return err;
- }
- ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
- return 0;
-}
-
-/*
- * Check whether it make sense to run itable init. thread or not.
- * If there is at least one uninitialized inode table, return
- * corresponding group number, else the loop goes through all
- * groups and return total number of groups.
- */
-static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
-{
- ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
- struct ext4_group_desc *gdp = NULL;
-
- for (group = 0; group < ngroups; group++) {
- gdp = ext4_get_group_desc(sb, group, NULL);
- if (!gdp)
- continue;
-
- if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
- break;
- }
-
- return group;
-}
-
-static int ext4_li_info_new(void)
-{
- struct ext4_lazy_init *eli = NULL;
-
- eli = kzalloc(sizeof(*eli), GFP_KERNEL);
- if (!eli)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&eli->li_request_list);
- mutex_init(&eli->li_list_mtx);
-
- eli->li_state |= EXT4_LAZYINIT_QUIT;
-
- ext4_li_info = eli;
-
- return 0;
-}
-
-static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
- ext4_group_t start)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_li_request *elr;
- unsigned long rnd;
-
- elr = kzalloc(sizeof(*elr), GFP_KERNEL);
- if (!elr)
- return NULL;
-
- elr->lr_super = sb;
- elr->lr_sbi = sbi;
- elr->lr_next_group = start;
-
- /*
- * Randomize first schedule time of the request to
- * spread the inode table initialization requests
- * better.
- */
- get_random_bytes(&rnd, sizeof(rnd));
- elr->lr_next_sched = jiffies + (unsigned long)rnd %
- (EXT4_DEF_LI_MAX_START_DELAY * HZ);
-
- return elr;
-}
-
-static int ext4_register_li_request(struct super_block *sb,
- ext4_group_t first_not_zeroed)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_li_request *elr;
- ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
- int ret = 0;
-
- if (sbi->s_li_request != NULL) {
- /*
- * Reset timeout so it can be computed again, because
- * s_li_wait_mult might have changed.
- */
- sbi->s_li_request->lr_timeout = 0;
- return 0;
- }
-
- if (first_not_zeroed == ngroups ||
- (sb->s_flags & MS_RDONLY) ||
- !test_opt(sb, INIT_INODE_TABLE))
- return 0;
-
- elr = ext4_li_request_new(sb, first_not_zeroed);
- if (!elr)
- return -ENOMEM;
-
- mutex_lock(&ext4_li_mtx);
-
- if (NULL == ext4_li_info) {
- ret = ext4_li_info_new();
- if (ret)
- goto out;
- }
-
- mutex_lock(&ext4_li_info->li_list_mtx);
- list_add(&elr->lr_request, &ext4_li_info->li_request_list);
- mutex_unlock(&ext4_li_info->li_list_mtx);
-
- sbi->s_li_request = elr;
- /*
- * set elr to NULL here since it has been inserted to
- * the request_list and the removal and free of it is
- * handled by ext4_clear_request_list from now on.
- */
- elr = NULL;
-
- if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
- ret = ext4_run_lazyinit_thread();
- if (ret)
- goto out;
- }
-out:
- mutex_unlock(&ext4_li_mtx);
- if (ret)
- kfree(elr);
- return ret;
-}
-
-/*
- * We do not need to lock anything since this is called on
- * module unload.
- */
-static void ext4_destroy_lazyinit_thread(void)
-{
- /*
- * If thread exited earlier
- * there's nothing to be done.
- */
- if (!ext4_li_info || !ext4_lazyinit_task)
- return;
-
- kthread_stop(ext4_lazyinit_task);
-}
-
-static int ext4_fill_super(struct super_block *sb, void *data, int silent)
-{
- char *orig_data = kstrdup(data, GFP_KERNEL);
- struct buffer_head *bh;
- struct ext4_super_block *es = NULL;
- struct ext4_sb_info *sbi;
- ext4_fsblk_t block;
- ext4_fsblk_t sb_block = get_sb_block(&data);
- ext4_fsblk_t logical_sb_block;
- unsigned long offset = 0;
- unsigned long journal_devnum = 0;
- unsigned long def_mount_opts;
- struct inode *root;
- char *cp;
- const char *descr;
- int ret = -ENOMEM;
- int blocksize, clustersize;
- unsigned int db_count;
- unsigned int i;
- int needs_recovery, has_huge_files, has_bigalloc;
- __u64 blocks_count;
- int err;
- unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
- ext4_group_t first_not_zeroed;
-
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- goto out_free_orig;
-
- sbi->s_blockgroup_lock =
- kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
- if (!sbi->s_blockgroup_lock) {
- kfree(sbi);
- goto out_free_orig;
- }
- sb->s_fs_info = sbi;
- sbi->s_mount_opt = 0;
- sbi->s_resuid = EXT4_DEF_RESUID;
- sbi->s_resgid = EXT4_DEF_RESGID;
- sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
- sbi->s_sb_block = sb_block;
- if (sb->s_bdev->bd_part)
- sbi->s_sectors_written_start =
- part_stat_read(sb->s_bdev->bd_part, sectors[1]);
-
- /* Cleanup superblock name */
- for (cp = sb->s_id; (cp = strchr(cp, '/'));)
- *cp = '!';
-
- ret = -EINVAL;
- blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
- if (!blocksize) {
- ext4_msg(sb, KERN_ERR, "unable to set blocksize");
- goto out_fail;
- }
-
- /*
- * The ext4 superblock will not be buffer aligned for other than 1kB
- * block sizes. We need to calculate the offset from buffer start.
- */
- if (blocksize != EXT4_MIN_BLOCK_SIZE) {
- logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
- offset = do_div(logical_sb_block, blocksize);
- } else {
- logical_sb_block = sb_block;
- }
-
- if (!(bh = sb_bread(sb, logical_sb_block))) {
- ext4_msg(sb, KERN_ERR, "unable to read superblock");
- goto out_fail;
- }
- /*
- * Note: s_es must be initialized as soon as possible because
- * some ext4 macro-instructions depend on its value
- */
- es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
- sbi->s_es = es;
- sb->s_magic = le16_to_cpu(es->s_magic);
- if (sb->s_magic != EXT4_SUPER_MAGIC)
- goto cantfind_ext4;
- sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
-
- /* Set defaults before we parse the mount options */
- def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
- set_opt(sb, INIT_INODE_TABLE);
- if (def_mount_opts & EXT4_DEFM_DEBUG)
- set_opt(sb, DEBUG);
- if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
- set_opt(sb, GRPID);
- if (def_mount_opts & EXT4_DEFM_UID16)
- set_opt(sb, NO_UID32);
- /* xattr user namespace & acls are now defaulted on */
-#ifdef CONFIG_EXT4_FS_XATTR
- set_opt(sb, XATTR_USER);
-#endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- set_opt(sb, POSIX_ACL);
-#endif
- set_opt(sb, MBLK_IO_SUBMIT);
- if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
- set_opt(sb, JOURNAL_DATA);
- else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
- set_opt(sb, ORDERED_DATA);
- else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
- set_opt(sb, WRITEBACK_DATA);
-
- if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
- set_opt(sb, ERRORS_PANIC);
- else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
- set_opt(sb, ERRORS_CONT);
- else
- set_opt(sb, ERRORS_RO);
- if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
- set_opt(sb, BLOCK_VALIDITY);
- if (def_mount_opts & EXT4_DEFM_DISCARD)
- set_opt(sb, DISCARD);
-
- sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
- sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
- sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
- sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
- sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
-
- if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
- set_opt(sb, BARRIER);
-
- /*
- * enable delayed allocation by default
- * Use -o nodelalloc to turn it off
- */
- if (!IS_EXT3_SB(sb) &&
- ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
- set_opt(sb, DELALLOC);
-
- /*
- * set default s_li_wait_mult for lazyinit, for the case there is
- * no mount option specified.
- */
- sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
-
- if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
- &journal_devnum, &journal_ioprio, 0)) {
- ext4_msg(sb, KERN_WARNING,
- "failed to parse options in superblock: %s",
- sbi->s_es->s_mount_opts);
- }
- sbi->s_def_mount_opt = sbi->s_mount_opt;
- if (!parse_options((char *) data, sb, &journal_devnum,
- &journal_ioprio, 0))
- goto failed_mount;
-
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
- printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
- "with data=journal disables delayed "
- "allocation and O_DIRECT support!\n");
- if (test_opt2(sb, EXPLICIT_DELALLOC)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and delalloc");
- goto failed_mount;
- }
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and delalloc");
- goto failed_mount;
- }
- if (test_opt(sb, DELALLOC))
- clear_opt(sb, DELALLOC);
- }
-
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- if (blocksize < PAGE_SIZE) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "dioread_nolock if block size != PAGE_SIZE");
- goto failed_mount;
- }
- }
-
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
-
- if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
- (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
- EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
- EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
- ext4_msg(sb, KERN_WARNING,
- "feature flags set on rev 0 fs, "
- "running e2fsck is recommended");
-
- if (IS_EXT2_SB(sb)) {
- if (ext2_feature_set_ok(sb))
- ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
- "using the ext4 subsystem");
- else {
- ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
- "to feature incompatibilities");
- goto failed_mount;
- }
- }
-
- if (IS_EXT3_SB(sb)) {
- if (ext3_feature_set_ok(sb))
- ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
- "using the ext4 subsystem");
- else {
- ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
- "to feature incompatibilities");
- goto failed_mount;
- }
- }
-
- /*
- * Check feature flags regardless of the revision level, since we
- * previously didn't change the revision level when setting the flags,
- * so there is a chance incompat flags are set on a rev 0 filesystem.
- */
- if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
- goto failed_mount;
-
- if (blocksize < EXT4_MIN_BLOCK_SIZE ||
- blocksize > EXT4_MAX_BLOCK_SIZE) {
- ext4_msg(sb, KERN_ERR,
- "Unsupported filesystem blocksize %d", blocksize);
- goto failed_mount;
- }
-
- if (sb->s_blocksize != blocksize) {
- /* Validate the filesystem blocksize */
- if (!sb_set_blocksize(sb, blocksize)) {
- ext4_msg(sb, KERN_ERR, "bad block size %d",
- blocksize);
- goto failed_mount;
- }
-
- brelse(bh);
- logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
- offset = do_div(logical_sb_block, blocksize);
- bh = sb_bread(sb, logical_sb_block);
- if (!bh) {
- ext4_msg(sb, KERN_ERR,
- "Can't read superblock on 2nd try");
- goto failed_mount;
- }
- es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
- sbi->s_es = es;
- if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
- ext4_msg(sb, KERN_ERR,
- "Magic mismatch, very weird!");
- goto failed_mount;
- }
- }
-
- has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
- sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
- has_huge_files);
- sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
-
- if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
- sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
- sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
- } else {
- sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
- sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
- if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
- (!is_power_of_2(sbi->s_inode_size)) ||
- (sbi->s_inode_size > blocksize)) {
- ext4_msg(sb, KERN_ERR,
- "unsupported inode size: %d",
- sbi->s_inode_size);
- goto failed_mount;
- }
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
- sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
- }
-
- sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
- if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
- sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
- !is_power_of_2(sbi->s_desc_size)) {
- ext4_msg(sb, KERN_ERR,
- "unsupported descriptor size %lu",
- sbi->s_desc_size);
- goto failed_mount;
- }
- } else
- sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
-
- sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
- sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
- if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
- goto cantfind_ext4;
-
- sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
- if (sbi->s_inodes_per_block == 0)
- goto cantfind_ext4;
- sbi->s_itb_per_group = sbi->s_inodes_per_group /
- sbi->s_inodes_per_block;
- sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
- sbi->s_sbh = bh;
- sbi->s_mount_state = le16_to_cpu(es->s_state);
- sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
- sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
-
- for (i = 0; i < 4; i++)
- sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
- sbi->s_def_hash_version = es->s_def_hash_version;
- i = le32_to_cpu(es->s_flags);
- if (i & EXT2_FLAGS_UNSIGNED_HASH)
- sbi->s_hash_unsigned = 3;
- else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
-#ifdef __CHAR_UNSIGNED__
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
- sbi->s_hash_unsigned = 3;
-#else
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
-#endif
- }
-
- /* Handle clustersize */
- clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
- has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC);
- if (has_bigalloc) {
- if (clustersize < blocksize) {
- ext4_msg(sb, KERN_ERR,
- "cluster size (%d) smaller than "
- "block size (%d)", clustersize, blocksize);
- goto failed_mount;
- }
- sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
- le32_to_cpu(es->s_log_block_size);
- sbi->s_clusters_per_group =
- le32_to_cpu(es->s_clusters_per_group);
- if (sbi->s_clusters_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#clusters per group too big: %lu",
- sbi->s_clusters_per_group);
- goto failed_mount;
- }
- if (sbi->s_blocks_per_group !=
- (sbi->s_clusters_per_group * (clustersize / blocksize))) {
- ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
- "clusters per group (%lu) inconsistent",
- sbi->s_blocks_per_group,
- sbi->s_clusters_per_group);
- goto failed_mount;
- }
- } else {
- if (clustersize != blocksize) {
- ext4_warning(sb, "fragment/cluster size (%d) != "
- "block size (%d)", clustersize,
- blocksize);
- clustersize = blocksize;
- }
- if (sbi->s_blocks_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#blocks per group too big: %lu",
- sbi->s_blocks_per_group);
- goto failed_mount;
- }
- sbi->s_clusters_per_group = sbi->s_blocks_per_group;
- sbi->s_cluster_bits = 0;
- }
- sbi->s_cluster_ratio = clustersize / blocksize;
-
- if (sbi->s_inodes_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#inodes per group too big: %lu",
- sbi->s_inodes_per_group);
- goto failed_mount;
- }
-
- /*
- * Test whether we have more sectors than will fit in sector_t,
- * and whether the max offset is addressable by the page cache.
- */
- err = generic_check_addressable(sb->s_blocksize_bits,
- ext4_blocks_count(es));
- if (err) {
- ext4_msg(sb, KERN_ERR, "filesystem"
- " too large to mount safely on this system");
- if (sizeof(sector_t) < 8)
- ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
- ret = err;
- goto failed_mount;
- }
-
- if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
- goto cantfind_ext4;
-
- /* check blocks count against device size */
- blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
- if (blocks_count && ext4_blocks_count(es) > blocks_count) {
- ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
- "exceeds size of device (%llu blocks)",
- ext4_blocks_count(es), blocks_count);
- goto failed_mount;
- }
-
- /*
- * It makes no sense for the first data block to be beyond the end
- * of the filesystem.
- */
- if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
- ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
- "block %u is beyond end of filesystem (%llu)",
- le32_to_cpu(es->s_first_data_block),
- ext4_blocks_count(es));
- goto failed_mount;
- }
- blocks_count = (ext4_blocks_count(es) -
- le32_to_cpu(es->s_first_data_block) +
- EXT4_BLOCKS_PER_GROUP(sb) - 1);
- do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
- if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
- ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
- "(block count %llu, first data block %u, "
- "blocks per group %lu)", sbi->s_groups_count,
- ext4_blocks_count(es),
- le32_to_cpu(es->s_first_data_block),
- EXT4_BLOCKS_PER_GROUP(sb));
- goto failed_mount;
- }
- sbi->s_groups_count = blocks_count;
- sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
- (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
- db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
- EXT4_DESC_PER_BLOCK(sb);
- sbi->s_group_desc = ext4_kvmalloc(db_count *
- sizeof(struct buffer_head *),
- GFP_KERNEL);
- if (sbi->s_group_desc == NULL) {
- ext4_msg(sb, KERN_ERR, "not enough memory");
- goto failed_mount;
- }
-
- if (ext4_proc_root)
- sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-
- if (sbi->s_proc)
- proc_create_data("options", S_IRUGO, sbi->s_proc,
- &ext4_seq_options_fops, sb);
-
- bgl_lock_init(sbi->s_blockgroup_lock);
-
- for (i = 0; i < db_count; i++) {
- block = descriptor_loc(sb, logical_sb_block, i);
- sbi->s_group_desc[i] = sb_bread(sb, block);
- if (!sbi->s_group_desc[i]) {
- ext4_msg(sb, KERN_ERR,
- "can't read group descriptor %d", i);
- db_count = i;
- goto failed_mount2;
- }
- }
- if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
- ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
- goto failed_mount2;
- }
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
- if (!ext4_fill_flex_info(sb)) {
- ext4_msg(sb, KERN_ERR,
- "unable to initialize "
- "flex_bg meta info!");
- goto failed_mount2;
- }
-
- sbi->s_gdb_count = db_count;
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
- spin_lock_init(&sbi->s_next_gen_lock);
-
- init_timer(&sbi->s_err_report);
- sbi->s_err_report.function = print_daily_error_info;
- sbi->s_err_report.data = (unsigned long) sb;
-
- err = percpu_counter_init(&sbi->s_freeclusters_counter,
- ext4_count_free_clusters(sb));
- if (!err) {
- err = percpu_counter_init(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- }
- if (!err) {
- err = percpu_counter_init(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- }
- if (!err) {
- err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
- }
- if (err) {
- ext4_msg(sb, KERN_ERR, "insufficient memory");
- goto failed_mount3;
- }
-
- sbi->s_stripe = ext4_get_stripe_size(sbi);
- sbi->s_max_writeback_mb_bump = 128;
-
- /*
- * set up enough so that it can read an inode
- */
- if (!test_opt(sb, NOLOAD) &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
- sb->s_op = &ext4_sops;
- else
- sb->s_op = &ext4_nojournal_sops;
- sb->s_export_op = &ext4_export_ops;
- sb->s_xattr = ext4_xattr_handlers;
-#ifdef CONFIG_QUOTA
- sb->s_qcop = &ext4_qctl_operations;
- sb->dq_op = &ext4_quota_operations;
-#endif
- memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
-
- INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
- mutex_init(&sbi->s_orphan_lock);
- sbi->s_resize_flags = 0;
-
- sb->s_root = NULL;
-
- needs_recovery = (es->s_last_orphan != 0 ||
- EXT4_HAS_INCOMPAT_FEATURE(sb,
- EXT4_FEATURE_INCOMPAT_RECOVER));
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
- !(sb->s_flags & MS_RDONLY))
- if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
- goto failed_mount3;
-
- /*
- * The first inode we look at is the journal inode. Don't try
- * root first: it may be modified in the journal!
- */
- if (!test_opt(sb, NOLOAD) &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
- if (ext4_load_journal(sb, es, journal_devnum))
- goto failed_mount3;
- } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
- EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
- ext4_msg(sb, KERN_ERR, "required journal recovery "
- "suppressed and not mounted read-only");
- goto failed_mount_wq;
- } else {
- clear_opt(sb, DATA_FLAGS);
- sbi->s_journal = NULL;
- needs_recovery = 0;
- goto no_journal;
- }
-
- if (ext4_blocks_count(es) > 0xffffffffULL &&
- !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
- JBD2_FEATURE_INCOMPAT_64BIT)) {
- ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
- goto failed_mount_wq;
- }
-
- if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
- jbd2_journal_set_features(sbi->s_journal,
- JBD2_FEATURE_COMPAT_CHECKSUM, 0,
- JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
- } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
- jbd2_journal_set_features(sbi->s_journal,
- JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
- jbd2_journal_clear_features(sbi->s_journal, 0, 0,
- JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
- } else {
- jbd2_journal_clear_features(sbi->s_journal,
- JBD2_FEATURE_COMPAT_CHECKSUM, 0,
- JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
- }
-
- /* We have now updated the journal if required, so we can
- * validate the data journaling mode. */
- switch (test_opt(sb, DATA_FLAGS)) {
- case 0:
- /* No mode set, assume a default based on the journal
- * capabilities: ORDERED_DATA if the journal can
- * cope, else JOURNAL_DATA
- */
- if (jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
- set_opt(sb, ORDERED_DATA);
- else
- set_opt(sb, JOURNAL_DATA);
- break;
-
- case EXT4_MOUNT_ORDERED_DATA:
- case EXT4_MOUNT_WRITEBACK_DATA:
- if (!jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
- ext4_msg(sb, KERN_ERR, "Journal does not support "
- "requested data journaling mode");
- goto failed_mount_wq;
- }
- default:
- break;
- }
- set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
-
- sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
-
- /*
- * The journal may have updated the bg summary counts, so we
- * need to update the global counters.
- */
- percpu_counter_set(&sbi->s_freeclusters_counter,
- ext4_count_free_clusters(sb));
- percpu_counter_set(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- percpu_counter_set(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
-
-no_journal:
- /*
- * The maximum number of concurrent works can be high and
- * concurrency isn't really necessary. Limit it to 1.
- */
- EXT4_SB(sb)->dio_unwritten_wq =
- alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!EXT4_SB(sb)->dio_unwritten_wq) {
- printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
- goto failed_mount_wq;
- }
-
- /*
- * The jbd2_journal_load will have done any necessary log recovery,
- * so we can safely mount the rest of the filesystem now.
- */
-
- root = ext4_iget(sb, EXT4_ROOT_INO);
- if (IS_ERR(root)) {
- ext4_msg(sb, KERN_ERR, "get root inode failed");
- ret = PTR_ERR(root);
- root = NULL;
- goto failed_mount4;
- }
- if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
- ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
- iput(root);
- goto failed_mount4;
- }
- sb->s_root = d_make_root(root);
- if (!sb->s_root) {
- ext4_msg(sb, KERN_ERR, "get root dentry failed");
- ret = -ENOMEM;
- goto failed_mount4;
- }
-
- if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
- sb->s_flags |= MS_RDONLY;
-
- /* determine the minimum size of new large inodes, if present */
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_want_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_want_extra_isize);
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_min_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_min_extra_isize);
- }
- }
- /* Check if enough inode space is available */
- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- ext4_msg(sb, KERN_INFO, "required extra inode space not"
- "available");
- }
-
- err = ext4_setup_system_zone(sb);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize system "
- "zone (%d)", err);
- goto failed_mount4a;
- }
-
- ext4_ext_init(sb);
- err = ext4_mb_init(sb, needs_recovery);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
- err);
- goto failed_mount5;
- }
-
- err = ext4_register_li_request(sb, first_not_zeroed);
- if (err)
- goto failed_mount6;
-
- sbi->s_kobj.kset = ext4_kset;
- init_completion(&sbi->s_kobj_unregister);
- err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
- "%s", sb->s_id);
- if (err)
- goto failed_mount7;
-
- EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
- ext4_orphan_cleanup(sb, es);
- EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
- if (needs_recovery) {
- ext4_msg(sb, KERN_INFO, "recovery complete");
- ext4_mark_recovery_complete(sb, es);
- }
- if (EXT4_SB(sb)->s_journal) {
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- descr = " journalled data mode";
- else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
- descr = " ordered data mode";
- else
- descr = " writeback data mode";
- } else
- descr = "out journal";
-
- ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
- *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
-
- if (es->s_error_count)
- mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
-
- kfree(orig_data);
- return 0;
-
-cantfind_ext4:
- if (!silent)
- ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
- goto failed_mount;
-
-failed_mount7:
- ext4_unregister_li_request(sb);
-failed_mount6:
- ext4_mb_release(sb);
-failed_mount5:
- ext4_ext_release(sb);
- ext4_release_system_zone(sb);
-failed_mount4a:
- dput(sb->s_root);
- sb->s_root = NULL;
-failed_mount4:
- ext4_msg(sb, KERN_ERR, "mount failed");
- destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
-failed_mount_wq:
- if (sbi->s_journal) {
- jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
- }
-failed_mount3:
- del_timer(&sbi->s_err_report);
- if (sbi->s_flex_groups)
- ext4_kvfree(sbi->s_flex_groups);
- percpu_counter_destroy(&sbi->s_freeclusters_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
- if (sbi->s_mmp_tsk)
- kthread_stop(sbi->s_mmp_tsk);
-failed_mount2:
- for (i = 0; i < db_count; i++)
- brelse(sbi->s_group_desc[i]);
- ext4_kvfree(sbi->s_group_desc);
-failed_mount:
- if (sbi->s_proc) {
- remove_proc_entry("options", sbi->s_proc);
- remove_proc_entry(sb->s_id, ext4_proc_root);
- }
-#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++)
- kfree(sbi->s_qf_names[i]);
-#endif
- ext4_blkdev_remove(sbi);
- brelse(bh);
-out_fail:
- sb->s_fs_info = NULL;
- kfree(sbi->s_blockgroup_lock);
- kfree(sbi);
-out_free_orig:
- kfree(orig_data);
- return ret;
-}
-
-/*
- * Setup any per-fs journal parameters now. We'll do this both on
- * initial mount, once the journal has been initialised but before we've
- * done any recovery; and again on any subsequent remount.
- */
-static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- journal->j_commit_interval = sbi->s_commit_interval;
- journal->j_min_batch_time = sbi->s_min_batch_time;
- journal->j_max_batch_time = sbi->s_max_batch_time;
-
- write_lock(&journal->j_state_lock);
- if (test_opt(sb, BARRIER))
- journal->j_flags |= JBD2_BARRIER;
- else
- journal->j_flags &= ~JBD2_BARRIER;
- if (test_opt(sb, DATA_ERR_ABORT))
- journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
- else
- journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
- write_unlock(&journal->j_state_lock);
-}
-
-static journal_t *ext4_get_journal(struct super_block *sb,
- unsigned int journal_inum)
-{
- struct inode *journal_inode;
- journal_t *journal;
-
- BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
-
- /* First, test for the existence of a valid inode on disk. Bad
- * things happen if we iget() an unused inode, as the subsequent
- * iput() will try to delete it. */
-
- journal_inode = ext4_iget(sb, journal_inum);
- if (IS_ERR(journal_inode)) {
- ext4_msg(sb, KERN_ERR, "no journal found");
- return NULL;
- }
- if (!journal_inode->i_nlink) {
- make_bad_inode(journal_inode);
- iput(journal_inode);
- ext4_msg(sb, KERN_ERR, "journal inode is deleted");
- return NULL;
- }
-
- jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
- journal_inode, journal_inode->i_size);
- if (!S_ISREG(journal_inode->i_mode)) {
- ext4_msg(sb, KERN_ERR, "invalid journal inode");
- iput(journal_inode);
- return NULL;
- }
-
- journal = jbd2_journal_init_inode(journal_inode);
- if (!journal) {
- ext4_msg(sb, KERN_ERR, "Could not load journal inode");
- iput(journal_inode);
- return NULL;
- }
- journal->j_private = sb;
- ext4_init_journal_params(sb, journal);
- return journal;
-}
-
-static journal_t *ext4_get_dev_journal(struct super_block *sb,
- dev_t j_dev)
-{
- struct buffer_head *bh;
- journal_t *journal;
- ext4_fsblk_t start;
- ext4_fsblk_t len;
- int hblock, blocksize;
- ext4_fsblk_t sb_block;
- unsigned long offset;
- struct ext4_super_block *es;
- struct block_device *bdev;
-
- BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
-
- bdev = ext4_blkdev_get(j_dev, sb);
- if (bdev == NULL)
- return NULL;
-
- blocksize = sb->s_blocksize;
- hblock = bdev_logical_block_size(bdev);
- if (blocksize < hblock) {
- ext4_msg(sb, KERN_ERR,
- "blocksize too small for journal device");
- goto out_bdev;
- }
-
- sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
- offset = EXT4_MIN_BLOCK_SIZE % blocksize;
- set_blocksize(bdev, blocksize);
- if (!(bh = __bread(bdev, sb_block, blocksize))) {
- ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
- "external journal");
- goto out_bdev;
- }
-
- es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
- if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
- !(le32_to_cpu(es->s_feature_incompat) &
- EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
- ext4_msg(sb, KERN_ERR, "external journal has "
- "bad superblock");
- brelse(bh);
- goto out_bdev;
- }
-
- if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
- ext4_msg(sb, KERN_ERR, "journal UUID does not match");
- brelse(bh);
- goto out_bdev;
- }
-
- len = ext4_blocks_count(es);
- start = sb_block + 1;
- brelse(bh); /* we're done with the superblock */
-
- journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
- start, len, blocksize);
- if (!journal) {
- ext4_msg(sb, KERN_ERR, "failed to create device journal");
- goto out_bdev;
- }
- journal->j_private = sb;
- ll_rw_block(READ, 1, &journal->j_sb_buffer);
- wait_on_buffer(journal->j_sb_buffer);
- if (!buffer_uptodate(journal->j_sb_buffer)) {
- ext4_msg(sb, KERN_ERR, "I/O error on journal device");
- goto out_journal;
- }
- if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
- ext4_msg(sb, KERN_ERR, "External journal has more than one "
- "user (unsupported) - %d",
- be32_to_cpu(journal->j_superblock->s_nr_users));
- goto out_journal;
- }
- EXT4_SB(sb)->journal_bdev = bdev;
- ext4_init_journal_params(sb, journal);
- return journal;
-
-out_journal:
- jbd2_journal_destroy(journal);
-out_bdev:
- ext4_blkdev_put(bdev);
- return NULL;
-}
-
-static int ext4_load_journal(struct super_block *sb,
- struct ext4_super_block *es,
- unsigned long journal_devnum)
-{
- journal_t *journal;
- unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
- dev_t journal_dev;
- int err = 0;
- int really_read_only;
-
- BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
-
- if (journal_devnum &&
- journal_devnum != le32_to_cpu(es->s_journal_dev)) {
- ext4_msg(sb, KERN_INFO, "external journal device major/minor "
- "numbers have changed");
- journal_dev = new_decode_dev(journal_devnum);
- } else
- journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
-
- really_read_only = bdev_read_only(sb->s_bdev);
-
- /*
- * Are we loading a blank journal or performing recovery after a
- * crash? For recovery, we need to check in advance whether we
- * can get read-write access to the device.
- */
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
- if (sb->s_flags & MS_RDONLY) {
- ext4_msg(sb, KERN_INFO, "INFO: recovery "
- "required on readonly filesystem");
- if (really_read_only) {
- ext4_msg(sb, KERN_ERR, "write access "
- "unavailable, cannot proceed");
- return -EROFS;
- }
- ext4_msg(sb, KERN_INFO, "write access will "
- "be enabled during recovery");
- }
- }
-
- if (journal_inum && journal_dev) {
- ext4_msg(sb, KERN_ERR, "filesystem has both journal "
- "and inode journals!");
- return -EINVAL;
- }
-
- if (journal_inum) {
- if (!(journal = ext4_get_journal(sb, journal_inum)))
- return -EINVAL;
- } else {
- if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
- return -EINVAL;
- }
-
- if (!(journal->j_flags & JBD2_BARRIER))
- ext4_msg(sb, KERN_INFO, "barriers disabled");
-
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
- err = jbd2_journal_wipe(journal, !really_read_only);
- if (!err) {
- char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
- if (save)
- memcpy(save, ((char *) es) +
- EXT4_S_ERR_START, EXT4_S_ERR_LEN);
- err = jbd2_journal_load(journal);
- if (save)
- memcpy(((char *) es) + EXT4_S_ERR_START,
- save, EXT4_S_ERR_LEN);
- kfree(save);
- }
-
- if (err) {
- ext4_msg(sb, KERN_ERR, "error loading journal");
- jbd2_journal_destroy(journal);
- return err;
- }
-
- EXT4_SB(sb)->s_journal = journal;
- ext4_clear_journal_err(sb, es);
-
- if (!really_read_only && journal_devnum &&
- journal_devnum != le32_to_cpu(es->s_journal_dev)) {
- es->s_journal_dev = cpu_to_le32(journal_devnum);
-
- /* Make sure we flush the recovery flag to disk. */
- ext4_commit_super(sb, 1);
- }
-
- return 0;
-}
-
-static int ext4_commit_super(struct super_block *sb, int sync)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
- int error = 0;
-
- if (!sbh || block_device_ejected(sb))
- return error;
- if (buffer_write_io_error(sbh)) {
- /*
- * Oh, dear. A previous attempt to write the
- * superblock failed. This could happen because the
- * USB device was yanked out. Or it could happen to
- * be a transient write error and maybe the block will
- * be remapped. Nothing we can do but to retry the
- * write and hope for the best.
- */
- ext4_msg(sb, KERN_ERR, "previous I/O error to "
- "superblock detected");
- clear_buffer_write_io_error(sbh);
- set_buffer_uptodate(sbh);
- }
- /*
- * If the file system is mounted read-only, don't update the
- * superblock write time. This avoids updating the superblock
- * write time when we are mounting the root file system
- * read/only but we need to replay the journal; at that point,
- * for people who are east of GMT and who make their clock
- * tick in localtime for Windows bug-for-bug compatibility,
- * the clock is set in the future, and this will cause e2fsck
- * to complain and force a full file system check.
- */
- if (!(sb->s_flags & MS_RDONLY))
- es->s_wtime = cpu_to_le32(get_seconds());
- if (sb->s_bdev->bd_part)
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1));
- else
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
- ext4_free_blocks_count_set(es,
- EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeclusters_counter)));
- es->s_free_inodes_count =
- cpu_to_le32(percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeinodes_counter));
- sb->s_dirt = 0;
- BUFFER_TRACE(sbh, "marking dirty");
- mark_buffer_dirty(sbh);
- if (sync) {
- error = sync_dirty_buffer(sbh);
- if (error)
- return error;
-
- error = buffer_write_io_error(sbh);
- if (error) {
- ext4_msg(sb, KERN_ERR, "I/O error while writing "
- "superblock");
- clear_buffer_write_io_error(sbh);
- set_buffer_uptodate(sbh);
- }
- }
- return error;
-}
-
-/*
- * Have we just finished recovery? If so, and if we are mounting (or
- * remounting) the filesystem readonly, then we will end up with a
- * consistent fs on disk. Record that fact.
- */
-static void ext4_mark_recovery_complete(struct super_block *sb,
- struct ext4_super_block *es)
-{
- journal_t *journal = EXT4_SB(sb)->s_journal;
-
- if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
- BUG_ON(journal != NULL);
- return;
- }
- jbd2_journal_lock_updates(journal);
- if (jbd2_journal_flush(journal) < 0)
- goto out;
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
- sb->s_flags & MS_RDONLY) {
- EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
- ext4_commit_super(sb, 1);
- }
-
-out:
- jbd2_journal_unlock_updates(journal);
-}
-
-/*
- * If we are mounting (or read-write remounting) a filesystem whose journal
- * has recorded an error from a previous lifetime, move that error to the
- * main filesystem now.
- */
-static void ext4_clear_journal_err(struct super_block *sb,
- struct ext4_super_block *es)
-{
- journal_t *journal;
- int j_errno;
- const char *errstr;
-
- BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
-
- journal = EXT4_SB(sb)->s_journal;
-
- /*
- * Now check for any error status which may have been recorded in the
- * journal by a prior ext4_error() or ext4_abort()
- */
-
- j_errno = jbd2_journal_errno(journal);
- if (j_errno) {
- char nbuf[16];
-
- errstr = ext4_decode_error(sb, j_errno, nbuf);
- ext4_warning(sb, "Filesystem error recorded "
- "from previous mount: %s", errstr);
- ext4_warning(sb, "Marking fs in need of filesystem check.");
-
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_commit_super(sb, 1);
-
- jbd2_journal_clear_err(journal);
- }
-}
-
-/*
- * Force the running and committing transactions to commit,
- * and wait on the commit.
- */
-int ext4_force_commit(struct super_block *sb)
-{
- journal_t *journal;
- int ret = 0;
-
- if (sb->s_flags & MS_RDONLY)
- return 0;
-
- journal = EXT4_SB(sb)->s_journal;
- if (journal) {
- vfs_check_frozen(sb, SB_FREEZE_TRANS);
- ret = ext4_journal_force_commit(journal);
- }
-
- return ret;
-}
-
-static void ext4_write_super(struct super_block *sb)
-{
- lock_super(sb);
- ext4_commit_super(sb, 1);
- unlock_super(sb);
-}
-
-static int ext4_sync_fs(struct super_block *sb, int wait)
-{
- int ret = 0;
- tid_t target;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- trace_ext4_sync_fs(sb, wait);
- flush_workqueue(sbi->dio_unwritten_wq);
- if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
- if (wait)
- jbd2_log_wait_commit(sbi->s_journal, target);
- }
- return ret;
-}
-
-/*
- * LVM calls this function before a (read-only) snapshot is created. This
- * gives us a chance to flush the journal completely and mark the fs clean.
- *
- * Note that only this function cannot bring a filesystem to be in a clean
- * state independently, because ext4 prevents a new handle from being started
- * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
- * the upper layer.
- */
-static int ext4_freeze(struct super_block *sb)
-{
- int error = 0;
- journal_t *journal;
-
- if (sb->s_flags & MS_RDONLY)
- return 0;
-
- journal = EXT4_SB(sb)->s_journal;
-
- /* Now we set up the journal barrier. */
- jbd2_journal_lock_updates(journal);
-
- /*
- * Don't clear the needs_recovery flag if we failed to flush
- * the journal.
- */
- error = jbd2_journal_flush(journal);
- if (error < 0)
- goto out;
-
- /* Journal blocked and flushed, clear needs_recovery flag. */
- EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
- error = ext4_commit_super(sb, 1);
-out:
- /* we rely on s_frozen to stop further updates */
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- return error;
-}
-
-/*
- * Called by LVM after the snapshot is done. We need to reset the RECOVER
- * flag here, even though the filesystem is not technically dirty yet.
- */
-static int ext4_unfreeze(struct super_block *sb)
-{
- if (sb->s_flags & MS_RDONLY)
- return 0;
-
- lock_super(sb);
- /* Reset the needs_recovery flag before the fs is unlocked. */
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
- ext4_commit_super(sb, 1);
- unlock_super(sb);
- return 0;
-}
-
-/*
- * Structure to save mount options for ext4_remount's benefit
- */
-struct ext4_mount_options {
- unsigned long s_mount_opt;
- unsigned long s_mount_opt2;
- uid_t s_resuid;
- gid_t s_resgid;
- unsigned long s_commit_interval;
- u32 s_min_batch_time, s_max_batch_time;
-#ifdef CONFIG_QUOTA
- int s_jquota_fmt;
- char *s_qf_names[MAXQUOTAS];
-#endif
-};
-
-static int ext4_remount(struct super_block *sb, int *flags, char *data)
-{
- struct ext4_super_block *es;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned long old_sb_flags;
- struct ext4_mount_options old_opts;
- int enable_quota = 0;
- ext4_group_t g;
- unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
- int err = 0;
-#ifdef CONFIG_QUOTA
- int i;
-#endif
- char *orig_data = kstrdup(data, GFP_KERNEL);
-
- /* Store the original options */
- lock_super(sb);
- old_sb_flags = sb->s_flags;
- old_opts.s_mount_opt = sbi->s_mount_opt;
- old_opts.s_mount_opt2 = sbi->s_mount_opt2;
- old_opts.s_resuid = sbi->s_resuid;
- old_opts.s_resgid = sbi->s_resgid;
- old_opts.s_commit_interval = sbi->s_commit_interval;
- old_opts.s_min_batch_time = sbi->s_min_batch_time;
- old_opts.s_max_batch_time = sbi->s_max_batch_time;
-#ifdef CONFIG_QUOTA
- old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
- for (i = 0; i < MAXQUOTAS; i++)
- old_opts.s_qf_names[i] = sbi->s_qf_names[i];
-#endif
- if (sbi->s_journal && sbi->s_journal->j_task->io_context)
- journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
-
- /*
- * Allow the "check" option to be passed as a remount option.
- */
- if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
- err = -EINVAL;
- goto restore_opts;
- }
-
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
- ext4_abort(sb, "Abort forced by user");
-
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
-
- es = sbi->s_es;
-
- if (sbi->s_journal) {
- ext4_init_journal_params(sb, sbi->s_journal);
- set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
- }
-
- if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
- err = -EROFS;
- goto restore_opts;
- }
-
- if (*flags & MS_RDONLY) {
- err = dquot_suspend(sb, -1);
- if (err < 0)
- goto restore_opts;
-
- /*
- * First of all, the unconditional stuff we have to do
- * to disable replay of the journal when we next remount
- */
- sb->s_flags |= MS_RDONLY;
-
- /*
- * OK, test if we are remounting a valid rw partition
- * readonly, and if so set the rdonly flag and then
- * mark the partition as valid again.
- */
- if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
- (sbi->s_mount_state & EXT4_VALID_FS))
- es->s_state = cpu_to_le16(sbi->s_mount_state);
-
- if (sbi->s_journal)
- ext4_mark_recovery_complete(sb, es);
- } else {
- /* Make sure we can mount this feature set readwrite */
- if (!ext4_feature_set_ok(sb, 0)) {
- err = -EROFS;
- goto restore_opts;
- }
- /*
- * Make sure the group descriptor checksums
- * are sane. If they aren't, refuse to remount r/w.
- */
- for (g = 0; g < sbi->s_groups_count; g++) {
- struct ext4_group_desc *gdp =
- ext4_get_group_desc(sb, g, NULL);
-
- if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
- ext4_msg(sb, KERN_ERR,
- "ext4_remount: Checksum for group %u failed (%u!=%u)",
- g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
- le16_to_cpu(gdp->bg_checksum));
- err = -EINVAL;
- goto restore_opts;
- }
- }
-
- /*
- * If we have an unprocessed orphan list hanging
- * around from a previously readonly bdev mount,
- * require a full umount/remount for now.
- */
- if (es->s_last_orphan) {
- ext4_msg(sb, KERN_WARNING, "Couldn't "
- "remount RDWR because of unprocessed "
- "orphan inode list. Please "
- "umount/remount instead");
- err = -EINVAL;
- goto restore_opts;
- }
-
- /*
- * Mounting a RDONLY partition read-write, so reread
- * and store the current valid flag. (It may have
- * been changed by e2fsck since we originally mounted
- * the partition.)
- */
- if (sbi->s_journal)
- ext4_clear_journal_err(sb, es);
- sbi->s_mount_state = le16_to_cpu(es->s_state);
- if (!ext4_setup_super(sb, es, 0))
- sb->s_flags &= ~MS_RDONLY;
- if (EXT4_HAS_INCOMPAT_FEATURE(sb,
- EXT4_FEATURE_INCOMPAT_MMP))
- if (ext4_multi_mount_protect(sb,
- le64_to_cpu(es->s_mmp_block))) {
- err = -EROFS;
- goto restore_opts;
- }
- enable_quota = 1;
- }
- }
-
- /*
- * Reinitialize lazy itable initialization thread based on
- * current settings
- */
- if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
- ext4_unregister_li_request(sb);
- else {
- ext4_group_t first_not_zeroed;
- first_not_zeroed = ext4_has_uninit_itable(sb);
- ext4_register_li_request(sb, first_not_zeroed);
- }
-
- ext4_setup_system_zone(sb);
- if (sbi->s_journal == NULL)
- ext4_commit_super(sb, 1);
-
-#ifdef CONFIG_QUOTA
- /* Release old quota file names */
- for (i = 0; i < MAXQUOTAS; i++)
- if (old_opts.s_qf_names[i] &&
- old_opts.s_qf_names[i] != sbi->s_qf_names[i])
- kfree(old_opts.s_qf_names[i]);
-#endif
- unlock_super(sb);
- if (enable_quota)
- dquot_resume(sb, -1);
-
- ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
- kfree(orig_data);
- return 0;
-
-restore_opts:
- sb->s_flags = old_sb_flags;
- sbi->s_mount_opt = old_opts.s_mount_opt;
- sbi->s_mount_opt2 = old_opts.s_mount_opt2;
- sbi->s_resuid = old_opts.s_resuid;
- sbi->s_resgid = old_opts.s_resgid;
- sbi->s_commit_interval = old_opts.s_commit_interval;
- sbi->s_min_batch_time = old_opts.s_min_batch_time;
- sbi->s_max_batch_time = old_opts.s_max_batch_time;
-#ifdef CONFIG_QUOTA
- sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
- for (i = 0; i < MAXQUOTAS; i++) {
- if (sbi->s_qf_names[i] &&
- old_opts.s_qf_names[i] != sbi->s_qf_names[i])
- kfree(sbi->s_qf_names[i]);
- sbi->s_qf_names[i] = old_opts.s_qf_names[i];
- }
-#endif
- unlock_super(sb);
- kfree(orig_data);
- return err;
-}
-
-/*
- * Note: calculating the overhead so we can be compatible with
- * historical BSD practice is quite difficult in the face of
- * clusters/bigalloc. This is because multiple metadata blocks from
- * different block group can end up in the same allocation cluster.
- * Calculating the exact overhead in the face of clustered allocation
- * requires either O(all block bitmaps) in memory or O(number of block
- * groups**2) in time. We will still calculate the superblock for
- * older file systems --- and if we come across with a bigalloc file
- * system with zero in s_overhead_clusters the estimate will be close to
- * correct especially for very large cluster sizes --- but for newer
- * file systems, it's better to calculate this figure once at mkfs
- * time, and store it in the superblock. If the superblock value is
- * present (even for non-bigalloc file systems), we will use it.
- */
-static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
- struct super_block *sb = dentry->d_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- struct ext4_group_desc *gdp;
- u64 fsid;
- s64 bfree;
-
- if (test_opt(sb, MINIX_DF)) {
- sbi->s_overhead_last = 0;
- } else if (es->s_overhead_clusters) {
- sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
- } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
- ext4_group_t i, ngroups = ext4_get_groups_count(sb);
- ext4_fsblk_t overhead = 0;
-
- /*
- * Compute the overhead (FS structures). This is constant
- * for a given filesystem unless the number of block groups
- * changes so we cache the previous value until it does.
- */
-
- /*
- * All of the blocks before first_data_block are
- * overhead
- */
- overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
-
- /*
- * Add the overhead found in each block group
- */
- for (i = 0; i < ngroups; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
- overhead += ext4_num_overhead_clusters(sb, i, gdp);
- cond_resched();
- }
- sbi->s_overhead_last = overhead;
- smp_wmb();
- sbi->s_blocks_last = ext4_blocks_count(es);
- }
-
- buf->f_type = EXT4_SUPER_MAGIC;
- buf->f_bsize = sb->s_blocksize;
- buf->f_blocks = (ext4_blocks_count(es) -
- EXT4_C2B(sbi, sbi->s_overhead_last));
- bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
- percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
- /* prevent underflow in case that few free space is available */
- buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
- buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
- if (buf->f_bfree < ext4_r_blocks_count(es))
- buf->f_bavail = 0;
- buf->f_files = le32_to_cpu(es->s_inodes_count);
- buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
- buf->f_namelen = EXT4_NAME_LEN;
- fsid = le64_to_cpup((void *)es->s_uuid) ^
- le64_to_cpup((void *)es->s_uuid + sizeof(u64));
- buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
- buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
-
- return 0;
-}
-
-/* Helper function for writing quotas on sync - we need to start transaction
- * before quota file is locked for write. Otherwise the are possible deadlocks:
- * Process 1 Process 2
- * ext4_create() quota_sync()
- * jbd2_journal_start() write_dquot()
- * dquot_initialize() down(dqio_mutex)
- * down(dqio_mutex) jbd2_journal_start()
- *
- */
-
-#ifdef CONFIG_QUOTA
-
-static inline struct inode *dquot_to_inode(struct dquot *dquot)
-{
- return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
-}
-
-static int ext4_write_dquot(struct dquot *dquot)
-{
- int ret, err;
- handle_t *handle;
- struct inode *inode;
-
- inode = dquot_to_inode(dquot);
- handle = ext4_journal_start(inode,
- EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- ret = dquot_commit(dquot);
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
- return ret;
-}
-
-static int ext4_acquire_dquot(struct dquot *dquot)
-{
- int ret, err;
- handle_t *handle;
-
- handle = ext4_journal_start(dquot_to_inode(dquot),
- EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- ret = dquot_acquire(dquot);
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
- return ret;
-}
-
-static int ext4_release_dquot(struct dquot *dquot)
-{
- int ret, err;
- handle_t *handle;
-
- handle = ext4_journal_start(dquot_to_inode(dquot),
- EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
- if (IS_ERR(handle)) {
- /* Release dquot anyway to avoid endless cycle in dqput() */
- dquot_release(dquot);
- return PTR_ERR(handle);
- }
- ret = dquot_release(dquot);
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
- return ret;
-}
-
-static int ext4_mark_dquot_dirty(struct dquot *dquot)
-{
- /* Are we journaling quotas? */
- if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
- EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
- dquot_mark_dquot_dirty(dquot);
- return ext4_write_dquot(dquot);
- } else {
- return dquot_mark_dquot_dirty(dquot);
- }
-}
-
-static int ext4_write_info(struct super_block *sb, int type)
-{
- int ret, err;
- handle_t *handle;
-
- /* Data block + inode block */
- handle = ext4_journal_start(sb->s_root->d_inode, 2);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- ret = dquot_commit_info(sb, type);
- err = ext4_journal_stop(handle);
- if (!ret)
- ret = err;
- return ret;
-}
-
-/*
- * Turn on quotas during mount time - we need to find
- * the quota file and such...
- */
-static int ext4_quota_on_mount(struct super_block *sb, int type)
-{
- return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
- EXT4_SB(sb)->s_jquota_fmt, type);
-}
-
-/*
- * Standard function to be called on quota_on
- */
-static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path)
-{
- int err;
-
- if (!test_opt(sb, QUOTA))
- return -EINVAL;
-
- /* Quotafile not on the same filesystem? */
- if (path->dentry->d_sb != sb)
- return -EXDEV;
- /* Journaling quota? */
- if (EXT4_SB(sb)->s_qf_names[type]) {
- /* Quotafile not in fs root? */
- if (path->dentry->d_parent != sb->s_root)
- ext4_msg(sb, KERN_WARNING,
- "Quota file not on filesystem root. "
- "Journaled quota will not work");
- }
-
- /*
- * When we journal data on quota file, we have to flush journal to see
- * all updates to the file when we bypass pagecache...
- */
- if (EXT4_SB(sb)->s_journal &&
- ext4_should_journal_data(path->dentry->d_inode)) {
- /*
- * We don't need to lock updates but journal_flush() could
- * otherwise be livelocked...
- */
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- if (err)
- return err;
- }
-
- return dquot_quota_on(sb, type, format_id, path);
-}
-
-static int ext4_quota_off(struct super_block *sb, int type)
-{
- struct inode *inode = sb_dqopt(sb)->files[type];
- handle_t *handle;
-
- /* Force all delayed allocation blocks to be allocated.
- * Caller already holds s_umount sem */
- if (test_opt(sb, DELALLOC))
- sync_filesystem(sb);
-
- if (!inode)
- goto out;
-
- /* Update modification times of quota files when userspace can
- * start looking at them */
- handle = ext4_journal_start(inode, 1);
- if (IS_ERR(handle))
- goto out;
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- ext4_mark_inode_dirty(handle, inode);
- ext4_journal_stop(handle);
-
-out:
- return dquot_quota_off(sb, type);
-}
-
-/* Read data from quotafile - avoid pagecache and such because we cannot afford
- * acquiring the locks... As quota files are never truncated and quota code
- * itself serializes the operations (and no one else should touch the files)
- * we don't have to be afraid of races */
-static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
- size_t len, loff_t off)
-{
- struct inode *inode = sb_dqopt(sb)->files[type];
- ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
- int err = 0;
- int offset = off & (sb->s_blocksize - 1);
- int tocopy;
- size_t toread;
- struct buffer_head *bh;
- loff_t i_size = i_size_read(inode);
-
- if (off > i_size)
- return 0;
- if (off+len > i_size)
- len = i_size-off;
- toread = len;
- while (toread > 0) {
- tocopy = sb->s_blocksize - offset < toread ?
- sb->s_blocksize - offset : toread;
- bh = ext4_bread(NULL, inode, blk, 0, &err);
- if (err)
- return err;
- if (!bh) /* A hole? */
- memset(data, 0, tocopy);
- else
- memcpy(data, bh->b_data+offset, tocopy);
- brelse(bh);
- offset = 0;
- toread -= tocopy;
- data += tocopy;
- blk++;
- }
- return len;
-}
-
-/* Write to quotafile (we know the transaction is already started and has
- * enough credits) */
-static ssize_t ext4_quota_write(struct super_block *sb, int type,
- const char *data, size_t len, loff_t off)
-{
- struct inode *inode = sb_dqopt(sb)->files[type];
- ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
- int err = 0;
- int offset = off & (sb->s_blocksize - 1);
- struct buffer_head *bh;
- handle_t *handle = journal_current_handle();
-
- if (EXT4_SB(sb)->s_journal && !handle) {
- ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
- " cancelled because transaction is not started",
- (unsigned long long)off, (unsigned long long)len);
- return -EIO;
- }
- /*
- * Since we account only one data block in transaction credits,
- * then it is impossible to cross a block boundary.
- */
- if (sb->s_blocksize - offset < len) {
- ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
- " cancelled because not block aligned",
- (unsigned long long)off, (unsigned long long)len);
- return -EIO;
- }
-
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
- bh = ext4_bread(handle, inode, blk, 1, &err);
- if (!bh)
- goto out;
- err = ext4_journal_get_write_access(handle, bh);
- if (err) {
- brelse(bh);
- goto out;
- }
- lock_buffer(bh);
- memcpy(bh->b_data+offset, data, len);
- flush_dcache_page(bh->b_page);
- unlock_buffer(bh);
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- brelse(bh);
-out:
- if (err) {
- mutex_unlock(&inode->i_mutex);
- return err;
- }
- if (inode->i_size < off + len) {
- i_size_write(inode, off + len);
- EXT4_I(inode)->i_disksize = inode->i_size;
- ext4_mark_inode_dirty(handle, inode);
- }
- mutex_unlock(&inode->i_mutex);
- return len;
-}
-
-#endif
-
-static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
-{
- return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
-}
-
-#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
-static inline void register_as_ext2(void)
-{
- int err = register_filesystem(&ext2_fs_type);
- if (err)
- printk(KERN_WARNING
- "EXT4-fs: Unable to register as ext2 (%d)\n", err);
-}
-
-static inline void unregister_as_ext2(void)
-{
- unregister_filesystem(&ext2_fs_type);
-}
-
-static inline int ext2_feature_set_ok(struct super_block *sb)
-{
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
- return 0;
- if (sb->s_flags & MS_RDONLY)
- return 1;
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
- return 0;
- return 1;
-}
-MODULE_ALIAS("ext2");
-#else
-static inline void register_as_ext2(void) { }
-static inline void unregister_as_ext2(void) { }
-static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
-#endif
-
-#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
-static inline void register_as_ext3(void)
-{
- int err = register_filesystem(&ext3_fs_type);
- if (err)
- printk(KERN_WARNING
- "EXT4-fs: Unable to register as ext3 (%d)\n", err);
-}
-
-static inline void unregister_as_ext3(void)
-{
- unregister_filesystem(&ext3_fs_type);
-}
-
-static inline int ext3_feature_set_ok(struct super_block *sb)
-{
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
- return 0;
- if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
- return 0;
- if (sb->s_flags & MS_RDONLY)
- return 1;
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
- return 0;
- return 1;
-}
-MODULE_ALIAS("ext3");
-#else
-static inline void register_as_ext3(void) { }
-static inline void unregister_as_ext3(void) { }
-static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
-#endif
-
-static struct file_system_type ext4_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext4",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-
-static int __init ext4_init_feat_adverts(void)
-{
- struct ext4_features *ef;
- int ret = -ENOMEM;
-
- ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
- if (!ef)
- goto out;
-
- ef->f_kobj.kset = ext4_kset;
- init_completion(&ef->f_kobj_unregister);
- ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
- "features");
- if (ret) {
- kfree(ef);
- goto out;
- }
-
- ext4_feat = ef;
- ret = 0;
-out:
- return ret;
-}
-
-static void ext4_exit_feat_adverts(void)
-{
- kobject_put(&ext4_feat->f_kobj);
- wait_for_completion(&ext4_feat->f_kobj_unregister);
- kfree(ext4_feat);
-}
-
-/* Shared across all ext4 file systems */
-wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
-
-static int __init ext4_init_fs(void)
-{
- int i, err;
-
- ext4_li_info = NULL;
- mutex_init(&ext4_li_mtx);
-
- ext4_check_flag_values();
-
- for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
- mutex_init(&ext4__aio_mutex[i]);
- init_waitqueue_head(&ext4__ioend_wq[i]);
- }
-
- err = ext4_init_pageio();
- if (err)
- return err;
- err = ext4_init_system_zone();
- if (err)
- goto out6;
- ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
- if (!ext4_kset)
- goto out5;
- ext4_proc_root = proc_mkdir("fs/ext4", NULL);
-
- err = ext4_init_feat_adverts();
- if (err)
- goto out4;
-
- err = ext4_init_mballoc();
- if (err)
- goto out3;
-
- err = ext4_init_xattr();
- if (err)
- goto out2;
- err = init_inodecache();
- if (err)
- goto out1;
- register_as_ext3();
- register_as_ext2();
- err = register_filesystem(&ext4_fs_type);
- if (err)
- goto out;
-
- return 0;
-out:
- unregister_as_ext2();
- unregister_as_ext3();
- destroy_inodecache();
-out1:
- ext4_exit_xattr();
-out2:
- ext4_exit_mballoc();
-out3:
- ext4_exit_feat_adverts();
-out4:
- if (ext4_proc_root)
- remove_proc_entry("fs/ext4", NULL);
- kset_unregister(ext4_kset);
-out5:
- ext4_exit_system_zone();
-out6:
- ext4_exit_pageio();
- return err;
-}
-
-static void __exit ext4_exit_fs(void)
-{
- ext4_destroy_lazyinit_thread();
- unregister_as_ext2();
- unregister_as_ext3();
- unregister_filesystem(&ext4_fs_type);
- destroy_inodecache();
- ext4_exit_xattr();
- ext4_exit_mballoc();
- ext4_exit_feat_adverts();
- remove_proc_entry("fs/ext4", NULL);
- kset_unregister(ext4_kset);
- ext4_exit_system_zone();
- ext4_exit_pageio();
-}
-
-MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
-MODULE_DESCRIPTION("Fourth Extended Filesystem");
-MODULE_LICENSE("GPL");
-module_init(ext4_init_fs)
-module_exit(ext4_exit_fs)
diff --git a/ANDROID_3.4.5/fs/ext4/symlink.c b/ANDROID_3.4.5/fs/ext4/symlink.c
deleted file mode 100644
index ed9354af..00000000
--- a/ANDROID_3.4.5/fs/ext4/symlink.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * linux/fs/ext4/symlink.c
- *
- * Only fast symlinks left here - the rest is done by generic code. AV, 1999
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/symlink.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext4 symlink handling code
- */
-
-#include <linux/fs.h>
-#include <linux/jbd2.h>
-#include <linux/namei.h>
-#include "ext4.h"
-#include "xattr.h"
-
-static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
- nd_set_link(nd, (char *) ei->i_data);
- return NULL;
-}
-
-const struct inode_operations ext4_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
- .setattr = ext4_setattr,
-#ifdef CONFIG_EXT4_FS_XATTR
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .listxattr = ext4_listxattr,
- .removexattr = generic_removexattr,
-#endif
-};
-
-const struct inode_operations ext4_fast_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = ext4_follow_link,
- .setattr = ext4_setattr,
-#ifdef CONFIG_EXT4_FS_XATTR
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .listxattr = ext4_listxattr,
- .removexattr = generic_removexattr,
-#endif
-};
diff --git a/ANDROID_3.4.5/fs/ext4/truncate.h b/ANDROID_3.4.5/fs/ext4/truncate.h
deleted file mode 100644
index 011ba667..00000000
--- a/ANDROID_3.4.5/fs/ext4/truncate.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * linux/fs/ext4/truncate.h
- *
- * Common inline functions needed for truncate support
- */
-
-/*
- * Truncate blocks that were not used by write. We have to truncate the
- * pagecache as well so that corresponding buffers get properly unmapped.
- */
-static inline void ext4_truncate_failed_write(struct inode *inode)
-{
- truncate_inode_pages(inode->i_mapping, inode->i_size);
- ext4_truncate(inode);
-}
-
-/*
- * Work out how many blocks we need to proceed with the next chunk of a
- * truncate transaction.
- */
-static inline unsigned long ext4_blocks_for_truncate(struct inode *inode)
-{
- ext4_lblk_t needed;
-
- needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
-
- /* Give ourselves just enough room to cope with inodes in which
- * i_blocks is corrupt: we've seen disk corruptions in the past
- * which resulted in random data in an inode which looked enough
- * like a regular file for ext4 to try to delete it. Things
- * will go a bit crazy if that happens, but at least we should
- * try not to panic the whole kernel. */
- if (needed < 2)
- needed = 2;
-
- /* But we need to bound the transaction so we don't overflow the
- * journal. */
- if (needed > EXT4_MAX_TRANS_DATA)
- needed = EXT4_MAX_TRANS_DATA;
-
- return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
-}
-
diff --git a/ANDROID_3.4.5/fs/ext4/xattr.c b/ANDROID_3.4.5/fs/ext4/xattr.c
deleted file mode 100644
index e88748e5..00000000
--- a/ANDROID_3.4.5/fs/ext4/xattr.c
+++ /dev/null
@@ -1,1608 +0,0 @@
-/*
- * linux/fs/ext4/xattr.c
- *
- * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * Fix by Harrison Xing <harrison@mountainviewdata.com>.
- * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
- * Extended attributes for symlinks and special files added per
- * suggestion of Luka Renko <luka.renko@hermes.si>.
- * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
- * Red Hat Inc.
- * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
- * and Andreas Gruenbacher <agruen@suse.de>.
- */
-
-/*
- * Extended attributes are stored directly in inodes (on file systems with
- * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
- * field contains the block number if an inode uses an additional block. All
- * attributes must fit in the inode and one additional block. Blocks that
- * contain the identical set of attributes may be shared among several inodes.
- * Identical blocks are detected by keeping a cache of blocks that have
- * recently been accessed.
- *
- * The attributes in inodes and on blocks have a different header; the entries
- * are stored in the same format:
- *
- * +------------------+
- * | header |
- * | entry 1 | |
- * | entry 2 | | growing downwards
- * | entry 3 | v
- * | four null bytes |
- * | . . . |
- * | value 1 | ^
- * | value 3 | | growing upwards
- * | value 2 | |
- * +------------------+
- *
- * The header is followed by multiple entry descriptors. In disk blocks, the
- * entry descriptors are kept sorted. In inodes, they are unsorted. The
- * attribute values are aligned to the end of the block in no specific order.
- *
- * Locking strategy
- * ----------------
- * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
- * EA blocks are only changed if they are exclusive to an inode, so
- * holding xattr_sem also means that nothing but the EA block's reference
- * count can change. Multiple writers to the same block are synchronized
- * by the buffer lock.
- */
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/mbcache.h>
-#include <linux/quotaops.h>
-#include <linux/rwsem.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-#include "xattr.h"
-#include "acl.h"
-
-#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
-#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
-#define BFIRST(bh) ENTRY(BHDR(bh)+1)
-#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
-
-#ifdef EXT4_XATTR_DEBUG
-# define ea_idebug(inode, f...) do { \
- printk(KERN_DEBUG "inode %s:%lu: ", \
- inode->i_sb->s_id, inode->i_ino); \
- printk(f); \
- printk("\n"); \
- } while (0)
-# define ea_bdebug(bh, f...) do { \
- char b[BDEVNAME_SIZE]; \
- printk(KERN_DEBUG "block %s:%lu: ", \
- bdevname(bh->b_bdev, b), \
- (unsigned long) bh->b_blocknr); \
- printk(f); \
- printk("\n"); \
- } while (0)
-#else
-# define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
-# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
-#endif
-
-static void ext4_xattr_cache_insert(struct buffer_head *);
-static struct buffer_head *ext4_xattr_cache_find(struct inode *,
- struct ext4_xattr_header *,
- struct mb_cache_entry **);
-static void ext4_xattr_rehash(struct ext4_xattr_header *,
- struct ext4_xattr_entry *);
-static int ext4_xattr_list(struct dentry *dentry, char *buffer,
- size_t buffer_size);
-
-static struct mb_cache *ext4_xattr_cache;
-
-static const struct xattr_handler *ext4_xattr_handler_map[] = {
- [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
- [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
-#endif
- [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT4_FS_SECURITY
- [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
-#endif
-};
-
-const struct xattr_handler *ext4_xattr_handlers[] = {
- &ext4_xattr_user_handler,
- &ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
- &ext4_xattr_acl_access_handler,
- &ext4_xattr_acl_default_handler,
-#endif
-#ifdef CONFIG_EXT4_FS_SECURITY
- &ext4_xattr_security_handler,
-#endif
- NULL
-};
-
-static inline const struct xattr_handler *
-ext4_xattr_handler(int name_index)
-{
- const struct xattr_handler *handler = NULL;
-
- if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
- handler = ext4_xattr_handler_map[name_index];
- return handler;
-}
-
-/*
- * Inode operation listxattr()
- *
- * dentry->d_inode->i_mutex: don't care
- */
-ssize_t
-ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- return ext4_xattr_list(dentry, buffer, size);
-}
-
-static int
-ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
-{
- while (!IS_LAST_ENTRY(entry)) {
- struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
- if ((void *)next >= end)
- return -EIO;
- entry = next;
- }
- return 0;
-}
-
-static inline int
-ext4_xattr_check_block(struct buffer_head *bh)
-{
- if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
- BHDR(bh)->h_blocks != cpu_to_le32(1))
- return -EIO;
- return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
-}
-
-static inline int
-ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
-{
- size_t value_size = le32_to_cpu(entry->e_value_size);
-
- if (entry->e_value_block != 0 || value_size > size ||
- le16_to_cpu(entry->e_value_offs) + value_size > size)
- return -EIO;
- return 0;
-}
-
-static int
-ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
- const char *name, size_t size, int sorted)
-{
- struct ext4_xattr_entry *entry;
- size_t name_len;
- int cmp = 1;
-
- if (name == NULL)
- return -EINVAL;
- name_len = strlen(name);
- entry = *pentry;
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
- cmp = name_index - entry->e_name_index;
- if (!cmp)
- cmp = name_len - entry->e_name_len;
- if (!cmp)
- cmp = memcmp(name, entry->e_name, name_len);
- if (cmp <= 0 && (sorted || cmp == 0))
- break;
- }
- *pentry = entry;
- if (!cmp && ext4_xattr_check_entry(entry, size))
- return -EIO;
- return cmp ? -ENODATA : 0;
-}
-
-static int
-ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
-{
- struct buffer_head *bh = NULL;
- struct ext4_xattr_entry *entry;
- size_t size;
- int error;
-
- ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
- name_index, name, buffer, (long)buffer_size);
-
- error = -ENODATA;
- if (!EXT4_I(inode)->i_file_acl)
- goto cleanup;
- ea_idebug(inode, "reading block %llu",
- (unsigned long long)EXT4_I(inode)->i_file_acl);
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh)
- goto cleanup;
- ea_bdebug(bh, "b_count=%d, refcount=%d",
- atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
- if (ext4_xattr_check_block(bh)) {
-bad_block:
- EXT4_ERROR_INODE(inode, "bad block %llu",
- EXT4_I(inode)->i_file_acl);
- error = -EIO;
- goto cleanup;
- }
- ext4_xattr_cache_insert(bh);
- entry = BFIRST(bh);
- error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
- if (error == -EIO)
- goto bad_block;
- if (error)
- goto cleanup;
- size = le32_to_cpu(entry->e_value_size);
- if (buffer) {
- error = -ERANGE;
- if (size > buffer_size)
- goto cleanup;
- memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
- size);
- }
- error = size;
-
-cleanup:
- brelse(bh);
- return error;
-}
-
-static int
-ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
-{
- struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_entry *entry;
- struct ext4_inode *raw_inode;
- struct ext4_iloc iloc;
- size_t size;
- void *end;
- int error;
-
- if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
- return -ENODATA;
- error = ext4_get_inode_loc(inode, &iloc);
- if (error)
- return error;
- raw_inode = ext4_raw_inode(&iloc);
- header = IHDR(inode, raw_inode);
- entry = IFIRST(header);
- end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(entry, end);
- if (error)
- goto cleanup;
- error = ext4_xattr_find_entry(&entry, name_index, name,
- end - (void *)entry, 0);
- if (error)
- goto cleanup;
- size = le32_to_cpu(entry->e_value_size);
- if (buffer) {
- error = -ERANGE;
- if (size > buffer_size)
- goto cleanup;
- memcpy(buffer, (void *)IFIRST(header) +
- le16_to_cpu(entry->e_value_offs), size);
- }
- error = size;
-
-cleanup:
- brelse(iloc.bh);
- return error;
-}
-
-/*
- * ext4_xattr_get()
- *
- * Copy an extended attribute into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
-int
-ext4_xattr_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
-{
- int error;
-
- down_read(&EXT4_I(inode)->xattr_sem);
- error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
- buffer_size);
- if (error == -ENODATA)
- error = ext4_xattr_block_get(inode, name_index, name, buffer,
- buffer_size);
- up_read(&EXT4_I(inode)->xattr_sem);
- return error;
-}
-
-static int
-ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
- char *buffer, size_t buffer_size)
-{
- size_t rest = buffer_size;
-
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
- const struct xattr_handler *handler =
- ext4_xattr_handler(entry->e_name_index);
-
- if (handler) {
- size_t size = handler->list(dentry, buffer, rest,
- entry->e_name,
- entry->e_name_len,
- handler->flags);
- if (buffer) {
- if (size > rest)
- return -ERANGE;
- buffer += size;
- }
- rest -= size;
- }
- }
- return buffer_size - rest;
-}
-
-static int
-ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-{
- struct inode *inode = dentry->d_inode;
- struct buffer_head *bh = NULL;
- int error;
-
- ea_idebug(inode, "buffer=%p, buffer_size=%ld",
- buffer, (long)buffer_size);
-
- error = 0;
- if (!EXT4_I(inode)->i_file_acl)
- goto cleanup;
- ea_idebug(inode, "reading block %llu",
- (unsigned long long)EXT4_I(inode)->i_file_acl);
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- error = -EIO;
- if (!bh)
- goto cleanup;
- ea_bdebug(bh, "b_count=%d, refcount=%d",
- atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
- if (ext4_xattr_check_block(bh)) {
- EXT4_ERROR_INODE(inode, "bad block %llu",
- EXT4_I(inode)->i_file_acl);
- error = -EIO;
- goto cleanup;
- }
- ext4_xattr_cache_insert(bh);
- error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
-
-cleanup:
- brelse(bh);
-
- return error;
-}
-
-static int
-ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-{
- struct inode *inode = dentry->d_inode;
- struct ext4_xattr_ibody_header *header;
- struct ext4_inode *raw_inode;
- struct ext4_iloc iloc;
- void *end;
- int error;
-
- if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
- return 0;
- error = ext4_get_inode_loc(inode, &iloc);
- if (error)
- return error;
- raw_inode = ext4_raw_inode(&iloc);
- header = IHDR(inode, raw_inode);
- end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(IFIRST(header), end);
- if (error)
- goto cleanup;
- error = ext4_xattr_list_entries(dentry, IFIRST(header),
- buffer, buffer_size);
-
-cleanup:
- brelse(iloc.bh);
- return error;
-}
-
-/*
- * ext4_xattr_list()
- *
- * Copy a list of attribute names into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
-static int
-ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-{
- int ret, ret2;
-
- down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
- ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
- if (ret < 0)
- goto errout;
- if (buffer) {
- buffer += ret;
- buffer_size -= ret;
- }
- ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
- if (ret < 0)
- goto errout;
- ret += ret2;
-errout:
- up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
- return ret;
-}
-
-/*
- * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
- * not set, set it.
- */
-static void ext4_xattr_update_super_block(handle_t *handle,
- struct super_block *sb)
-{
- if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
- return;
-
- if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
- EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
- ext4_handle_dirty_super(handle, sb);
- }
-}
-
-/*
- * Release the xattr block BH: If the reference count is > 1, decrement
- * it; otherwise free the block.
- */
-static void
-ext4_xattr_release_block(handle_t *handle, struct inode *inode,
- struct buffer_head *bh)
-{
- struct mb_cache_entry *ce = NULL;
- int error = 0;
-
- ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr);
- error = ext4_journal_get_write_access(handle, bh);
- if (error)
- goto out;
-
- lock_buffer(bh);
- if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
- ea_bdebug(bh, "refcount now=0; freeing");
- if (ce)
- mb_cache_entry_free(ce);
- get_bh(bh);
- ext4_free_blocks(handle, inode, bh, 0, 1,
- EXT4_FREE_BLOCKS_METADATA |
- EXT4_FREE_BLOCKS_FORGET);
- unlock_buffer(bh);
- } else {
- le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
- unlock_buffer(bh);
- error = ext4_handle_dirty_metadata(handle, inode, bh);
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
- dquot_free_block(inode, 1);
- ea_bdebug(bh, "refcount now=%d; releasing",
- le32_to_cpu(BHDR(bh)->h_refcount));
- }
-out:
- ext4_std_error(inode->i_sb, error);
- return;
-}
-
-/*
- * Find the available free space for EAs. This also returns the total number of
- * bytes used by EA entries.
- */
-static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
- size_t *min_offs, void *base, int *total)
-{
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- *total += EXT4_XATTR_LEN(last->e_name_len);
- if (!last->e_value_block && last->e_value_size) {
- size_t offs = le16_to_cpu(last->e_value_offs);
- if (offs < *min_offs)
- *min_offs = offs;
- }
- }
- return (*min_offs - ((void *)last - base) - sizeof(__u32));
-}
-
-struct ext4_xattr_info {
- int name_index;
- const char *name;
- const void *value;
- size_t value_len;
-};
-
-struct ext4_xattr_search {
- struct ext4_xattr_entry *first;
- void *base;
- void *end;
- struct ext4_xattr_entry *here;
- int not_found;
-};
-
-static int
-ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
-{
- struct ext4_xattr_entry *last;
- size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
-
- /* Compute min_offs and last. */
- last = s->first;
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_block && last->e_value_size) {
- size_t offs = le16_to_cpu(last->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
- }
- }
- free = min_offs - ((void *)last - s->base) - sizeof(__u32);
- if (!s->not_found) {
- if (!s->here->e_value_block && s->here->e_value_size) {
- size_t size = le32_to_cpu(s->here->e_value_size);
- free += EXT4_XATTR_SIZE(size);
- }
- free += EXT4_XATTR_LEN(name_len);
- }
- if (i->value) {
- if (free < EXT4_XATTR_SIZE(i->value_len) ||
- free < EXT4_XATTR_LEN(name_len) +
- EXT4_XATTR_SIZE(i->value_len))
- return -ENOSPC;
- }
-
- if (i->value && s->not_found) {
- /* Insert the new name. */
- size_t size = EXT4_XATTR_LEN(name_len);
- size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
- memmove((void *)s->here + size, s->here, rest);
- memset(s->here, 0, size);
- s->here->e_name_index = i->name_index;
- s->here->e_name_len = name_len;
- memcpy(s->here->e_name, i->name, name_len);
- } else {
- if (!s->here->e_value_block && s->here->e_value_size) {
- void *first_val = s->base + min_offs;
- size_t offs = le16_to_cpu(s->here->e_value_offs);
- void *val = s->base + offs;
- size_t size = EXT4_XATTR_SIZE(
- le32_to_cpu(s->here->e_value_size));
-
- if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
- /* The old and the new value have the same
- size. Just replace. */
- s->here->e_value_size =
- cpu_to_le32(i->value_len);
- memset(val + size - EXT4_XATTR_PAD, 0,
- EXT4_XATTR_PAD); /* Clear pad bytes. */
- memcpy(val, i->value, i->value_len);
- return 0;
- }
-
- /* Remove the old value. */
- memmove(first_val + size, first_val, val - first_val);
- memset(first_val, 0, size);
- s->here->e_value_size = 0;
- s->here->e_value_offs = 0;
- min_offs += size;
-
- /* Adjust all value offsets. */
- last = s->first;
- while (!IS_LAST_ENTRY(last)) {
- size_t o = le16_to_cpu(last->e_value_offs);
- if (!last->e_value_block &&
- last->e_value_size && o < offs)
- last->e_value_offs =
- cpu_to_le16(o + size);
- last = EXT4_XATTR_NEXT(last);
- }
- }
- if (!i->value) {
- /* Remove the old name. */
- size_t size = EXT4_XATTR_LEN(name_len);
- last = ENTRY((void *)last - size);
- memmove(s->here, (void *)s->here + size,
- (void *)last - (void *)s->here + sizeof(__u32));
- memset(last, 0, size);
- }
- }
-
- if (i->value) {
- /* Insert the new value. */
- s->here->e_value_size = cpu_to_le32(i->value_len);
- if (i->value_len) {
- size_t size = EXT4_XATTR_SIZE(i->value_len);
- void *val = s->base + min_offs - size;
- s->here->e_value_offs = cpu_to_le16(min_offs - size);
- memset(val + size - EXT4_XATTR_PAD, 0,
- EXT4_XATTR_PAD); /* Clear the pad bytes. */
- memcpy(val, i->value, i->value_len);
- }
- }
- return 0;
-}
-
-struct ext4_xattr_block_find {
- struct ext4_xattr_search s;
- struct buffer_head *bh;
-};
-
-static int
-ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
- struct ext4_xattr_block_find *bs)
-{
- struct super_block *sb = inode->i_sb;
- int error;
-
- ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
- i->name_index, i->name, i->value, (long)i->value_len);
-
- if (EXT4_I(inode)->i_file_acl) {
- /* The inode already has an extended attribute block. */
- bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
- error = -EIO;
- if (!bs->bh)
- goto cleanup;
- ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
- atomic_read(&(bs->bh->b_count)),
- le32_to_cpu(BHDR(bs->bh)->h_refcount));
- if (ext4_xattr_check_block(bs->bh)) {
- EXT4_ERROR_INODE(inode, "bad block %llu",
- EXT4_I(inode)->i_file_acl);
- error = -EIO;
- goto cleanup;
- }
- /* Find the named attribute. */
- bs->s.base = BHDR(bs->bh);
- bs->s.first = BFIRST(bs->bh);
- bs->s.end = bs->bh->b_data + bs->bh->b_size;
- bs->s.here = bs->s.first;
- error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
- i->name, bs->bh->b_size, 1);
- if (error && error != -ENODATA)
- goto cleanup;
- bs->s.not_found = error;
- }
- error = 0;
-
-cleanup:
- return error;
-}
-
-static int
-ext4_xattr_block_set(handle_t *handle, struct inode *inode,
- struct ext4_xattr_info *i,
- struct ext4_xattr_block_find *bs)
-{
- struct super_block *sb = inode->i_sb;
- struct buffer_head *new_bh = NULL;
- struct ext4_xattr_search *s = &bs->s;
- struct mb_cache_entry *ce = NULL;
- int error = 0;
-
-#define header(x) ((struct ext4_xattr_header *)(x))
-
- if (i->value && i->value_len > sb->s_blocksize)
- return -ENOSPC;
- if (s->base) {
- ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
- bs->bh->b_blocknr);
- error = ext4_journal_get_write_access(handle, bs->bh);
- if (error)
- goto cleanup;
- lock_buffer(bs->bh);
-
- if (header(s->base)->h_refcount == cpu_to_le32(1)) {
- if (ce) {
- mb_cache_entry_free(ce);
- ce = NULL;
- }
- ea_bdebug(bs->bh, "modifying in-place");
- error = ext4_xattr_set_entry(i, s);
- if (!error) {
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base),
- s->here);
- ext4_xattr_cache_insert(bs->bh);
- }
- unlock_buffer(bs->bh);
- if (error == -EIO)
- goto bad_block;
- if (!error)
- error = ext4_handle_dirty_metadata(handle,
- inode,
- bs->bh);
- if (error)
- goto cleanup;
- goto inserted;
- } else {
- int offset = (char *)s->here - bs->bh->b_data;
-
- unlock_buffer(bs->bh);
- ext4_handle_release_buffer(handle, bs->bh);
- if (ce) {
- mb_cache_entry_release(ce);
- ce = NULL;
- }
- ea_bdebug(bs->bh, "cloning");
- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
- error = -ENOMEM;
- if (s->base == NULL)
- goto cleanup;
- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
- s->first = ENTRY(header(s->base)+1);
- header(s->base)->h_refcount = cpu_to_le32(1);
- s->here = ENTRY(s->base + offset);
- s->end = s->base + bs->bh->b_size;
- }
- } else {
- /* Allocate a buffer where we construct the new block. */
- s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
- /* assert(header == s->base) */
- error = -ENOMEM;
- if (s->base == NULL)
- goto cleanup;
- header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
- header(s->base)->h_blocks = cpu_to_le32(1);
- header(s->base)->h_refcount = cpu_to_le32(1);
- s->first = ENTRY(header(s->base)+1);
- s->here = ENTRY(header(s->base)+1);
- s->end = s->base + sb->s_blocksize;
- }
-
- error = ext4_xattr_set_entry(i, s);
- if (error == -EIO)
- goto bad_block;
- if (error)
- goto cleanup;
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base), s->here);
-
-inserted:
- if (!IS_LAST_ENTRY(s->first)) {
- new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
- if (new_bh) {
- /* We found an identical block in the cache. */
- if (new_bh == bs->bh)
- ea_bdebug(new_bh, "keeping");
- else {
- /* The old block is released after updating
- the inode. */
- error = dquot_alloc_block(inode, 1);
- if (error)
- goto cleanup;
- error = ext4_journal_get_write_access(handle,
- new_bh);
- if (error)
- goto cleanup_dquot;
- lock_buffer(new_bh);
- le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
- ea_bdebug(new_bh, "reusing; refcount now=%d",
- le32_to_cpu(BHDR(new_bh)->h_refcount));
- unlock_buffer(new_bh);
- error = ext4_handle_dirty_metadata(handle,
- inode,
- new_bh);
- if (error)
- goto cleanup_dquot;
- }
- mb_cache_entry_release(ce);
- ce = NULL;
- } else if (bs->bh && s->base == bs->bh->b_data) {
- /* We were modifying this block in-place. */
- ea_bdebug(bs->bh, "keeping this block");
- new_bh = bs->bh;
- get_bh(new_bh);
- } else {
- /* We need to allocate a new block */
- ext4_fsblk_t goal, block;
-
- goal = ext4_group_first_block_no(sb,
- EXT4_I(inode)->i_block_group);
-
- /* non-extent files can't have physical blocks past 2^32 */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
-
- /*
- * take i_data_sem because we will test
- * i_delalloc_reserved_flag in ext4_mb_new_blocks
- */
- down_read((&EXT4_I(inode)->i_data_sem));
- block = ext4_new_meta_blocks(handle, inode, goal, 0,
- NULL, &error);
- up_read((&EXT4_I(inode)->i_data_sem));
- if (error)
- goto cleanup;
-
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
-
- ea_idebug(inode, "creating block %llu",
- (unsigned long long)block);
-
- new_bh = sb_getblk(sb, block);
- if (!new_bh) {
-getblk_failed:
- ext4_free_blocks(handle, inode, NULL, block, 1,
- EXT4_FREE_BLOCKS_METADATA);
- error = -EIO;
- goto cleanup;
- }
- lock_buffer(new_bh);
- error = ext4_journal_get_create_access(handle, new_bh);
- if (error) {
- unlock_buffer(new_bh);
- goto getblk_failed;
- }
- memcpy(new_bh->b_data, s->base, new_bh->b_size);
- set_buffer_uptodate(new_bh);
- unlock_buffer(new_bh);
- ext4_xattr_cache_insert(new_bh);
- error = ext4_handle_dirty_metadata(handle,
- inode, new_bh);
- if (error)
- goto cleanup;
- }
- }
-
- /* Update the inode. */
- EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
-
- /* Drop the previous xattr block. */
- if (bs->bh && bs->bh != new_bh)
- ext4_xattr_release_block(handle, inode, bs->bh);
- error = 0;
-
-cleanup:
- if (ce)
- mb_cache_entry_release(ce);
- brelse(new_bh);
- if (!(bs->bh && s->base == bs->bh->b_data))
- kfree(s->base);
-
- return error;
-
-cleanup_dquot:
- dquot_free_block(inode, 1);
- goto cleanup;
-
-bad_block:
- EXT4_ERROR_INODE(inode, "bad block %llu",
- EXT4_I(inode)->i_file_acl);
- goto cleanup;
-
-#undef header
-}
-
-struct ext4_xattr_ibody_find {
- struct ext4_xattr_search s;
- struct ext4_iloc iloc;
-};
-
-static int
-ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
- struct ext4_xattr_ibody_find *is)
-{
- struct ext4_xattr_ibody_header *header;
- struct ext4_inode *raw_inode;
- int error;
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return 0;
- raw_inode = ext4_raw_inode(&is->iloc);
- header = IHDR(inode, raw_inode);
- is->s.base = is->s.first = IFIRST(header);
- is->s.here = is->s.first;
- is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
- error = ext4_xattr_check_names(IFIRST(header), is->s.end);
- if (error)
- return error;
- /* Find the named attribute. */
- error = ext4_xattr_find_entry(&is->s.here, i->name_index,
- i->name, is->s.end -
- (void *)is->s.base, 0);
- if (error && error != -ENODATA)
- return error;
- is->s.not_found = error;
- }
- return 0;
-}
-
-static int
-ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
- struct ext4_xattr_info *i,
- struct ext4_xattr_ibody_find *is)
-{
- struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_search *s = &is->s;
- int error;
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return -ENOSPC;
- error = ext4_xattr_set_entry(i, s);
- if (error)
- return error;
- header = IHDR(inode, ext4_raw_inode(&is->iloc));
- if (!IS_LAST_ENTRY(s->first)) {
- header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
- ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- } else {
- header->h_magic = cpu_to_le32(0);
- ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
- }
- return 0;
-}
-
-/*
- * ext4_xattr_set_handle()
- *
- * Create, replace or remove an extended attribute for this inode. Value
- * is NULL to remove an existing extended attribute, and non-NULL to
- * either replace an existing extended attribute, or create a new extended
- * attribute. The flags XATTR_REPLACE and XATTR_CREATE
- * specify that an extended attribute must exist and must not exist
- * previous to the call, respectively.
- *
- * Returns 0, or a negative error number on failure.
- */
-int
-ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len,
- int flags)
-{
- struct ext4_xattr_info i = {
- .name_index = name_index,
- .name = name,
- .value = value,
- .value_len = value_len,
-
- };
- struct ext4_xattr_ibody_find is = {
- .s = { .not_found = -ENODATA, },
- };
- struct ext4_xattr_block_find bs = {
- .s = { .not_found = -ENODATA, },
- };
- unsigned long no_expand;
- int error;
-
- if (!name)
- return -EINVAL;
- if (strlen(name) > 255)
- return -ERANGE;
- down_write(&EXT4_I(inode)->xattr_sem);
- no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
- ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
-
- error = ext4_reserve_inode_write(handle, inode, &is.iloc);
- if (error)
- goto cleanup;
-
- if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
- struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
- memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
- ext4_clear_inode_state(inode, EXT4_STATE_NEW);
- }
-
- error = ext4_xattr_ibody_find(inode, &i, &is);
- if (error)
- goto cleanup;
- if (is.s.not_found)
- error = ext4_xattr_block_find(inode, &i, &bs);
- if (error)
- goto cleanup;
- if (is.s.not_found && bs.s.not_found) {
- error = -ENODATA;
- if (flags & XATTR_REPLACE)
- goto cleanup;
- error = 0;
- if (!value)
- goto cleanup;
- } else {
- error = -EEXIST;
- if (flags & XATTR_CREATE)
- goto cleanup;
- }
- if (!value) {
- if (!is.s.not_found)
- error = ext4_xattr_ibody_set(handle, inode, &i, &is);
- else if (!bs.s.not_found)
- error = ext4_xattr_block_set(handle, inode, &i, &bs);
- } else {
- error = ext4_xattr_ibody_set(handle, inode, &i, &is);
- if (!error && !bs.s.not_found) {
- i.value = NULL;
- error = ext4_xattr_block_set(handle, inode, &i, &bs);
- } else if (error == -ENOSPC) {
- if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
- error = ext4_xattr_block_find(inode, &i, &bs);
- if (error)
- goto cleanup;
- }
- error = ext4_xattr_block_set(handle, inode, &i, &bs);
- if (error)
- goto cleanup;
- if (!is.s.not_found) {
- i.value = NULL;
- error = ext4_xattr_ibody_set(handle, inode, &i,
- &is);
- }
- }
- }
- if (!error) {
- ext4_xattr_update_super_block(handle, inode->i_sb);
- inode->i_ctime = ext4_current_time(inode);
- if (!value)
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
- error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
- /*
- * The bh is consumed by ext4_mark_iloc_dirty, even with
- * error != 0.
- */
- is.iloc.bh = NULL;
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
- }
-
-cleanup:
- brelse(is.iloc.bh);
- brelse(bs.bh);
- if (no_expand == 0)
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
- up_write(&EXT4_I(inode)->xattr_sem);
- return error;
-}
-
-/*
- * ext4_xattr_set()
- *
- * Like ext4_xattr_set_handle, but start from an inode. This extended
- * attribute modification is a filesystem transaction by itself.
- *
- * Returns 0, or a negative error number on failure.
- */
-int
-ext4_xattr_set(struct inode *inode, int name_index, const char *name,
- const void *value, size_t value_len, int flags)
-{
- handle_t *handle;
- int error, retries = 0;
-
-retry:
- handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- } else {
- int error2;
-
- error = ext4_xattr_set_handle(handle, inode, name_index, name,
- value, value_len, flags);
- error2 = ext4_journal_stop(handle);
- if (error == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
- if (error == 0)
- error = error2;
- }
-
- return error;
-}
-
-/*
- * Shift the EA entries in the inode to create space for the increased
- * i_extra_isize.
- */
-static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
- int value_offs_shift, void *to,
- void *from, size_t n, int blocksize)
-{
- struct ext4_xattr_entry *last = entry;
- int new_offs;
-
- /* Adjust the value offsets of the entries */
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_block && last->e_value_size) {
- new_offs = le16_to_cpu(last->e_value_offs) +
- value_offs_shift;
- BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
- > blocksize);
- last->e_value_offs = cpu_to_le16(new_offs);
- }
- }
- /* Shift the entries by n bytes */
- memmove(to, from, n);
-}
-
-/*
- * Expand an inode by new_extra_isize bytes when EAs are present.
- * Returns 0 on success or negative error number on failure.
- */
-int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
- struct ext4_inode *raw_inode, handle_t *handle)
-{
- struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_entry *entry, *last, *first;
- struct buffer_head *bh = NULL;
- struct ext4_xattr_ibody_find *is = NULL;
- struct ext4_xattr_block_find *bs = NULL;
- char *buffer = NULL, *b_entry_name = NULL;
- size_t min_offs, free;
- int total_ino, total_blk;
- void *base, *start, *end;
- int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
- int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
-
- down_write(&EXT4_I(inode)->xattr_sem);
-retry:
- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
- up_write(&EXT4_I(inode)->xattr_sem);
- return 0;
- }
-
- header = IHDR(inode, raw_inode);
- entry = IFIRST(header);
-
- /*
- * Check if enough free space is available in the inode to shift the
- * entries ahead by new_extra_isize.
- */
-
- base = start = entry;
- end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- min_offs = end - base;
- last = entry;
- total_ino = sizeof(struct ext4_xattr_ibody_header);
-
- free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
- if (free >= new_extra_isize) {
- entry = IFIRST(header);
- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
- - new_extra_isize, (void *)raw_inode +
- EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
- (void *)header, total_ino,
- inode->i_sb->s_blocksize);
- EXT4_I(inode)->i_extra_isize = new_extra_isize;
- error = 0;
- goto cleanup;
- }
-
- /*
- * Enough free space isn't available in the inode, check if
- * EA block can hold new_extra_isize bytes.
- */
- if (EXT4_I(inode)->i_file_acl) {
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- error = -EIO;
- if (!bh)
- goto cleanup;
- if (ext4_xattr_check_block(bh)) {
- EXT4_ERROR_INODE(inode, "bad block %llu",
- EXT4_I(inode)->i_file_acl);
- error = -EIO;
- goto cleanup;
- }
- base = BHDR(bh);
- first = BFIRST(bh);
- end = bh->b_data + bh->b_size;
- min_offs = end - base;
- free = ext4_xattr_free_space(first, &min_offs, base,
- &total_blk);
- if (free < new_extra_isize) {
- if (!tried_min_extra_isize && s_min_extra_isize) {
- tried_min_extra_isize++;
- new_extra_isize = s_min_extra_isize;
- brelse(bh);
- goto retry;
- }
- error = -1;
- goto cleanup;
- }
- } else {
- free = inode->i_sb->s_blocksize;
- }
-
- while (new_extra_isize > 0) {
- size_t offs, size, entry_size;
- struct ext4_xattr_entry *small_entry = NULL;
- struct ext4_xattr_info i = {
- .value = NULL,
- .value_len = 0,
- };
- unsigned int total_size; /* EA entry size + value size */
- unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
- unsigned int min_total_size = ~0U;
-
- is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
- bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
- if (!is || !bs) {
- error = -ENOMEM;
- goto cleanup;
- }
-
- is->s.not_found = -ENODATA;
- bs->s.not_found = -ENODATA;
- is->iloc.bh = NULL;
- bs->bh = NULL;
-
- last = IFIRST(header);
- /* Find the entry best suited to be pushed into EA block */
- entry = NULL;
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- total_size =
- EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
- EXT4_XATTR_LEN(last->e_name_len);
- if (total_size <= free && total_size < min_total_size) {
- if (total_size < new_extra_isize) {
- small_entry = last;
- } else {
- entry = last;
- min_total_size = total_size;
- }
- }
- }
-
- if (entry == NULL) {
- if (small_entry) {
- entry = small_entry;
- } else {
- if (!tried_min_extra_isize &&
- s_min_extra_isize) {
- tried_min_extra_isize++;
- new_extra_isize = s_min_extra_isize;
- goto retry;
- }
- error = -1;
- goto cleanup;
- }
- }
- offs = le16_to_cpu(entry->e_value_offs);
- size = le32_to_cpu(entry->e_value_size);
- entry_size = EXT4_XATTR_LEN(entry->e_name_len);
- i.name_index = entry->e_name_index,
- buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
- b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
- if (!buffer || !b_entry_name) {
- error = -ENOMEM;
- goto cleanup;
- }
- /* Save the entry name and the entry value */
- memcpy(buffer, (void *)IFIRST(header) + offs,
- EXT4_XATTR_SIZE(size));
- memcpy(b_entry_name, entry->e_name, entry->e_name_len);
- b_entry_name[entry->e_name_len] = '\0';
- i.name = b_entry_name;
-
- error = ext4_get_inode_loc(inode, &is->iloc);
- if (error)
- goto cleanup;
-
- error = ext4_xattr_ibody_find(inode, &i, is);
- if (error)
- goto cleanup;
-
- /* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(handle, inode, &i, is);
- if (error)
- goto cleanup;
-
- entry = IFIRST(header);
- if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
- shift_bytes = new_extra_isize;
- else
- shift_bytes = entry_size + size;
- /* Adjust the offsets and shift the remaining entries ahead */
- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
- shift_bytes, (void *)raw_inode +
- EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
- (void *)header, total_ino - entry_size,
- inode->i_sb->s_blocksize);
-
- extra_isize += shift_bytes;
- new_extra_isize -= shift_bytes;
- EXT4_I(inode)->i_extra_isize = extra_isize;
-
- i.name = b_entry_name;
- i.value = buffer;
- i.value_len = size;
- error = ext4_xattr_block_find(inode, &i, bs);
- if (error)
- goto cleanup;
-
- /* Add entry which was removed from the inode into the block */
- error = ext4_xattr_block_set(handle, inode, &i, bs);
- if (error)
- goto cleanup;
- kfree(b_entry_name);
- kfree(buffer);
- b_entry_name = NULL;
- buffer = NULL;
- brelse(is->iloc.bh);
- kfree(is);
- kfree(bs);
- }
- brelse(bh);
- up_write(&EXT4_I(inode)->xattr_sem);
- return 0;
-
-cleanup:
- kfree(b_entry_name);
- kfree(buffer);
- if (is)
- brelse(is->iloc.bh);
- kfree(is);
- kfree(bs);
- brelse(bh);
- up_write(&EXT4_I(inode)->xattr_sem);
- return error;
-}
-
-
-
-/*
- * ext4_xattr_delete_inode()
- *
- * Free extended attribute resources associated with this inode. This
- * is called immediately before an inode is freed. We have exclusive
- * access to the inode.
- */
-void
-ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
-{
- struct buffer_head *bh = NULL;
-
- if (!EXT4_I(inode)->i_file_acl)
- goto cleanup;
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh) {
- EXT4_ERROR_INODE(inode, "block %llu read error",
- EXT4_I(inode)->i_file_acl);
- goto cleanup;
- }
- if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
- BHDR(bh)->h_blocks != cpu_to_le32(1)) {
- EXT4_ERROR_INODE(inode, "bad block %llu",
- EXT4_I(inode)->i_file_acl);
- goto cleanup;
- }
- ext4_xattr_release_block(handle, inode, bh);
- EXT4_I(inode)->i_file_acl = 0;
-
-cleanup:
- brelse(bh);
-}
-
-/*
- * ext4_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext4_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-/*
- * ext4_xattr_cache_insert()
- *
- * Create a new entry in the extended attribute cache, and insert
- * it unless such an entry is already in the cache.
- *
- * Returns 0, or a negative error number on failure.
- */
-static void
-ext4_xattr_cache_insert(struct buffer_head *bh)
-{
- __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
- struct mb_cache_entry *ce;
- int error;
-
- ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS);
- if (!ce) {
- ea_bdebug(bh, "out of memory");
- return;
- }
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
- if (error) {
- mb_cache_entry_free(ce);
- if (error == -EBUSY) {
- ea_bdebug(bh, "already in cache");
- error = 0;
- }
- } else {
- ea_bdebug(bh, "inserting [%x]", (int)hash);
- mb_cache_entry_release(ce);
- }
-}
-
-/*
- * ext4_xattr_cmp()
- *
- * Compare two extended attribute blocks for equality.
- *
- * Returns 0 if the blocks are equal, 1 if they differ, and
- * a negative error number on errors.
- */
-static int
-ext4_xattr_cmp(struct ext4_xattr_header *header1,
- struct ext4_xattr_header *header2)
-{
- struct ext4_xattr_entry *entry1, *entry2;
-
- entry1 = ENTRY(header1+1);
- entry2 = ENTRY(header2+1);
- while (!IS_LAST_ENTRY(entry1)) {
- if (IS_LAST_ENTRY(entry2))
- return 1;
- if (entry1->e_hash != entry2->e_hash ||
- entry1->e_name_index != entry2->e_name_index ||
- entry1->e_name_len != entry2->e_name_len ||
- entry1->e_value_size != entry2->e_value_size ||
- memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
- return 1;
- if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
- return -EIO;
- if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
- (char *)header2 + le16_to_cpu(entry2->e_value_offs),
- le32_to_cpu(entry1->e_value_size)))
- return 1;
-
- entry1 = EXT4_XATTR_NEXT(entry1);
- entry2 = EXT4_XATTR_NEXT(entry2);
- }
- if (!IS_LAST_ENTRY(entry2))
- return 1;
- return 0;
-}
-
-/*
- * ext4_xattr_cache_find()
- *
- * Find an identical extended attribute block.
- *
- * Returns a pointer to the block found, or NULL if such a block was
- * not found or an error occurred.
- */
-static struct buffer_head *
-ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
- struct mb_cache_entry **pce)
-{
- __u32 hash = le32_to_cpu(header->h_hash);
- struct mb_cache_entry *ce;
-
- if (!header->h_hash)
- return NULL; /* never share */
- ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-again:
- ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev,
- hash);
- while (ce) {
- struct buffer_head *bh;
-
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
- bh = sb_bread(inode->i_sb, ce->e_block);
- if (!bh) {
- EXT4_ERROR_INODE(inode, "block %lu read error",
- (unsigned long) ce->e_block);
- } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
- EXT4_XATTR_REFCOUNT_MAX) {
- ea_idebug(inode, "block %lu refcount %d>=%d",
- (unsigned long) ce->e_block,
- le32_to_cpu(BHDR(bh)->h_refcount),
- EXT4_XATTR_REFCOUNT_MAX);
- } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
- *pce = ce;
- return bh;
- }
- brelse(bh);
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
- }
- return NULL;
-}
-
-#define NAME_HASH_SHIFT 5
-#define VALUE_HASH_SHIFT 16
-
-/*
- * ext4_xattr_hash_entry()
- *
- * Compute the hash of an extended attribute.
- */
-static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
- struct ext4_xattr_entry *entry)
-{
- __u32 hash = 0;
- char *name = entry->e_name;
- int n;
-
- for (n = 0; n < entry->e_name_len; n++) {
- hash = (hash << NAME_HASH_SHIFT) ^
- (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
- *name++;
- }
-
- if (entry->e_value_block == 0 && entry->e_value_size != 0) {
- __le32 *value = (__le32 *)((char *)header +
- le16_to_cpu(entry->e_value_offs));
- for (n = (le32_to_cpu(entry->e_value_size) +
- EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
- hash = (hash << VALUE_HASH_SHIFT) ^
- (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
- le32_to_cpu(*value++);
- }
- }
- entry->e_hash = cpu_to_le32(hash);
-}
-
-#undef NAME_HASH_SHIFT
-#undef VALUE_HASH_SHIFT
-
-#define BLOCK_HASH_SHIFT 16
-
-/*
- * ext4_xattr_rehash()
- *
- * Re-compute the extended attribute hash value after an entry has changed.
- */
-static void ext4_xattr_rehash(struct ext4_xattr_header *header,
- struct ext4_xattr_entry *entry)
-{
- struct ext4_xattr_entry *here;
- __u32 hash = 0;
-
- ext4_xattr_hash_entry(header, entry);
- here = ENTRY(header+1);
- while (!IS_LAST_ENTRY(here)) {
- if (!here->e_hash) {
- /* Block is not shared if an entry's hash value == 0 */
- hash = 0;
- break;
- }
- hash = (hash << BLOCK_HASH_SHIFT) ^
- (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
- le32_to_cpu(here->e_hash);
- here = EXT4_XATTR_NEXT(here);
- }
- header->h_hash = cpu_to_le32(hash);
-}
-
-#undef BLOCK_HASH_SHIFT
-
-int __init
-ext4_init_xattr(void)
-{
- ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
- if (!ext4_xattr_cache)
- return -ENOMEM;
- return 0;
-}
-
-void
-ext4_exit_xattr(void)
-{
- if (ext4_xattr_cache)
- mb_cache_destroy(ext4_xattr_cache);
- ext4_xattr_cache = NULL;
-}
diff --git a/ANDROID_3.4.5/fs/ext4/xattr.h b/ANDROID_3.4.5/fs/ext4/xattr.h
deleted file mode 100644
index 25b7387f..00000000
--- a/ANDROID_3.4.5/fs/ext4/xattr.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- File: fs/ext4/xattr.h
-
- On-disk format of extended attributes for the ext4 filesystem.
-
- (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-*/
-
-#include <linux/xattr.h>
-
-/* Magic value in attribute blocks */
-#define EXT4_XATTR_MAGIC 0xEA020000
-
-/* Maximum number of references to one attribute block */
-#define EXT4_XATTR_REFCOUNT_MAX 1024
-
-/* Name indexes */
-#define EXT4_XATTR_INDEX_USER 1
-#define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS 2
-#define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT 3
-#define EXT4_XATTR_INDEX_TRUSTED 4
-#define EXT4_XATTR_INDEX_LUSTRE 5
-#define EXT4_XATTR_INDEX_SECURITY 6
-
-struct ext4_xattr_header {
- __le32 h_magic; /* magic number for identification */
- __le32 h_refcount; /* reference count */
- __le32 h_blocks; /* number of disk blocks used */
- __le32 h_hash; /* hash value of all attributes */
- __u32 h_reserved[4]; /* zero right now */
-};
-
-struct ext4_xattr_ibody_header {
- __le32 h_magic; /* magic number for identification */
-};
-
-struct ext4_xattr_entry {
- __u8 e_name_len; /* length of name */
- __u8 e_name_index; /* attribute name index */
- __le16 e_value_offs; /* offset in disk block of value */
- __le32 e_value_block; /* disk block attribute is stored on (n/i) */
- __le32 e_value_size; /* size of attribute value */
- __le32 e_hash; /* hash value of name and value */
- char e_name[0]; /* attribute name */
-};
-
-#define EXT4_XATTR_PAD_BITS 2
-#define EXT4_XATTR_PAD (1<<EXT4_XATTR_PAD_BITS)
-#define EXT4_XATTR_ROUND (EXT4_XATTR_PAD-1)
-#define EXT4_XATTR_LEN(name_len) \
- (((name_len) + EXT4_XATTR_ROUND + \
- sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
-#define EXT4_XATTR_NEXT(entry) \
- ((struct ext4_xattr_entry *)( \
- (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
-#define EXT4_XATTR_SIZE(size) \
- (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
-
-#define IHDR(inode, raw_inode) \
- ((struct ext4_xattr_ibody_header *) \
- ((void *)raw_inode + \
- EXT4_GOOD_OLD_INODE_SIZE + \
- EXT4_I(inode)->i_extra_isize))
-#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
-
-# ifdef CONFIG_EXT4_FS_XATTR
-
-extern const struct xattr_handler ext4_xattr_user_handler;
-extern const struct xattr_handler ext4_xattr_trusted_handler;
-extern const struct xattr_handler ext4_xattr_acl_access_handler;
-extern const struct xattr_handler ext4_xattr_acl_default_handler;
-extern const struct xattr_handler ext4_xattr_security_handler;
-
-extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
-
-extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
-extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
-
-extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
-extern void ext4_xattr_put_super(struct super_block *);
-
-extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
- struct ext4_inode *raw_inode, handle_t *handle);
-
-extern int __init ext4_init_xattr(void);
-extern void ext4_exit_xattr(void);
-
-extern const struct xattr_handler *ext4_xattr_handlers[];
-
-# else /* CONFIG_EXT4_FS_XATTR */
-
-static inline int
-ext4_xattr_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t size, int flags)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int
-ext4_xattr_set(struct inode *inode, int name_index, const char *name,
- const void *value, size_t size, int flags)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int
-ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
- const char *name, const void *value, size_t size, int flags)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void
-ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
-{
-}
-
-static inline void
-ext4_xattr_put_super(struct super_block *sb)
-{
-}
-
-static __init inline int
-ext4_init_xattr(void)
-{
- return 0;
-}
-
-static inline void
-ext4_exit_xattr(void)
-{
-}
-
-static inline int
-ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
- struct ext4_inode *raw_inode, handle_t *handle)
-{
- return -EOPNOTSUPP;
-}
-
-#define ext4_xattr_handlers NULL
-
-# endif /* CONFIG_EXT4_FS_XATTR */
-
-#ifdef CONFIG_EXT4_FS_SECURITY
-extern int ext4_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir, const struct qstr *qstr);
-#else
-static inline int ext4_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir, const struct qstr *qstr)
-{
- return 0;
-}
-#endif
diff --git a/ANDROID_3.4.5/fs/ext4/xattr_security.c b/ANDROID_3.4.5/fs/ext4/xattr_security.c
deleted file mode 100644
index d2a20062..00000000
--- a/ANDROID_3.4.5/fs/ext4/xattr_security.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * linux/fs/ext4/xattr_security.c
- * Handler for storing security labels as extended attributes.
- */
-
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/security.h>
-#include <linux/slab.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-#include "xattr.h"
-
-static size_t
-ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len, int type)
-{
- const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
- const size_t total_len = prefix_len + name_len + 1;
-
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
-static int
-ext4_xattr_security_get(struct dentry *dentry, const char *name,
- void *buffer, size_t size, int type)
-{
- if (strcmp(name, "") == 0)
- return -EINVAL;
- return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
- name, buffer, size);
-}
-
-static int
-ext4_xattr_security_set(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- if (strcmp(name, "") == 0)
- return -EINVAL;
- return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
- name, value, size, flags);
-}
-
-static int
-ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
- void *fs_info)
-{
- const struct xattr *xattr;
- handle_t *handle = fs_info;
- int err = 0;
-
- for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- err = ext4_xattr_set_handle(handle, inode,
- EXT4_XATTR_INDEX_SECURITY,
- xattr->name, xattr->value,
- xattr->value_len, 0);
- if (err < 0)
- break;
- }
- return err;
-}
-
-int
-ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
- const struct qstr *qstr)
-{
- return security_inode_init_security(inode, dir, qstr,
- &ext4_initxattrs, handle);
-}
-
-const struct xattr_handler ext4_xattr_security_handler = {
- .prefix = XATTR_SECURITY_PREFIX,
- .list = ext4_xattr_security_list,
- .get = ext4_xattr_security_get,
- .set = ext4_xattr_security_set,
-};
diff --git a/ANDROID_3.4.5/fs/ext4/xattr_trusted.c b/ANDROID_3.4.5/fs/ext4/xattr_trusted.c
deleted file mode 100644
index 95f1f4ab..00000000
--- a/ANDROID_3.4.5/fs/ext4/xattr_trusted.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * linux/fs/ext4/xattr_trusted.c
- * Handler for trusted extended attributes.
- *
- * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
- */
-
-#include <linux/string.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-#include "xattr.h"
-
-static size_t
-ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len, int type)
-{
- const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
-static int
-ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
- size_t size, int type)
-{
- if (strcmp(name, "") == 0)
- return -EINVAL;
- return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
- name, buffer, size);
-}
-
-static int
-ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- if (strcmp(name, "") == 0)
- return -EINVAL;
- return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
- name, value, size, flags);
-}
-
-const struct xattr_handler ext4_xattr_trusted_handler = {
- .prefix = XATTR_TRUSTED_PREFIX,
- .list = ext4_xattr_trusted_list,
- .get = ext4_xattr_trusted_get,
- .set = ext4_xattr_trusted_set,
-};
diff --git a/ANDROID_3.4.5/fs/ext4/xattr_user.c b/ANDROID_3.4.5/fs/ext4/xattr_user.c
deleted file mode 100644
index 0edb7611..00000000
--- a/ANDROID_3.4.5/fs/ext4/xattr_user.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * linux/fs/ext4/xattr_user.c
- * Handler for extended user attributes.
- *
- * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
- */
-
-#include <linux/string.h>
-#include <linux/fs.h>
-#include "ext4_jbd2.h"
-#include "ext4.h"
-#include "xattr.h"
-
-static size_t
-ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len, int type)
-{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
-static int
-ext4_xattr_user_get(struct dentry *dentry, const char *name,
- void *buffer, size_t size, int type)
-{
- if (strcmp(name, "") == 0)
- return -EINVAL;
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return -EOPNOTSUPP;
- return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
- name, buffer, size);
-}
-
-static int
-ext4_xattr_user_set(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- if (strcmp(name, "") == 0)
- return -EINVAL;
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return -EOPNOTSUPP;
- return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
- name, value, size, flags);
-}
-
-const struct xattr_handler ext4_xattr_user_handler = {
- .prefix = XATTR_USER_PREFIX,
- .list = ext4_xattr_user_list,
- .get = ext4_xattr_user_get,
- .set = ext4_xattr_user_set,
-};