diff options
Diffstat (limited to 'ANDROID_3.4.5/fs/jfs')
46 files changed, 0 insertions, 32779 deletions
diff --git a/ANDROID_3.4.5/fs/jfs/Kconfig b/ANDROID_3.4.5/fs/jfs/Kconfig deleted file mode 100644 index 57cef199..00000000 --- a/ANDROID_3.4.5/fs/jfs/Kconfig +++ /dev/null @@ -1,50 +0,0 @@ -config JFS_FS - tristate "JFS filesystem support" - select NLS - select CRC32 - help - This is a port of IBM's Journaled Filesystem . More information is - available in the file <file:Documentation/filesystems/jfs.txt>. - - If you do not intend to use the JFS filesystem, say N. - -config JFS_POSIX_ACL - bool "JFS POSIX Access Control Lists" - depends on JFS_FS - select FS_POSIX_ACL - help - Posix Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. - - To learn more about Access Control Lists, visit the Posix ACLs for - Linux website <http://acl.bestbits.at/>. - - If you don't know what Access Control Lists are, say N - -config JFS_SECURITY - bool "JFS Security Labels" - depends on JFS_FS - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute handler for file security - labels in the jfs filesystem. - - If you are not using a security module that requires using - extended attributes for file security labels, say N. - -config JFS_DEBUG - bool "JFS debugging" - depends on JFS_FS - help - If you are experiencing any problems with the JFS filesystem, say - Y here. This will result in additional debugging messages to be - written to the system log. Under normal circumstances, this - results in very little overhead. - -config JFS_STATISTICS - bool "JFS statistics" - depends on JFS_FS - help - Enabling this option will cause statistics from the JFS file system - to be made available to the user in the /proc/fs/jfs/ directory. diff --git a/ANDROID_3.4.5/fs/jfs/Makefile b/ANDROID_3.4.5/fs/jfs/Makefile deleted file mode 100644 index a58fa72d..00000000 --- a/ANDROID_3.4.5/fs/jfs/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# -# Makefile for the Linux JFS filesystem routines. -# - -obj-$(CONFIG_JFS_FS) += jfs.o - -jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ - jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ - jfs_unicode.o jfs_dtree.o jfs_inode.o \ - jfs_extent.o symlink.o jfs_metapage.o \ - jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \ - resize.o xattr.o ioctl.o - -jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o - -ccflags-y := -D_JFS_4K diff --git a/ANDROID_3.4.5/fs/jfs/acl.c b/ANDROID_3.4.5/fs/jfs/acl.c deleted file mode 100644 index 45559dc3..00000000 --- a/ANDROID_3.4.5/fs/jfs/acl.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2002-2004 - * Copyright (C) Andreas Gruenbacher, 2001 - * Copyright (C) Linus Torvalds, 1991, 1992 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/posix_acl_xattr.h> -#include "jfs_incore.h" -#include "jfs_txnmgr.h" -#include "jfs_xattr.h" -#include "jfs_acl.h" - -struct posix_acl *jfs_get_acl(struct inode *inode, int type) -{ - struct posix_acl *acl; - char *ea_name; - int size; - char *value = NULL; - - acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - - switch(type) { - case ACL_TYPE_ACCESS: - ea_name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - ea_name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return ERR_PTR(-EINVAL); - } - - size = __jfs_getxattr(inode, ea_name, NULL, 0); - - if (size > 0) { - value = kmalloc(size, GFP_KERNEL); - if (!value) - return ERR_PTR(-ENOMEM); - size = __jfs_getxattr(inode, ea_name, value, size); - } - - if (size < 0) { - if (size == -ENODATA) - acl = NULL; - else - acl = ERR_PTR(size); - } else { - acl = posix_acl_from_xattr(value, size); - } - kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; -} - -static int jfs_set_acl(tid_t tid, struct inode *inode, int type, - struct posix_acl *acl) -{ - char *ea_name; - int rc; - int size = 0; - char *value = NULL; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - switch(type) { - case ACL_TYPE_ACCESS: - ea_name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - ea_name = POSIX_ACL_XATTR_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; - default: - return -EINVAL; - } - if (acl) { - size = posix_acl_xattr_size(acl->a_count); - value = kmalloc(size, GFP_KERNEL); - if (!value) - return -ENOMEM; - rc = posix_acl_to_xattr(acl, value, size); - if (rc < 0) - goto out; - } - rc = __jfs_setxattr(tid, inode, ea_name, value, size, 0); -out: - kfree(value); - - if (!rc) - set_cached_acl(inode, type, acl); - - return rc; -} - -int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) -{ - struct posix_acl *acl = NULL; - int rc = 0; - - if (S_ISLNK(inode->i_mode)) - return 0; - - acl = jfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - - if (acl) { - if (S_ISDIR(inode->i_mode)) { - rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); - if (rc) - goto cleanup; - } - rc = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); - if (rc < 0) - goto cleanup; /* posix_acl_release(NULL) is no-op */ - if (rc > 0) - rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); -cleanup: - posix_acl_release(acl); - } else - inode->i_mode &= ~current_umask(); - - JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | - inode->i_mode; - - return rc; -} - -int jfs_acl_chmod(struct inode *inode) -{ - struct posix_acl *acl; - int rc; - tid_t tid; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - - rc = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); - if (rc) - return rc; - - tid = txBegin(inode->i_sb, 0); - mutex_lock(&JFS_IP(inode)->commit_mutex); - rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); - if (!rc) - rc = txCommit(tid, 1, &inode, 0); - txEnd(tid); - mutex_unlock(&JFS_IP(inode)->commit_mutex); - - posix_acl_release(acl); - return rc; -} diff --git a/ANDROID_3.4.5/fs/jfs/endian24.h b/ANDROID_3.4.5/fs/jfs/endian24.h deleted file mode 100644 index fa92f7f1..00000000 --- a/ANDROID_3.4.5/fs/jfs/endian24.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_ENDIAN24 -#define _H_ENDIAN24 - -/* - * endian24.h: - * - * Endian conversion for 24-byte data - * - */ -#define __swab24(x) \ -({ \ - __u32 __x = (x); \ - ((__u32)( \ - ((__x & (__u32)0x000000ffUL) << 16) | \ - (__x & (__u32)0x0000ff00UL) | \ - ((__x & (__u32)0x00ff0000UL) >> 16) )); \ -}) - -#if (defined(__KERNEL__) && defined(__LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)) - #define __cpu_to_le24(x) ((__u32)(x)) - #define __le24_to_cpu(x) ((__u32)(x)) -#else - #define __cpu_to_le24(x) __swab24(x) - #define __le24_to_cpu(x) __swab24(x) -#endif - -#ifdef __KERNEL__ - #define cpu_to_le24 __cpu_to_le24 - #define le24_to_cpu __le24_to_cpu -#endif - -#endif /* !_H_ENDIAN24 */ diff --git a/ANDROID_3.4.5/fs/jfs/file.c b/ANDROID_3.4.5/fs/jfs/file.c deleted file mode 100644 index 844f9460..00000000 --- a/ANDROID_3.4.5/fs/jfs/file.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/quotaops.h> -#include "jfs_incore.h" -#include "jfs_inode.h" -#include "jfs_dmap.h" -#include "jfs_txnmgr.h" -#include "jfs_xattr.h" -#include "jfs_acl.h" -#include "jfs_debug.h" - -int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) -{ - struct inode *inode = file->f_mapping->host; - int rc = 0; - - rc = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (rc) - return rc; - - mutex_lock(&inode->i_mutex); - if (!(inode->i_state & I_DIRTY) || - (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { - /* Make sure committed changes hit the disk */ - jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); - mutex_unlock(&inode->i_mutex); - return rc; - } - - rc |= jfs_commit_inode(inode, 1); - mutex_unlock(&inode->i_mutex); - - return rc ? -EIO : 0; -} - -static int jfs_open(struct inode *inode, struct file *file) -{ - int rc; - - if ((rc = dquot_file_open(inode, file))) - return rc; - - /* - * We attempt to allow only one "active" file open per aggregate - * group. Otherwise, appending to files in parallel can cause - * fragmentation within the files. - * - * If the file is empty, it was probably just created and going - * to be written to. If it has a size, we'll hold off until the - * file is actually grown. - */ - if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE && - (inode->i_size == 0)) { - struct jfs_inode_info *ji = JFS_IP(inode); - spin_lock_irq(&ji->ag_lock); - if (ji->active_ag == -1) { - struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); - ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); - atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); - } - spin_unlock_irq(&ji->ag_lock); - } - - return 0; -} -static int jfs_release(struct inode *inode, struct file *file) -{ - struct jfs_inode_info *ji = JFS_IP(inode); - - spin_lock_irq(&ji->ag_lock); - if (ji->active_ag != -1) { - struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; - atomic_dec(&bmap->db_active[ji->active_ag]); - ji->active_ag = -1; - } - spin_unlock_irq(&ji->ag_lock); - - return 0; -} - -int jfs_setattr(struct dentry *dentry, struct iattr *iattr) -{ - struct inode *inode = dentry->d_inode; - int rc; - - rc = inode_change_ok(inode, iattr); - if (rc) - return rc; - - if (is_quota_modification(inode, iattr)) - dquot_initialize(inode); - if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { - rc = dquot_transfer(inode, iattr); - if (rc) - return rc; - } - - if ((iattr->ia_valid & ATTR_SIZE) && - iattr->ia_size != i_size_read(inode)) { - inode_dio_wait(inode); - - rc = vmtruncate(inode, iattr->ia_size); - if (rc) - return rc; - } - - setattr_copy(inode, iattr); - mark_inode_dirty(inode); - - if (iattr->ia_valid & ATTR_MODE) - rc = jfs_acl_chmod(inode); - return rc; -} - -const struct inode_operations jfs_file_inode_operations = { - .truncate = jfs_truncate, - .setxattr = jfs_setxattr, - .getxattr = jfs_getxattr, - .listxattr = jfs_listxattr, - .removexattr = jfs_removexattr, - .setattr = jfs_setattr, -#ifdef CONFIG_JFS_POSIX_ACL - .get_acl = jfs_get_acl, -#endif -}; - -const struct file_operations jfs_file_operations = { - .open = jfs_open, - .llseek = generic_file_llseek, - .write = do_sync_write, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, - .mmap = generic_file_mmap, - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, - .fsync = jfs_fsync, - .release = jfs_release, - .unlocked_ioctl = jfs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = jfs_compat_ioctl, -#endif -}; diff --git a/ANDROID_3.4.5/fs/jfs/inode.c b/ANDROID_3.4.5/fs/jfs/inode.c deleted file mode 100644 index 77b69b27..00000000 --- a/ANDROID_3.4.5/fs/jfs/inode.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/mpage.h> -#include <linux/buffer_head.h> -#include <linux/pagemap.h> -#include <linux/quotaops.h> -#include <linux/writeback.h> -#include "jfs_incore.h" -#include "jfs_inode.h" -#include "jfs_filsys.h" -#include "jfs_imap.h" -#include "jfs_extent.h" -#include "jfs_unicode.h" -#include "jfs_debug.h" - - -struct inode *jfs_iget(struct super_block *sb, unsigned long ino) -{ - struct inode *inode; - int ret; - - inode = iget_locked(sb, ino); - if (!inode) - return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; - - ret = diRead(inode); - if (ret < 0) { - iget_failed(inode); - return ERR_PTR(ret); - } - - if (S_ISREG(inode->i_mode)) { - inode->i_op = &jfs_file_inode_operations; - inode->i_fop = &jfs_file_operations; - inode->i_mapping->a_ops = &jfs_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &jfs_dir_inode_operations; - inode->i_fop = &jfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { - if (inode->i_size >= IDATASIZE) { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &jfs_aops; - } else { - inode->i_op = &jfs_fast_symlink_inode_operations; - /* - * The inline data should be null-terminated, but - * don't let on-disk corruption crash the kernel - */ - JFS_IP(inode)->i_inline[inode->i_size] = '\0'; - } - } else { - inode->i_op = &jfs_file_inode_operations; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - } - unlock_new_inode(inode); - return inode; -} - -/* - * Workhorse of both fsync & write_inode - */ -int jfs_commit_inode(struct inode *inode, int wait) -{ - int rc = 0; - tid_t tid; - static int noisy = 5; - - jfs_info("In jfs_commit_inode, inode = 0x%p", inode); - - /* - * Don't commit if inode has been committed since last being - * marked dirty, or if it has been deleted. - */ - if (inode->i_nlink == 0 || !test_cflag(COMMIT_Dirty, inode)) - return 0; - - if (isReadOnly(inode)) { - /* kernel allows writes to devices on read-only - * partitions and may think inode is dirty - */ - if (!special_file(inode->i_mode) && noisy) { - jfs_err("jfs_commit_inode(0x%p) called on " - "read-only volume", inode); - jfs_err("Is remount racy?"); - noisy--; - } - return 0; - } - - tid = txBegin(inode->i_sb, COMMIT_INODE); - mutex_lock(&JFS_IP(inode)->commit_mutex); - - /* - * Retest inode state after taking commit_mutex - */ - if (inode->i_nlink && test_cflag(COMMIT_Dirty, inode)) - rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0); - - txEnd(tid); - mutex_unlock(&JFS_IP(inode)->commit_mutex); - return rc; -} - -int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - int wait = wbc->sync_mode == WB_SYNC_ALL; - - if (test_cflag(COMMIT_Nolink, inode)) - return 0; - /* - * If COMMIT_DIRTY is not set, the inode isn't really dirty. - * It has been committed since the last change, but was still - * on the dirty inode list. - */ - if (!test_cflag(COMMIT_Dirty, inode)) { - /* Make sure committed changes hit the disk */ - jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait); - return 0; - } - - if (jfs_commit_inode(inode, wait)) { - jfs_err("jfs_write_inode: jfs_commit_inode failed!"); - return -EIO; - } else - return 0; -} - -void jfs_evict_inode(struct inode *inode) -{ - jfs_info("In jfs_evict_inode, inode = 0x%p", inode); - - if (!inode->i_nlink && !is_bad_inode(inode)) { - dquot_initialize(inode); - - if (JFS_IP(inode)->fileset == FILESYSTEM_I) { - truncate_inode_pages(&inode->i_data, 0); - - if (test_cflag(COMMIT_Freewmap, inode)) - jfs_free_zero_link(inode); - - diFree(inode); - - /* - * Free the inode from the quota allocation. - */ - dquot_initialize(inode); - dquot_free_inode(inode); - } - } else { - truncate_inode_pages(&inode->i_data, 0); - } - end_writeback(inode); - dquot_drop(inode); -} - -void jfs_dirty_inode(struct inode *inode, int flags) -{ - static int noisy = 5; - - if (isReadOnly(inode)) { - if (!special_file(inode->i_mode) && noisy) { - /* kernel allows writes to devices on read-only - * partitions and may try to mark inode dirty - */ - jfs_err("jfs_dirty_inode called on read-only volume"); - jfs_err("Is remount racy?"); - noisy--; - } - return; - } - - set_cflag(COMMIT_Dirty, inode); -} - -int jfs_get_block(struct inode *ip, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - s64 lblock64 = lblock; - int rc = 0; - xad_t xad; - s64 xaddr; - int xflag; - s32 xlen = bh_result->b_size >> ip->i_blkbits; - - /* - * Take appropriate lock on inode - */ - if (create) - IWRITE_LOCK(ip, RDWRLOCK_NORMAL); - else - IREAD_LOCK(ip, RDWRLOCK_NORMAL); - - if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && - (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && - xaddr) { - if (xflag & XAD_NOTRECORDED) { - if (!create) - /* - * Allocated but not recorded, read treats - * this as a hole - */ - goto unlock; -#ifdef _JFS_4K - XADoffset(&xad, lblock64); - XADlength(&xad, xlen); - XADaddress(&xad, xaddr); -#else /* _JFS_4K */ - /* - * As long as block size = 4K, this isn't a problem. - * We should mark the whole page not ABNR, but how - * will we know to mark the other blocks BH_New? - */ - BUG(); -#endif /* _JFS_4K */ - rc = extRecord(ip, &xad); - if (rc) - goto unlock; - set_buffer_new(bh_result); - } - - map_bh(bh_result, ip->i_sb, xaddr); - bh_result->b_size = xlen << ip->i_blkbits; - goto unlock; - } - if (!create) - goto unlock; - - /* - * Allocate a new block - */ -#ifdef _JFS_4K - if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad))) - goto unlock; - rc = extAlloc(ip, xlen, lblock64, &xad, false); - if (rc) - goto unlock; - - set_buffer_new(bh_result); - map_bh(bh_result, ip->i_sb, addressXAD(&xad)); - bh_result->b_size = lengthXAD(&xad) << ip->i_blkbits; - -#else /* _JFS_4K */ - /* - * We need to do whatever it takes to keep all but the last buffers - * in 4K pages - see jfs_write.c - */ - BUG(); -#endif /* _JFS_4K */ - - unlock: - /* - * Release lock on inode - */ - if (create) - IWRITE_UNLOCK(ip); - else - IREAD_UNLOCK(ip); - return rc; -} - -static int jfs_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page, jfs_get_block, wbc); -} - -static int jfs_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - return mpage_writepages(mapping, wbc, jfs_get_block); -} - -static int jfs_readpage(struct file *file, struct page *page) -{ - return mpage_readpage(page, jfs_get_block); -} - -static int jfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); -} - -static int jfs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - int ret; - - ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, - jfs_get_block); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } - - return ret; -} - -static sector_t jfs_bmap(struct address_space *mapping, sector_t block) -{ - return generic_block_bmap(mapping, block, jfs_get_block); -} - -static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - ssize_t ret; - - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - jfs_get_block); - - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again. - */ - if (unlikely((rw & WRITE) && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); - - if (end > isize) - vmtruncate(inode, isize); - } - - return ret; -} - -const struct address_space_operations jfs_aops = { - .readpage = jfs_readpage, - .readpages = jfs_readpages, - .writepage = jfs_writepage, - .writepages = jfs_writepages, - .write_begin = jfs_write_begin, - .write_end = nobh_write_end, - .bmap = jfs_bmap, - .direct_IO = jfs_direct_IO, -}; - -/* - * Guts of jfs_truncate. Called with locks already held. Can be called - * with directory for truncating directory index table. - */ -void jfs_truncate_nolock(struct inode *ip, loff_t length) -{ - loff_t newsize; - tid_t tid; - - ASSERT(length >= 0); - - if (test_cflag(COMMIT_Nolink, ip)) { - xtTruncate(0, ip, length, COMMIT_WMAP); - return; - } - - do { - tid = txBegin(ip->i_sb, 0); - - /* - * The commit_mutex cannot be taken before txBegin. - * txBegin may block and there is a chance the inode - * could be marked dirty and need to be committed - * before txBegin unblocks - */ - mutex_lock(&JFS_IP(ip)->commit_mutex); - - newsize = xtTruncate(tid, ip, length, - COMMIT_TRUNCATE | COMMIT_PWMAP); - if (newsize < 0) { - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - break; - } - - ip->i_mtime = ip->i_ctime = CURRENT_TIME; - mark_inode_dirty(ip); - - txCommit(tid, 1, &ip, 0); - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - } while (newsize > length); /* Truncate isn't always atomic */ -} - -void jfs_truncate(struct inode *ip) -{ - jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); - - nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); - - IWRITE_LOCK(ip, RDWRLOCK_NORMAL); - jfs_truncate_nolock(ip, ip->i_size); - IWRITE_UNLOCK(ip); -} diff --git a/ANDROID_3.4.5/fs/jfs/ioctl.c b/ANDROID_3.4.5/fs/jfs/ioctl.c deleted file mode 100644 index f19d1e04..00000000 --- a/ANDROID_3.4.5/fs/jfs/ioctl.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * linux/fs/jfs/ioctl.c - * - * Copyright (C) 2006 Herbert Poetzl - * adapted from Remy Card's ext2/ioctl.c - */ - -#include <linux/fs.h> -#include <linux/ctype.h> -#include <linux/capability.h> -#include <linux/mount.h> -#include <linux/time.h> -#include <linux/sched.h> -#include <asm/current.h> -#include <asm/uaccess.h> - -#include "jfs_incore.h" -#include "jfs_dinode.h" -#include "jfs_inode.h" - - -static struct { - long jfs_flag; - long ext2_flag; -} jfs_map[] = { - {JFS_NOATIME_FL, FS_NOATIME_FL}, - {JFS_DIRSYNC_FL, FS_DIRSYNC_FL}, - {JFS_SYNC_FL, FS_SYNC_FL}, - {JFS_SECRM_FL, FS_SECRM_FL}, - {JFS_UNRM_FL, FS_UNRM_FL}, - {JFS_APPEND_FL, FS_APPEND_FL}, - {JFS_IMMUTABLE_FL, FS_IMMUTABLE_FL}, - {0, 0}, -}; - -static long jfs_map_ext2(unsigned long flags, int from) -{ - int index=0; - long mapped=0; - - while (jfs_map[index].jfs_flag) { - if (from) { - if (jfs_map[index].ext2_flag & flags) - mapped |= jfs_map[index].jfs_flag; - } else { - if (jfs_map[index].jfs_flag & flags) - mapped |= jfs_map[index].ext2_flag; - } - index++; - } - return mapped; -} - - -long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = filp->f_dentry->d_inode; - struct jfs_inode_info *jfs_inode = JFS_IP(inode); - unsigned int flags; - - switch (cmd) { - case JFS_IOC_GETFLAGS: - jfs_get_inode_flags(jfs_inode); - flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE; - flags = jfs_map_ext2(flags, 0); - return put_user(flags, (int __user *) arg); - case JFS_IOC_SETFLAGS: { - unsigned int oldflags; - int err; - - err = mnt_want_write_file(filp); - if (err) - return err; - - if (!inode_owner_or_capable(inode)) { - err = -EACCES; - goto setflags_out; - } - if (get_user(flags, (int __user *) arg)) { - err = -EFAULT; - goto setflags_out; - } - - flags = jfs_map_ext2(flags, 1); - if (!S_ISDIR(inode->i_mode)) - flags &= ~JFS_DIRSYNC_FL; - - /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) { - err = -EPERM; - goto setflags_out; - } - - /* Lock against other parallel changes of flags */ - mutex_lock(&inode->i_mutex); - - jfs_get_inode_flags(jfs_inode); - oldflags = jfs_inode->mode2; - - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - */ - if ((oldflags & JFS_IMMUTABLE_FL) || - ((flags ^ oldflags) & - (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - mutex_unlock(&inode->i_mutex); - err = -EPERM; - goto setflags_out; - } - } - - flags = flags & JFS_FL_USER_MODIFIABLE; - flags |= oldflags & ~JFS_FL_USER_MODIFIABLE; - jfs_inode->mode2 = flags; - - jfs_set_inode_flags(inode); - mutex_unlock(&inode->i_mutex); - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); -setflags_out: - mnt_drop_write_file(filp); - return err; - } - default: - return -ENOTTY; - } -} - -#ifdef CONFIG_COMPAT -long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - /* While these ioctl numbers defined with 'long' and have different - * numbers than the 64bit ABI, - * the actual implementation only deals with ints and is compatible. - */ - switch (cmd) { - case JFS_IOC_GETFLAGS32: - cmd = JFS_IOC_GETFLAGS; - break; - case JFS_IOC_SETFLAGS32: - cmd = JFS_IOC_SETFLAGS; - break; - } - return jfs_ioctl(filp, cmd, arg); -} -#endif diff --git a/ANDROID_3.4.5/fs/jfs/jfs_acl.h b/ANDROID_3.4.5/fs/jfs/jfs_acl.h deleted file mode 100644 index ad84fe50..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_acl.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_ACL -#define _H_JFS_ACL - -#ifdef CONFIG_JFS_POSIX_ACL - -struct posix_acl *jfs_get_acl(struct inode *inode, int type); -int jfs_init_acl(tid_t, struct inode *, struct inode *); -int jfs_acl_chmod(struct inode *inode); - -#else - -static inline int jfs_init_acl(tid_t tid, struct inode *inode, - struct inode *dir) -{ - return 0; -} - -static inline int jfs_acl_chmod(struct inode *inode) -{ - return 0; -} - -#endif -#endif /* _H_JFS_ACL */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_btree.h b/ANDROID_3.4.5/fs/jfs/jfs_btree.h deleted file mode 100644 index 79c61805..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_btree.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_BTREE -#define _H_JFS_BTREE - -/* - * jfs_btree.h: B+-tree - * - * JFS B+-tree (dtree and xtree) common definitions - */ - -/* - * basic btree page - btpage - * -struct btpage { - s64 next; right sibling bn - s64 prev; left sibling bn - - u8 flag; - u8 rsrvd[7]; type specific - s64 self; self address - - u8 entry[4064]; -}; */ - -/* btpaget_t flag */ -#define BT_TYPE 0x07 /* B+-tree index */ -#define BT_ROOT 0x01 /* root page */ -#define BT_LEAF 0x02 /* leaf page */ -#define BT_INTERNAL 0x04 /* internal page */ -#define BT_RIGHTMOST 0x10 /* rightmost page */ -#define BT_LEFTMOST 0x20 /* leftmost page */ -#define BT_SWAPPED 0x80 /* used by fsck for endian swapping */ - -/* btorder (in inode) */ -#define BT_RANDOM 0x0000 -#define BT_SEQUENTIAL 0x0001 -#define BT_LOOKUP 0x0010 -#define BT_INSERT 0x0020 -#define BT_DELETE 0x0040 - -/* - * btree page buffer cache access - */ -#define BT_IS_ROOT(MP) (((MP)->xflag & COMMIT_PAGE) == 0) - -/* get page from buffer page */ -#define BT_PAGE(IP, MP, TYPE, ROOT)\ - (BT_IS_ROOT(MP) ? (TYPE *)&JFS_IP(IP)->ROOT : (TYPE *)(MP)->data) - -/* get the page buffer and the page for specified block address */ -#define BT_GETPAGE(IP, BN, MP, TYPE, SIZE, P, RC, ROOT)\ -{\ - if ((BN) == 0)\ - {\ - MP = (struct metapage *)&JFS_IP(IP)->bxflag;\ - P = (TYPE *)&JFS_IP(IP)->ROOT;\ - RC = 0;\ - }\ - else\ - {\ - MP = read_metapage((IP), BN, SIZE, 1);\ - if (MP) {\ - RC = 0;\ - P = (MP)->data;\ - } else {\ - P = NULL;\ - jfs_err("bread failed!");\ - RC = -EIO;\ - }\ - }\ -} - -#define BT_MARK_DIRTY(MP, IP)\ -{\ - if (BT_IS_ROOT(MP))\ - mark_inode_dirty(IP);\ - else\ - mark_metapage_dirty(MP);\ -} - -/* put the page buffer */ -#define BT_PUTPAGE(MP)\ -{\ - if (! BT_IS_ROOT(MP)) \ - release_metapage(MP); \ -} - - -/* - * btree traversal stack - * - * record the path traversed during the search; - * top frame record the leaf page/entry selected. - */ -struct btframe { /* stack frame */ - s64 bn; /* 8: */ - s16 index; /* 2: */ - s16 lastindex; /* 2: unused */ - struct metapage *mp; /* 4/8: */ -}; /* (16/24) */ - -struct btstack { - struct btframe *top; - int nsplit; - struct btframe stack[MAXTREEHEIGHT]; -}; - -#define BT_CLR(btstack)\ - (btstack)->top = (btstack)->stack - -#define BT_STACK_FULL(btstack)\ - ( (btstack)->top == &((btstack)->stack[MAXTREEHEIGHT-1])) - -#define BT_PUSH(BTSTACK, BN, INDEX)\ -{\ - assert(!BT_STACK_FULL(BTSTACK));\ - (BTSTACK)->top->bn = BN;\ - (BTSTACK)->top->index = INDEX;\ - ++(BTSTACK)->top;\ -} - -#define BT_POP(btstack)\ - ( (btstack)->top == (btstack)->stack ? NULL : --(btstack)->top ) - -#define BT_STACK(btstack)\ - ( (btstack)->top == (btstack)->stack ? NULL : (btstack)->top ) - -static inline void BT_STACK_DUMP(struct btstack *btstack) -{ - int i; - printk("btstack dump:\n"); - for (i = 0; i < MAXTREEHEIGHT; i++) - printk(KERN_ERR "bn = %Lx, index = %d\n", - (long long)btstack->stack[i].bn, - btstack->stack[i].index); -} - -/* retrieve search results */ -#define BT_GETSEARCH(IP, LEAF, BN, MP, TYPE, P, INDEX, ROOT)\ -{\ - BN = (LEAF)->bn;\ - MP = (LEAF)->mp;\ - if (BN)\ - P = (TYPE *)MP->data;\ - else\ - P = (TYPE *)&JFS_IP(IP)->ROOT;\ - INDEX = (LEAF)->index;\ -} - -/* put the page buffer of search */ -#define BT_PUTSEARCH(BTSTACK)\ -{\ - if (! BT_IS_ROOT((BTSTACK)->top->mp))\ - release_metapage((BTSTACK)->top->mp);\ -} -#endif /* _H_JFS_BTREE */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_debug.c b/ANDROID_3.4.5/fs/jfs/jfs_debug.c deleted file mode 100644 index dd824d9b..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_debug.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/ctype.h> -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <asm/uaccess.h> -#include "jfs_incore.h" -#include "jfs_filsys.h" -#include "jfs_debug.h" - -#ifdef PROC_FS_JFS /* see jfs_debug.h */ - -static struct proc_dir_entry *base; -#ifdef CONFIG_JFS_DEBUG -static int jfs_loglevel_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%d\n", jfsloglevel); - return 0; -} - -static int jfs_loglevel_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_loglevel_proc_show, NULL); -} - -static ssize_t jfs_loglevel_proc_write(struct file *file, - const char __user *buffer, size_t count, loff_t *ppos) -{ - char c; - - if (get_user(c, buffer)) - return -EFAULT; - - /* yes, I know this is an ASCIIism. --hch */ - if (c < '0' || c > '9') - return -EINVAL; - jfsloglevel = c - '0'; - return count; -} - -static const struct file_operations jfs_loglevel_proc_fops = { - .owner = THIS_MODULE, - .open = jfs_loglevel_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = jfs_loglevel_proc_write, -}; -#endif - -static struct { - const char *name; - const struct file_operations *proc_fops; -} Entries[] = { -#ifdef CONFIG_JFS_STATISTICS - { "lmstats", &jfs_lmstats_proc_fops, }, - { "txstats", &jfs_txstats_proc_fops, }, - { "xtstat", &jfs_xtstat_proc_fops, }, - { "mpstat", &jfs_mpstat_proc_fops, }, -#endif -#ifdef CONFIG_JFS_DEBUG - { "TxAnchor", &jfs_txanchor_proc_fops, }, - { "loglevel", &jfs_loglevel_proc_fops } -#endif -}; -#define NPROCENT ARRAY_SIZE(Entries) - -void jfs_proc_init(void) -{ - int i; - - if (!(base = proc_mkdir("fs/jfs", NULL))) - return; - - for (i = 0; i < NPROCENT; i++) - proc_create(Entries[i].name, 0, base, Entries[i].proc_fops); -} - -void jfs_proc_clean(void) -{ - int i; - - if (base) { - for (i = 0; i < NPROCENT; i++) - remove_proc_entry(Entries[i].name, base); - remove_proc_entry("fs/jfs", NULL); - } -} - -#endif /* PROC_FS_JFS */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_debug.h b/ANDROID_3.4.5/fs/jfs/jfs_debug.h deleted file mode 100644 index eafd1300..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_debug.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_DEBUG -#define _H_JFS_DEBUG - -/* - * jfs_debug.h - * - * global debug message, data structure/macro definitions - * under control of CONFIG_JFS_DEBUG, CONFIG_JFS_STATISTICS; - */ - -/* - * Create /proc/fs/jfs if procfs is enabled andeither - * CONFIG_JFS_DEBUG or CONFIG_JFS_STATISTICS is defined - */ -#if defined(CONFIG_PROC_FS) && (defined(CONFIG_JFS_DEBUG) || defined(CONFIG_JFS_STATISTICS)) -#define PROC_FS_JFS -extern void jfs_proc_init(void); -extern void jfs_proc_clean(void); -#endif - -/* - * assert with traditional printf/panic - */ -#define assert(p) do { \ - if (!(p)) { \ - printk(KERN_CRIT "BUG at %s:%d assert(%s)\n", \ - __FILE__, __LINE__, #p); \ - BUG(); \ - } \ -} while (0) - -/* - * debug ON - * -------- - */ -#ifdef CONFIG_JFS_DEBUG -#define ASSERT(p) assert(p) - -/* printk verbosity */ -#define JFS_LOGLEVEL_ERR 1 -#define JFS_LOGLEVEL_WARN 2 -#define JFS_LOGLEVEL_DEBUG 3 -#define JFS_LOGLEVEL_INFO 4 - -extern int jfsloglevel; - -extern const struct file_operations jfs_txanchor_proc_fops; - -/* information message: e.g., configuration, major event */ -#define jfs_info(fmt, arg...) do { \ - if (jfsloglevel >= JFS_LOGLEVEL_INFO) \ - printk(KERN_INFO fmt "\n", ## arg); \ -} while (0) - -/* debug message: ad hoc */ -#define jfs_debug(fmt, arg...) do { \ - if (jfsloglevel >= JFS_LOGLEVEL_DEBUG) \ - printk(KERN_DEBUG fmt "\n", ## arg); \ -} while (0) - -/* warn message: */ -#define jfs_warn(fmt, arg...) do { \ - if (jfsloglevel >= JFS_LOGLEVEL_WARN) \ - printk(KERN_WARNING fmt "\n", ## arg); \ -} while (0) - -/* error event message: e.g., i/o error */ -#define jfs_err(fmt, arg...) do { \ - if (jfsloglevel >= JFS_LOGLEVEL_ERR) \ - printk(KERN_ERR fmt "\n", ## arg); \ -} while (0) - -/* - * debug OFF - * --------- - */ -#else /* CONFIG_JFS_DEBUG */ -#define ASSERT(p) do {} while (0) -#define jfs_info(fmt, arg...) do {} while (0) -#define jfs_debug(fmt, arg...) do {} while (0) -#define jfs_warn(fmt, arg...) do {} while (0) -#define jfs_err(fmt, arg...) do {} while (0) -#endif /* CONFIG_JFS_DEBUG */ - -/* - * statistics - * ---------- - */ -#ifdef CONFIG_JFS_STATISTICS -extern const struct file_operations jfs_lmstats_proc_fops; -extern const struct file_operations jfs_txstats_proc_fops; -extern const struct file_operations jfs_mpstat_proc_fops; -extern const struct file_operations jfs_xtstat_proc_fops; - -#define INCREMENT(x) ((x)++) -#define DECREMENT(x) ((x)--) -#define HIGHWATERMARK(x,y) ((x) = max((x), (y))) -#else -#define INCREMENT(x) -#define DECREMENT(x) -#define HIGHWATERMARK(x,y) -#endif /* CONFIG_JFS_STATISTICS */ - -#endif /* _H_JFS_DEBUG */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_dinode.h b/ANDROID_3.4.5/fs/jfs/jfs_dinode.h deleted file mode 100644 index 395c4c0d..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_dinode.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_DINODE -#define _H_JFS_DINODE - -/* - * jfs_dinode.h: on-disk inode manager - */ - -#define INODESLOTSIZE 128 -#define L2INODESLOTSIZE 7 -#define log2INODESIZE 9 /* log2(bytes per dinode) */ - - -/* - * on-disk inode : 512 bytes - * - * note: align 64-bit fields on 8-byte boundary. - */ -struct dinode { - /* - * I. base area (128 bytes) - * ------------------------ - * - * define generic/POSIX attributes - */ - __le32 di_inostamp; /* 4: stamp to show inode belongs to fileset */ - __le32 di_fileset; /* 4: fileset number */ - __le32 di_number; /* 4: inode number, aka file serial number */ - __le32 di_gen; /* 4: inode generation number */ - - pxd_t di_ixpxd; /* 8: inode extent descriptor */ - - __le64 di_size; /* 8: size */ - __le64 di_nblocks; /* 8: number of blocks allocated */ - - __le32 di_nlink; /* 4: number of links to the object */ - - __le32 di_uid; /* 4: user id of owner */ - __le32 di_gid; /* 4: group id of owner */ - - __le32 di_mode; /* 4: attribute, format and permission */ - - struct timestruc_t di_atime; /* 8: time last data accessed */ - struct timestruc_t di_ctime; /* 8: time last status changed */ - struct timestruc_t di_mtime; /* 8: time last data modified */ - struct timestruc_t di_otime; /* 8: time created */ - - dxd_t di_acl; /* 16: acl descriptor */ - - dxd_t di_ea; /* 16: ea descriptor */ - - __le32 di_next_index; /* 4: Next available dir_table index */ - - __le32 di_acltype; /* 4: Type of ACL */ - - /* - * Extension Areas. - * - * Historically, the inode was partitioned into 4 128-byte areas, - * the last 3 being defined as unions which could have multiple - * uses. The first 96 bytes had been completely unused until - * an index table was added to the directory. It is now more - * useful to describe the last 3/4 of the inode as a single - * union. We would probably be better off redesigning the - * entire structure from scratch, but we don't want to break - * commonality with OS/2's JFS at this time. - */ - union { - struct { - /* - * This table contains the information needed to - * find a directory entry from a 32-bit index. - * If the index is small enough, the table is inline, - * otherwise, an x-tree root overlays this table - */ - struct dir_table_slot _table[12]; /* 96: inline */ - - dtroot_t _dtroot; /* 288: dtree root */ - } _dir; /* (384) */ -#define di_dirtable u._dir._table -#define di_dtroot u._dir._dtroot -#define di_parent di_dtroot.header.idotdot -#define di_DASD di_dtroot.header.DASD - - struct { - union { - u8 _data[96]; /* 96: unused */ - struct { - void *_imap; /* 4: unused */ - __le32 _gengen; /* 4: generator */ - } _imap; - } _u1; /* 96: */ -#define di_gengen u._file._u1._imap._gengen - - union { - xtpage_t _xtroot; - struct { - u8 unused[16]; /* 16: */ - dxd_t _dxd; /* 16: */ - union { - __le32 _rdev; /* 4: */ - u8 _fastsymlink[128]; - } _u; - u8 _inlineea[128]; - } _special; - } _u2; - } _file; -#define di_xtroot u._file._u2._xtroot -#define di_dxd u._file._u2._special._dxd -#define di_btroot di_xtroot -#define di_inlinedata u._file._u2._special._u -#define di_rdev u._file._u2._special._u._rdev -#define di_fastsymlink u._file._u2._special._u._fastsymlink -#define di_inlineea u._file._u2._special._inlineea - } u; -}; - -/* extended mode bits (on-disk inode di_mode) */ -#define IFJOURNAL 0x00010000 /* journalled file */ -#define ISPARSE 0x00020000 /* sparse file enabled */ -#define INLINEEA 0x00040000 /* inline EA area free */ -#define ISWAPFILE 0x00800000 /* file open for pager swap space */ - -/* more extended mode bits: attributes for OS/2 */ -#define IREADONLY 0x02000000 /* no write access to file */ -#define IHIDDEN 0x04000000 /* hidden file */ -#define ISYSTEM 0x08000000 /* system file */ - -#define IDIRECTORY 0x20000000 /* directory (shadow of real bit) */ -#define IARCHIVE 0x40000000 /* file archive bit */ -#define INEWNAME 0x80000000 /* non-8.3 filename format */ - -#define IRASH 0x4E000000 /* mask for changeable attributes */ -#define ATTRSHIFT 25 /* bits to shift to move attribute - specification to mode position */ - -/* extended attributes for Linux */ - -#define JFS_NOATIME_FL 0x00080000 /* do not update atime */ - -#define JFS_DIRSYNC_FL 0x00100000 /* dirsync behaviour */ -#define JFS_SYNC_FL 0x00200000 /* Synchronous updates */ -#define JFS_SECRM_FL 0x00400000 /* Secure deletion */ -#define JFS_UNRM_FL 0x00800000 /* allow for undelete */ - -#define JFS_APPEND_FL 0x01000000 /* writes to file may only append */ -#define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */ - -#define JFS_FL_USER_VISIBLE 0x03F80000 -#define JFS_FL_USER_MODIFIABLE 0x03F80000 -#define JFS_FL_INHERIT 0x03C80000 - -/* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */ -#define JFS_IOC_GETFLAGS _IOR('f', 1, long) -#define JFS_IOC_SETFLAGS _IOW('f', 2, long) - -#define JFS_IOC_GETFLAGS32 _IOR('f', 1, int) -#define JFS_IOC_SETFLAGS32 _IOW('f', 2, int) - -#endif /*_H_JFS_DINODE */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_dmap.c b/ANDROID_3.4.5/fs/jfs/jfs_dmap.c deleted file mode 100644 index 9cbd11a3..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_dmap.c +++ /dev/null @@ -1,3989 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/slab.h> -#include "jfs_incore.h" -#include "jfs_superblock.h" -#include "jfs_dmap.h" -#include "jfs_imap.h" -#include "jfs_lock.h" -#include "jfs_metapage.h" -#include "jfs_debug.h" - -/* - * SERIALIZATION of the Block Allocation Map. - * - * the working state of the block allocation map is accessed in - * two directions: - * - * 1) allocation and free requests that start at the dmap - * level and move up through the dmap control pages (i.e. - * the vast majority of requests). - * - * 2) allocation requests that start at dmap control page - * level and work down towards the dmaps. - * - * the serialization scheme used here is as follows. - * - * requests which start at the bottom are serialized against each - * other through buffers and each requests holds onto its buffers - * as it works it way up from a single dmap to the required level - * of dmap control page. - * requests that start at the top are serialized against each other - * and request that start from the bottom by the multiple read/single - * write inode lock of the bmap inode. requests starting at the top - * take this lock in write mode while request starting at the bottom - * take the lock in read mode. a single top-down request may proceed - * exclusively while multiple bottoms-up requests may proceed - * simultaneously (under the protection of busy buffers). - * - * in addition to information found in dmaps and dmap control pages, - * the working state of the block allocation map also includes read/ - * write information maintained in the bmap descriptor (i.e. total - * free block count, allocation group level free block counts). - * a single exclusive lock (BMAP_LOCK) is used to guard this information - * in the face of multiple-bottoms up requests. - * (lock ordering: IREAD_LOCK, BMAP_LOCK); - * - * accesses to the persistent state of the block allocation map (limited - * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. - */ - -#define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock) -#define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock) -#define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock) - -/* - * forward references - */ -static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks); -static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); -static int dbBackSplit(dmtree_t * tp, int leafno); -static int dbJoin(dmtree_t * tp, int leafno, int newval); -static void dbAdjTree(dmtree_t * tp, int leafno, int newval); -static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, - int level); -static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); -static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks); -static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks, - int l2nb, s64 * results); -static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks); -static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks, - int l2nb, - s64 * results); -static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, - s64 * results); -static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, - s64 * results); -static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); -static int dbFindBits(u32 word, int l2nb); -static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); -static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); -static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks); -static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks); -static int dbMaxBud(u8 * cp); -s64 dbMapFileSizeToMapSize(struct inode *ipbmap); -static int blkstol2(s64 nb); - -static int cntlz(u32 value); -static int cnttz(u32 word); - -static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks); -static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks); -static int dbInitDmapTree(struct dmap * dp); -static int dbInitTree(struct dmaptree * dtp); -static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i); -static int dbGetL2AGSize(s64 nblocks); - -/* - * buddy table - * - * table used for determining buddy sizes within characters of - * dmap bitmap words. the characters themselves serve as indexes - * into the table, with the table elements yielding the maximum - * binary buddy of free bits within the character. - */ -static const s8 budtab[256] = { - 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, - 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, - 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, - 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 -}; - - -/* - * NAME: dbMount() - * - * FUNCTION: initializate the block allocation map. - * - * memory is allocated for the in-core bmap descriptor and - * the in-core descriptor is initialized from disk. - * - * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. - * - * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient memory - * -EIO - i/o error - */ -int dbMount(struct inode *ipbmap) -{ - struct bmap *bmp; - struct dbmap_disk *dbmp_le; - struct metapage *mp; - int i; - - /* - * allocate/initialize the in-memory bmap descriptor - */ - /* allocate memory for the in-memory bmap descriptor */ - bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL); - if (bmp == NULL) - return -ENOMEM; - - /* read the on-disk bmap descriptor. */ - mp = read_metapage(ipbmap, - BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, - PSIZE, 0); - if (mp == NULL) { - kfree(bmp); - return -EIO; - } - - /* copy the on-disk bmap descriptor to its in-memory version. */ - dbmp_le = (struct dbmap_disk *) mp->data; - bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); - bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); - bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); - bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); - bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); - bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); - bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); - bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); - bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); - bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); - bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); - bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); - for (i = 0; i < MAXAG; i++) - bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); - bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); - bmp->db_maxfreebud = dbmp_le->dn_maxfreebud; - - /* release the buffer. */ - release_metapage(mp); - - /* bind the bmap inode and the bmap descriptor to each other. */ - bmp->db_ipbmap = ipbmap; - JFS_SBI(ipbmap->i_sb)->bmap = bmp; - - memset(bmp->db_active, 0, sizeof(bmp->db_active)); - - /* - * allocate/initialize the bmap lock - */ - BMAP_LOCK_INIT(bmp); - - return (0); -} - - -/* - * NAME: dbUnmount() - * - * FUNCTION: terminate the block allocation map in preparation for - * file system unmount. - * - * the in-core bmap descriptor is written to disk and - * the memory for this descriptor is freed. - * - * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error - */ -int dbUnmount(struct inode *ipbmap, int mounterror) -{ - struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; - - if (!(mounterror || isReadOnly(ipbmap))) - dbSync(ipbmap); - - /* - * Invalidate the page cache buffers - */ - truncate_inode_pages(ipbmap->i_mapping, 0); - - /* free the memory for the in-memory bmap. */ - kfree(bmp); - - return (0); -} - -/* - * dbSync() - */ -int dbSync(struct inode *ipbmap) -{ - struct dbmap_disk *dbmp_le; - struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; - struct metapage *mp; - int i; - - /* - * write bmap global control page - */ - /* get the buffer for the on-disk bmap descriptor. */ - mp = read_metapage(ipbmap, - BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, - PSIZE, 0); - if (mp == NULL) { - jfs_err("dbSync: read_metapage failed!"); - return -EIO; - } - /* copy the in-memory version of the bmap to the on-disk version */ - dbmp_le = (struct dbmap_disk *) mp->data; - dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); - dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); - dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); - dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); - dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); - dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); - dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); - dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); - dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); - dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); - dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); - dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); - for (i = 0; i < MAXAG; i++) - dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); - dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); - dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; - - /* write the buffer */ - write_metapage(mp); - - /* - * write out dirty pages of bmap - */ - filemap_write_and_wait(ipbmap->i_mapping); - - diWriteSpecial(ipbmap, 0); - - return (0); -} - - -/* - * NAME: dbFree() - * - * FUNCTION: free the specified block range from the working block - * allocation map. - * - * the blocks will be free from the working map one dmap - * at a time. - * - * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - starting block number to be freed. - * nblocks - number of blocks to be freed. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error - */ -int dbFree(struct inode *ip, s64 blkno, s64 nblocks) -{ - struct metapage *mp; - struct dmap *dp; - int nb, rc; - s64 lblkno, rem; - struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; - struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; - - IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); - - /* block to be freed better be within the mapsize. */ - if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { - IREAD_UNLOCK(ipbmap); - printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", - (unsigned long long) blkno, - (unsigned long long) nblocks); - jfs_error(ip->i_sb, - "dbFree: block to be freed is outside the map"); - return -EIO; - } - - /* - * free the blocks a dmap at a time. - */ - mp = NULL; - for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { - /* release previous dmap if any */ - if (mp) { - write_metapage(mp); - } - - /* get the buffer for the current dmap. */ - lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); - mp = read_metapage(ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) { - IREAD_UNLOCK(ipbmap); - return -EIO; - } - dp = (struct dmap *) mp->data; - - /* determine the number of blocks to be freed from - * this dmap. - */ - nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); - - /* free the blocks. */ - if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { - jfs_error(ip->i_sb, "dbFree: error in block map\n"); - release_metapage(mp); - IREAD_UNLOCK(ipbmap); - return (rc); - } - } - - /* write the last buffer. */ - write_metapage(mp); - - IREAD_UNLOCK(ipbmap); - - return (0); -} - - -/* - * NAME: dbUpdatePMap() - * - * FUNCTION: update the allocation state (free or allocate) of the - * specified block range in the persistent block allocation map. - * - * the blocks will be updated in the persistent map one - * dmap at a time. - * - * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. - * free - 'true' if block range is to be freed from the persistent - * map; 'false' if it is to be allocated. - * blkno - starting block number of the range. - * nblocks - number of contiguous blocks in the range. - * tblk - transaction block; - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error - */ -int -dbUpdatePMap(struct inode *ipbmap, - int free, s64 blkno, s64 nblocks, struct tblock * tblk) -{ - int nblks, dbitno, wbitno, rbits; - int word, nbits, nwords; - struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; - s64 lblkno, rem, lastlblkno; - u32 mask; - struct dmap *dp; - struct metapage *mp; - struct jfs_log *log; - int lsn, difft, diffp; - unsigned long flags; - - /* the blocks better be within the mapsize. */ - if (blkno + nblocks > bmp->db_mapsize) { - printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", - (unsigned long long) blkno, - (unsigned long long) nblocks); - jfs_error(ipbmap->i_sb, - "dbUpdatePMap: blocks are outside the map"); - return -EIO; - } - - /* compute delta of transaction lsn from log syncpt */ - lsn = tblk->lsn; - log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; - logdiff(difft, lsn, log); - - /* - * update the block state a dmap at a time. - */ - mp = NULL; - lastlblkno = 0; - for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) { - /* get the buffer for the current dmap. */ - lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); - if (lblkno != lastlblkno) { - if (mp) { - write_metapage(mp); - } - - mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, - 0); - if (mp == NULL) - return -EIO; - metapage_wait_for_io(mp); - } - dp = (struct dmap *) mp->data; - - /* determine the bit number and word within the dmap of - * the starting block. also determine how many blocks - * are to be updated within this dmap. - */ - dbitno = blkno & (BPERDMAP - 1); - word = dbitno >> L2DBWORD; - nblks = min(rem, (s64)BPERDMAP - dbitno); - - /* update the bits of the dmap words. the first and last - * words may only have a subset of their bits updated. if - * this is the case, we'll work against that word (i.e. - * partial first and/or last) only in a single pass. a - * single pass will also be used to update all words that - * are to have all their bits updated. - */ - for (rbits = nblks; rbits > 0; - rbits -= nbits, dbitno += nbits) { - /* determine the bit number within the word and - * the number of bits within the word. - */ - wbitno = dbitno & (DBWORD - 1); - nbits = min(rbits, DBWORD - wbitno); - - /* check if only part of the word is to be updated. */ - if (nbits < DBWORD) { - /* update (free or allocate) the bits - * in this word. - */ - mask = - (ONES << (DBWORD - nbits) >> wbitno); - if (free) - dp->pmap[word] &= - cpu_to_le32(~mask); - else - dp->pmap[word] |= - cpu_to_le32(mask); - - word += 1; - } else { - /* one or more words are to have all - * their bits updated. determine how - * many words and how many bits. - */ - nwords = rbits >> L2DBWORD; - nbits = nwords << L2DBWORD; - - /* update (free or allocate) the bits - * in these words. - */ - if (free) - memset(&dp->pmap[word], 0, - nwords * 4); - else - memset(&dp->pmap[word], (int) ONES, - nwords * 4); - - word += nwords; - } - } - - /* - * update dmap lsn - */ - if (lblkno == lastlblkno) - continue; - - lastlblkno = lblkno; - - LOGSYNC_LOCK(log, flags); - if (mp->lsn != 0) { - /* inherit older/smaller lsn */ - logdiff(diffp, mp->lsn, log); - if (difft < diffp) { - mp->lsn = lsn; - - /* move bp after tblock in logsync list */ - list_move(&mp->synclist, &tblk->synclist); - } - - /* inherit younger/larger clsn */ - logdiff(difft, tblk->clsn, log); - logdiff(diffp, mp->clsn, log); - if (difft > diffp) - mp->clsn = tblk->clsn; - } else { - mp->log = log; - mp->lsn = lsn; - - /* insert bp after tblock in logsync list */ - log->count++; - list_add(&mp->synclist, &tblk->synclist); - - mp->clsn = tblk->clsn; - } - LOGSYNC_UNLOCK(log, flags); - } - - /* write the last buffer. */ - if (mp) { - write_metapage(mp); - } - - return (0); -} - - -/* - * NAME: dbNextAG() - * - * FUNCTION: find the preferred allocation group for new allocations. - * - * Within the allocation groups, we maintain a preferred - * allocation group which consists of a group with at least - * average free space. It is the preferred group that we target - * new inode allocation towards. The tie-in between inode - * allocation and block allocation occurs as we allocate the - * first (data) block of an inode and specify the inode (block) - * as the allocation hint for this block. - * - * We try to avoid having more than one open file growing in - * an allocation group, as this will lead to fragmentation. - * This differs from the old OS/2 method of trying to keep - * empty ags around for large allocations. - * - * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. - * - * RETURN VALUES: - * the preferred allocation group number. - */ -int dbNextAG(struct inode *ipbmap) -{ - s64 avgfree; - int agpref; - s64 hwm = 0; - int i; - int next_best = -1; - struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; - - BMAP_LOCK(bmp); - - /* determine the average number of free blocks within the ags. */ - avgfree = (u32)bmp->db_nfree / bmp->db_numag; - - /* - * if the current preferred ag does not have an active allocator - * and has at least average freespace, return it - */ - agpref = bmp->db_agpref; - if ((atomic_read(&bmp->db_active[agpref]) == 0) && - (bmp->db_agfree[agpref] >= avgfree)) - goto unlock; - - /* From the last preferred ag, find the next one with at least - * average free space. - */ - for (i = 0 ; i < bmp->db_numag; i++, agpref++) { - if (agpref == bmp->db_numag) - agpref = 0; - - if (atomic_read(&bmp->db_active[agpref])) - /* open file is currently growing in this ag */ - continue; - if (bmp->db_agfree[agpref] >= avgfree) { - /* Return this one */ - bmp->db_agpref = agpref; - goto unlock; - } else if (bmp->db_agfree[agpref] > hwm) { - /* Less than avg. freespace, but best so far */ - hwm = bmp->db_agfree[agpref]; - next_best = agpref; - } - } - - /* - * If no inactive ag was found with average freespace, use the - * next best - */ - if (next_best != -1) - bmp->db_agpref = next_best; - /* else leave db_agpref unchanged */ -unlock: - BMAP_UNLOCK(bmp); - - /* return the preferred group. - */ - return (bmp->db_agpref); -} - -/* - * NAME: dbAlloc() - * - * FUNCTION: attempt to allocate a specified number of contiguous free - * blocks from the working allocation block map. - * - * the block allocation policy uses hints and a multi-step - * approach. - * - * for allocation requests smaller than the number of blocks - * per dmap, we first try to allocate the new blocks - * immediately following the hint. if these blocks are not - * available, we try to allocate blocks near the hint. if - * no blocks near the hint are available, we next try to - * allocate within the same dmap as contains the hint. - * - * if no blocks are available in the dmap or the allocation - * request is larger than the dmap size, we try to allocate - * within the same allocation group as contains the hint. if - * this does not succeed, we finally try to allocate anywhere - * within the aggregate. - * - * we also try to allocate anywhere within the aggregate for - * for allocation requests larger than the allocation group - * size or requests that specify no hint value. - * - * PARAMETERS: - * ip - pointer to in-core inode; - * hint - allocation hint. - * nblocks - number of contiguous blocks in the range. - * results - on successful return, set to the starting block number - * of the newly allocated contiguous range. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - */ -int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) -{ - int rc, agno; - struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; - struct bmap *bmp; - struct metapage *mp; - s64 lblkno, blkno; - struct dmap *dp; - int l2nb; - s64 mapSize; - int writers; - - /* assert that nblocks is valid */ - assert(nblocks > 0); - - /* get the log2 number of blocks to be allocated. - * if the number of blocks is not a log2 multiple, - * it will be rounded up to the next log2 multiple. - */ - l2nb = BLKSTOL2(nblocks); - - bmp = JFS_SBI(ip->i_sb)->bmap; - - mapSize = bmp->db_mapsize; - - /* the hint should be within the map */ - if (hint >= mapSize) { - jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); - return -EIO; - } - - /* if the number of blocks to be allocated is greater than the - * allocation group size, try to allocate anywhere. - */ - if (l2nb > bmp->db_agl2size) { - IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); - - rc = dbAllocAny(bmp, nblocks, l2nb, results); - - goto write_unlock; - } - - /* - * If no hint, let dbNextAG recommend an allocation group - */ - if (hint == 0) - goto pref_ag; - - /* we would like to allocate close to the hint. adjust the - * hint to the block following the hint since the allocators - * will start looking for free space starting at this point. - */ - blkno = hint + 1; - - if (blkno >= bmp->db_mapsize) - goto pref_ag; - - agno = blkno >> bmp->db_agl2size; - - /* check if blkno crosses over into a new allocation group. - * if so, check if we should allow allocations within this - * allocation group. - */ - if ((blkno & (bmp->db_agsize - 1)) == 0) - /* check if the AG is currently being written to. - * if so, call dbNextAG() to find a non-busy - * AG with sufficient free space. - */ - if (atomic_read(&bmp->db_active[agno])) - goto pref_ag; - - /* check if the allocation request size can be satisfied from a - * single dmap. if so, try to allocate from the dmap containing - * the hint using a tiered strategy. - */ - if (nblocks <= BPERDMAP) { - IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); - - /* get the buffer for the dmap containing the hint. - */ - rc = -EIO; - lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); - mp = read_metapage(ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) - goto read_unlock; - - dp = (struct dmap *) mp->data; - - /* first, try to satisfy the allocation request with the - * blocks beginning at the hint. - */ - if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks)) - != -ENOSPC) { - if (rc == 0) { - *results = blkno; - mark_metapage_dirty(mp); - } - - release_metapage(mp); - goto read_unlock; - } - - writers = atomic_read(&bmp->db_active[agno]); - if ((writers > 1) || - ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) { - /* - * Someone else is writing in this allocation - * group. To avoid fragmenting, try another ag - */ - release_metapage(mp); - IREAD_UNLOCK(ipbmap); - goto pref_ag; - } - - /* next, try to satisfy the allocation request with blocks - * near the hint. - */ - if ((rc = - dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) - != -ENOSPC) { - if (rc == 0) - mark_metapage_dirty(mp); - - release_metapage(mp); - goto read_unlock; - } - - /* try to satisfy the allocation request with blocks within - * the same dmap as the hint. - */ - if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) - != -ENOSPC) { - if (rc == 0) - mark_metapage_dirty(mp); - - release_metapage(mp); - goto read_unlock; - } - - release_metapage(mp); - IREAD_UNLOCK(ipbmap); - } - - /* try to satisfy the allocation request with blocks within - * the same allocation group as the hint. - */ - IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); - if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) - goto write_unlock; - - IWRITE_UNLOCK(ipbmap); - - - pref_ag: - /* - * Let dbNextAG recommend a preferred allocation group - */ - agno = dbNextAG(ipbmap); - IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); - - /* Try to allocate within this allocation group. if that fails, try to - * allocate anywhere in the map. - */ - if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) - rc = dbAllocAny(bmp, nblocks, l2nb, results); - - write_unlock: - IWRITE_UNLOCK(ipbmap); - - return (rc); - - read_unlock: - IREAD_UNLOCK(ipbmap); - - return (rc); -} - -#ifdef _NOTYET -/* - * NAME: dbAllocExact() - * - * FUNCTION: try to allocate the requested extent; - * - * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - extent address; - * nblocks - extent length; - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - */ -int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) -{ - int rc; - struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; - struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; - struct dmap *dp; - s64 lblkno; - struct metapage *mp; - - IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); - - /* - * validate extent request: - * - * note: defragfs policy: - * max 64 blocks will be moved. - * allocation request size must be satisfied from a single dmap. - */ - if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { - IREAD_UNLOCK(ipbmap); - return -EINVAL; - } - - if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) { - /* the free space is no longer available */ - IREAD_UNLOCK(ipbmap); - return -ENOSPC; - } - - /* read in the dmap covering the extent */ - lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); - mp = read_metapage(ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) { - IREAD_UNLOCK(ipbmap); - return -EIO; - } - dp = (struct dmap *) mp->data; - - /* try to allocate the requested extent */ - rc = dbAllocNext(bmp, dp, blkno, nblocks); - - IREAD_UNLOCK(ipbmap); - - if (rc == 0) - mark_metapage_dirty(mp); - - release_metapage(mp); - - return (rc); -} -#endif /* _NOTYET */ - -/* - * NAME: dbReAlloc() - * - * FUNCTION: attempt to extend a current allocation by a specified - * number of blocks. - * - * this routine attempts to satisfy the allocation request - * by first trying to extend the existing allocation in - * place by allocating the additional blocks as the blocks - * immediately following the current allocation. if these - * blocks are not available, this routine will attempt to - * allocate a new set of contiguous blocks large enough - * to cover the existing allocation plus the additional - * number of blocks required. - * - * PARAMETERS: - * ip - pointer to in-core inode requiring allocation. - * blkno - starting block of the current allocation. - * nblocks - number of contiguous blocks within the current - * allocation. - * addnblocks - number of blocks to add to the allocation. - * results - on successful return, set to the starting block number - * of the existing allocation if the existing allocation - * was extended in place or to a newly allocated contiguous - * range if the existing allocation could not be extended - * in place. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - */ -int -dbReAlloc(struct inode *ip, - s64 blkno, s64 nblocks, s64 addnblocks, s64 * results) -{ - int rc; - - /* try to extend the allocation in place. - */ - if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) { - *results = blkno; - return (0); - } else { - if (rc != -ENOSPC) - return (rc); - } - - /* could not extend the allocation in place, so allocate a - * new set of blocks for the entire request (i.e. try to get - * a range of contiguous blocks large enough to cover the - * existing allocation plus the additional blocks.) - */ - return (dbAlloc - (ip, blkno + nblocks - 1, addnblocks + nblocks, results)); -} - - -/* - * NAME: dbExtend() - * - * FUNCTION: attempt to extend a current allocation by a specified - * number of blocks. - * - * this routine attempts to satisfy the allocation request - * by first trying to extend the existing allocation in - * place by allocating the additional blocks as the blocks - * immediately following the current allocation. - * - * PARAMETERS: - * ip - pointer to in-core inode requiring allocation. - * blkno - starting block of the current allocation. - * nblocks - number of contiguous blocks within the current - * allocation. - * addnblocks - number of blocks to add to the allocation. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - */ -static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) -{ - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - s64 lblkno, lastblkno, extblkno; - uint rel_block; - struct metapage *mp; - struct dmap *dp; - int rc; - struct inode *ipbmap = sbi->ipbmap; - struct bmap *bmp; - - /* - * We don't want a non-aligned extent to cross a page boundary - */ - if (((rel_block = blkno & (sbi->nbperpage - 1))) && - (rel_block + nblocks + addnblocks > sbi->nbperpage)) - return -ENOSPC; - - /* get the last block of the current allocation */ - lastblkno = blkno + nblocks - 1; - - /* determine the block number of the block following - * the existing allocation. - */ - extblkno = lastblkno + 1; - - IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); - - /* better be within the file system */ - bmp = sbi->bmap; - if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { - IREAD_UNLOCK(ipbmap); - jfs_error(ip->i_sb, - "dbExtend: the block is outside the filesystem"); - return -EIO; - } - - /* we'll attempt to extend the current allocation in place by - * allocating the additional blocks as the blocks immediately - * following the current allocation. we only try to extend the - * current allocation in place if the number of additional blocks - * can fit into a dmap, the last block of the current allocation - * is not the last block of the file system, and the start of the - * inplace extension is not on an allocation group boundary. - */ - if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize || - (extblkno & (bmp->db_agsize - 1)) == 0) { - IREAD_UNLOCK(ipbmap); - return -ENOSPC; - } - - /* get the buffer for the dmap containing the first block - * of the extension. - */ - lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage); - mp = read_metapage(ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) { - IREAD_UNLOCK(ipbmap); - return -EIO; - } - - dp = (struct dmap *) mp->data; - - /* try to allocate the blocks immediately following the - * current allocation. - */ - rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks); - - IREAD_UNLOCK(ipbmap); - - /* were we successful ? */ - if (rc == 0) - write_metapage(mp); - else - /* we were not successful */ - release_metapage(mp); - - - return (rc); -} - - -/* - * NAME: dbAllocNext() - * - * FUNCTION: attempt to allocate the blocks of the specified block - * range within a dmap. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap. - * blkno - starting block number of the range. - * nblocks - number of contiguous free blocks of the range. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - * - * serialization: IREAD_LOCK(ipbmap) held on entry/exit; - */ -static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks) -{ - int dbitno, word, rembits, nb, nwords, wbitno, nw; - int l2size; - s8 *leaf; - u32 mask; - - if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocNext: Corrupt dmap page"); - return -EIO; - } - - /* pick up a pointer to the leaves of the dmap tree. - */ - leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); - - /* determine the bit number and word within the dmap of the - * starting block. - */ - dbitno = blkno & (BPERDMAP - 1); - word = dbitno >> L2DBWORD; - - /* check if the specified block range is contained within - * this dmap. - */ - if (dbitno + nblocks > BPERDMAP) - return -ENOSPC; - - /* check if the starting leaf indicates that anything - * is free. - */ - if (leaf[word] == NOFREE) - return -ENOSPC; - - /* check the dmaps words corresponding to block range to see - * if the block range is free. not all bits of the first and - * last words may be contained within the block range. if this - * is the case, we'll work against those words (i.e. partial first - * and/or last) on an individual basis (a single pass) and examine - * the actual bits to determine if they are free. a single pass - * will be used for all dmap words fully contained within the - * specified range. within this pass, the leaves of the dmap - * tree will be examined to determine if the blocks are free. a - * single leaf may describe the free space of multiple dmap - * words, so we may visit only a subset of the actual leaves - * corresponding to the dmap words of the block range. - */ - for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { - /* determine the bit number within the word and - * the number of bits within the word. - */ - wbitno = dbitno & (DBWORD - 1); - nb = min(rembits, DBWORD - wbitno); - - /* check if only part of the word is to be examined. - */ - if (nb < DBWORD) { - /* check if the bits are free. - */ - mask = (ONES << (DBWORD - nb) >> wbitno); - if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask) - return -ENOSPC; - - word += 1; - } else { - /* one or more dmap words are fully contained - * within the block range. determine how many - * words and how many bits. - */ - nwords = rembits >> L2DBWORD; - nb = nwords << L2DBWORD; - - /* now examine the appropriate leaves to determine - * if the blocks are free. - */ - while (nwords > 0) { - /* does the leaf describe any free space ? - */ - if (leaf[word] < BUDMIN) - return -ENOSPC; - - /* determine the l2 number of bits provided - * by this leaf. - */ - l2size = - min((int)leaf[word], NLSTOL2BSZ(nwords)); - - /* determine how many words were handled. - */ - nw = BUDSIZE(l2size, BUDMIN); - - nwords -= nw; - word += nw; - } - } - } - - /* allocate the blocks. - */ - return (dbAllocDmap(bmp, dp, blkno, nblocks)); -} - - -/* - * NAME: dbAllocNear() - * - * FUNCTION: attempt to allocate a number of contiguous free blocks near - * a specified block (hint) within a dmap. - * - * starting with the dmap leaf that covers the hint, we'll - * check the next four contiguous leaves for sufficient free - * space. if sufficient free space is found, we'll allocate - * the desired free space. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap. - * blkno - block number to allocate near. - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number - * of the newly allocated range. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - * - * serialization: IREAD_LOCK(ipbmap) held on entry/exit; - */ -static int -dbAllocNear(struct bmap * bmp, - struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results) -{ - int word, lword, rc; - s8 *leaf; - - if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocNear: Corrupt dmap page"); - return -EIO; - } - - leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); - - /* determine the word within the dmap that holds the hint - * (i.e. blkno). also, determine the last word in the dmap - * that we'll include in our examination. - */ - word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; - lword = min(word + 4, LPERDMAP); - - /* examine the leaves for sufficient free space. - */ - for (; word < lword; word++) { - /* does the leaf describe sufficient free space ? - */ - if (leaf[word] < l2nb) - continue; - - /* determine the block number within the file system - * of the first block described by this dmap word. - */ - blkno = le64_to_cpu(dp->start) + (word << L2DBWORD); - - /* if not all bits of the dmap word are free, get the - * starting bit number within the dmap word of the required - * string of free bits and adjust the block number with the - * value. - */ - if (leaf[word] < BUDMIN) - blkno += - dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb); - - /* allocate the blocks. - */ - if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) - *results = blkno; - - return (rc); - } - - return -ENOSPC; -} - - -/* - * NAME: dbAllocAG() - * - * FUNCTION: attempt to allocate the specified number of contiguous - * free blocks within the specified allocation group. - * - * unless the allocation group size is equal to the number - * of blocks per dmap, the dmap control pages will be used to - * find the required free space, if available. we start the - * search at the highest dmap control page level which - * distinctly describes the allocation group's free space - * (i.e. the highest level at which the allocation group's - * free space is not mixed in with that of any other group). - * in addition, we start the search within this level at a - * height of the dmapctl dmtree at which the nodes distinctly - * describe the allocation group's free space. at this height, - * the allocation group's free space may be represented by 1 - * or two sub-trees, depending on the allocation group size. - * we search the top nodes of these subtrees left to right for - * sufficient free space. if sufficient free space is found, - * the subtree is searched to find the leftmost leaf that - * has free space. once we have made it to the leaf, we - * move the search to the next lower level dmap control page - * corresponding to this leaf. we continue down the dmap control - * pages until we find the dmap that contains or starts the - * sufficient free space and we allocate at this dmap. - * - * if the allocation group size is equal to the dmap size, - * we'll start at the dmap corresponding to the allocation - * group and attempt the allocation at this level. - * - * the dmap control page search is also not performed if the - * allocation group is completely free and we go to the first - * dmap of the allocation group to do the allocation. this is - * done because the allocation group may be part (not the first - * part) of a larger binary buddy system, causing the dmap - * control pages to indicate no free space (NOFREE) within - * the allocation group. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * agno - allocation group number. - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number - * of the newly allocated range. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - * - * note: IWRITE_LOCK(ipmap) held on entry/exit; - */ -static int -dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) -{ - struct metapage *mp; - struct dmapctl *dcp; - int rc, ti, i, k, m, n, agperlev; - s64 blkno, lblkno; - int budmin; - - /* allocation request should not be for more than the - * allocation group size. - */ - if (l2nb > bmp->db_agl2size) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: allocation request is larger than the " - "allocation group size"); - return -EIO; - } - - /* determine the starting block number of the allocation - * group. - */ - blkno = (s64) agno << bmp->db_agl2size; - - /* check if the allocation group size is the minimum allocation - * group size or if the allocation group is completely free. if - * the allocation group size is the minimum size of BPERDMAP (i.e. - * 1 dmap), there is no need to search the dmap control page (below) - * that fully describes the allocation group since the allocation - * group is already fully described by a dmap. in this case, we - * just call dbAllocCtl() to search the dmap tree and allocate the - * required space if available. - * - * if the allocation group is completely free, dbAllocCtl() is - * also called to allocate the required space. this is done for - * two reasons. first, it makes no sense searching the dmap control - * pages for free space when we know that free space exists. second, - * the dmap control pages may indicate that the allocation group - * has no free space if the allocation group is part (not the first - * part) of a larger binary buddy system. - */ - if (bmp->db_agsize == BPERDMAP - || bmp->db_agfree[agno] == bmp->db_agsize) { - rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); - if ((rc == -ENOSPC) && - (bmp->db_agfree[agno] == bmp->db_agsize)) { - printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n", - (unsigned long long) blkno, - (unsigned long long) nblocks); - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: dbAllocCtl failed in free AG"); - } - return (rc); - } - - /* the buffer for the dmap control page that fully describes the - * allocation group. - */ - lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel); - mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) - return -EIO; - dcp = (struct dmapctl *) mp->data; - budmin = dcp->budmin; - - if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: Corrupt dmapctl page"); - release_metapage(mp); - return -EIO; - } - - /* search the subtree(s) of the dmap control page that describes - * the allocation group, looking for sufficient free space. to begin, - * determine how many allocation groups are represented in a dmap - * control page at the control page level (i.e. L0, L1, L2) that - * fully describes an allocation group. next, determine the starting - * tree index of this allocation group within the control page. - */ - agperlev = - (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth; - ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); - - /* dmap control page trees fan-out by 4 and a single allocation - * group may be described by 1 or 2 subtrees within the ag level - * dmap control page, depending upon the ag size. examine the ag's - * subtrees for sufficient free space, starting with the leftmost - * subtree. - */ - for (i = 0; i < bmp->db_agwidth; i++, ti++) { - /* is there sufficient free space ? - */ - if (l2nb > dcp->stree[ti]) - continue; - - /* sufficient free space found in a subtree. now search down - * the subtree to find the leftmost leaf that describes this - * free space. - */ - for (k = bmp->db_agheight; k > 0; k--) { - for (n = 0, m = (ti << 2) + 1; n < 4; n++) { - if (l2nb <= dcp->stree[m + n]) { - ti = m + n; - break; - } - } - if (n == 4) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: failed descending stree"); - release_metapage(mp); - return -EIO; - } - } - - /* determine the block number within the file system - * that corresponds to this leaf. - */ - if (bmp->db_aglevel == 2) - blkno = 0; - else if (bmp->db_aglevel == 1) - blkno &= ~(MAXL1SIZE - 1); - else /* bmp->db_aglevel == 0 */ - blkno &= ~(MAXL0SIZE - 1); - - blkno += - ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin; - - /* release the buffer in preparation for going down - * the next level of dmap control pages. - */ - release_metapage(mp); - - /* check if we need to continue to search down the lower - * level dmap control pages. we need to if the number of - * blocks required is less than maximum number of blocks - * described at the next lower level. - */ - if (l2nb < budmin) { - - /* search the lower level dmap control pages to get - * the starting block number of the dmap that - * contains or starts off the free space. - */ - if ((rc = - dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1, - &blkno))) { - if (rc == -ENOSPC) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: control page " - "inconsistent"); - return -EIO; - } - return (rc); - } - } - - /* allocate the blocks. - */ - rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); - if (rc == -ENOSPC) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: unable to allocate blocks"); - rc = -EIO; - } - return (rc); - } - - /* no space in the allocation group. release the buffer and - * return -ENOSPC. - */ - release_metapage(mp); - - return -ENOSPC; -} - - -/* - * NAME: dbAllocAny() - * - * FUNCTION: attempt to allocate the specified number of contiguous - * free blocks anywhere in the file system. - * - * dbAllocAny() attempts to find the sufficient free space by - * searching down the dmap control pages, starting with the - * highest level (i.e. L0, L1, L2) control page. if free space - * large enough to satisfy the desired free space is found, the - * desired free space is allocated. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number - * of the newly allocated range. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - * - * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) -{ - int rc; - s64 blkno = 0; - - /* starting with the top level dmap control page, search - * down the dmap control levels for sufficient free space. - * if free space is found, dbFindCtl() returns the starting - * block number of the dmap that contains or starts off the - * range of free space. - */ - if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno))) - return (rc); - - /* allocate the blocks. - */ - rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); - if (rc == -ENOSPC) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAny: unable to allocate blocks"); - return -EIO; - } - return (rc); -} - - -/* - * NAME: dbFindCtl() - * - * FUNCTION: starting at a specified dmap control page level and block - * number, search down the dmap control levels for a range of - * contiguous free blocks large enough to satisfy an allocation - * request for the specified number of free blocks. - * - * if sufficient contiguous free blocks are found, this routine - * returns the starting block number within a dmap page that - * contains or starts a range of contiqious free blocks that - * is sufficient in size. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * level - starting dmap control page level. - * l2nb - log2 number of contiguous free blocks desired. - * *blkno - on entry, starting block number for conducting the search. - * on successful return, the first block within a dmap page - * that contains or starts a range of contiguous free blocks. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - * - * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) -{ - int rc, leafidx, lev; - s64 b, lblkno; - struct dmapctl *dcp; - int budmin; - struct metapage *mp; - - /* starting at the specified dmap control page level and block - * number, search down the dmap control levels for the starting - * block number of a dmap page that contains or starts off - * sufficient free blocks. - */ - for (lev = level, b = *blkno; lev >= 0; lev--) { - /* get the buffer of the dmap control page for the block - * number and level (i.e. L0, L1, L2). - */ - lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev); - mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) - return -EIO; - dcp = (struct dmapctl *) mp->data; - budmin = dcp->budmin; - - if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbFindCtl: Corrupt dmapctl page"); - release_metapage(mp); - return -EIO; - } - - /* search the tree within the dmap control page for - * sufficient free space. if sufficient free space is found, - * dbFindLeaf() returns the index of the leaf at which - * free space was found. - */ - rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx); - - /* release the buffer. - */ - release_metapage(mp); - - /* space found ? - */ - if (rc) { - if (lev != level) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbFindCtl: dmap inconsistent"); - return -EIO; - } - return -ENOSPC; - } - - /* adjust the block number to reflect the location within - * the dmap control page (i.e. the leaf) at which free - * space was found. - */ - b += (((s64) leafidx) << budmin); - - /* we stop the search at this dmap control page level if - * the number of blocks required is greater than or equal - * to the maximum number of blocks described at the next - * (lower) level. - */ - if (l2nb >= budmin) - break; - } - - *blkno = b; - return (0); -} - - -/* - * NAME: dbAllocCtl() - * - * FUNCTION: attempt to allocate a specified number of contiguous - * blocks starting within a specific dmap. - * - * this routine is called by higher level routines that search - * the dmap control pages above the actual dmaps for contiguous - * free space. the result of successful searches by these - * routines are the starting block numbers within dmaps, with - * the dmaps themselves containing the desired contiguous free - * space or starting a contiguous free space of desired size - * that is made up of the blocks of one or more dmaps. these - * calls should not fail due to insufficent resources. - * - * this routine is called in some cases where it is not known - * whether it will fail due to insufficient resources. more - * specifically, this occurs when allocating from an allocation - * group whose size is equal to the number of blocks per dmap. - * in this case, the dmap control pages are not examined prior - * to calling this routine (to save pathlength) and the call - * might fail. - * - * for a request size that fits within a dmap, this routine relies - * upon the dmap's dmtree to find the requested contiguous free - * space. for request sizes that are larger than a dmap, the - * requested free space will start at the first block of the - * first dmap (i.e. blkno). - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * nblocks - actual number of contiguous free blocks to allocate. - * l2nb - log2 number of contiguous free blocks to allocate. - * blkno - starting block number of the dmap to start the allocation - * from. - * results - on successful return, set to the starting block number - * of the newly allocated range. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - * - * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static int -dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) -{ - int rc, nb; - s64 b, lblkno, n; - struct metapage *mp; - struct dmap *dp; - - /* check if the allocation request is confined to a single dmap. - */ - if (l2nb <= L2BPERDMAP) { - /* get the buffer for the dmap. - */ - lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); - mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) - return -EIO; - dp = (struct dmap *) mp->data; - - /* try to allocate the blocks. - */ - rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results); - if (rc == 0) - mark_metapage_dirty(mp); - - release_metapage(mp); - - return (rc); - } - - /* allocation request involving multiple dmaps. it must start on - * a dmap boundary. - */ - assert((blkno & (BPERDMAP - 1)) == 0); - - /* allocate the blocks dmap by dmap. - */ - for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) { - /* get the buffer for the dmap. - */ - lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); - mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) { - rc = -EIO; - goto backout; - } - dp = (struct dmap *) mp->data; - - /* the dmap better be all free. - */ - if (dp->tree.stree[ROOT] != L2BPERDMAP) { - release_metapage(mp); - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: the dmap is not all free"); - rc = -EIO; - goto backout; - } - - /* determine how many blocks to allocate from this dmap. - */ - nb = min(n, (s64)BPERDMAP); - - /* allocate the blocks from the dmap. - */ - if ((rc = dbAllocDmap(bmp, dp, b, nb))) { - release_metapage(mp); - goto backout; - } - - /* write the buffer. - */ - write_metapage(mp); - } - - /* set the results (starting block number) and return. - */ - *results = blkno; - return (0); - - /* something failed in handling an allocation request involving - * multiple dmaps. we'll try to clean up by backing out any - * allocation that has already happened for this request. if - * we fail in backing out the allocation, we'll mark the file - * system to indicate that blocks have been leaked. - */ - backout: - - /* try to backout the allocations dmap by dmap. - */ - for (n = nblocks - n, b = blkno; n > 0; - n -= BPERDMAP, b += BPERDMAP) { - /* get the buffer for this dmap. - */ - lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); - mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) { - /* could not back out. mark the file system - * to indicate that we have leaked blocks. - */ - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: I/O Error: Block Leakage."); - continue; - } - dp = (struct dmap *) mp->data; - - /* free the blocks is this dmap. - */ - if (dbFreeDmap(bmp, dp, b, BPERDMAP)) { - /* could not back out. mark the file system - * to indicate that we have leaked blocks. - */ - release_metapage(mp); - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: Block Leakage."); - continue; - } - - /* write the buffer. - */ - write_metapage(mp); - } - - return (rc); -} - - -/* - * NAME: dbAllocDmapLev() - * - * FUNCTION: attempt to allocate a specified number of contiguous blocks - * from a specified dmap. - * - * this routine checks if the contiguous blocks are available. - * if so, nblocks of blocks are allocated; otherwise, ENOSPC is - * returned. - * - * PARAMETERS: - * mp - pointer to bmap descriptor - * dp - pointer to dmap to attempt to allocate blocks from. - * l2nb - log2 number of contiguous block desired. - * nblocks - actual number of contiguous block desired. - * results - on successful return, set to the starting block number - * of the newly allocated range. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error - * - * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or - * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; - */ -static int -dbAllocDmapLev(struct bmap * bmp, - struct dmap * dp, int nblocks, int l2nb, s64 * results) -{ - s64 blkno; - int leafidx, rc; - - /* can't be more than a dmaps worth of blocks */ - assert(l2nb <= L2BPERDMAP); - - /* search the tree within the dmap page for sufficient - * free space. if sufficient free space is found, dbFindLeaf() - * returns the index of the leaf at which free space was found. - */ - if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx)) - return -ENOSPC; - - /* determine the block number within the file system corresponding - * to the leaf at which free space was found. - */ - blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD); - - /* if not all bits of the dmap word are free, get the starting - * bit number within the dmap word of the required string of free - * bits and adjust the block number with this value. - */ - if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN) - blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb); - - /* allocate the blocks */ - if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) - *results = blkno; - - return (rc); -} - - -/* - * NAME: dbAllocDmap() - * - * FUNCTION: adjust the disk allocation map to reflect the allocation - * of a specified block range within a dmap. - * - * this routine allocates the specified blocks from the dmap - * through a call to dbAllocBits(). if the allocation of the - * block range causes the maximum string of free blocks within - * the dmap to change (i.e. the value of the root of the dmap's - * dmtree), this routine will cause this change to be reflected - * up through the appropriate levels of the dmap control pages - * by a call to dbAdjCtl() for the L0 dmap control page that - * covers this dmap. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to allocate the block range from. - * blkno - starting block number of the block to be allocated. - * nblocks - number of blocks to be allocated. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error - * - * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks) -{ - s8 oldroot; - int rc; - - /* save the current value of the root (i.e. maximum free string) - * of the dmap tree. - */ - oldroot = dp->tree.stree[ROOT]; - - /* allocate the specified (blocks) bits */ - dbAllocBits(bmp, dp, blkno, nblocks); - - /* if the root has not changed, done. */ - if (dp->tree.stree[ROOT] == oldroot) - return (0); - - /* root changed. bubble the change up to the dmap control pages. - * if the adjustment of the upper level control pages fails, - * backout the bit allocation (thus making everything consistent). - */ - if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0))) - dbFreeBits(bmp, dp, blkno, nblocks); - - return (rc); -} - - -/* - * NAME: dbFreeDmap() - * - * FUNCTION: adjust the disk allocation map to reflect the allocation - * of a specified block range within a dmap. - * - * this routine frees the specified blocks from the dmap through - * a call to dbFreeBits(). if the deallocation of the block range - * causes the maximum string of free blocks within the dmap to - * change (i.e. the value of the root of the dmap's dmtree), this - * routine will cause this change to be reflected up through the - * appropriate levels of the dmap control pages by a call to - * dbAdjCtl() for the L0 dmap control page that covers this dmap. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to free the block range from. - * blkno - starting block number of the block to be freed. - * nblocks - number of blocks to be freed. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error - * - * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks) -{ - s8 oldroot; - int rc = 0, word; - - /* save the current value of the root (i.e. maximum free string) - * of the dmap tree. - */ - oldroot = dp->tree.stree[ROOT]; - - /* free the specified (blocks) bits */ - rc = dbFreeBits(bmp, dp, blkno, nblocks); - - /* if error or the root has not changed, done. */ - if (rc || (dp->tree.stree[ROOT] == oldroot)) - return (rc); - - /* root changed. bubble the change up to the dmap control pages. - * if the adjustment of the upper level control pages fails, - * backout the deallocation. - */ - if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { - word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; - - /* as part of backing out the deallocation, we will have - * to back split the dmap tree if the deallocation caused - * the freed blocks to become part of a larger binary buddy - * system. - */ - if (dp->tree.stree[word] == NOFREE) - dbBackSplit((dmtree_t *) & dp->tree, word); - - dbAllocBits(bmp, dp, blkno, nblocks); - } - - return (rc); -} - - -/* - * NAME: dbAllocBits() - * - * FUNCTION: allocate a specified block range from a dmap. - * - * this routine updates the dmap to reflect the working - * state allocation of the specified block range. it directly - * updates the bits of the working map and causes the adjustment - * of the binary buddy system described by the dmap's dmtree - * leaves to reflect the bits allocated. it also causes the - * dmap's dmtree, as a whole, to reflect the allocated range. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to allocate bits from. - * blkno - starting block number of the bits to be allocated. - * nblocks - number of bits to be allocated. - * - * RETURN VALUES: none - * - * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks) -{ - int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; - dmtree_t *tp = (dmtree_t *) & dp->tree; - int size; - s8 *leaf; - - /* pick up a pointer to the leaves of the dmap tree */ - leaf = dp->tree.stree + LEAFIND; - - /* determine the bit number and word within the dmap of the - * starting block. - */ - dbitno = blkno & (BPERDMAP - 1); - word = dbitno >> L2DBWORD; - - /* block range better be within the dmap */ - assert(dbitno + nblocks <= BPERDMAP); - - /* allocate the bits of the dmap's words corresponding to the block - * range. not all bits of the first and last words may be contained - * within the block range. if this is the case, we'll work against - * those words (i.e. partial first and/or last) on an individual basis - * (a single pass), allocating the bits of interest by hand and - * updating the leaf corresponding to the dmap word. a single pass - * will be used for all dmap words fully contained within the - * specified range. within this pass, the bits of all fully contained - * dmap words will be marked as free in a single shot and the leaves - * will be updated. a single leaf may describe the free space of - * multiple dmap words, so we may update only a subset of the actual - * leaves corresponding to the dmap words of the block range. - */ - for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { - /* determine the bit number within the word and - * the number of bits within the word. - */ - wbitno = dbitno & (DBWORD - 1); - nb = min(rembits, DBWORD - wbitno); - - /* check if only part of a word is to be allocated. - */ - if (nb < DBWORD) { - /* allocate (set to 1) the appropriate bits within - * this dmap word. - */ - dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) - >> wbitno); - - /* update the leaf for this dmap word. in addition - * to setting the leaf value to the binary buddy max - * of the updated dmap word, dbSplit() will split - * the binary system of the leaves if need be. - */ - dbSplit(tp, word, BUDMIN, - dbMaxBud((u8 *) & dp->wmap[word])); - - word += 1; - } else { - /* one or more dmap words are fully contained - * within the block range. determine how many - * words and allocate (set to 1) the bits of these - * words. - */ - nwords = rembits >> L2DBWORD; - memset(&dp->wmap[word], (int) ONES, nwords * 4); - - /* determine how many bits. - */ - nb = nwords << L2DBWORD; - - /* now update the appropriate leaves to reflect - * the allocated words. - */ - for (; nwords > 0; nwords -= nw) { - if (leaf[word] < BUDMIN) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocBits: leaf page " - "corrupt"); - break; - } - - /* determine what the leaf value should be - * updated to as the minimum of the l2 number - * of bits being allocated and the l2 number - * of bits currently described by this leaf. - */ - size = min((int)leaf[word], NLSTOL2BSZ(nwords)); - - /* update the leaf to reflect the allocation. - * in addition to setting the leaf value to - * NOFREE, dbSplit() will split the binary - * system of the leaves to reflect the current - * allocation (size). - */ - dbSplit(tp, word, size, NOFREE); - - /* get the number of dmap words handled */ - nw = BUDSIZE(size, BUDMIN); - word += nw; - } - } - } - - /* update the free count for this dmap */ - le32_add_cpu(&dp->nfree, -nblocks); - - BMAP_LOCK(bmp); - - /* if this allocation group is completely free, - * update the maximum allocation group number if this allocation - * group is the new max. - */ - agno = blkno >> bmp->db_agl2size; - if (agno > bmp->db_maxag) - bmp->db_maxag = agno; - - /* update the free count for the allocation group and map */ - bmp->db_agfree[agno] -= nblocks; - bmp->db_nfree -= nblocks; - - BMAP_UNLOCK(bmp); -} - - -/* - * NAME: dbFreeBits() - * - * FUNCTION: free a specified block range from a dmap. - * - * this routine updates the dmap to reflect the working - * state allocation of the specified block range. it directly - * updates the bits of the working map and causes the adjustment - * of the binary buddy system described by the dmap's dmtree - * leaves to reflect the bits freed. it also causes the dmap's - * dmtree, as a whole, to reflect the deallocated range. - * - * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to free bits from. - * blkno - starting block number of the bits to be freed. - * nblocks - number of bits to be freed. - * - * RETURN VALUES: 0 for success - * - * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks) -{ - int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; - dmtree_t *tp = (dmtree_t *) & dp->tree; - int rc = 0; - int size; - - /* determine the bit number and word within the dmap of the - * starting block. - */ - dbitno = blkno & (BPERDMAP - 1); - word = dbitno >> L2DBWORD; - - /* block range better be within the dmap. - */ - assert(dbitno + nblocks <= BPERDMAP); - - /* free the bits of the dmaps words corresponding to the block range. - * not all bits of the first and last words may be contained within - * the block range. if this is the case, we'll work against those - * words (i.e. partial first and/or last) on an individual basis - * (a single pass), freeing the bits of interest by hand and updating - * the leaf corresponding to the dmap word. a single pass will be used - * for all dmap words fully contained within the specified range. - * within this pass, the bits of all fully contained dmap words will - * be marked as free in a single shot and the leaves will be updated. a - * single leaf may describe the free space of multiple dmap words, - * so we may update only a subset of the actual leaves corresponding - * to the dmap words of the block range. - * - * dbJoin() is used to update leaf values and will join the binary - * buddy system of the leaves if the new leaf values indicate this - * should be done. - */ - for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { - /* determine the bit number within the word and - * the number of bits within the word. - */ - wbitno = dbitno & (DBWORD - 1); - nb = min(rembits, DBWORD - wbitno); - - /* check if only part of a word is to be freed. - */ - if (nb < DBWORD) { - /* free (zero) the appropriate bits within this - * dmap word. - */ - dp->wmap[word] &= - cpu_to_le32(~(ONES << (DBWORD - nb) - >> wbitno)); - - /* update the leaf for this dmap word. - */ - rc = dbJoin(tp, word, - dbMaxBud((u8 *) & dp->wmap[word])); - if (rc) - return rc; - - word += 1; - } else { - /* one or more dmap words are fully contained - * within the block range. determine how many - * words and free (zero) the bits of these words. - */ - nwords = rembits >> L2DBWORD; - memset(&dp->wmap[word], 0, nwords * 4); - - /* determine how many bits. - */ - nb = nwords << L2DBWORD; - - /* now update the appropriate leaves to reflect - * the freed words. - */ - for (; nwords > 0; nwords -= nw) { - /* determine what the leaf value should be - * updated to as the minimum of the l2 number - * of bits being freed and the l2 (max) number - * of bits that can be described by this leaf. - */ - size = - min(LITOL2BSZ - (word, L2LPERDMAP, BUDMIN), - NLSTOL2BSZ(nwords)); - - /* update the leaf. - */ - rc = dbJoin(tp, word, size); - if (rc) - return rc; - - /* get the number of dmap words handled. - */ - nw = BUDSIZE(size, BUDMIN); - word += nw; - } - } - } - - /* update the free count for this dmap. - */ - le32_add_cpu(&dp->nfree, nblocks); - - BMAP_LOCK(bmp); - - /* update the free count for the allocation group and - * map. - */ - agno = blkno >> bmp->db_agl2size; - bmp->db_nfree += nblocks; - bmp->db_agfree[agno] += nblocks; - - /* check if this allocation group is not completely free and - * if it is currently the maximum (rightmost) allocation group. - * if so, establish the new maximum allocation group number by - * searching left for the first allocation group with allocation. - */ - if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) || - (agno == bmp->db_numag - 1 && - bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) { - while (bmp->db_maxag > 0) { - bmp->db_maxag -= 1; - if (bmp->db_agfree[bmp->db_maxag] != - bmp->db_agsize) - break; - } - - /* re-establish the allocation group preference if the - * current preference is right of the maximum allocation - * group. - */ - if (bmp->db_agpref > bmp->db_maxag) - bmp->db_agpref = bmp->db_maxag; - } - - BMAP_UNLOCK(bmp); - - return 0; -} - - -/* - * NAME: dbAdjCtl() - * - * FUNCTION: adjust a dmap control page at a specified level to reflect - * the change in a lower level dmap or dmap control page's - * maximum string of free blocks (i.e. a change in the root - * of the lower level object's dmtree) due to the allocation - * or deallocation of a range of blocks with a single dmap. - * - * on entry, this routine is provided with the new value of - * the lower level dmap or dmap control page root and the - * starting block number of the block range whose allocation - * or deallocation resulted in the root change. this range - * is respresented by a single leaf of the current dmapctl - * and the leaf will be updated with this value, possibly - * causing a binary buddy system within the leaves to be - * split or joined. the update may also cause the dmapctl's - * dmtree to be updated. - * - * if the adjustment of the dmap control page, itself, causes its - * root to change, this change will be bubbled up to the next dmap - * control level by a recursive call to this routine, specifying - * the new root value and the next dmap control page level to - * be adjusted. - * PARAMETERS: - * bmp - pointer to bmap descriptor - * blkno - the first block of a block range within a dmap. it is - * the allocation or deallocation of this block range that - * requires the dmap control page to be adjusted. - * newval - the new value of the lower level dmap or dmap control - * page root. - * alloc - 'true' if adjustment is due to an allocation. - * level - current level of dmap control page (i.e. L0, L1, L2) to - * be adjusted. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error - * - * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static int -dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) -{ - struct metapage *mp; - s8 oldroot; - int oldval; - s64 lblkno; - struct dmapctl *dcp; - int rc, leafno, ti; - - /* get the buffer for the dmap control page for the specified - * block number and control page level. - */ - lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level); - mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) - return -EIO; - dcp = (struct dmapctl *) mp->data; - - if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAdjCtl: Corrupt dmapctl page"); - release_metapage(mp); - return -EIO; - } - - /* determine the leaf number corresponding to the block and - * the index within the dmap control tree. - */ - leafno = BLKTOCTLLEAF(blkno, dcp->budmin); - ti = leafno + le32_to_cpu(dcp->leafidx); - - /* save the current leaf value and the current root level (i.e. - * maximum l2 free string described by this dmapctl). - */ - oldval = dcp->stree[ti]; - oldroot = dcp->stree[ROOT]; - - /* check if this is a control page update for an allocation. - * if so, update the leaf to reflect the new leaf value using - * dbSplit(); otherwise (deallocation), use dbJoin() to update - * the leaf with the new value. in addition to updating the - * leaf, dbSplit() will also split the binary buddy system of - * the leaves, if required, and bubble new values within the - * dmapctl tree, if required. similarly, dbJoin() will join - * the binary buddy system of leaves and bubble new values up - * the dmapctl tree as required by the new leaf value. - */ - if (alloc) { - /* check if we are in the middle of a binary buddy - * system. this happens when we are performing the - * first allocation out of an allocation group that - * is part (not the first part) of a larger binary - * buddy system. if we are in the middle, back split - * the system prior to calling dbSplit() which assumes - * that it is at the front of a binary buddy system. - */ - if (oldval == NOFREE) { - rc = dbBackSplit((dmtree_t *) dcp, leafno); - if (rc) - return rc; - oldval = dcp->stree[ti]; - } - dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); - } else { - rc = dbJoin((dmtree_t *) dcp, leafno, newval); - if (rc) - return rc; - } - - /* check if the root of the current dmap control page changed due - * to the update and if the current dmap control page is not at - * the current top level (i.e. L0, L1, L2) of the map. if so (i.e. - * root changed and this is not the top level), call this routine - * again (recursion) for the next higher level of the mapping to - * reflect the change in root for the current dmap control page. - */ - if (dcp->stree[ROOT] != oldroot) { - /* are we below the top level of the map. if so, - * bubble the root up to the next higher level. - */ - if (level < bmp->db_maxlevel) { - /* bubble up the new root of this dmap control page to - * the next level. - */ - if ((rc = - dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc, - level + 1))) { - /* something went wrong in bubbling up the new - * root value, so backout the changes to the - * current dmap control page. - */ - if (alloc) { - dbJoin((dmtree_t *) dcp, leafno, - oldval); - } else { - /* the dbJoin() above might have - * caused a larger binary buddy system - * to form and we may now be in the - * middle of it. if this is the case, - * back split the buddies. - */ - if (dcp->stree[ti] == NOFREE) - dbBackSplit((dmtree_t *) - dcp, leafno); - dbSplit((dmtree_t *) dcp, leafno, - dcp->budmin, oldval); - } - - /* release the buffer and return the error. - */ - release_metapage(mp); - return (rc); - } - } else { - /* we're at the top level of the map. update - * the bmap control page to reflect the size - * of the maximum free buddy system. - */ - assert(level == bmp->db_maxlevel); - if (bmp->db_maxfreebud != oldroot) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAdjCtl: the maximum free buddy is " - "not the old root"); - } - bmp->db_maxfreebud = dcp->stree[ROOT]; - } - } - - /* write the buffer. - */ - write_metapage(mp); - - return (0); -} - - -/* - * NAME: dbSplit() - * - * FUNCTION: update the leaf of a dmtree with a new value, splitting - * the leaf from the binary buddy system of the dmtree's - * leaves, as required. - * - * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. - * splitsz - the size the binary buddy system starting at the leaf - * must be split to, specified as the log2 number of blocks. - * newval - the new value for the leaf. - * - * RETURN VALUES: none - * - * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) -{ - int budsz; - int cursz; - s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); - - /* check if the leaf needs to be split. - */ - if (leaf[leafno] > tp->dmt_budmin) { - /* the split occurs by cutting the buddy system in half - * at the specified leaf until we reach the specified - * size. pick up the starting split size (current size - * - 1 in l2) and the corresponding buddy size. - */ - cursz = leaf[leafno] - 1; - budsz = BUDSIZE(cursz, tp->dmt_budmin); - - /* split until we reach the specified size. - */ - while (cursz >= splitsz) { - /* update the buddy's leaf with its new value. - */ - dbAdjTree(tp, leafno ^ budsz, cursz); - - /* on to the next size and buddy. - */ - cursz -= 1; - budsz >>= 1; - } - } - - /* adjust the dmap tree to reflect the specified leaf's new - * value. - */ - dbAdjTree(tp, leafno, newval); -} - - -/* - * NAME: dbBackSplit() - * - * FUNCTION: back split the binary buddy system of dmtree leaves - * that hold a specified leaf until the specified leaf - * starts its own binary buddy system. - * - * the allocators typically perform allocations at the start - * of binary buddy systems and dbSplit() is used to accomplish - * any required splits. in some cases, however, allocation - * may occur in the middle of a binary system and requires a - * back split, with the split proceeding out from the middle of - * the system (less efficient) rather than the start of the - * system (more efficient). the cases in which a back split - * is required are rare and are limited to the first allocation - * within an allocation group which is a part (not first part) - * of a larger binary buddy system and a few exception cases - * in which a previous join operation must be backed out. - * - * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. - * - * RETURN VALUES: none - * - * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; - */ -static int dbBackSplit(dmtree_t * tp, int leafno) -{ - int budsz, bud, w, bsz, size; - int cursz; - s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); - - /* leaf should be part (not first part) of a binary - * buddy system. - */ - assert(leaf[leafno] == NOFREE); - - /* the back split is accomplished by iteratively finding the leaf - * that starts the buddy system that contains the specified leaf and - * splitting that system in two. this iteration continues until - * the specified leaf becomes the start of a buddy system. - * - * determine maximum possible l2 size for the specified leaf. - */ - size = - LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs), - tp->dmt_budmin); - - /* determine the number of leaves covered by this size. this - * is the buddy size that we will start with as we search for - * the buddy system that contains the specified leaf. - */ - budsz = BUDSIZE(size, tp->dmt_budmin); - - /* back split. - */ - while (leaf[leafno] == NOFREE) { - /* find the leftmost buddy leaf. - */ - for (w = leafno, bsz = budsz;; bsz <<= 1, - w = (w < bud) ? w : bud) { - if (bsz >= le32_to_cpu(tp->dmt_nleafs)) { - jfs_err("JFS: block map error in dbBackSplit"); - return -EIO; - } - - /* determine the buddy. - */ - bud = w ^ bsz; - - /* check if this buddy is the start of the system. - */ - if (leaf[bud] != NOFREE) { - /* split the leaf at the start of the - * system in two. - */ - cursz = leaf[bud] - 1; - dbSplit(tp, bud, cursz, cursz); - break; - } - } - } - - if (leaf[leafno] != size) { - jfs_err("JFS: wrong leaf value in dbBackSplit"); - return -EIO; - } - return 0; -} - - -/* - * NAME: dbJoin() - * - * FUNCTION: update the leaf of a dmtree with a new value, joining - * the leaf with other leaves of the dmtree into a multi-leaf - * binary buddy system, as required. - * - * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. - * newval - the new value for the leaf. - * - * RETURN VALUES: none - */ -static int dbJoin(dmtree_t * tp, int leafno, int newval) -{ - int budsz, buddy; - s8 *leaf; - - /* can the new leaf value require a join with other leaves ? - */ - if (newval >= tp->dmt_budmin) { - /* pickup a pointer to the leaves of the tree. - */ - leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); - - /* try to join the specified leaf into a large binary - * buddy system. the join proceeds by attempting to join - * the specified leafno with its buddy (leaf) at new value. - * if the join occurs, we attempt to join the left leaf - * of the joined buddies with its buddy at new value + 1. - * we continue to join until we find a buddy that cannot be - * joined (does not have a value equal to the size of the - * last join) or until all leaves have been joined into a - * single system. - * - * get the buddy size (number of words covered) of - * the new value. - */ - budsz = BUDSIZE(newval, tp->dmt_budmin); - - /* try to join. - */ - while (budsz < le32_to_cpu(tp->dmt_nleafs)) { - /* get the buddy leaf. - */ - buddy = leafno ^ budsz; - - /* if the leaf's new value is greater than its - * buddy's value, we join no more. - */ - if (newval > leaf[buddy]) - break; - - /* It shouldn't be less */ - if (newval < leaf[buddy]) - return -EIO; - - /* check which (leafno or buddy) is the left buddy. - * the left buddy gets to claim the blocks resulting - * from the join while the right gets to claim none. - * the left buddy is also eligible to participate in - * a join at the next higher level while the right - * is not. - * - */ - if (leafno < buddy) { - /* leafno is the left buddy. - */ - dbAdjTree(tp, buddy, NOFREE); - } else { - /* buddy is the left buddy and becomes - * leafno. - */ - dbAdjTree(tp, leafno, NOFREE); - leafno = buddy; - } - - /* on to try the next join. - */ - newval += 1; - budsz <<= 1; - } - } - - /* update the leaf value. - */ - dbAdjTree(tp, leafno, newval); - - return 0; -} - - -/* - * NAME: dbAdjTree() - * - * FUNCTION: update a leaf of a dmtree with a new value, adjusting - * the dmtree, as required, to reflect the new leaf value. - * the combination of any buddies must already be done before - * this is called. - * - * PARAMETERS: - * tp - pointer to the tree to be adjusted. - * leafno - the number of the leaf to be updated. - * newval - the new value for the leaf. - * - * RETURN VALUES: none - */ -static void dbAdjTree(dmtree_t * tp, int leafno, int newval) -{ - int lp, pp, k; - int max; - - /* pick up the index of the leaf for this leafno. - */ - lp = leafno + le32_to_cpu(tp->dmt_leafidx); - - /* is the current value the same as the old value ? if so, - * there is nothing to do. - */ - if (tp->dmt_stree[lp] == newval) - return; - - /* set the new value. - */ - tp->dmt_stree[lp] = newval; - - /* bubble the new value up the tree as required. - */ - for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) { - /* get the index of the first leaf of the 4 leaf - * group containing the specified leaf (leafno). - */ - lp = ((lp - 1) & ~0x03) + 1; - - /* get the index of the parent of this 4 leaf group. - */ - pp = (lp - 1) >> 2; - - /* determine the maximum of the 4 leaves. - */ - max = TREEMAX(&tp->dmt_stree[lp]); - - /* if the maximum of the 4 is the same as the - * parent's value, we're done. - */ - if (tp->dmt_stree[pp] == max) - break; - - /* parent gets new value. - */ - tp->dmt_stree[pp] = max; - - /* parent becomes leaf for next go-round. - */ - lp = pp; - } -} - - -/* - * NAME: dbFindLeaf() - * - * FUNCTION: search a dmtree_t for sufficient free blocks, returning - * the index of a leaf describing the free blocks if - * sufficient free blocks are found. - * - * the search starts at the top of the dmtree_t tree and - * proceeds down the tree to the leftmost leaf with sufficient - * free space. - * - * PARAMETERS: - * tp - pointer to the tree to be searched. - * l2nb - log2 number of free blocks to search for. - * leafidx - return pointer to be set to the index of the leaf - * describing at least l2nb free blocks if sufficient - * free blocks are found. - * - * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient free blocks. - */ -static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) -{ - int ti, n = 0, k, x = 0; - - /* first check the root of the tree to see if there is - * sufficient free space. - */ - if (l2nb > tp->dmt_stree[ROOT]) - return -ENOSPC; - - /* sufficient free space available. now search down the tree - * starting at the next level for the leftmost leaf that - * describes sufficient free space. - */ - for (k = le32_to_cpu(tp->dmt_height), ti = 1; - k > 0; k--, ti = ((ti + n) << 2) + 1) { - /* search the four nodes at this level, starting from - * the left. - */ - for (x = ti, n = 0; n < 4; n++) { - /* sufficient free space found. move to the next - * level (or quit if this is the last level). - */ - if (l2nb <= tp->dmt_stree[x + n]) - break; - } - - /* better have found something since the higher - * levels of the tree said it was here. - */ - assert(n < 4); - } - - /* set the return to the leftmost leaf describing sufficient - * free space. - */ - *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx); - - return (0); -} - - -/* - * NAME: dbFindBits() - * - * FUNCTION: find a specified number of binary buddy free bits within a - * dmap bitmap word value. - * - * this routine searches the bitmap value for (1 << l2nb) free - * bits at (1 << l2nb) alignments within the value. - * - * PARAMETERS: - * word - dmap bitmap word value. - * l2nb - number of free bits specified as a log2 number. - * - * RETURN VALUES: - * starting bit number of free bits. - */ -static int dbFindBits(u32 word, int l2nb) -{ - int bitno, nb; - u32 mask; - - /* get the number of bits. - */ - nb = 1 << l2nb; - assert(nb <= DBWORD); - - /* complement the word so we can use a mask (i.e. 0s represent - * free bits) and compute the mask. - */ - word = ~word; - mask = ONES << (DBWORD - nb); - - /* scan the word for nb free bits at nb alignments. - */ - for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) { - if ((mask & word) == mask) - break; - } - - ASSERT(bitno < 32); - - /* return the bit number. - */ - return (bitno); -} - - -/* - * NAME: dbMaxBud(u8 *cp) - * - * FUNCTION: determine the largest binary buddy string of free - * bits within 32-bits of the map. - * - * PARAMETERS: - * cp - pointer to the 32-bit value. - * - * RETURN VALUES: - * largest binary buddy of free bits within a dmap word. - */ -static int dbMaxBud(u8 * cp) -{ - signed char tmp1, tmp2; - - /* check if the wmap word is all free. if so, the - * free buddy size is BUDMIN. - */ - if (*((uint *) cp) == 0) - return (BUDMIN); - - /* check if the wmap word is half free. if so, the - * free buddy size is BUDMIN-1. - */ - if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0) - return (BUDMIN - 1); - - /* not all free or half free. determine the free buddy - * size thru table lookup using quarters of the wmap word. - */ - tmp1 = max(budtab[cp[2]], budtab[cp[3]]); - tmp2 = max(budtab[cp[0]], budtab[cp[1]]); - return (max(tmp1, tmp2)); -} - - -/* - * NAME: cnttz(uint word) - * - * FUNCTION: determine the number of trailing zeros within a 32-bit - * value. - * - * PARAMETERS: - * value - 32-bit value to be examined. - * - * RETURN VALUES: - * count of trailing zeros - */ -static int cnttz(u32 word) -{ - int n; - - for (n = 0; n < 32; n++, word >>= 1) { - if (word & 0x01) - break; - } - - return (n); -} - - -/* - * NAME: cntlz(u32 value) - * - * FUNCTION: determine the number of leading zeros within a 32-bit - * value. - * - * PARAMETERS: - * value - 32-bit value to be examined. - * - * RETURN VALUES: - * count of leading zeros - */ -static int cntlz(u32 value) -{ - int n; - - for (n = 0; n < 32; n++, value <<= 1) { - if (value & HIGHORDER) - break; - } - return (n); -} - - -/* - * NAME: blkstol2(s64 nb) - * - * FUNCTION: convert a block count to its log2 value. if the block - * count is not a l2 multiple, it is rounded up to the next - * larger l2 multiple. - * - * PARAMETERS: - * nb - number of blocks - * - * RETURN VALUES: - * log2 number of blocks - */ -static int blkstol2(s64 nb) -{ - int l2nb; - s64 mask; /* meant to be signed */ - - mask = (s64) 1 << (64 - 1); - - /* count the leading bits. - */ - for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) { - /* leading bit found. - */ - if (nb & mask) { - /* determine the l2 value. - */ - l2nb = (64 - 1) - l2nb; - - /* check if we need to round up. - */ - if (~mask & nb) - l2nb++; - - return (l2nb); - } - } - assert(0); - return 0; /* fix compiler warning */ -} - - -/* - * NAME: dbAllocBottomUp() - * - * FUNCTION: alloc the specified block range from the working block - * allocation map. - * - * the blocks will be alloc from the working map one dmap - * at a time. - * - * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - starting block number to be freed. - * nblocks - number of blocks to be freed. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error - */ -int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) -{ - struct metapage *mp; - struct dmap *dp; - int nb, rc; - s64 lblkno, rem; - struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; - struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; - - IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); - - /* block to be allocated better be within the mapsize. */ - ASSERT(nblocks <= bmp->db_mapsize - blkno); - - /* - * allocate the blocks a dmap at a time. - */ - mp = NULL; - for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { - /* release previous dmap if any */ - if (mp) { - write_metapage(mp); - } - - /* get the buffer for the current dmap. */ - lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); - mp = read_metapage(ipbmap, lblkno, PSIZE, 0); - if (mp == NULL) { - IREAD_UNLOCK(ipbmap); - return -EIO; - } - dp = (struct dmap *) mp->data; - - /* determine the number of blocks to be allocated from - * this dmap. - */ - nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); - - /* allocate the blocks. */ - if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { - release_metapage(mp); - IREAD_UNLOCK(ipbmap); - return (rc); - } - } - - /* write the last buffer. */ - write_metapage(mp); - - IREAD_UNLOCK(ipbmap); - - return (0); -} - - -static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, - int nblocks) -{ - int rc; - int dbitno, word, rembits, nb, nwords, wbitno, agno; - s8 oldroot; - struct dmaptree *tp = (struct dmaptree *) & dp->tree; - - /* save the current value of the root (i.e. maximum free string) - * of the dmap tree. - */ - oldroot = tp->stree[ROOT]; - - /* determine the bit number and word within the dmap of the - * starting block. - */ - dbitno = blkno & (BPERDMAP - 1); - word = dbitno >> L2DBWORD; - - /* block range better be within the dmap */ - assert(dbitno + nblocks <= BPERDMAP); - - /* allocate the bits of the dmap's words corresponding to the block - * range. not all bits of the first and last words may be contained - * within the block range. if this is the case, we'll work against - * those words (i.e. partial first and/or last) on an individual basis - * (a single pass), allocating the bits of interest by hand and - * updating the leaf corresponding to the dmap word. a single pass - * will be used for all dmap words fully contained within the - * specified range. within this pass, the bits of all fully contained - * dmap words will be marked as free in a single shot and the leaves - * will be updated. a single leaf may describe the free space of - * multiple dmap words, so we may update only a subset of the actual - * leaves corresponding to the dmap words of the block range. - */ - for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { - /* determine the bit number within the word and - * the number of bits within the word. - */ - wbitno = dbitno & (DBWORD - 1); - nb = min(rembits, DBWORD - wbitno); - - /* check if only part of a word is to be allocated. - */ - if (nb < DBWORD) { - /* allocate (set to 1) the appropriate bits within - * this dmap word. - */ - dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) - >> wbitno); - - word++; - } else { - /* one or more dmap words are fully contained - * within the block range. determine how many - * words and allocate (set to 1) the bits of these - * words. - */ - nwords = rembits >> L2DBWORD; - memset(&dp->wmap[word], (int) ONES, nwords * 4); - - /* determine how many bits */ - nb = nwords << L2DBWORD; - word += nwords; - } - } - - /* update the free count for this dmap */ - le32_add_cpu(&dp->nfree, -nblocks); - - /* reconstruct summary tree */ - dbInitDmapTree(dp); - - BMAP_LOCK(bmp); - - /* if this allocation group is completely free, - * update the highest active allocation group number - * if this allocation group is the new max. - */ - agno = blkno >> bmp->db_agl2size; - if (agno > bmp->db_maxag) - bmp->db_maxag = agno; - - /* update the free count for the allocation group and map */ - bmp->db_agfree[agno] -= nblocks; - bmp->db_nfree -= nblocks; - - BMAP_UNLOCK(bmp); - - /* if the root has not changed, done. */ - if (tp->stree[ROOT] == oldroot) - return (0); - - /* root changed. bubble the change up to the dmap control pages. - * if the adjustment of the upper level control pages fails, - * backout the bit allocation (thus making everything consistent). - */ - if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0))) - dbFreeBits(bmp, dp, blkno, nblocks); - - return (rc); -} - - -/* - * NAME: dbExtendFS() - * - * FUNCTION: extend bmap from blkno for nblocks; - * dbExtendFS() updates bmap ready for dbAllocBottomUp(); - * - * L2 - * | - * L1---------------------------------L1 - * | | - * L0---------L0---------L0 L0---------L0---------L0 - * | | | | | | - * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; - * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm - * - * <---old---><----------------------------extend-----------------------> - */ -int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) -{ - struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); - int nbperpage = sbi->nbperpage; - int i, i0 = true, j, j0 = true, k, n; - s64 newsize; - s64 p; - struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL; - struct dmapctl *l2dcp, *l1dcp, *l0dcp; - struct dmap *dp; - s8 *l0leaf, *l1leaf, *l2leaf; - struct bmap *bmp = sbi->bmap; - int agno, l2agsize, oldl2agsize; - s64 ag_rem; - - newsize = blkno + nblocks; - - jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld", - (long long) blkno, (long long) nblocks, (long long) newsize); - - /* - * initialize bmap control page. - * - * all the data in bmap control page should exclude - * the mkfs hidden dmap page. - */ - - /* update mapsize */ - bmp->db_mapsize = newsize; - bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize); - - /* compute new AG size */ - l2agsize = dbGetL2AGSize(newsize); - oldl2agsize = bmp->db_agl2size; - - bmp->db_agl2size = l2agsize; - bmp->db_agsize = 1 << l2agsize; - - /* compute new number of AG */ - agno = bmp->db_numag; - bmp->db_numag = newsize >> l2agsize; - bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; - - /* - * reconfigure db_agfree[] - * from old AG configuration to new AG configuration; - * - * coalesce contiguous k (newAGSize/oldAGSize) AGs; - * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; - * note: new AG size = old AG size * (2**x). - */ - if (l2agsize == oldl2agsize) - goto extend; - k = 1 << (l2agsize - oldl2agsize); - ag_rem = bmp->db_agfree[0]; /* save agfree[0] */ - for (i = 0, n = 0; i < agno; n++) { - bmp->db_agfree[n] = 0; /* init collection point */ - - /* coalesce contiguous k AGs; */ - for (j = 0; j < k && i < agno; j++, i++) { - /* merge AGi to AGn */ - bmp->db_agfree[n] += bmp->db_agfree[i]; - } - } - bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */ - - for (; n < MAXAG; n++) - bmp->db_agfree[n] = 0; - - /* - * update highest active ag number - */ - - bmp->db_maxag = bmp->db_maxag / k; - - /* - * extend bmap - * - * update bit maps and corresponding level control pages; - * global control page db_nfree, db_agfree[agno], db_maxfreebud; - */ - extend: - /* get L2 page */ - p = BMAPBLKNO + nbperpage; /* L2 page */ - l2mp = read_metapage(ipbmap, p, PSIZE, 0); - if (!l2mp) { - jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); - return -EIO; - } - l2dcp = (struct dmapctl *) l2mp->data; - - /* compute start L1 */ - k = blkno >> L2MAXL1SIZE; - l2leaf = l2dcp->stree + CTLLEAFIND + k; - p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */ - - /* - * extend each L1 in L2 - */ - for (; k < LPERCTL; k++, p += nbperpage) { - /* get L1 page */ - if (j0) { - /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */ - l1mp = read_metapage(ipbmap, p, PSIZE, 0); - if (l1mp == NULL) - goto errout; - l1dcp = (struct dmapctl *) l1mp->data; - - /* compute start L0 */ - j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; - l1leaf = l1dcp->stree + CTLLEAFIND + j; - p = BLKTOL0(blkno, sbi->l2nbperpage); - j0 = false; - } else { - /* assign/init L1 page */ - l1mp = get_metapage(ipbmap, p, PSIZE, 0); - if (l1mp == NULL) - goto errout; - - l1dcp = (struct dmapctl *) l1mp->data; - - /* compute start L0 */ - j = 0; - l1leaf = l1dcp->stree + CTLLEAFIND; - p += nbperpage; /* 1st L0 of L1.k */ - } - - /* - * extend each L0 in L1 - */ - for (; j < LPERCTL; j++) { - /* get L0 page */ - if (i0) { - /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */ - - l0mp = read_metapage(ipbmap, p, PSIZE, 0); - if (l0mp == NULL) - goto errout; - l0dcp = (struct dmapctl *) l0mp->data; - - /* compute start dmap */ - i = (blkno & (MAXL0SIZE - 1)) >> - L2BPERDMAP; - l0leaf = l0dcp->stree + CTLLEAFIND + i; - p = BLKTODMAP(blkno, - sbi->l2nbperpage); - i0 = false; - } else { - /* assign/init L0 page */ - l0mp = get_metapage(ipbmap, p, PSIZE, 0); - if (l0mp == NULL) - goto errout; - - l0dcp = (struct dmapctl *) l0mp->data; - - /* compute start dmap */ - i = 0; - l0leaf = l0dcp->stree + CTLLEAFIND; - p += nbperpage; /* 1st dmap of L0.j */ - } - - /* - * extend each dmap in L0 - */ - for (; i < LPERCTL; i++) { - /* - * reconstruct the dmap page, and - * initialize corresponding parent L0 leaf - */ - if ((n = blkno & (BPERDMAP - 1))) { - /* read in dmap page: */ - mp = read_metapage(ipbmap, p, - PSIZE, 0); - if (mp == NULL) - goto errout; - n = min(nblocks, (s64)BPERDMAP - n); - } else { - /* assign/init dmap page */ - mp = read_metapage(ipbmap, p, - PSIZE, 0); - if (mp == NULL) - goto errout; - - n = min(nblocks, (s64)BPERDMAP); - } - - dp = (struct dmap *) mp->data; - *l0leaf = dbInitDmap(dp, blkno, n); - - bmp->db_nfree += n; - agno = le64_to_cpu(dp->start) >> l2agsize; - bmp->db_agfree[agno] += n; - - write_metapage(mp); - - l0leaf++; - p += nbperpage; - - blkno += n; - nblocks -= n; - if (nblocks == 0) - break; - } /* for each dmap in a L0 */ - - /* - * build current L0 page from its leaves, and - * initialize corresponding parent L1 leaf - */ - *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); - write_metapage(l0mp); - l0mp = NULL; - - if (nblocks) - l1leaf++; /* continue for next L0 */ - else { - /* more than 1 L0 ? */ - if (j > 0) - break; /* build L1 page */ - else { - /* summarize in global bmap page */ - bmp->db_maxfreebud = *l1leaf; - release_metapage(l1mp); - release_metapage(l2mp); - goto finalize; - } - } - } /* for each L0 in a L1 */ - - /* - * build current L1 page from its leaves, and - * initialize corresponding parent L2 leaf - */ - *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); - write_metapage(l1mp); - l1mp = NULL; - - if (nblocks) - l2leaf++; /* continue for next L1 */ - else { - /* more than 1 L1 ? */ - if (k > 0) - break; /* build L2 page */ - else { - /* summarize in global bmap page */ - bmp->db_maxfreebud = *l2leaf; - release_metapage(l2mp); - goto finalize; - } - } - } /* for each L1 in a L2 */ - - jfs_error(ipbmap->i_sb, - "dbExtendFS: function has not returned as expected"); -errout: - if (l0mp) - release_metapage(l0mp); - if (l1mp) - release_metapage(l1mp); - release_metapage(l2mp); - return -EIO; - - /* - * finalize bmap control page - */ -finalize: - - return 0; -} - - -/* - * dbFinalizeBmap() - */ -void dbFinalizeBmap(struct inode *ipbmap) -{ - struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; - int actags, inactags, l2nl; - s64 ag_rem, actfree, inactfree, avgfree; - int i, n; - - /* - * finalize bmap control page - */ -//finalize: - /* - * compute db_agpref: preferred ag to allocate from - * (the leftmost ag with average free space in it); - */ -//agpref: - /* get the number of active ags and inacitve ags */ - actags = bmp->db_maxag + 1; - inactags = bmp->db_numag - actags; - ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */ - - /* determine how many blocks are in the inactive allocation - * groups. in doing this, we must account for the fact that - * the rightmost group might be a partial group (i.e. file - * system size is not a multiple of the group size). - */ - inactfree = (inactags && ag_rem) ? - ((inactags - 1) << bmp->db_agl2size) + ag_rem - : inactags << bmp->db_agl2size; - - /* determine how many free blocks are in the active - * allocation groups plus the average number of free blocks - * within the active ags. - */ - actfree = bmp->db_nfree - inactfree; - avgfree = (u32) actfree / (u32) actags; - - /* if the preferred allocation group has not average free space. - * re-establish the preferred group as the leftmost - * group with average free space. - */ - if (bmp->db_agfree[bmp->db_agpref] < avgfree) { - for (bmp->db_agpref = 0; bmp->db_agpref < actags; - bmp->db_agpref++) { - if (bmp->db_agfree[bmp->db_agpref] >= avgfree) - break; - } - if (bmp->db_agpref >= bmp->db_numag) { - jfs_error(ipbmap->i_sb, - "cannot find ag with average freespace"); - } - } - - /* - * compute db_aglevel, db_agheight, db_width, db_agstart: - * an ag is covered in aglevel dmapctl summary tree, - * at agheight level height (from leaf) with agwidth number of nodes - * each, which starts at agstart index node of the smmary tree node - * array; - */ - bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); - l2nl = - bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); - bmp->db_agheight = l2nl >> 1; - bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1)); - for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0; - i--) { - bmp->db_agstart += n; - n <<= 2; - } - -} - - -/* - * NAME: dbInitDmap()/ujfs_idmap_page() - * - * FUNCTION: initialize working/persistent bitmap of the dmap page - * for the specified number of blocks: - * - * at entry, the bitmaps had been initialized as free (ZEROS); - * The number of blocks will only account for the actually - * existing blocks. Blocks which don't actually exist in - * the aggregate will be marked as allocated (ONES); - * - * PARAMETERS: - * dp - pointer to page of map - * nblocks - number of blocks this page - * - * RETURNS: NONE - */ -static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) -{ - int blkno, w, b, r, nw, nb, i; - - /* starting block number within the dmap */ - blkno = Blkno & (BPERDMAP - 1); - - if (blkno == 0) { - dp->nblocks = dp->nfree = cpu_to_le32(nblocks); - dp->start = cpu_to_le64(Blkno); - - if (nblocks == BPERDMAP) { - memset(&dp->wmap[0], 0, LPERDMAP * 4); - memset(&dp->pmap[0], 0, LPERDMAP * 4); - goto initTree; - } - } else { - le32_add_cpu(&dp->nblocks, nblocks); - le32_add_cpu(&dp->nfree, nblocks); - } - - /* word number containing start block number */ - w = blkno >> L2DBWORD; - - /* - * free the bits corresponding to the block range (ZEROS): - * note: not all bits of the first and last words may be contained - * within the block range. - */ - for (r = nblocks; r > 0; r -= nb, blkno += nb) { - /* number of bits preceding range to be freed in the word */ - b = blkno & (DBWORD - 1); - /* number of bits to free in the word */ - nb = min(r, DBWORD - b); - - /* is partial word to be freed ? */ - if (nb < DBWORD) { - /* free (set to 0) from the bitmap word */ - dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) - >> b)); - dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) - >> b)); - - /* skip the word freed */ - w++; - } else { - /* free (set to 0) contiguous bitmap words */ - nw = r >> L2DBWORD; - memset(&dp->wmap[w], 0, nw * 4); - memset(&dp->pmap[w], 0, nw * 4); - - /* skip the words freed */ - nb = nw << L2DBWORD; - w += nw; - } - } - - /* - * mark bits following the range to be freed (non-existing - * blocks) as allocated (ONES) - */ - - if (blkno == BPERDMAP) - goto initTree; - - /* the first word beyond the end of existing blocks */ - w = blkno >> L2DBWORD; - - /* does nblocks fall on a 32-bit boundary ? */ - b = blkno & (DBWORD - 1); - if (b) { - /* mark a partial word allocated */ - dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b); - w++; - } - - /* set the rest of the words in the page to allocated (ONES) */ - for (i = w; i < LPERDMAP; i++) - dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES); - - /* - * init tree - */ - initTree: - return (dbInitDmapTree(dp)); -} - - -/* - * NAME: dbInitDmapTree()/ujfs_complete_dmap() - * - * FUNCTION: initialize summary tree of the specified dmap: - * - * at entry, bitmap of the dmap has been initialized; - * - * PARAMETERS: - * dp - dmap to complete - * blkno - starting block number for this dmap - * treemax - will be filled in with max free for this dmap - * - * RETURNS: max free string at the root of the tree - */ -static int dbInitDmapTree(struct dmap * dp) -{ - struct dmaptree *tp; - s8 *cp; - int i; - - /* init fixed info of tree */ - tp = &dp->tree; - tp->nleafs = cpu_to_le32(LPERDMAP); - tp->l2nleafs = cpu_to_le32(L2LPERDMAP); - tp->leafidx = cpu_to_le32(LEAFIND); - tp->height = cpu_to_le32(4); - tp->budmin = BUDMIN; - - /* init each leaf from corresponding wmap word: - * note: leaf is set to NOFREE(-1) if all blocks of corresponding - * bitmap word are allocated. - */ - cp = tp->stree + le32_to_cpu(tp->leafidx); - for (i = 0; i < LPERDMAP; i++) - *cp++ = dbMaxBud((u8 *) & dp->wmap[i]); - - /* build the dmap's binary buddy summary tree */ - return (dbInitTree(tp)); -} - - -/* - * NAME: dbInitTree()/ujfs_adjtree() - * - * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. - * - * at entry, the leaves of the tree has been initialized - * from corresponding bitmap word or root of summary tree - * of the child control page; - * configure binary buddy system at the leaf level, then - * bubble up the values of the leaf nodes up the tree. - * - * PARAMETERS: - * cp - Pointer to the root of the tree - * l2leaves- Number of leaf nodes as a power of 2 - * l2min - Number of blocks that can be covered by a leaf - * as a power of 2 - * - * RETURNS: max free string at the root of the tree - */ -static int dbInitTree(struct dmaptree * dtp) -{ - int l2max, l2free, bsize, nextb, i; - int child, parent, nparent; - s8 *tp, *cp, *cp1; - - tp = dtp->stree; - - /* Determine the maximum free string possible for the leaves */ - l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin; - - /* - * configure the leaf levevl into binary buddy system - * - * Try to combine buddies starting with a buddy size of 1 - * (i.e. two leaves). At a buddy size of 1 two buddy leaves - * can be combined if both buddies have a maximum free of l2min; - * the combination will result in the left-most buddy leaf having - * a maximum free of l2min+1. - * After processing all buddies for a given size, process buddies - * at the next higher buddy size (i.e. current size * 2) and - * the next maximum free (current free + 1). - * This continues until the maximum possible buddy combination - * yields maximum free. - */ - for (l2free = dtp->budmin, bsize = 1; l2free < l2max; - l2free++, bsize = nextb) { - /* get next buddy size == current buddy pair size */ - nextb = bsize << 1; - - /* scan each adjacent buddy pair at current buddy size */ - for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx); - i < le32_to_cpu(dtp->nleafs); - i += nextb, cp += nextb) { - /* coalesce if both adjacent buddies are max free */ - if (*cp == l2free && *(cp + bsize) == l2free) { - *cp = l2free + 1; /* left take right */ - *(cp + bsize) = -1; /* right give left */ - } - } - } - - /* - * bubble summary information of leaves up the tree. - * - * Starting at the leaf node level, the four nodes described by - * the higher level parent node are compared for a maximum free and - * this maximum becomes the value of the parent node. - * when all lower level nodes are processed in this fashion then - * move up to the next level (parent becomes a lower level node) and - * continue the process for that level. - */ - for (child = le32_to_cpu(dtp->leafidx), - nparent = le32_to_cpu(dtp->nleafs) >> 2; - nparent > 0; nparent >>= 2, child = parent) { - /* get index of 1st node of parent level */ - parent = (child - 1) >> 2; - - /* set the value of the parent node as the maximum - * of the four nodes of the current level. - */ - for (i = 0, cp = tp + child, cp1 = tp + parent; - i < nparent; i++, cp += 4, cp1++) - *cp1 = TREEMAX(cp); - } - - return (*tp); -} - - -/* - * dbInitDmapCtl() - * - * function: initialize dmapctl page - */ -static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) -{ /* start leaf index not covered by range */ - s8 *cp; - - dcp->nleafs = cpu_to_le32(LPERCTL); - dcp->l2nleafs = cpu_to_le32(L2LPERCTL); - dcp->leafidx = cpu_to_le32(CTLLEAFIND); - dcp->height = cpu_to_le32(5); - dcp->budmin = L2BPERDMAP + L2LPERCTL * level; - - /* - * initialize the leaves of current level that were not covered - * by the specified input block range (i.e. the leaves have no - * low level dmapctl or dmap). - */ - cp = &dcp->stree[CTLLEAFIND + i]; - for (; i < LPERCTL; i++) - *cp++ = NOFREE; - - /* build the dmap's binary buddy summary tree */ - return (dbInitTree((struct dmaptree *) dcp)); -} - - -/* - * NAME: dbGetL2AGSize()/ujfs_getagl2size() - * - * FUNCTION: Determine log2(allocation group size) from aggregate size - * - * PARAMETERS: - * nblocks - Number of blocks in aggregate - * - * RETURNS: log2(allocation group size) in aggregate blocks - */ -static int dbGetL2AGSize(s64 nblocks) -{ - s64 sz; - s64 m; - int l2sz; - - if (nblocks < BPERDMAP * MAXAG) - return (L2BPERDMAP); - - /* round up aggregate size to power of 2 */ - m = ((u64) 1 << (64 - 1)); - for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) { - if (m & nblocks) - break; - } - - sz = (s64) 1 << l2sz; - if (sz < nblocks) - l2sz += 1; - - /* agsize = roundupSize/max_number_of_ag */ - return (l2sz - L2MAXAG); -} - - -/* - * NAME: dbMapFileSizeToMapSize() - * - * FUNCTION: compute number of blocks the block allocation map file - * can cover from the map file size; - * - * RETURNS: Number of blocks which can be covered by this block map file; - */ - -/* - * maximum number of map pages at each level including control pages - */ -#define MAXL0PAGES (1 + LPERCTL) -#define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES) -#define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES) - -/* - * convert number of map pages to the zero origin top dmapctl level - */ -#define BMAPPGTOLEV(npages) \ - (((npages) <= 3 + MAXL0PAGES) ? 0 : \ - ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) - -s64 dbMapFileSizeToMapSize(struct inode * ipbmap) -{ - struct super_block *sb = ipbmap->i_sb; - s64 nblocks; - s64 npages, ndmaps; - int level, i; - int complete, factor; - - nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize; - npages = nblocks >> JFS_SBI(sb)->l2nbperpage; - level = BMAPPGTOLEV(npages); - - /* At each level, accumulate the number of dmap pages covered by - * the number of full child levels below it; - * repeat for the last incomplete child level. - */ - ndmaps = 0; - npages--; /* skip the first global control page */ - /* skip higher level control pages above top level covered by map */ - npages -= (2 - level); - npages--; /* skip top level's control page */ - for (i = level; i >= 0; i--) { - factor = - (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); - complete = (u32) npages / factor; - ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : - ((i == 1) ? LPERCTL : 1)); - - /* pages in last/incomplete child */ - npages = (u32) npages % factor; - /* skip incomplete child's level control page */ - npages--; - } - - /* convert the number of dmaps into the number of blocks - * which can be covered by the dmaps; - */ - nblocks = ndmaps << L2BPERDMAP; - - return (nblocks); -} diff --git a/ANDROID_3.4.5/fs/jfs/jfs_dmap.h b/ANDROID_3.4.5/fs/jfs/jfs_dmap.h deleted file mode 100644 index 6dcb906c..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_dmap.h +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_DMAP -#define _H_JFS_DMAP - -#include "jfs_txnmgr.h" - -#define BMAPVERSION 1 /* version number */ -#define TREESIZE (256+64+16+4+1) /* size of a dmap tree */ -#define LEAFIND (64+16+4+1) /* index of 1st leaf of a dmap tree */ -#define LPERDMAP 256 /* num leaves per dmap tree */ -#define L2LPERDMAP 8 /* l2 number of leaves per dmap tree */ -#define DBWORD 32 /* # of blks covered by a map word */ -#define L2DBWORD 5 /* l2 # of blks covered by a mword */ -#define BUDMIN L2DBWORD /* max free string in a map word */ -#define BPERDMAP (LPERDMAP * DBWORD) /* num of blks per dmap */ -#define L2BPERDMAP 13 /* l2 num of blks per dmap */ -#define CTLTREESIZE (1024+256+64+16+4+1) /* size of a dmapctl tree */ -#define CTLLEAFIND (256+64+16+4+1) /* idx of 1st leaf of a dmapctl tree */ -#define LPERCTL 1024 /* num of leaves per dmapctl tree */ -#define L2LPERCTL 10 /* l2 num of leaves per dmapctl tree */ -#define ROOT 0 /* index of the root of a tree */ -#define NOFREE ((s8) -1) /* no blocks free */ -#define MAXAG 128 /* max number of allocation groups */ -#define L2MAXAG 7 /* l2 max num of AG */ -#define L2MINAGSZ 25 /* l2 of minimum AG size in bytes */ -#define BMAPBLKNO 0 /* lblkno of bmap within the map */ - -/* - * maximum l2 number of disk blocks at the various dmapctl levels. - */ -#define L2MAXL0SIZE (L2BPERDMAP + 1 * L2LPERCTL) -#define L2MAXL1SIZE (L2BPERDMAP + 2 * L2LPERCTL) -#define L2MAXL2SIZE (L2BPERDMAP + 3 * L2LPERCTL) - -/* - * maximum number of disk blocks at the various dmapctl levels. - */ -#define MAXL0SIZE ((s64)1 << L2MAXL0SIZE) -#define MAXL1SIZE ((s64)1 << L2MAXL1SIZE) -#define MAXL2SIZE ((s64)1 << L2MAXL2SIZE) - -#define MAXMAPSIZE MAXL2SIZE /* maximum aggregate map size */ - -/* - * determine the maximum free string for four (lower level) nodes - * of the tree. - */ -static inline signed char TREEMAX(signed char *cp) -{ - signed char tmp1, tmp2; - - tmp1 = max(*(cp+2), *(cp+3)); - tmp2 = max(*(cp), *(cp+1)); - - return max(tmp1, tmp2); -} - -/* - * convert disk block number to the logical block number of the dmap - * describing the disk block. s is the log2(number of logical blocks per page) - * - * The calculation figures out how many logical pages are in front of the dmap. - * - the number of dmaps preceding it - * - the number of L0 pages preceding its L0 page - * - the number of L1 pages preceding its L1 page - * - 3 is added to account for the L2, L1, and L0 page for this dmap - * - 1 is added to account for the control page of the map. - */ -#define BLKTODMAP(b,s) \ - ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) - -/* - * convert disk block number to the logical block number of the LEVEL 0 - * dmapctl describing the disk block. s is the log2(number of logical blocks - * per page) - * - * The calculation figures out how many logical pages are in front of the L0. - * - the number of dmap pages preceding it - * - the number of L0 pages preceding it - * - the number of L1 pages preceding its L1 page - * - 2 is added to account for the L2, and L1 page for this L0 - * - 1 is added to account for the control page of the map. - */ -#define BLKTOL0(b,s) \ - (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) - -/* - * convert disk block number to the logical block number of the LEVEL 1 - * dmapctl describing the disk block. s is the log2(number of logical blocks - * per page) - * - * The calculation figures out how many logical pages are in front of the L1. - * - the number of dmap pages preceding it - * - the number of L0 pages preceding it - * - the number of L1 pages preceding it - * - 1 is added to account for the L2 page - * - 1 is added to account for the control page of the map. - */ -#define BLKTOL1(b,s) \ - (((((b) >> 33) << 20) + (((b) >> 33) << 10) + ((b) >> 33) + 1 + 1) << (s)) - -/* - * convert disk block number to the logical block number of the dmapctl - * at the specified level which describes the disk block. - */ -#define BLKTOCTL(b,s,l) \ - (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) - -/* - * convert aggregate map size to the zero origin dmapctl level of the - * top dmapctl. - */ -#define BMAPSZTOLEV(size) \ - (((size) <= MAXL0SIZE) ? 0 : ((size) <= MAXL1SIZE) ? 1 : 2) - -/* convert disk block number to allocation group number. - */ -#define BLKTOAG(b,sbi) ((b) >> ((sbi)->bmap->db_agl2size)) - -/* convert allocation group number to starting disk block - * number. - */ -#define AGTOBLK(a,ip) \ - ((s64)(a) << (JFS_SBI((ip)->i_sb)->bmap->db_agl2size)) - -/* - * dmap summary tree - * - * dmaptree must be consistent with dmapctl. - */ -struct dmaptree { - __le32 nleafs; /* 4: number of tree leafs */ - __le32 l2nleafs; /* 4: l2 number of tree leafs */ - __le32 leafidx; /* 4: index of first tree leaf */ - __le32 height; /* 4: height of the tree */ - s8 budmin; /* 1: min l2 tree leaf value to combine */ - s8 stree[TREESIZE]; /* TREESIZE: tree */ - u8 pad[2]; /* 2: pad to word boundary */ -}; /* - 360 - */ - -/* - * dmap page per 8K blocks bitmap - */ -struct dmap { - __le32 nblocks; /* 4: num blks covered by this dmap */ - __le32 nfree; /* 4: num of free blks in this dmap */ - __le64 start; /* 8: starting blkno for this dmap */ - struct dmaptree tree; /* 360: dmap tree */ - u8 pad[1672]; /* 1672: pad to 2048 bytes */ - __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ - __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ -}; /* - 4096 - */ - -/* - * disk map control page per level. - * - * dmapctl must be consistent with dmaptree. - */ -struct dmapctl { - __le32 nleafs; /* 4: number of tree leafs */ - __le32 l2nleafs; /* 4: l2 number of tree leafs */ - __le32 leafidx; /* 4: index of the first tree leaf */ - __le32 height; /* 4: height of tree */ - s8 budmin; /* 1: minimum l2 tree leaf value */ - s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ - u8 pad[2714]; /* 2714: pad to 4096 */ -}; /* - 4096 - */ - -/* - * common definition for dmaptree within dmap and dmapctl - */ -typedef union dmtree { - struct dmaptree t1; - struct dmapctl t2; -} dmtree_t; - -/* macros for accessing fields within dmtree */ -#define dmt_nleafs t1.nleafs -#define dmt_l2nleafs t1.l2nleafs -#define dmt_leafidx t1.leafidx -#define dmt_height t1.height -#define dmt_budmin t1.budmin -#define dmt_stree t1.stree - -/* - * on-disk aggregate disk allocation map descriptor. - */ -struct dbmap_disk { - __le64 dn_mapsize; /* 8: number of blocks in aggregate */ - __le64 dn_nfree; /* 8: num free blks in aggregate map */ - __le32 dn_l2nbperpage; /* 4: number of blks per page */ - __le32 dn_numag; /* 4: total number of ags */ - __le32 dn_maxlevel; /* 4: number of active ags */ - __le32 dn_maxag; /* 4: max active alloc group number */ - __le32 dn_agpref; /* 4: preferred alloc group (hint) */ - __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ - __le32 dn_agheight; /* 4: height in dmapctl of the AG */ - __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ - __le32 dn_agstart; /* 4: start tree index at AG height */ - __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ - __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ - __le64 dn_agsize; /* 8: num of blks per alloc group */ - s8 dn_maxfreebud; /* 1: max free buddy system */ - u8 pad[3007]; /* 3007: pad to 4096 */ -}; /* - 4096 - */ - -struct dbmap { - s64 dn_mapsize; /* number of blocks in aggregate */ - s64 dn_nfree; /* num free blks in aggregate map */ - int dn_l2nbperpage; /* number of blks per page */ - int dn_numag; /* total number of ags */ - int dn_maxlevel; /* number of active ags */ - int dn_maxag; /* max active alloc group number */ - int dn_agpref; /* preferred alloc group (hint) */ - int dn_aglevel; /* dmapctl level holding the AG */ - int dn_agheight; /* height in dmapctl of the AG */ - int dn_agwidth; /* width in dmapctl of the AG */ - int dn_agstart; /* start tree index at AG height */ - int dn_agl2size; /* l2 num of blks per alloc group */ - s64 dn_agfree[MAXAG]; /* per AG free count */ - s64 dn_agsize; /* num of blks per alloc group */ - signed char dn_maxfreebud; /* max free buddy system */ -}; /* - 4096 - */ -/* - * in-memory aggregate disk allocation map descriptor. - */ -struct bmap { - struct dbmap db_bmap; /* on-disk aggregate map descriptor */ - struct inode *db_ipbmap; /* ptr to aggregate map incore inode */ - struct mutex db_bmaplock; /* aggregate map lock */ - atomic_t db_active[MAXAG]; /* count of active, open files in AG */ - u32 *db_DBmap; -}; - -/* macros for accessing fields within in-memory aggregate map descriptor */ -#define db_mapsize db_bmap.dn_mapsize -#define db_nfree db_bmap.dn_nfree -#define db_agfree db_bmap.dn_agfree -#define db_agsize db_bmap.dn_agsize -#define db_agl2size db_bmap.dn_agl2size -#define db_agwidth db_bmap.dn_agwidth -#define db_agheight db_bmap.dn_agheight -#define db_agstart db_bmap.dn_agstart -#define db_numag db_bmap.dn_numag -#define db_maxlevel db_bmap.dn_maxlevel -#define db_aglevel db_bmap.dn_aglevel -#define db_agpref db_bmap.dn_agpref -#define db_maxag db_bmap.dn_maxag -#define db_maxfreebud db_bmap.dn_maxfreebud -#define db_l2nbperpage db_bmap.dn_l2nbperpage - -/* - * macros for various conversions needed by the allocators. - * blkstol2(), cntlz(), and cnttz() are operating system dependent functions. - */ -/* convert number of blocks to log2 number of blocks, rounding up to - * the next log2 value if blocks is not a l2 multiple. - */ -#define BLKSTOL2(d) (blkstol2(d)) - -/* convert number of leafs to log2 leaf value */ -#define NLSTOL2BSZ(n) (31 - cntlz((n)) + BUDMIN) - -/* convert leaf index to log2 leaf value */ -#define LITOL2BSZ(n,m,b) ((((n) == 0) ? (m) : cnttz((n))) + (b)) - -/* convert a block number to a dmap control leaf index */ -#define BLKTOCTLLEAF(b,m) \ - (((b) & (((s64)1 << ((m) + L2LPERCTL)) - 1)) >> (m)) - -/* convert log2 leaf value to buddy size */ -#define BUDSIZE(s,m) (1 << ((s) - (m))) - -/* - * external references. - */ -extern int dbMount(struct inode *ipbmap); - -extern int dbUnmount(struct inode *ipbmap, int mounterror); - -extern int dbFree(struct inode *ipbmap, s64 blkno, s64 nblocks); - -extern int dbUpdatePMap(struct inode *ipbmap, - int free, s64 blkno, s64 nblocks, struct tblock * tblk); - -extern int dbNextAG(struct inode *ipbmap); - -extern int dbAlloc(struct inode *ipbmap, s64 hint, s64 nblocks, s64 * results); - -extern int dbReAlloc(struct inode *ipbmap, - s64 blkno, s64 nblocks, s64 addnblocks, s64 * results); - -extern int dbSync(struct inode *ipbmap); -extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks); -extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks); -extern void dbFinalizeBmap(struct inode *ipbmap); -extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap); -#endif /* _H_JFS_DMAP */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_dtree.c b/ANDROID_3.4.5/fs/jfs/jfs_dtree.c deleted file mode 100644 index 9197a1b0..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_dtree.c +++ /dev/null @@ -1,4567 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * jfs_dtree.c: directory B+-tree manager - * - * B+-tree with variable length key directory: - * - * each directory page is structured as an array of 32-byte - * directory entry slots initialized as a freelist - * to avoid search/compaction of free space at insertion. - * when an entry is inserted, a number of slots are allocated - * from the freelist as required to store variable length data - * of the entry; when the entry is deleted, slots of the entry - * are returned to freelist. - * - * leaf entry stores full name as key and file serial number - * (aka inode number) as data. - * internal/router entry stores sufffix compressed name - * as key and simple extent descriptor as data. - * - * each directory page maintains a sorted entry index table - * which stores the start slot index of sorted entries - * to allow binary search on the table. - * - * directory starts as a root/leaf page in on-disk inode - * inline data area. - * when it becomes full, it starts a leaf of a external extent - * of length of 1 block. each time the first leaf becomes full, - * it is extended rather than split (its size is doubled), - * until its length becoms 4 KBytes, from then the extent is split - * with new 4 Kbyte extent when it becomes full - * to reduce external fragmentation of small directories. - * - * blah, blah, blah, for linear scan of directory in pieces by - * readdir(). - * - * - * case-insensitive directory file system - * - * names are stored in case-sensitive way in leaf entry. - * but stored, searched and compared in case-insensitive (uppercase) order - * (i.e., both search key and entry key are folded for search/compare): - * (note that case-sensitive order is BROKEN in storage, e.g., - * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad - * - * entries which folds to the same key makes up a equivalent class - * whose members are stored as contiguous cluster (may cross page boundary) - * but whose order is arbitrary and acts as duplicate, e.g., - * abc, Abc, aBc, abC) - * - * once match is found at leaf, requires scan forward/backward - * either for, in case-insensitive search, duplicate - * or for, in case-sensitive search, for exact match - * - * router entry must be created/stored in case-insensitive way - * in internal entry: - * (right most key of left page and left most key of right page - * are folded, and its suffix compression is propagated as router - * key in parent) - * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> - * should be made the router key for the split) - * - * case-insensitive search: - * - * fold search key; - * - * case-insensitive search of B-tree: - * for internal entry, router key is already folded; - * for leaf entry, fold the entry key before comparison. - * - * if (leaf entry case-insensitive match found) - * if (next entry satisfies case-insensitive match) - * return EDUPLICATE; - * if (prev entry satisfies case-insensitive match) - * return EDUPLICATE; - * return match; - * else - * return no match; - * - * serialization: - * target directory inode lock is being held on entry/exit - * of all main directory service routines. - * - * log based recovery: - */ - -#include <linux/fs.h> -#include <linux/quotaops.h> -#include <linux/slab.h> -#include "jfs_incore.h" -#include "jfs_superblock.h" -#include "jfs_filsys.h" -#include "jfs_metapage.h" -#include "jfs_dmap.h" -#include "jfs_unicode.h" -#include "jfs_debug.h" - -/* dtree split parameter */ -struct dtsplit { - struct metapage *mp; - s16 index; - s16 nslot; - struct component_name *key; - ddata_t *data; - struct pxdlist *pxdlist; -}; - -#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) - -/* get page buffer for specified block address */ -#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ -{\ - BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ - if (!(RC))\ - {\ - if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ - ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ - {\ - BT_PUTPAGE(MP);\ - jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ - MP = NULL;\ - RC = -EIO;\ - }\ - }\ -} - -/* for consistency */ -#define DT_PUTPAGE(MP) BT_PUTPAGE(MP) - -#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ - BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) - -/* - * forward references - */ -static int dtSplitUp(tid_t tid, struct inode *ip, - struct dtsplit * split, struct btstack * btstack); - -static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, - struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); - -static int dtExtendPage(tid_t tid, struct inode *ip, - struct dtsplit * split, struct btstack * btstack); - -static int dtSplitRoot(tid_t tid, struct inode *ip, - struct dtsplit * split, struct metapage ** rmpp); - -static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, - dtpage_t * fp, struct btstack * btstack); - -static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); - -static int dtReadFirst(struct inode *ip, struct btstack * btstack); - -static int dtReadNext(struct inode *ip, - loff_t * offset, struct btstack * btstack); - -static int dtCompare(struct component_name * key, dtpage_t * p, int si); - -static int ciCompare(struct component_name * key, dtpage_t * p, int si, - int flag); - -static void dtGetKey(dtpage_t * p, int i, struct component_name * key, - int flag); - -static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, - int ri, struct component_name * key, int flag); - -static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, - ddata_t * data, struct dt_lock **); - -static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, - struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, - int do_index); - -static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); - -static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); - -static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); - -#define ciToUpper(c) UniStrupr((c)->name) - -/* - * read_index_page() - * - * Reads a page of a directory's index table. - * Having metadata mapped into the directory inode's address space - * presents a multitude of problems. We avoid this by mapping to - * the absolute address space outside of the *_metapage routines - */ -static struct metapage *read_index_page(struct inode *inode, s64 blkno) -{ - int rc; - s64 xaddr; - int xflag; - s32 xlen; - - rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); - if (rc || (xaddr == 0)) - return NULL; - - return read_metapage(inode, xaddr, PSIZE, 1); -} - -/* - * get_index_page() - * - * Same as get_index_page(), but get's a new page without reading - */ -static struct metapage *get_index_page(struct inode *inode, s64 blkno) -{ - int rc; - s64 xaddr; - int xflag; - s32 xlen; - - rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); - if (rc || (xaddr == 0)) - return NULL; - - return get_metapage(inode, xaddr, PSIZE, 1); -} - -/* - * find_index() - * - * Returns dtree page containing directory table entry for specified - * index and pointer to its entry. - * - * mp must be released by caller. - */ -static struct dir_table_slot *find_index(struct inode *ip, u32 index, - struct metapage ** mp, s64 *lblock) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - s64 blkno; - s64 offset; - int page_offset; - struct dir_table_slot *slot; - static int maxWarnings = 10; - - if (index < 2) { - if (maxWarnings) { - jfs_warn("find_entry called with index = %d", index); - maxWarnings--; - } - return NULL; - } - - if (index >= jfs_ip->next_index) { - jfs_warn("find_entry called with index >= next_index"); - return NULL; - } - - if (jfs_dirtable_inline(ip)) { - /* - * Inline directory table - */ - *mp = NULL; - slot = &jfs_ip->i_dirtable[index - 2]; - } else { - offset = (index - 2) * sizeof(struct dir_table_slot); - page_offset = offset & (PSIZE - 1); - blkno = ((offset + 1) >> L2PSIZE) << - JFS_SBI(ip->i_sb)->l2nbperpage; - - if (*mp && (*lblock != blkno)) { - release_metapage(*mp); - *mp = NULL; - } - if (!(*mp)) { - *lblock = blkno; - *mp = read_index_page(ip, blkno); - } - if (!(*mp)) { - jfs_err("free_index: error reading directory table"); - return NULL; - } - - slot = - (struct dir_table_slot *) ((char *) (*mp)->data + - page_offset); - } - return slot; -} - -static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, - u32 index) -{ - struct tlock *tlck; - struct linelock *llck; - struct lv *lv; - - tlck = txLock(tid, ip, mp, tlckDATA); - llck = (struct linelock *) tlck->lock; - - if (llck->index >= llck->maxcnt) - llck = txLinelock(llck); - lv = &llck->lv[llck->index]; - - /* - * Linelock slot size is twice the size of directory table - * slot size. 512 entries per page. - */ - lv->offset = ((index - 2) & 511) >> 1; - lv->length = 1; - llck->index++; -} - -/* - * add_index() - * - * Adds an entry to the directory index table. This is used to provide - * each directory entry with a persistent index in which to resume - * directory traversals - */ -static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) -{ - struct super_block *sb = ip->i_sb; - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - u64 blkno; - struct dir_table_slot *dirtab_slot; - u32 index; - struct linelock *llck; - struct lv *lv; - struct metapage *mp; - s64 offset; - uint page_offset; - struct tlock *tlck; - s64 xaddr; - - ASSERT(DO_INDEX(ip)); - - if (jfs_ip->next_index < 2) { - jfs_warn("add_index: next_index = %d. Resetting!", - jfs_ip->next_index); - jfs_ip->next_index = 2; - } - - index = jfs_ip->next_index++; - - if (index <= MAX_INLINE_DIRTABLE_ENTRY) { - /* - * i_size reflects size of index table, or 8 bytes per entry. - */ - ip->i_size = (loff_t) (index - 1) << 3; - - /* - * dir table fits inline within inode - */ - dirtab_slot = &jfs_ip->i_dirtable[index-2]; - dirtab_slot->flag = DIR_INDEX_VALID; - dirtab_slot->slot = slot; - DTSaddress(dirtab_slot, bn); - - set_cflag(COMMIT_Dirtable, ip); - - return index; - } - if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { - struct dir_table_slot temp_table[12]; - - /* - * It's time to move the inline table to an external - * page and begin to build the xtree - */ - if (dquot_alloc_block(ip, sbi->nbperpage)) - goto clean_up; - if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { - dquot_free_block(ip, sbi->nbperpage); - goto clean_up; - } - - /* - * Save the table, we're going to overwrite it with the - * xtree root - */ - memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); - - /* - * Initialize empty x-tree - */ - xtInitRoot(tid, ip); - - /* - * Add the first block to the xtree - */ - if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { - /* This really shouldn't fail */ - jfs_warn("add_index: xtInsert failed!"); - memcpy(&jfs_ip->i_dirtable, temp_table, - sizeof (temp_table)); - dbFree(ip, xaddr, sbi->nbperpage); - dquot_free_block(ip, sbi->nbperpage); - goto clean_up; - } - ip->i_size = PSIZE; - - mp = get_index_page(ip, 0); - if (!mp) { - jfs_err("add_index: get_metapage failed!"); - xtTruncate(tid, ip, 0, COMMIT_PWMAP); - memcpy(&jfs_ip->i_dirtable, temp_table, - sizeof (temp_table)); - goto clean_up; - } - tlck = txLock(tid, ip, mp, tlckDATA); - llck = (struct linelock *) & tlck->lock; - ASSERT(llck->index == 0); - lv = &llck->lv[0]; - - lv->offset = 0; - lv->length = 6; /* tlckDATA slot size is 16 bytes */ - llck->index++; - - memcpy(mp->data, temp_table, sizeof(temp_table)); - - mark_metapage_dirty(mp); - release_metapage(mp); - - /* - * Logging is now directed by xtree tlocks - */ - clear_cflag(COMMIT_Dirtable, ip); - } - - offset = (index - 2) * sizeof(struct dir_table_slot); - page_offset = offset & (PSIZE - 1); - blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; - if (page_offset == 0) { - /* - * This will be the beginning of a new page - */ - xaddr = 0; - if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { - jfs_warn("add_index: xtInsert failed!"); - goto clean_up; - } - ip->i_size += PSIZE; - - if ((mp = get_index_page(ip, blkno))) - memset(mp->data, 0, PSIZE); /* Just looks better */ - else - xtTruncate(tid, ip, offset, COMMIT_PWMAP); - } else - mp = read_index_page(ip, blkno); - - if (!mp) { - jfs_err("add_index: get/read_metapage failed!"); - goto clean_up; - } - - lock_index(tid, ip, mp, index); - - dirtab_slot = - (struct dir_table_slot *) ((char *) mp->data + page_offset); - dirtab_slot->flag = DIR_INDEX_VALID; - dirtab_slot->slot = slot; - DTSaddress(dirtab_slot, bn); - - mark_metapage_dirty(mp); - release_metapage(mp); - - return index; - - clean_up: - - jfs_ip->next_index--; - - return 0; -} - -/* - * free_index() - * - * Marks an entry to the directory index table as free. - */ -static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) -{ - struct dir_table_slot *dirtab_slot; - s64 lblock; - struct metapage *mp = NULL; - - dirtab_slot = find_index(ip, index, &mp, &lblock); - - if (!dirtab_slot) - return; - - dirtab_slot->flag = DIR_INDEX_FREE; - dirtab_slot->slot = dirtab_slot->addr1 = 0; - dirtab_slot->addr2 = cpu_to_le32(next); - - if (mp) { - lock_index(tid, ip, mp, index); - mark_metapage_dirty(mp); - release_metapage(mp); - } else - set_cflag(COMMIT_Dirtable, ip); -} - -/* - * modify_index() - * - * Changes an entry in the directory index table - */ -static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, - int slot, struct metapage ** mp, s64 *lblock) -{ - struct dir_table_slot *dirtab_slot; - - dirtab_slot = find_index(ip, index, mp, lblock); - - if (!dirtab_slot) - return; - - DTSaddress(dirtab_slot, bn); - dirtab_slot->slot = slot; - - if (*mp) { - lock_index(tid, ip, *mp, index); - mark_metapage_dirty(*mp); - } else - set_cflag(COMMIT_Dirtable, ip); -} - -/* - * read_index() - * - * reads a directory table slot - */ -static int read_index(struct inode *ip, u32 index, - struct dir_table_slot * dirtab_slot) -{ - s64 lblock; - struct metapage *mp = NULL; - struct dir_table_slot *slot; - - slot = find_index(ip, index, &mp, &lblock); - if (!slot) { - return -EIO; - } - - memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); - - if (mp) - release_metapage(mp); - - return 0; -} - -/* - * dtSearch() - * - * function: - * Search for the entry with specified key - * - * parameter: - * - * return: 0 - search result on stack, leaf page pinned; - * errno - I/O error - */ -int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, - struct btstack * btstack, int flag) -{ - int rc = 0; - int cmp = 1; /* init for empty page */ - s64 bn; - struct metapage *mp; - dtpage_t *p; - s8 *stbl; - int base, index, lim; - struct btframe *btsp; - pxd_t *pxd; - int psize = 288; /* initial in-line directory */ - ino_t inumber; - struct component_name ciKey; - struct super_block *sb = ip->i_sb; - - ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS); - if (!ciKey.name) { - rc = -ENOMEM; - goto dtSearch_Exit2; - } - - - /* uppercase search key for c-i directory */ - UniStrcpy(ciKey.name, key->name); - ciKey.namlen = key->namlen; - - /* only uppercase if case-insensitive support is on */ - if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { - ciToUpper(&ciKey); - } - BT_CLR(btstack); /* reset stack */ - - /* init level count for max pages to split */ - btstack->nsplit = 1; - - /* - * search down tree from root: - * - * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of - * internal page, child page Pi contains entry with k, Ki <= K < Kj. - * - * if entry with search key K is not found - * internal page search find the entry with largest key Ki - * less than K which point to the child page to search; - * leaf page search find the entry with smallest key Kj - * greater than K so that the returned index is the position of - * the entry to be shifted right for insertion of new entry. - * for empty tree, search key is greater than any key of the tree. - * - * by convention, root bn = 0. - */ - for (bn = 0;;) { - /* get/pin the page to search */ - DT_GETPAGE(ip, bn, mp, psize, p, rc); - if (rc) - goto dtSearch_Exit1; - - /* get sorted entry table of the page */ - stbl = DT_GETSTBL(p); - - /* - * binary search with search key K on the current page. - */ - for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { - index = base + (lim >> 1); - - if (p->header.flag & BT_LEAF) { - /* uppercase leaf name to compare */ - cmp = - ciCompare(&ciKey, p, stbl[index], - JFS_SBI(sb)->mntflag); - } else { - /* router key is in uppercase */ - - cmp = dtCompare(&ciKey, p, stbl[index]); - - - } - if (cmp == 0) { - /* - * search hit - */ - /* search hit - leaf page: - * return the entry found - */ - if (p->header.flag & BT_LEAF) { - inumber = le32_to_cpu( - ((struct ldtentry *) & p->slot[stbl[index]])->inumber); - - /* - * search for JFS_LOOKUP - */ - if (flag == JFS_LOOKUP) { - *data = inumber; - rc = 0; - goto out; - } - - /* - * search for JFS_CREATE - */ - if (flag == JFS_CREATE) { - *data = inumber; - rc = -EEXIST; - goto out; - } - - /* - * search for JFS_REMOVE or JFS_RENAME - */ - if ((flag == JFS_REMOVE || - flag == JFS_RENAME) && - *data != inumber) { - rc = -ESTALE; - goto out; - } - - /* - * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME - */ - /* save search result */ - *data = inumber; - btsp = btstack->top; - btsp->bn = bn; - btsp->index = index; - btsp->mp = mp; - - rc = 0; - goto dtSearch_Exit1; - } - - /* search hit - internal page: - * descend/search its child page - */ - goto getChild; - } - - if (cmp > 0) { - base = index + 1; - --lim; - } - } - - /* - * search miss - * - * base is the smallest index with key (Kj) greater than - * search key (K) and may be zero or (maxindex + 1) index. - */ - /* - * search miss - leaf page - * - * return location of entry (base) where new entry with - * search key K is to be inserted. - */ - if (p->header.flag & BT_LEAF) { - /* - * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME - */ - if (flag == JFS_LOOKUP || flag == JFS_REMOVE || - flag == JFS_RENAME) { - rc = -ENOENT; - goto out; - } - - /* - * search for JFS_CREATE|JFS_FINDDIR: - * - * save search result - */ - *data = 0; - btsp = btstack->top; - btsp->bn = bn; - btsp->index = base; - btsp->mp = mp; - - rc = 0; - goto dtSearch_Exit1; - } - - /* - * search miss - internal page - * - * if base is non-zero, decrement base by one to get the parent - * entry of the child page to search. - */ - index = base ? base - 1 : base; - - /* - * go down to child page - */ - getChild: - /* update max. number of pages to split */ - if (BT_STACK_FULL(btstack)) { - /* Something's corrupted, mark filesystem dirty so - * chkdsk will fix it. - */ - jfs_error(sb, "stack overrun in dtSearch!"); - BT_STACK_DUMP(btstack); - rc = -EIO; - goto out; - } - btstack->nsplit++; - - /* push (bn, index) of the parent page/entry */ - BT_PUSH(btstack, bn, index); - - /* get the child page block number */ - pxd = (pxd_t *) & p->slot[stbl[index]]; - bn = addressPXD(pxd); - psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; - - /* unpin the parent page */ - DT_PUTPAGE(mp); - } - - out: - DT_PUTPAGE(mp); - - dtSearch_Exit1: - - kfree(ciKey.name); - - dtSearch_Exit2: - - return rc; -} - - -/* - * dtInsert() - * - * function: insert an entry to directory tree - * - * parameter: - * - * return: 0 - success; - * errno - failure; - */ -int dtInsert(tid_t tid, struct inode *ip, - struct component_name * name, ino_t * fsn, struct btstack * btstack) -{ - int rc = 0; - struct metapage *mp; /* meta-page buffer */ - dtpage_t *p; /* base B+-tree index page */ - s64 bn; - int index; - struct dtsplit split; /* split information */ - ddata_t data; - struct dt_lock *dtlck; - int n; - struct tlock *tlck; - struct lv *lv; - - /* - * retrieve search result - * - * dtSearch() returns (leaf page pinned, index at which to insert). - * n.b. dtSearch() may return index of (maxindex + 1) of - * the full page. - */ - DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); - - /* - * insert entry for new key - */ - if (DO_INDEX(ip)) { - if (JFS_IP(ip)->next_index == DIREND) { - DT_PUTPAGE(mp); - return -EMLINK; - } - n = NDTLEAF(name->namlen); - data.leaf.tid = tid; - data.leaf.ip = ip; - } else { - n = NDTLEAF_LEGACY(name->namlen); - data.leaf.ip = NULL; /* signifies legacy directory format */ - } - data.leaf.ino = *fsn; - - /* - * leaf page does not have enough room for new entry: - * - * extend/split the leaf page; - * - * dtSplitUp() will insert the entry and unpin the leaf page. - */ - if (n > p->header.freecnt) { - split.mp = mp; - split.index = index; - split.nslot = n; - split.key = name; - split.data = &data; - rc = dtSplitUp(tid, ip, &split, btstack); - return rc; - } - - /* - * leaf page does have enough room for new entry: - * - * insert the new data entry into the leaf page; - */ - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the leaf page - */ - tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); - dtlck = (struct dt_lock *) & tlck->lock; - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - - /* linelock header */ - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - dtInsertEntry(p, index, name, &data, &dtlck); - - /* linelock stbl of non-root leaf page */ - if (!(p->header.flag & BT_ROOT)) { - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - n = index >> L2DTSLOTSIZE; - lv->offset = p->header.stblindex + n; - lv->length = - ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; - dtlck->index++; - } - - /* unpin the leaf page */ - DT_PUTPAGE(mp); - - return 0; -} - - -/* - * dtSplitUp() - * - * function: propagate insertion bottom up; - * - * parameter: - * - * return: 0 - success; - * errno - failure; - * leaf page unpinned; - */ -static int dtSplitUp(tid_t tid, - struct inode *ip, struct dtsplit * split, struct btstack * btstack) -{ - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - int rc = 0; - struct metapage *smp; - dtpage_t *sp; /* split page */ - struct metapage *rmp; - dtpage_t *rp; /* new right page split from sp */ - pxd_t rpxd; /* new right page extent descriptor */ - struct metapage *lmp; - dtpage_t *lp; /* left child page */ - int skip; /* index of entry of insertion */ - struct btframe *parent; /* parent page entry on traverse stack */ - s64 xaddr, nxaddr; - int xlen, xsize; - struct pxdlist pxdlist; - pxd_t *pxd; - struct component_name key = { 0, NULL }; - ddata_t *data = split->data; - int n; - struct dt_lock *dtlck; - struct tlock *tlck; - struct lv *lv; - int quota_allocation = 0; - - /* get split page */ - smp = split->mp; - sp = DT_PAGE(ip, smp); - - key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS); - if (!key.name) { - DT_PUTPAGE(smp); - rc = -ENOMEM; - goto dtSplitUp_Exit; - } - - /* - * split leaf page - * - * The split routines insert the new entry, and - * acquire txLock as appropriate. - */ - /* - * split root leaf page: - */ - if (sp->header.flag & BT_ROOT) { - /* - * allocate a single extent child page - */ - xlen = 1; - n = sbi->bsize >> L2DTSLOTSIZE; - n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ - n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ - if (n <= split->nslot) - xlen++; - if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) { - DT_PUTPAGE(smp); - goto freeKeyName; - } - - pxdlist.maxnpxd = 1; - pxdlist.npxd = 0; - pxd = &pxdlist.pxd[0]; - PXDaddress(pxd, xaddr); - PXDlength(pxd, xlen); - split->pxdlist = &pxdlist; - rc = dtSplitRoot(tid, ip, split, &rmp); - - if (rc) - dbFree(ip, xaddr, xlen); - else - DT_PUTPAGE(rmp); - - DT_PUTPAGE(smp); - - if (!DO_INDEX(ip)) - ip->i_size = xlen << sbi->l2bsize; - - goto freeKeyName; - } - - /* - * extend first leaf page - * - * extend the 1st extent if less than buffer page size - * (dtExtendPage() reurns leaf page unpinned) - */ - pxd = &sp->header.self; - xlen = lengthPXD(pxd); - xsize = xlen << sbi->l2bsize; - if (xsize < PSIZE) { - xaddr = addressPXD(pxd); - n = xsize >> L2DTSLOTSIZE; - n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ - if ((n + sp->header.freecnt) <= split->nslot) - n = xlen + (xlen << 1); - else - n = xlen; - - /* Allocate blocks to quota. */ - rc = dquot_alloc_block(ip, n); - if (rc) - goto extendOut; - quota_allocation += n; - - if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, - (s64) n, &nxaddr))) - goto extendOut; - - pxdlist.maxnpxd = 1; - pxdlist.npxd = 0; - pxd = &pxdlist.pxd[0]; - PXDaddress(pxd, nxaddr) - PXDlength(pxd, xlen + n); - split->pxdlist = &pxdlist; - if ((rc = dtExtendPage(tid, ip, split, btstack))) { - nxaddr = addressPXD(pxd); - if (xaddr != nxaddr) { - /* free relocated extent */ - xlen = lengthPXD(pxd); - dbFree(ip, nxaddr, (s64) xlen); - } else { - /* free extended delta */ - xlen = lengthPXD(pxd) - n; - xaddr = addressPXD(pxd) + xlen; - dbFree(ip, xaddr, (s64) n); - } - } else if (!DO_INDEX(ip)) - ip->i_size = lengthPXD(pxd) << sbi->l2bsize; - - - extendOut: - DT_PUTPAGE(smp); - goto freeKeyName; - } - - /* - * split leaf page <sp> into <sp> and a new right page <rp>. - * - * return <rp> pinned and its extent descriptor <rpxd> - */ - /* - * allocate new directory page extent and - * new index page(s) to cover page split(s) - * - * allocation hint: ? - */ - n = btstack->nsplit; - pxdlist.maxnpxd = pxdlist.npxd = 0; - xlen = sbi->nbperpage; - for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { - if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { - PXDaddress(pxd, xaddr); - PXDlength(pxd, xlen); - pxdlist.maxnpxd++; - continue; - } - - DT_PUTPAGE(smp); - - /* undo allocation */ - goto splitOut; - } - - split->pxdlist = &pxdlist; - if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { - DT_PUTPAGE(smp); - - /* undo allocation */ - goto splitOut; - } - - if (!DO_INDEX(ip)) - ip->i_size += PSIZE; - - /* - * propagate up the router entry for the leaf page just split - * - * insert a router entry for the new page into the parent page, - * propagate the insert/split up the tree by walking back the stack - * of (bn of parent page, index of child page entry in parent page) - * that were traversed during the search for the page that split. - * - * the propagation of insert/split up the tree stops if the root - * splits or the page inserted into doesn't have to split to hold - * the new entry. - * - * the parent entry for the split page remains the same, and - * a new entry is inserted at its right with the first key and - * block number of the new right page. - * - * There are a maximum of 4 pages pinned at any time: - * two children, left parent and right parent (when the parent splits). - * keep the child pages pinned while working on the parent. - * make sure that all pins are released at exit. - */ - while ((parent = BT_POP(btstack)) != NULL) { - /* parent page specified by stack frame <parent> */ - - /* keep current child pages (<lp>, <rp>) pinned */ - lmp = smp; - lp = sp; - - /* - * insert router entry in parent for new right child page <rp> - */ - /* get the parent page <sp> */ - DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); - if (rc) { - DT_PUTPAGE(lmp); - DT_PUTPAGE(rmp); - goto splitOut; - } - - /* - * The new key entry goes ONE AFTER the index of parent entry, - * because the split was to the right. - */ - skip = parent->index + 1; - - /* - * compute the key for the router entry - * - * key suffix compression: - * for internal pages that have leaf pages as children, - * retain only what's needed to distinguish between - * the new entry and the entry on the page to its left. - * If the keys compare equal, retain the entire key. - * - * note that compression is performed only at computing - * router key at the lowest internal level. - * further compression of the key between pairs of higher - * level internal pages loses too much information and - * the search may fail. - * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} - * results in two adjacent parent entries (a)(xx). - * if split occurs between these two entries, and - * if compression is applied, the router key of parent entry - * of right page (x) will divert search for x into right - * subtree and miss x in the left subtree.) - * - * the entire key must be retained for the next-to-leftmost - * internal key at any level of the tree, or search may fail - * (e.g., ?) - */ - switch (rp->header.flag & BT_TYPE) { - case BT_LEAF: - /* - * compute the length of prefix for suffix compression - * between last entry of left page and first entry - * of right page - */ - if ((sp->header.flag & BT_ROOT && skip > 1) || - sp->header.prev != 0 || skip > 1) { - /* compute uppercase router prefix key */ - rc = ciGetLeafPrefixKey(lp, - lp->header.nextindex-1, - rp, 0, &key, - sbi->mntflag); - if (rc) { - DT_PUTPAGE(lmp); - DT_PUTPAGE(rmp); - DT_PUTPAGE(smp); - goto splitOut; - } - } else { - /* next to leftmost entry of - lowest internal level */ - - /* compute uppercase router key */ - dtGetKey(rp, 0, &key, sbi->mntflag); - key.name[key.namlen] = 0; - - if ((sbi->mntflag & JFS_OS2) == JFS_OS2) - ciToUpper(&key); - } - - n = NDTINTERNAL(key.namlen); - break; - - case BT_INTERNAL: - dtGetKey(rp, 0, &key, sbi->mntflag); - n = NDTINTERNAL(key.namlen); - break; - - default: - jfs_err("dtSplitUp(): UFO!"); - break; - } - - /* unpin left child page */ - DT_PUTPAGE(lmp); - - /* - * compute the data for the router entry - */ - data->xd = rpxd; /* child page xd */ - - /* - * parent page is full - split the parent page - */ - if (n > sp->header.freecnt) { - /* init for parent page split */ - split->mp = smp; - split->index = skip; /* index at insert */ - split->nslot = n; - split->key = &key; - /* split->data = data; */ - - /* unpin right child page */ - DT_PUTPAGE(rmp); - - /* The split routines insert the new entry, - * acquire txLock as appropriate. - * return <rp> pinned and its block number <rbn>. - */ - rc = (sp->header.flag & BT_ROOT) ? - dtSplitRoot(tid, ip, split, &rmp) : - dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); - if (rc) { - DT_PUTPAGE(smp); - goto splitOut; - } - - /* smp and rmp are pinned */ - } - /* - * parent page is not full - insert router entry in parent page - */ - else { - BT_MARK_DIRTY(smp, ip); - /* - * acquire a transaction lock on the parent page - */ - tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); - dtlck = (struct dt_lock *) & tlck->lock; - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - - /* linelock header */ - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - /* linelock stbl of non-root parent page */ - if (!(sp->header.flag & BT_ROOT)) { - lv++; - n = skip >> L2DTSLOTSIZE; - lv->offset = sp->header.stblindex + n; - lv->length = - ((sp->header.nextindex - - 1) >> L2DTSLOTSIZE) - n + 1; - dtlck->index++; - } - - dtInsertEntry(sp, skip, &key, data, &dtlck); - - /* exit propagate up */ - break; - } - } - - /* unpin current split and its right page */ - DT_PUTPAGE(smp); - DT_PUTPAGE(rmp); - - /* - * free remaining extents allocated for split - */ - splitOut: - n = pxdlist.npxd; - pxd = &pxdlist.pxd[n]; - for (; n < pxdlist.maxnpxd; n++, pxd++) - dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); - - freeKeyName: - kfree(key.name); - - /* Rollback quota allocation */ - if (rc && quota_allocation) - dquot_free_block(ip, quota_allocation); - - dtSplitUp_Exit: - - return rc; -} - - -/* - * dtSplitPage() - * - * function: Split a non-root page of a btree. - * - * parameter: - * - * return: 0 - success; - * errno - failure; - * return split and new page pinned; - */ -static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, - struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) -{ - int rc = 0; - struct metapage *smp; - dtpage_t *sp; - struct metapage *rmp; - dtpage_t *rp; /* new right page allocated */ - s64 rbn; /* new right page block number */ - struct metapage *mp; - dtpage_t *p; - s64 nextbn; - struct pxdlist *pxdlist; - pxd_t *pxd; - int skip, nextindex, half, left, nxt, off, si; - struct ldtentry *ldtentry; - struct idtentry *idtentry; - u8 *stbl; - struct dtslot *f; - int fsi, stblsize; - int n; - struct dt_lock *sdtlck, *rdtlck; - struct tlock *tlck; - struct dt_lock *dtlck; - struct lv *slv, *rlv, *lv; - - /* get split page */ - smp = split->mp; - sp = DT_PAGE(ip, smp); - - /* - * allocate the new right page for the split - */ - pxdlist = split->pxdlist; - pxd = &pxdlist->pxd[pxdlist->npxd]; - pxdlist->npxd++; - rbn = addressPXD(pxd); - rmp = get_metapage(ip, rbn, PSIZE, 1); - if (rmp == NULL) - return -EIO; - - /* Allocate blocks to quota. */ - rc = dquot_alloc_block(ip, lengthPXD(pxd)); - if (rc) { - release_metapage(rmp); - return rc; - } - - jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); - - BT_MARK_DIRTY(rmp, ip); - /* - * acquire a transaction lock on the new right page - */ - tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); - rdtlck = (struct dt_lock *) & tlck->lock; - - rp = (dtpage_t *) rmp->data; - *rpp = rp; - rp->header.self = *pxd; - - BT_MARK_DIRTY(smp, ip); - /* - * acquire a transaction lock on the split page - * - * action: - */ - tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); - sdtlck = (struct dt_lock *) & tlck->lock; - - /* linelock header of split page */ - ASSERT(sdtlck->index == 0); - slv = & sdtlck->lv[0]; - slv->offset = 0; - slv->length = 1; - sdtlck->index++; - - /* - * initialize/update sibling pointers between sp and rp - */ - nextbn = le64_to_cpu(sp->header.next); - rp->header.next = cpu_to_le64(nextbn); - rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); - sp->header.next = cpu_to_le64(rbn); - - /* - * initialize new right page - */ - rp->header.flag = sp->header.flag; - - /* compute sorted entry table at start of extent data area */ - rp->header.nextindex = 0; - rp->header.stblindex = 1; - - n = PSIZE >> L2DTSLOTSIZE; - rp->header.maxslot = n; - stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ - - /* init freelist */ - fsi = rp->header.stblindex + stblsize; - rp->header.freelist = fsi; - rp->header.freecnt = rp->header.maxslot - fsi; - - /* - * sequential append at tail: append without split - * - * If splitting the last page on a level because of appending - * a entry to it (skip is maxentry), it's likely that the access is - * sequential. Adding an empty page on the side of the level is less - * work and can push the fill factor much higher than normal. - * If we're wrong it's no big deal, we'll just do the split the right - * way next time. - * (It may look like it's equally easy to do a similar hack for - * reverse sorted data, that is, split the tree left, - * but it's not. Be my guest.) - */ - if (nextbn == 0 && split->index == sp->header.nextindex) { - /* linelock header + stbl (first slot) of new page */ - rlv = & rdtlck->lv[rdtlck->index]; - rlv->offset = 0; - rlv->length = 2; - rdtlck->index++; - - /* - * initialize freelist of new right page - */ - f = &rp->slot[fsi]; - for (fsi++; fsi < rp->header.maxslot; f++, fsi++) - f->next = fsi; - f->next = -1; - - /* insert entry at the first entry of the new right page */ - dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); - - goto out; - } - - /* - * non-sequential insert (at possibly middle page) - */ - - /* - * update prev pointer of previous right sibling page; - */ - if (nextbn != 0) { - DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); - if (rc) { - discard_metapage(rmp); - return rc; - } - - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the next page - */ - tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); - jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p", - tlck, ip, mp); - dtlck = (struct dt_lock *) & tlck->lock; - - /* linelock header of previous right sibling page */ - lv = & dtlck->lv[dtlck->index]; - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - p->header.prev = cpu_to_le64(rbn); - - DT_PUTPAGE(mp); - } - - /* - * split the data between the split and right pages. - */ - skip = split->index; - half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ - left = 0; - - /* - * compute fill factor for split pages - * - * <nxt> traces the next entry to move to rp - * <off> traces the next entry to stay in sp - */ - stbl = (u8 *) & sp->slot[sp->header.stblindex]; - nextindex = sp->header.nextindex; - for (nxt = off = 0; nxt < nextindex; ++off) { - if (off == skip) - /* check for fill factor with new entry size */ - n = split->nslot; - else { - si = stbl[nxt]; - switch (sp->header.flag & BT_TYPE) { - case BT_LEAF: - ldtentry = (struct ldtentry *) & sp->slot[si]; - if (DO_INDEX(ip)) - n = NDTLEAF(ldtentry->namlen); - else - n = NDTLEAF_LEGACY(ldtentry-> - namlen); - break; - - case BT_INTERNAL: - idtentry = (struct idtentry *) & sp->slot[si]; - n = NDTINTERNAL(idtentry->namlen); - break; - - default: - break; - } - - ++nxt; /* advance to next entry to move in sp */ - } - - left += n; - if (left >= half) - break; - } - - /* <nxt> poins to the 1st entry to move */ - - /* - * move entries to right page - * - * dtMoveEntry() initializes rp and reserves entry for insertion - * - * split page moved out entries are linelocked; - * new/right page moved in entries are linelocked; - */ - /* linelock header + stbl of new right page */ - rlv = & rdtlck->lv[rdtlck->index]; - rlv->offset = 0; - rlv->length = 5; - rdtlck->index++; - - dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); - - sp->header.nextindex = nxt; - - /* - * finalize freelist of new right page - */ - fsi = rp->header.freelist; - f = &rp->slot[fsi]; - for (fsi++; fsi < rp->header.maxslot; f++, fsi++) - f->next = fsi; - f->next = -1; - - /* - * Update directory index table for entries now in right page - */ - if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { - s64 lblock; - - mp = NULL; - stbl = DT_GETSTBL(rp); - for (n = 0; n < rp->header.nextindex; n++) { - ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; - modify_index(tid, ip, le32_to_cpu(ldtentry->index), - rbn, n, &mp, &lblock); - } - if (mp) - release_metapage(mp); - } - - /* - * the skipped index was on the left page, - */ - if (skip <= off) { - /* insert the new entry in the split page */ - dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); - - /* linelock stbl of split page */ - if (sdtlck->index >= sdtlck->maxcnt) - sdtlck = (struct dt_lock *) txLinelock(sdtlck); - slv = & sdtlck->lv[sdtlck->index]; - n = skip >> L2DTSLOTSIZE; - slv->offset = sp->header.stblindex + n; - slv->length = - ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; - sdtlck->index++; - } - /* - * the skipped index was on the right page, - */ - else { - /* adjust the skip index to reflect the new position */ - skip -= nxt; - - /* insert the new entry in the right page */ - dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); - } - - out: - *rmpp = rmp; - *rpxdp = *pxd; - - return rc; -} - - -/* - * dtExtendPage() - * - * function: extend 1st/only directory leaf page - * - * parameter: - * - * return: 0 - success; - * errno - failure; - * return extended page pinned; - */ -static int dtExtendPage(tid_t tid, - struct inode *ip, struct dtsplit * split, struct btstack * btstack) -{ - struct super_block *sb = ip->i_sb; - int rc; - struct metapage *smp, *pmp, *mp; - dtpage_t *sp, *pp; - struct pxdlist *pxdlist; - pxd_t *pxd, *tpxd; - int xlen, xsize; - int newstblindex, newstblsize; - int oldstblindex, oldstblsize; - int fsi, last; - struct dtslot *f; - struct btframe *parent; - int n; - struct dt_lock *dtlck; - s64 xaddr, txaddr; - struct tlock *tlck; - struct pxd_lock *pxdlock; - struct lv *lv; - uint type; - struct ldtentry *ldtentry; - u8 *stbl; - - /* get page to extend */ - smp = split->mp; - sp = DT_PAGE(ip, smp); - - /* get parent/root page */ - parent = BT_POP(btstack); - DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); - if (rc) - return (rc); - - /* - * extend the extent - */ - pxdlist = split->pxdlist; - pxd = &pxdlist->pxd[pxdlist->npxd]; - pxdlist->npxd++; - - xaddr = addressPXD(pxd); - tpxd = &sp->header.self; - txaddr = addressPXD(tpxd); - /* in-place extension */ - if (xaddr == txaddr) { - type = tlckEXTEND; - } - /* relocation */ - else { - type = tlckNEW; - - /* save moved extent descriptor for later free */ - tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); - pxdlock = (struct pxd_lock *) & tlck->lock; - pxdlock->flag = mlckFREEPXD; - pxdlock->pxd = sp->header.self; - pxdlock->index = 1; - - /* - * Update directory index table to reflect new page address - */ - if (DO_INDEX(ip)) { - s64 lblock; - - mp = NULL; - stbl = DT_GETSTBL(sp); - for (n = 0; n < sp->header.nextindex; n++) { - ldtentry = - (struct ldtentry *) & sp->slot[stbl[n]]; - modify_index(tid, ip, - le32_to_cpu(ldtentry->index), - xaddr, n, &mp, &lblock); - } - if (mp) - release_metapage(mp); - } - } - - /* - * extend the page - */ - sp->header.self = *pxd; - - jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp); - - BT_MARK_DIRTY(smp, ip); - /* - * acquire a transaction lock on the extended/leaf page - */ - tlck = txLock(tid, ip, smp, tlckDTREE | type); - dtlck = (struct dt_lock *) & tlck->lock; - lv = & dtlck->lv[0]; - - /* update buffer extent descriptor of extended page */ - xlen = lengthPXD(pxd); - xsize = xlen << JFS_SBI(sb)->l2bsize; - - /* - * copy old stbl to new stbl at start of extended area - */ - oldstblindex = sp->header.stblindex; - oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; - newstblindex = sp->header.maxslot; - n = xsize >> L2DTSLOTSIZE; - newstblsize = (n + 31) >> L2DTSLOTSIZE; - memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], - sp->header.nextindex); - - /* - * in-line extension: linelock old area of extended page - */ - if (type == tlckEXTEND) { - /* linelock header */ - lv->offset = 0; - lv->length = 1; - dtlck->index++; - lv++; - - /* linelock new stbl of extended page */ - lv->offset = newstblindex; - lv->length = newstblsize; - } - /* - * relocation: linelock whole relocated area - */ - else { - lv->offset = 0; - lv->length = sp->header.maxslot + newstblsize; - } - - dtlck->index++; - - sp->header.maxslot = n; - sp->header.stblindex = newstblindex; - /* sp->header.nextindex remains the same */ - - /* - * add old stbl region at head of freelist - */ - fsi = oldstblindex; - f = &sp->slot[fsi]; - last = sp->header.freelist; - for (n = 0; n < oldstblsize; n++, fsi++, f++) { - f->next = last; - last = fsi; - } - sp->header.freelist = last; - sp->header.freecnt += oldstblsize; - - /* - * append free region of newly extended area at tail of freelist - */ - /* init free region of newly extended area */ - fsi = n = newstblindex + newstblsize; - f = &sp->slot[fsi]; - for (fsi++; fsi < sp->header.maxslot; f++, fsi++) - f->next = fsi; - f->next = -1; - - /* append new free region at tail of old freelist */ - fsi = sp->header.freelist; - if (fsi == -1) - sp->header.freelist = n; - else { - do { - f = &sp->slot[fsi]; - fsi = f->next; - } while (fsi != -1); - - f->next = n; - } - - sp->header.freecnt += sp->header.maxslot - n; - - /* - * insert the new entry - */ - dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); - - BT_MARK_DIRTY(pmp, ip); - /* - * linelock any freeslots residing in old extent - */ - if (type == tlckEXTEND) { - n = sp->header.maxslot >> 2; - if (sp->header.freelist < n) - dtLinelockFreelist(sp, n, &dtlck); - } - - /* - * update parent entry on the parent/root page - */ - /* - * acquire a transaction lock on the parent/root page - */ - tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); - dtlck = (struct dt_lock *) & tlck->lock; - lv = & dtlck->lv[dtlck->index]; - - /* linelock parent entry - 1st slot */ - lv->offset = 1; - lv->length = 1; - dtlck->index++; - - /* update the parent pxd for page extension */ - tpxd = (pxd_t *) & pp->slot[1]; - *tpxd = *pxd; - - DT_PUTPAGE(pmp); - return 0; -} - - -/* - * dtSplitRoot() - * - * function: - * split the full root page into - * original/root/split page and new right page - * i.e., root remains fixed in tree anchor (inode) and - * the root is copied to a single new right child page - * since root page << non-root page, and - * the split root page contains a single entry for the - * new right child page. - * - * parameter: - * - * return: 0 - success; - * errno - failure; - * return new page pinned; - */ -static int dtSplitRoot(tid_t tid, - struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) -{ - struct super_block *sb = ip->i_sb; - struct metapage *smp; - dtroot_t *sp; - struct metapage *rmp; - dtpage_t *rp; - s64 rbn; - int xlen; - int xsize; - struct dtslot *f; - s8 *stbl; - int fsi, stblsize, n; - struct idtentry *s; - pxd_t *ppxd; - struct pxdlist *pxdlist; - pxd_t *pxd; - struct dt_lock *dtlck; - struct tlock *tlck; - struct lv *lv; - int rc; - - /* get split root page */ - smp = split->mp; - sp = &JFS_IP(ip)->i_dtroot; - - /* - * allocate/initialize a single (right) child page - * - * N.B. at first split, a one (or two) block to fit new entry - * is allocated; at subsequent split, a full page is allocated; - */ - pxdlist = split->pxdlist; - pxd = &pxdlist->pxd[pxdlist->npxd]; - pxdlist->npxd++; - rbn = addressPXD(pxd); - xlen = lengthPXD(pxd); - xsize = xlen << JFS_SBI(sb)->l2bsize; - rmp = get_metapage(ip, rbn, xsize, 1); - if (!rmp) - return -EIO; - - rp = rmp->data; - - /* Allocate blocks to quota. */ - rc = dquot_alloc_block(ip, lengthPXD(pxd)); - if (rc) { - release_metapage(rmp); - return rc; - } - - BT_MARK_DIRTY(rmp, ip); - /* - * acquire a transaction lock on the new right page - */ - tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); - dtlck = (struct dt_lock *) & tlck->lock; - - rp->header.flag = - (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; - rp->header.self = *pxd; - - /* initialize sibling pointers */ - rp->header.next = 0; - rp->header.prev = 0; - - /* - * move in-line root page into new right page extent - */ - /* linelock header + copied entries + new stbl (1st slot) in new page */ - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - lv->offset = 0; - lv->length = 10; /* 1 + 8 + 1 */ - dtlck->index++; - - n = xsize >> L2DTSLOTSIZE; - rp->header.maxslot = n; - stblsize = (n + 31) >> L2DTSLOTSIZE; - - /* copy old stbl to new stbl at start of extended area */ - rp->header.stblindex = DTROOTMAXSLOT; - stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; - memcpy(stbl, sp->header.stbl, sp->header.nextindex); - rp->header.nextindex = sp->header.nextindex; - - /* copy old data area to start of new data area */ - memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); - - /* - * append free region of newly extended area at tail of freelist - */ - /* init free region of newly extended area */ - fsi = n = DTROOTMAXSLOT + stblsize; - f = &rp->slot[fsi]; - for (fsi++; fsi < rp->header.maxslot; f++, fsi++) - f->next = fsi; - f->next = -1; - - /* append new free region at tail of old freelist */ - fsi = sp->header.freelist; - if (fsi == -1) - rp->header.freelist = n; - else { - rp->header.freelist = fsi; - - do { - f = &rp->slot[fsi]; - fsi = f->next; - } while (fsi != -1); - - f->next = n; - } - - rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; - - /* - * Update directory index table for entries now in right page - */ - if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { - s64 lblock; - struct metapage *mp = NULL; - struct ldtentry *ldtentry; - - stbl = DT_GETSTBL(rp); - for (n = 0; n < rp->header.nextindex; n++) { - ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; - modify_index(tid, ip, le32_to_cpu(ldtentry->index), - rbn, n, &mp, &lblock); - } - if (mp) - release_metapage(mp); - } - /* - * insert the new entry into the new right/child page - * (skip index in the new right page will not change) - */ - dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); - - /* - * reset parent/root page - * - * set the 1st entry offset to 0, which force the left-most key - * at any level of the tree to be less than any search key. - * - * The btree comparison code guarantees that the left-most key on any - * level of the tree is never used, so it doesn't need to be filled in. - */ - BT_MARK_DIRTY(smp, ip); - /* - * acquire a transaction lock on the root page (in-memory inode) - */ - tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); - dtlck = (struct dt_lock *) & tlck->lock; - - /* linelock root */ - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - lv->offset = 0; - lv->length = DTROOTMAXSLOT; - dtlck->index++; - - /* update page header of root */ - if (sp->header.flag & BT_LEAF) { - sp->header.flag &= ~BT_LEAF; - sp->header.flag |= BT_INTERNAL; - } - - /* init the first entry */ - s = (struct idtentry *) & sp->slot[DTENTRYSTART]; - ppxd = (pxd_t *) s; - *ppxd = *pxd; - s->next = -1; - s->namlen = 0; - - stbl = sp->header.stbl; - stbl[0] = DTENTRYSTART; - sp->header.nextindex = 1; - - /* init freelist */ - fsi = DTENTRYSTART + 1; - f = &sp->slot[fsi]; - - /* init free region of remaining area */ - for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) - f->next = fsi; - f->next = -1; - - sp->header.freelist = DTENTRYSTART + 1; - sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); - - *rmpp = rmp; - - return 0; -} - - -/* - * dtDelete() - * - * function: delete the entry(s) referenced by a key. - * - * parameter: - * - * return: - */ -int dtDelete(tid_t tid, - struct inode *ip, struct component_name * key, ino_t * ino, int flag) -{ - int rc = 0; - s64 bn; - struct metapage *mp, *imp; - dtpage_t *p; - int index; - struct btstack btstack; - struct dt_lock *dtlck; - struct tlock *tlck; - struct lv *lv; - int i; - struct ldtentry *ldtentry; - u8 *stbl; - u32 table_index, next_index; - struct metapage *nmp; - dtpage_t *np; - - /* - * search for the entry to delete: - * - * dtSearch() returns (leaf page pinned, index at which to delete). - */ - if ((rc = dtSearch(ip, key, ino, &btstack, flag))) - return rc; - - /* retrieve search result */ - DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - /* - * We need to find put the index of the next entry into the - * directory index table in order to resume a readdir from this - * entry. - */ - if (DO_INDEX(ip)) { - stbl = DT_GETSTBL(p); - ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; - table_index = le32_to_cpu(ldtentry->index); - if (index == (p->header.nextindex - 1)) { - /* - * Last entry in this leaf page - */ - if ((p->header.flag & BT_ROOT) - || (p->header.next == 0)) - next_index = -1; - else { - /* Read next leaf page */ - DT_GETPAGE(ip, le64_to_cpu(p->header.next), - nmp, PSIZE, np, rc); - if (rc) - next_index = -1; - else { - stbl = DT_GETSTBL(np); - ldtentry = - (struct ldtentry *) & np-> - slot[stbl[0]]; - next_index = - le32_to_cpu(ldtentry->index); - DT_PUTPAGE(nmp); - } - } - } else { - ldtentry = - (struct ldtentry *) & p->slot[stbl[index + 1]]; - next_index = le32_to_cpu(ldtentry->index); - } - free_index(tid, ip, table_index, next_index); - } - /* - * the leaf page becomes empty, delete the page - */ - if (p->header.nextindex == 1) { - /* delete empty page */ - rc = dtDeleteUp(tid, ip, mp, p, &btstack); - } - /* - * the leaf page has other entries remaining: - * - * delete the entry from the leaf page. - */ - else { - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the leaf page - */ - tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); - dtlck = (struct dt_lock *) & tlck->lock; - - /* - * Do not assume that dtlck->index will be zero. During a - * rename within a directory, this transaction may have - * modified this page already when adding the new entry. - */ - - /* linelock header */ - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - /* linelock stbl of non-root leaf page */ - if (!(p->header.flag & BT_ROOT)) { - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - i = index >> L2DTSLOTSIZE; - lv->offset = p->header.stblindex + i; - lv->length = - ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - - i + 1; - dtlck->index++; - } - - /* free the leaf entry */ - dtDeleteEntry(p, index, &dtlck); - - /* - * Update directory index table for entries moved in stbl - */ - if (DO_INDEX(ip) && index < p->header.nextindex) { - s64 lblock; - - imp = NULL; - stbl = DT_GETSTBL(p); - for (i = index; i < p->header.nextindex; i++) { - ldtentry = - (struct ldtentry *) & p->slot[stbl[i]]; - modify_index(tid, ip, - le32_to_cpu(ldtentry->index), - bn, i, &imp, &lblock); - } - if (imp) - release_metapage(imp); - } - - DT_PUTPAGE(mp); - } - - return rc; -} - - -/* - * dtDeleteUp() - * - * function: - * free empty pages as propagating deletion up the tree - * - * parameter: - * - * return: - */ -static int dtDeleteUp(tid_t tid, struct inode *ip, - struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) -{ - int rc = 0; - struct metapage *mp; - dtpage_t *p; - int index, nextindex; - int xlen; - struct btframe *parent; - struct dt_lock *dtlck; - struct tlock *tlck; - struct lv *lv; - struct pxd_lock *pxdlock; - int i; - - /* - * keep the root leaf page which has become empty - */ - if (BT_IS_ROOT(fmp)) { - /* - * reset the root - * - * dtInitRoot() acquires txlock on the root - */ - dtInitRoot(tid, ip, PARENT(ip)); - - DT_PUTPAGE(fmp); - - return 0; - } - - /* - * free the non-root leaf page - */ - /* - * acquire a transaction lock on the page - * - * write FREEXTENT|NOREDOPAGE log record - * N.B. linelock is overlaid as freed extent descriptor, and - * the buffer page is freed; - */ - tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); - pxdlock = (struct pxd_lock *) & tlck->lock; - pxdlock->flag = mlckFREEPXD; - pxdlock->pxd = fp->header.self; - pxdlock->index = 1; - - /* update sibling pointers */ - if ((rc = dtRelink(tid, ip, fp))) { - BT_PUTPAGE(fmp); - return rc; - } - - xlen = lengthPXD(&fp->header.self); - - /* Free quota allocation. */ - dquot_free_block(ip, xlen); - - /* free/invalidate its buffer page */ - discard_metapage(fmp); - - /* - * propagate page deletion up the directory tree - * - * If the delete from the parent page makes it empty, - * continue all the way up the tree. - * stop if the root page is reached (which is never deleted) or - * if the entry deletion does not empty the page. - */ - while ((parent = BT_POP(btstack)) != NULL) { - /* pin the parent page <sp> */ - DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* - * free the extent of the child page deleted - */ - index = parent->index; - - /* - * delete the entry for the child page from parent - */ - nextindex = p->header.nextindex; - - /* - * the parent has the single entry being deleted: - * - * free the parent page which has become empty. - */ - if (nextindex == 1) { - /* - * keep the root internal page which has become empty - */ - if (p->header.flag & BT_ROOT) { - /* - * reset the root - * - * dtInitRoot() acquires txlock on the root - */ - dtInitRoot(tid, ip, PARENT(ip)); - - DT_PUTPAGE(mp); - - return 0; - } - /* - * free the parent page - */ - else { - /* - * acquire a transaction lock on the page - * - * write FREEXTENT|NOREDOPAGE log record - */ - tlck = - txMaplock(tid, ip, - tlckDTREE | tlckFREE); - pxdlock = (struct pxd_lock *) & tlck->lock; - pxdlock->flag = mlckFREEPXD; - pxdlock->pxd = p->header.self; - pxdlock->index = 1; - - /* update sibling pointers */ - if ((rc = dtRelink(tid, ip, p))) { - DT_PUTPAGE(mp); - return rc; - } - - xlen = lengthPXD(&p->header.self); - - /* Free quota allocation */ - dquot_free_block(ip, xlen); - - /* free/invalidate its buffer page */ - discard_metapage(mp); - - /* propagate up */ - continue; - } - } - - /* - * the parent has other entries remaining: - * - * delete the router entry from the parent page. - */ - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the page - * - * action: router entry deletion - */ - tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); - dtlck = (struct dt_lock *) & tlck->lock; - - /* linelock header */ - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - /* linelock stbl of non-root leaf page */ - if (!(p->header.flag & BT_ROOT)) { - if (dtlck->index < dtlck->maxcnt) - lv++; - else { - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[0]; - } - i = index >> L2DTSLOTSIZE; - lv->offset = p->header.stblindex + i; - lv->length = - ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - - i + 1; - dtlck->index++; - } - - /* free the router entry */ - dtDeleteEntry(p, index, &dtlck); - - /* reset key of new leftmost entry of level (for consistency) */ - if (index == 0 && - ((p->header.flag & BT_ROOT) || p->header.prev == 0)) - dtTruncateEntry(p, 0, &dtlck); - - /* unpin the parent page */ - DT_PUTPAGE(mp); - - /* exit propagation up */ - break; - } - - if (!DO_INDEX(ip)) - ip->i_size -= PSIZE; - - return 0; -} - -#ifdef _NOTYET -/* - * NAME: dtRelocate() - * - * FUNCTION: relocate dtpage (internal or leaf) of directory; - * This function is mainly used by defragfs utility. - */ -int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, - s64 nxaddr) -{ - int rc = 0; - struct metapage *mp, *pmp, *lmp, *rmp; - dtpage_t *p, *pp, *rp = 0, *lp= 0; - s64 bn; - int index; - struct btstack btstack; - pxd_t *pxd; - s64 oxaddr, nextbn, prevbn; - int xlen, xsize; - struct tlock *tlck; - struct dt_lock *dtlck; - struct pxd_lock *pxdlock; - s8 *stbl; - struct lv *lv; - - oxaddr = addressPXD(opxd); - xlen = lengthPXD(opxd); - - jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d", - (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr, - xlen); - - /* - * 1. get the internal parent dtpage covering - * router entry for the tartget page to be relocated; - */ - rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); - if (rc) - return rc; - - /* retrieve search result */ - DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); - jfs_info("dtRelocate: parent router entry validated."); - - /* - * 2. relocate the target dtpage - */ - /* read in the target page from src extent */ - DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); - if (rc) { - /* release the pinned parent page */ - DT_PUTPAGE(pmp); - return rc; - } - - /* - * read in sibling pages if any to update sibling pointers; - */ - rmp = NULL; - if (p->header.next) { - nextbn = le64_to_cpu(p->header.next); - DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); - if (rc) { - DT_PUTPAGE(mp); - DT_PUTPAGE(pmp); - return (rc); - } - } - - lmp = NULL; - if (p->header.prev) { - prevbn = le64_to_cpu(p->header.prev); - DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); - if (rc) { - DT_PUTPAGE(mp); - DT_PUTPAGE(pmp); - if (rmp) - DT_PUTPAGE(rmp); - return (rc); - } - } - - /* at this point, all xtpages to be updated are in memory */ - - /* - * update sibling pointers of sibling dtpages if any; - */ - if (lmp) { - tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK); - dtlck = (struct dt_lock *) & tlck->lock; - /* linelock header */ - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - lp->header.next = cpu_to_le64(nxaddr); - DT_PUTPAGE(lmp); - } - - if (rmp) { - tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK); - dtlck = (struct dt_lock *) & tlck->lock; - /* linelock header */ - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - rp->header.prev = cpu_to_le64(nxaddr); - DT_PUTPAGE(rmp); - } - - /* - * update the target dtpage to be relocated - * - * write LOG_REDOPAGE of LOG_NEW type for dst page - * for the whole target page (logredo() will apply - * after image and update bmap for allocation of the - * dst extent), and update bmap for allocation of - * the dst extent; - */ - tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW); - dtlck = (struct dt_lock *) & tlck->lock; - /* linelock header */ - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - - /* update the self address in the dtpage header */ - pxd = &p->header.self; - PXDaddress(pxd, nxaddr); - - /* the dst page is the same as the src page, i.e., - * linelock for afterimage of the whole page; - */ - lv->offset = 0; - lv->length = p->header.maxslot; - dtlck->index++; - - /* update the buffer extent descriptor of the dtpage */ - xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; - - /* unpin the relocated page */ - DT_PUTPAGE(mp); - jfs_info("dtRelocate: target dtpage relocated."); - - /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec - * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec - * will also force a bmap update ). - */ - - /* - * 3. acquire maplock for the source extent to be freed; - */ - /* for dtpage relocation, write a LOG_NOREDOPAGE record - * for the source dtpage (logredo() will init NoRedoPage - * filter and will also update bmap for free of the source - * dtpage), and upadte bmap for free of the source dtpage; - */ - tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); - pxdlock = (struct pxd_lock *) & tlck->lock; - pxdlock->flag = mlckFREEPXD; - PXDaddress(&pxdlock->pxd, oxaddr); - PXDlength(&pxdlock->pxd, xlen); - pxdlock->index = 1; - - /* - * 4. update the parent router entry for relocation; - * - * acquire tlck for the parent entry covering the target dtpage; - * write LOG_REDOPAGE to apply after image only; - */ - jfs_info("dtRelocate: update parent router entry."); - tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); - dtlck = (struct dt_lock *) & tlck->lock; - lv = & dtlck->lv[dtlck->index]; - - /* update the PXD with the new address */ - stbl = DT_GETSTBL(pp); - pxd = (pxd_t *) & pp->slot[stbl[index]]; - PXDaddress(pxd, nxaddr); - lv->offset = stbl[index]; - lv->length = 1; - dtlck->index++; - - /* unpin the parent dtpage */ - DT_PUTPAGE(pmp); - - return rc; -} - -/* - * NAME: dtSearchNode() - * - * FUNCTION: Search for an dtpage containing a specified address - * This function is mainly used by defragfs utility. - * - * NOTE: Search result on stack, the found page is pinned at exit. - * The result page must be an internal dtpage. - * lmxaddr give the address of the left most page of the - * dtree level, in which the required dtpage resides. - */ -static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, - struct btstack * btstack) -{ - int rc = 0; - s64 bn; - struct metapage *mp; - dtpage_t *p; - int psize = 288; /* initial in-line directory */ - s8 *stbl; - int i; - pxd_t *pxd; - struct btframe *btsp; - - BT_CLR(btstack); /* reset stack */ - - /* - * descend tree to the level with specified leftmost page - * - * by convention, root bn = 0. - */ - for (bn = 0;;) { - /* get/pin the page to search */ - DT_GETPAGE(ip, bn, mp, psize, p, rc); - if (rc) - return rc; - - /* does the xaddr of leftmost page of the levevl - * matches levevl search key ? - */ - if (p->header.flag & BT_ROOT) { - if (lmxaddr == 0) - break; - } else if (addressPXD(&p->header.self) == lmxaddr) - break; - - /* - * descend down to leftmost child page - */ - if (p->header.flag & BT_LEAF) { - DT_PUTPAGE(mp); - return -ESTALE; - } - - /* get the leftmost entry */ - stbl = DT_GETSTBL(p); - pxd = (pxd_t *) & p->slot[stbl[0]]; - - /* get the child page block address */ - bn = addressPXD(pxd); - psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; - /* unpin the parent page */ - DT_PUTPAGE(mp); - } - - /* - * search each page at the current levevl - */ - loop: - stbl = DT_GETSTBL(p); - for (i = 0; i < p->header.nextindex; i++) { - pxd = (pxd_t *) & p->slot[stbl[i]]; - - /* found the specified router entry */ - if (addressPXD(pxd) == addressPXD(kpxd) && - lengthPXD(pxd) == lengthPXD(kpxd)) { - btsp = btstack->top; - btsp->bn = bn; - btsp->index = i; - btsp->mp = mp; - - return 0; - } - } - - /* get the right sibling page if any */ - if (p->header.next) - bn = le64_to_cpu(p->header.next); - else { - DT_PUTPAGE(mp); - return -ESTALE; - } - - /* unpin current page */ - DT_PUTPAGE(mp); - - /* get the right sibling page */ - DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - goto loop; -} -#endif /* _NOTYET */ - -/* - * dtRelink() - * - * function: - * link around a freed page. - * - * parameter: - * fp: page to be freed - * - * return: - */ -static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) -{ - int rc; - struct metapage *mp; - s64 nextbn, prevbn; - struct tlock *tlck; - struct dt_lock *dtlck; - struct lv *lv; - - nextbn = le64_to_cpu(p->header.next); - prevbn = le64_to_cpu(p->header.prev); - - /* update prev pointer of the next page */ - if (nextbn != 0) { - DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); - if (rc) - return rc; - - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the next page - * - * action: update prev pointer; - */ - tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); - jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", - tlck, ip, mp); - dtlck = (struct dt_lock *) & tlck->lock; - - /* linelock header */ - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - p->header.prev = cpu_to_le64(prevbn); - DT_PUTPAGE(mp); - } - - /* update next pointer of the previous page */ - if (prevbn != 0) { - DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); - if (rc) - return rc; - - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the prev page - * - * action: update next pointer; - */ - tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); - jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", - tlck, ip, mp); - dtlck = (struct dt_lock *) & tlck->lock; - - /* linelock header */ - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - lv->offset = 0; - lv->length = 1; - dtlck->index++; - - p->header.next = cpu_to_le64(nextbn); - DT_PUTPAGE(mp); - } - - return 0; -} - - -/* - * dtInitRoot() - * - * initialize directory root (inline in inode) - */ -void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - dtroot_t *p; - int fsi; - struct dtslot *f; - struct tlock *tlck; - struct dt_lock *dtlck; - struct lv *lv; - u16 xflag_save; - - /* - * If this was previously an non-empty directory, we need to remove - * the old directory table. - */ - if (DO_INDEX(ip)) { - if (!jfs_dirtable_inline(ip)) { - struct tblock *tblk = tid_to_tblock(tid); - /* - * We're playing games with the tid's xflag. If - * we're removing a regular file, the file's xtree - * is committed with COMMIT_PMAP, but we always - * commit the directories xtree with COMMIT_PWMAP. - */ - xflag_save = tblk->xflag; - tblk->xflag = 0; - /* - * xtTruncate isn't guaranteed to fully truncate - * the xtree. The caller needs to check i_size - * after committing the transaction to see if - * additional truncation is needed. The - * COMMIT_Stale flag tells caller that we - * initiated the truncation. - */ - xtTruncate(tid, ip, 0, COMMIT_PWMAP); - set_cflag(COMMIT_Stale, ip); - - tblk->xflag = xflag_save; - } else - ip->i_size = 1; - - jfs_ip->next_index = 2; - } else - ip->i_size = IDATASIZE; - - /* - * acquire a transaction lock on the root - * - * action: directory initialization; - */ - tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, - tlckDTREE | tlckENTRY | tlckBTROOT); - dtlck = (struct dt_lock *) & tlck->lock; - - /* linelock root */ - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - lv->offset = 0; - lv->length = DTROOTMAXSLOT; - dtlck->index++; - - p = &jfs_ip->i_dtroot; - - p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; - - p->header.nextindex = 0; - - /* init freelist */ - fsi = 1; - f = &p->slot[fsi]; - - /* init data area of root */ - for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) - f->next = fsi; - f->next = -1; - - p->header.freelist = 1; - p->header.freecnt = 8; - - /* init '..' entry */ - p->header.idotdot = cpu_to_le32(idotdot); - - return; -} - -/* - * add_missing_indices() - * - * function: Fix dtree page in which one or more entries has an invalid index. - * fsck.jfs should really fix this, but it currently does not. - * Called from jfs_readdir when bad index is detected. - */ -static void add_missing_indices(struct inode *inode, s64 bn) -{ - struct ldtentry *d; - struct dt_lock *dtlck; - int i; - uint index; - struct lv *lv; - struct metapage *mp; - dtpage_t *p; - int rc; - s8 *stbl; - tid_t tid; - struct tlock *tlck; - - tid = txBegin(inode->i_sb, 0); - - DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); - - if (rc) { - printk(KERN_ERR "DT_GETPAGE failed!\n"); - goto end; - } - BT_MARK_DIRTY(mp, inode); - - ASSERT(p->header.flag & BT_LEAF); - - tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); - if (BT_IS_ROOT(mp)) - tlck->type |= tlckBTROOT; - - dtlck = (struct dt_lock *) &tlck->lock; - - stbl = DT_GETSTBL(p); - for (i = 0; i < p->header.nextindex; i++) { - d = (struct ldtentry *) &p->slot[stbl[i]]; - index = le32_to_cpu(d->index); - if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { - d->index = cpu_to_le32(add_index(tid, inode, bn, i)); - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = &dtlck->lv[dtlck->index]; - lv->offset = stbl[i]; - lv->length = 1; - dtlck->index++; - } - } - - DT_PUTPAGE(mp); - (void) txCommit(tid, 1, &inode, 0); -end: - txEnd(tid); -} - -/* - * Buffer to hold directory entry info while traversing a dtree page - * before being fed to the filldir function - */ -struct jfs_dirent { - loff_t position; - int ino; - u16 name_len; - char name[0]; -}; - -/* - * function to determine next variable-sized jfs_dirent in buffer - */ -static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) -{ - return (struct jfs_dirent *) - ((char *)dirent + - ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + - sizeof (loff_t) - 1) & - ~(sizeof (loff_t) - 1))); -} - -/* - * jfs_readdir() - * - * function: read directory entries sequentially - * from the specified entry offset - * - * parameter: - * - * return: offset = (pn, index) of start entry - * of next jfs_readdir()/dtRead() - */ -int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - struct inode *ip = filp->f_path.dentry->d_inode; - struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; - int rc = 0; - loff_t dtpos; /* legacy OS/2 style position */ - struct dtoffset { - s16 pn; - s16 index; - s32 unused; - } *dtoffset = (struct dtoffset *) &dtpos; - s64 bn; - struct metapage *mp; - dtpage_t *p; - int index; - s8 *stbl; - struct btstack btstack; - int i, next; - struct ldtentry *d; - struct dtslot *t; - int d_namleft, len, outlen; - unsigned long dirent_buf; - char *name_ptr; - u32 dir_index; - int do_index = 0; - uint loop_count = 0; - struct jfs_dirent *jfs_dirent; - int jfs_dirents; - int overflow, fix_page, page_fixed = 0; - static int unique_pos = 2; /* If we can't fix broken index */ - - if (filp->f_pos == DIREND) - return 0; - - if (DO_INDEX(ip)) { - /* - * persistent index is stored in directory entries. - * Special cases: 0 = . - * 1 = .. - * -1 = End of directory - */ - do_index = 1; - - dir_index = (u32) filp->f_pos; - - if (dir_index > 1) { - struct dir_table_slot dirtab_slot; - - if (dtEmpty(ip) || - (dir_index >= JFS_IP(ip)->next_index)) { - /* Stale position. Directory has shrunk */ - filp->f_pos = DIREND; - return 0; - } - repeat: - rc = read_index(ip, dir_index, &dirtab_slot); - if (rc) { - filp->f_pos = DIREND; - return rc; - } - if (dirtab_slot.flag == DIR_INDEX_FREE) { - if (loop_count++ > JFS_IP(ip)->next_index) { - jfs_err("jfs_readdir detected " - "infinite loop!"); - filp->f_pos = DIREND; - return 0; - } - dir_index = le32_to_cpu(dirtab_slot.addr2); - if (dir_index == -1) { - filp->f_pos = DIREND; - return 0; - } - goto repeat; - } - bn = addressDTS(&dirtab_slot); - index = dirtab_slot.slot; - DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) { - filp->f_pos = DIREND; - return 0; - } - if (p->header.flag & BT_INTERNAL) { - jfs_err("jfs_readdir: bad index table"); - DT_PUTPAGE(mp); - filp->f_pos = -1; - return 0; - } - } else { - if (dir_index == 0) { - /* - * self "." - */ - filp->f_pos = 0; - if (filldir(dirent, ".", 1, 0, ip->i_ino, - DT_DIR)) - return 0; - } - /* - * parent ".." - */ - filp->f_pos = 1; - if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) - return 0; - - /* - * Find first entry of left-most leaf - */ - if (dtEmpty(ip)) { - filp->f_pos = DIREND; - return 0; - } - - if ((rc = dtReadFirst(ip, &btstack))) - return rc; - - DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - } - } else { - /* - * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 - * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." - * pn > 0: Real entries, pn=1 -> leftmost page - * pn = index = -1: No more entries - */ - dtpos = filp->f_pos; - if (dtpos == 0) { - /* build "." entry */ - - if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, - DT_DIR)) - return 0; - dtoffset->index = 1; - filp->f_pos = dtpos; - } - - if (dtoffset->pn == 0) { - if (dtoffset->index == 1) { - /* build ".." entry */ - - if (filldir(dirent, "..", 2, filp->f_pos, - PARENT(ip), DT_DIR)) - return 0; - } else { - jfs_err("jfs_readdir called with " - "invalid offset!"); - } - dtoffset->pn = 1; - dtoffset->index = 0; - filp->f_pos = dtpos; - } - - if (dtEmpty(ip)) { - filp->f_pos = DIREND; - return 0; - } - - if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { - jfs_err("jfs_readdir: unexpected rc = %d " - "from dtReadNext", rc); - filp->f_pos = DIREND; - return 0; - } - /* get start leaf page and index */ - DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - /* offset beyond directory eof ? */ - if (bn < 0) { - filp->f_pos = DIREND; - return 0; - } - } - - dirent_buf = __get_free_page(GFP_KERNEL); - if (dirent_buf == 0) { - DT_PUTPAGE(mp); - jfs_warn("jfs_readdir: __get_free_page failed!"); - filp->f_pos = DIREND; - return -ENOMEM; - } - - while (1) { - jfs_dirent = (struct jfs_dirent *) dirent_buf; - jfs_dirents = 0; - overflow = fix_page = 0; - - stbl = DT_GETSTBL(p); - - for (i = index; i < p->header.nextindex; i++) { - d = (struct ldtentry *) & p->slot[stbl[i]]; - - if (((long) jfs_dirent + d->namlen + 1) > - (dirent_buf + PAGE_SIZE)) { - /* DBCS codepages could overrun dirent_buf */ - index = i; - overflow = 1; - break; - } - - d_namleft = d->namlen; - name_ptr = jfs_dirent->name; - jfs_dirent->ino = le32_to_cpu(d->inumber); - - if (do_index) { - len = min(d_namleft, DTLHDRDATALEN); - jfs_dirent->position = le32_to_cpu(d->index); - /* - * d->index should always be valid, but it - * isn't. fsck.jfs doesn't create the - * directory index for the lost+found - * directory. Rather than let it go, - * we can try to fix it. - */ - if ((jfs_dirent->position < 2) || - (jfs_dirent->position >= - JFS_IP(ip)->next_index)) { - if (!page_fixed && !isReadOnly(ip)) { - fix_page = 1; - /* - * setting overflow and setting - * index to i will cause the - * same page to be processed - * again starting here - */ - overflow = 1; - index = i; - break; - } - jfs_dirent->position = unique_pos++; - } - } else { - jfs_dirent->position = dtpos; - len = min(d_namleft, DTLHDRDATALEN_LEGACY); - } - - /* copy the name of head/only segment */ - outlen = jfs_strfromUCS_le(name_ptr, d->name, len, - codepage); - jfs_dirent->name_len = outlen; - - /* copy name in the additional segment(s) */ - next = d->next; - while (next >= 0) { - t = (struct dtslot *) & p->slot[next]; - name_ptr += outlen; - d_namleft -= len; - /* Sanity Check */ - if (d_namleft == 0) { - jfs_error(ip->i_sb, - "JFS:Dtree error: ino = " - "%ld, bn=%Ld, index = %d", - (long)ip->i_ino, - (long long)bn, - i); - goto skip_one; - } - len = min(d_namleft, DTSLOTDATALEN); - outlen = jfs_strfromUCS_le(name_ptr, t->name, - len, codepage); - jfs_dirent->name_len += outlen; - - next = t->next; - } - - jfs_dirents++; - jfs_dirent = next_jfs_dirent(jfs_dirent); -skip_one: - if (!do_index) - dtoffset->index++; - } - - if (!overflow) { - /* Point to next leaf page */ - if (p->header.flag & BT_ROOT) - bn = 0; - else { - bn = le64_to_cpu(p->header.next); - index = 0; - /* update offset (pn:index) for new page */ - if (!do_index) { - dtoffset->pn++; - dtoffset->index = 0; - } - } - page_fixed = 0; - } - - /* unpin previous leaf page */ - DT_PUTPAGE(mp); - - jfs_dirent = (struct jfs_dirent *) dirent_buf; - while (jfs_dirents--) { - filp->f_pos = jfs_dirent->position; - if (filldir(dirent, jfs_dirent->name, - jfs_dirent->name_len, filp->f_pos, - jfs_dirent->ino, DT_UNKNOWN)) - goto out; - jfs_dirent = next_jfs_dirent(jfs_dirent); - } - - if (fix_page) { - add_missing_indices(ip, bn); - page_fixed = 1; - } - - if (!overflow && (bn == 0)) { - filp->f_pos = DIREND; - break; - } - - DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) { - free_page(dirent_buf); - return rc; - } - } - - out: - free_page(dirent_buf); - - return rc; -} - - -/* - * dtReadFirst() - * - * function: get the leftmost page of the directory - */ -static int dtReadFirst(struct inode *ip, struct btstack * btstack) -{ - int rc = 0; - s64 bn; - int psize = 288; /* initial in-line directory */ - struct metapage *mp; - dtpage_t *p; - s8 *stbl; - struct btframe *btsp; - pxd_t *xd; - - BT_CLR(btstack); /* reset stack */ - - /* - * descend leftmost path of the tree - * - * by convention, root bn = 0. - */ - for (bn = 0;;) { - DT_GETPAGE(ip, bn, mp, psize, p, rc); - if (rc) - return rc; - - /* - * leftmost leaf page - */ - if (p->header.flag & BT_LEAF) { - /* return leftmost entry */ - btsp = btstack->top; - btsp->bn = bn; - btsp->index = 0; - btsp->mp = mp; - - return 0; - } - - /* - * descend down to leftmost child page - */ - if (BT_STACK_FULL(btstack)) { - DT_PUTPAGE(mp); - jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); - BT_STACK_DUMP(btstack); - return -EIO; - } - /* push (bn, index) of the parent page/entry */ - BT_PUSH(btstack, bn, 0); - - /* get the leftmost entry */ - stbl = DT_GETSTBL(p); - xd = (pxd_t *) & p->slot[stbl[0]]; - - /* get the child page block address */ - bn = addressPXD(xd); - psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; - - /* unpin the parent page */ - DT_PUTPAGE(mp); - } -} - - -/* - * dtReadNext() - * - * function: get the page of the specified offset (pn:index) - * - * return: if (offset > eof), bn = -1; - * - * note: if index > nextindex of the target leaf page, - * start with 1st entry of next leaf page; - */ -static int dtReadNext(struct inode *ip, loff_t * offset, - struct btstack * btstack) -{ - int rc = 0; - struct dtoffset { - s16 pn; - s16 index; - s32 unused; - } *dtoffset = (struct dtoffset *) offset; - s64 bn; - struct metapage *mp; - dtpage_t *p; - int index; - int pn; - s8 *stbl; - struct btframe *btsp, *parent; - pxd_t *xd; - - /* - * get leftmost leaf page pinned - */ - if ((rc = dtReadFirst(ip, btstack))) - return rc; - - /* get leaf page */ - DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); - - /* get the start offset (pn:index) */ - pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ - index = dtoffset->index; - - /* start at leftmost page ? */ - if (pn == 0) { - /* offset beyond eof ? */ - if (index < p->header.nextindex) - goto out; - - if (p->header.flag & BT_ROOT) { - bn = -1; - goto out; - } - - /* start with 1st entry of next leaf page */ - dtoffset->pn++; - dtoffset->index = index = 0; - goto a; - } - - /* start at non-leftmost page: scan parent pages for large pn */ - if (p->header.flag & BT_ROOT) { - bn = -1; - goto out; - } - - /* start after next leaf page ? */ - if (pn > 1) - goto b; - - /* get leaf page pn = 1 */ - a: - bn = le64_to_cpu(p->header.next); - - /* unpin leaf page */ - DT_PUTPAGE(mp); - - /* offset beyond eof ? */ - if (bn == 0) { - bn = -1; - goto out; - } - - goto c; - - /* - * scan last internal page level to get target leaf page - */ - b: - /* unpin leftmost leaf page */ - DT_PUTPAGE(mp); - - /* get left most parent page */ - btsp = btstack->top; - parent = btsp - 1; - bn = parent->bn; - DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* scan parent pages at last internal page level */ - while (pn >= p->header.nextindex) { - pn -= p->header.nextindex; - - /* get next parent page address */ - bn = le64_to_cpu(p->header.next); - - /* unpin current parent page */ - DT_PUTPAGE(mp); - - /* offset beyond eof ? */ - if (bn == 0) { - bn = -1; - goto out; - } - - /* get next parent page */ - DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* update parent page stack frame */ - parent->bn = bn; - } - - /* get leaf page address */ - stbl = DT_GETSTBL(p); - xd = (pxd_t *) & p->slot[stbl[pn]]; - bn = addressPXD(xd); - - /* unpin parent page */ - DT_PUTPAGE(mp); - - /* - * get target leaf page - */ - c: - DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* - * leaf page has been completed: - * start with 1st entry of next leaf page - */ - if (index >= p->header.nextindex) { - bn = le64_to_cpu(p->header.next); - - /* unpin leaf page */ - DT_PUTPAGE(mp); - - /* offset beyond eof ? */ - if (bn == 0) { - bn = -1; - goto out; - } - - /* get next leaf page */ - DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* start with 1st entry of next leaf page */ - dtoffset->pn++; - dtoffset->index = 0; - } - - out: - /* return target leaf page pinned */ - btsp = btstack->top; - btsp->bn = bn; - btsp->index = dtoffset->index; - btsp->mp = mp; - - return 0; -} - - -/* - * dtCompare() - * - * function: compare search key with an internal entry - * - * return: - * < 0 if k is < record - * = 0 if k is = record - * > 0 if k is > record - */ -static int dtCompare(struct component_name * key, /* search key */ - dtpage_t * p, /* directory page */ - int si) -{ /* entry slot index */ - wchar_t *kname; - __le16 *name; - int klen, namlen, len, rc; - struct idtentry *ih; - struct dtslot *t; - - /* - * force the left-most key on internal pages, at any level of - * the tree, to be less than any search key. - * this obviates having to update the leftmost key on an internal - * page when the user inserts a new key in the tree smaller than - * anything that has been stored. - * - * (? if/when dtSearch() narrows down to 1st entry (index = 0), - * at any internal page at any level of the tree, - * it descends to child of the entry anyway - - * ? make the entry as min size dummy entry) - * - * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) - * return (1); - */ - - kname = key->name; - klen = key->namlen; - - ih = (struct idtentry *) & p->slot[si]; - si = ih->next; - name = ih->name; - namlen = ih->namlen; - len = min(namlen, DTIHDRDATALEN); - - /* compare with head/only segment */ - len = min(klen, len); - if ((rc = UniStrncmp_le(kname, name, len))) - return rc; - - klen -= len; - namlen -= len; - - /* compare with additional segment(s) */ - kname += len; - while (klen > 0 && namlen > 0) { - /* compare with next name segment */ - t = (struct dtslot *) & p->slot[si]; - len = min(namlen, DTSLOTDATALEN); - len = min(klen, len); - name = t->name; - if ((rc = UniStrncmp_le(kname, name, len))) - return rc; - - klen -= len; - namlen -= len; - kname += len; - si = t->next; - } - - return (klen - namlen); -} - - - - -/* - * ciCompare() - * - * function: compare search key with an (leaf/internal) entry - * - * return: - * < 0 if k is < record - * = 0 if k is = record - * > 0 if k is > record - */ -static int ciCompare(struct component_name * key, /* search key */ - dtpage_t * p, /* directory page */ - int si, /* entry slot index */ - int flag) -{ - wchar_t *kname, x; - __le16 *name; - int klen, namlen, len, rc; - struct ldtentry *lh; - struct idtentry *ih; - struct dtslot *t; - int i; - - /* - * force the left-most key on internal pages, at any level of - * the tree, to be less than any search key. - * this obviates having to update the leftmost key on an internal - * page when the user inserts a new key in the tree smaller than - * anything that has been stored. - * - * (? if/when dtSearch() narrows down to 1st entry (index = 0), - * at any internal page at any level of the tree, - * it descends to child of the entry anyway - - * ? make the entry as min size dummy entry) - * - * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) - * return (1); - */ - - kname = key->name; - klen = key->namlen; - - /* - * leaf page entry - */ - if (p->header.flag & BT_LEAF) { - lh = (struct ldtentry *) & p->slot[si]; - si = lh->next; - name = lh->name; - namlen = lh->namlen; - if (flag & JFS_DIR_INDEX) - len = min(namlen, DTLHDRDATALEN); - else - len = min(namlen, DTLHDRDATALEN_LEGACY); - } - /* - * internal page entry - */ - else { - ih = (struct idtentry *) & p->slot[si]; - si = ih->next; - name = ih->name; - namlen = ih->namlen; - len = min(namlen, DTIHDRDATALEN); - } - - /* compare with head/only segment */ - len = min(klen, len); - for (i = 0; i < len; i++, kname++, name++) { - /* only uppercase if case-insensitive support is on */ - if ((flag & JFS_OS2) == JFS_OS2) - x = UniToupper(le16_to_cpu(*name)); - else - x = le16_to_cpu(*name); - if ((rc = *kname - x)) - return rc; - } - - klen -= len; - namlen -= len; - - /* compare with additional segment(s) */ - while (klen > 0 && namlen > 0) { - /* compare with next name segment */ - t = (struct dtslot *) & p->slot[si]; - len = min(namlen, DTSLOTDATALEN); - len = min(klen, len); - name = t->name; - for (i = 0; i < len; i++, kname++, name++) { - /* only uppercase if case-insensitive support is on */ - if ((flag & JFS_OS2) == JFS_OS2) - x = UniToupper(le16_to_cpu(*name)); - else - x = le16_to_cpu(*name); - - if ((rc = *kname - x)) - return rc; - } - - klen -= len; - namlen -= len; - si = t->next; - } - - return (klen - namlen); -} - - -/* - * ciGetLeafPrefixKey() - * - * function: compute prefix of suffix compression - * from two adjacent leaf entries - * across page boundary - * - * return: non-zero on error - * - */ -static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, - int ri, struct component_name * key, int flag) -{ - int klen, namlen; - wchar_t *pl, *pr, *kname; - struct component_name lkey; - struct component_name rkey; - - lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), - GFP_KERNEL); - if (lkey.name == NULL) - return -ENOMEM; - - rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), - GFP_KERNEL); - if (rkey.name == NULL) { - kfree(lkey.name); - return -ENOMEM; - } - - /* get left and right key */ - dtGetKey(lp, li, &lkey, flag); - lkey.name[lkey.namlen] = 0; - - if ((flag & JFS_OS2) == JFS_OS2) - ciToUpper(&lkey); - - dtGetKey(rp, ri, &rkey, flag); - rkey.name[rkey.namlen] = 0; - - - if ((flag & JFS_OS2) == JFS_OS2) - ciToUpper(&rkey); - - /* compute prefix */ - klen = 0; - kname = key->name; - namlen = min(lkey.namlen, rkey.namlen); - for (pl = lkey.name, pr = rkey.name; - namlen; pl++, pr++, namlen--, klen++, kname++) { - *kname = *pr; - if (*pl != *pr) { - key->namlen = klen + 1; - goto free_names; - } - } - - /* l->namlen <= r->namlen since l <= r */ - if (lkey.namlen < rkey.namlen) { - *kname = *pr; - key->namlen = klen + 1; - } else /* l->namelen == r->namelen */ - key->namlen = klen; - -free_names: - kfree(lkey.name); - kfree(rkey.name); - return 0; -} - - - -/* - * dtGetKey() - * - * function: get key of the entry - */ -static void dtGetKey(dtpage_t * p, int i, /* entry index */ - struct component_name * key, int flag) -{ - int si; - s8 *stbl; - struct ldtentry *lh; - struct idtentry *ih; - struct dtslot *t; - int namlen, len; - wchar_t *kname; - __le16 *name; - - /* get entry */ - stbl = DT_GETSTBL(p); - si = stbl[i]; - if (p->header.flag & BT_LEAF) { - lh = (struct ldtentry *) & p->slot[si]; - si = lh->next; - namlen = lh->namlen; - name = lh->name; - if (flag & JFS_DIR_INDEX) - len = min(namlen, DTLHDRDATALEN); - else - len = min(namlen, DTLHDRDATALEN_LEGACY); - } else { - ih = (struct idtentry *) & p->slot[si]; - si = ih->next; - namlen = ih->namlen; - name = ih->name; - len = min(namlen, DTIHDRDATALEN); - } - - key->namlen = namlen; - kname = key->name; - - /* - * move head/only segment - */ - UniStrncpy_from_le(kname, name, len); - - /* - * move additional segment(s) - */ - while (si >= 0) { - /* get next segment */ - t = &p->slot[si]; - kname += len; - namlen -= len; - len = min(namlen, DTSLOTDATALEN); - UniStrncpy_from_le(kname, t->name, len); - - si = t->next; - } -} - - -/* - * dtInsertEntry() - * - * function: allocate free slot(s) and - * write a leaf/internal entry - * - * return: entry slot index - */ -static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, - ddata_t * data, struct dt_lock ** dtlock) -{ - struct dtslot *h, *t; - struct ldtentry *lh = NULL; - struct idtentry *ih = NULL; - int hsi, fsi, klen, len, nextindex; - wchar_t *kname; - __le16 *name; - s8 *stbl; - pxd_t *xd; - struct dt_lock *dtlck = *dtlock; - struct lv *lv; - int xsi, n; - s64 bn = 0; - struct metapage *mp = NULL; - - klen = key->namlen; - kname = key->name; - - /* allocate a free slot */ - hsi = fsi = p->header.freelist; - h = &p->slot[fsi]; - p->header.freelist = h->next; - --p->header.freecnt; - - /* open new linelock */ - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - - lv = & dtlck->lv[dtlck->index]; - lv->offset = hsi; - - /* write head/only segment */ - if (p->header.flag & BT_LEAF) { - lh = (struct ldtentry *) h; - lh->next = h->next; - lh->inumber = cpu_to_le32(data->leaf.ino); - lh->namlen = klen; - name = lh->name; - if (data->leaf.ip) { - len = min(klen, DTLHDRDATALEN); - if (!(p->header.flag & BT_ROOT)) - bn = addressPXD(&p->header.self); - lh->index = cpu_to_le32(add_index(data->leaf.tid, - data->leaf.ip, - bn, index)); - } else - len = min(klen, DTLHDRDATALEN_LEGACY); - } else { - ih = (struct idtentry *) h; - ih->next = h->next; - xd = (pxd_t *) ih; - *xd = data->xd; - ih->namlen = klen; - name = ih->name; - len = min(klen, DTIHDRDATALEN); - } - - UniStrncpy_to_le(name, kname, len); - - n = 1; - xsi = hsi; - - /* write additional segment(s) */ - t = h; - klen -= len; - while (klen) { - /* get free slot */ - fsi = p->header.freelist; - t = &p->slot[fsi]; - p->header.freelist = t->next; - --p->header.freecnt; - - /* is next slot contiguous ? */ - if (fsi != xsi + 1) { - /* close current linelock */ - lv->length = n; - dtlck->index++; - - /* open new linelock */ - if (dtlck->index < dtlck->maxcnt) - lv++; - else { - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[0]; - } - - lv->offset = fsi; - n = 0; - } - - kname += len; - len = min(klen, DTSLOTDATALEN); - UniStrncpy_to_le(t->name, kname, len); - - n++; - xsi = fsi; - klen -= len; - } - - /* close current linelock */ - lv->length = n; - dtlck->index++; - - *dtlock = dtlck; - - /* terminate last/only segment */ - if (h == t) { - /* single segment entry */ - if (p->header.flag & BT_LEAF) - lh->next = -1; - else - ih->next = -1; - } else - /* multi-segment entry */ - t->next = -1; - - /* if insert into middle, shift right succeeding entries in stbl */ - stbl = DT_GETSTBL(p); - nextindex = p->header.nextindex; - if (index < nextindex) { - memmove(stbl + index + 1, stbl + index, nextindex - index); - - if ((p->header.flag & BT_LEAF) && data->leaf.ip) { - s64 lblock; - - /* - * Need to update slot number for entries that moved - * in the stbl - */ - mp = NULL; - for (n = index + 1; n <= nextindex; n++) { - lh = (struct ldtentry *) & (p->slot[stbl[n]]); - modify_index(data->leaf.tid, data->leaf.ip, - le32_to_cpu(lh->index), bn, n, - &mp, &lblock); - } - if (mp) - release_metapage(mp); - } - } - - stbl[index] = hsi; - - /* advance next available entry index of stbl */ - ++p->header.nextindex; -} - - -/* - * dtMoveEntry() - * - * function: move entries from split/left page to new/right page - * - * nextindex of dst page and freelist/freecnt of both pages - * are updated. - */ -static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, - struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, - int do_index) -{ - int ssi, next; /* src slot index */ - int di; /* dst entry index */ - int dsi; /* dst slot index */ - s8 *sstbl, *dstbl; /* sorted entry table */ - int snamlen, len; - struct ldtentry *slh, *dlh = NULL; - struct idtentry *sih, *dih = NULL; - struct dtslot *h, *s, *d; - struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; - struct lv *slv, *dlv; - int xssi, ns, nd; - int sfsi; - - sstbl = (s8 *) & sp->slot[sp->header.stblindex]; - dstbl = (s8 *) & dp->slot[dp->header.stblindex]; - - dsi = dp->header.freelist; /* first (whole page) free slot */ - sfsi = sp->header.freelist; - - /* linelock destination entry slot */ - dlv = & ddtlck->lv[ddtlck->index]; - dlv->offset = dsi; - - /* linelock source entry slot */ - slv = & sdtlck->lv[sdtlck->index]; - slv->offset = sstbl[si]; - xssi = slv->offset - 1; - - /* - * move entries - */ - ns = nd = 0; - for (di = 0; si < sp->header.nextindex; si++, di++) { - ssi = sstbl[si]; - dstbl[di] = dsi; - - /* is next slot contiguous ? */ - if (ssi != xssi + 1) { - /* close current linelock */ - slv->length = ns; - sdtlck->index++; - - /* open new linelock */ - if (sdtlck->index < sdtlck->maxcnt) - slv++; - else { - sdtlck = (struct dt_lock *) txLinelock(sdtlck); - slv = & sdtlck->lv[0]; - } - - slv->offset = ssi; - ns = 0; - } - - /* - * move head/only segment of an entry - */ - /* get dst slot */ - h = d = &dp->slot[dsi]; - - /* get src slot and move */ - s = &sp->slot[ssi]; - if (sp->header.flag & BT_LEAF) { - /* get source entry */ - slh = (struct ldtentry *) s; - dlh = (struct ldtentry *) h; - snamlen = slh->namlen; - - if (do_index) { - len = min(snamlen, DTLHDRDATALEN); - dlh->index = slh->index; /* little-endian */ - } else - len = min(snamlen, DTLHDRDATALEN_LEGACY); - - memcpy(dlh, slh, 6 + len * 2); - - next = slh->next; - - /* update dst head/only segment next field */ - dsi++; - dlh->next = dsi; - } else { - sih = (struct idtentry *) s; - snamlen = sih->namlen; - - len = min(snamlen, DTIHDRDATALEN); - dih = (struct idtentry *) h; - memcpy(dih, sih, 10 + len * 2); - next = sih->next; - - dsi++; - dih->next = dsi; - } - - /* free src head/only segment */ - s->next = sfsi; - s->cnt = 1; - sfsi = ssi; - - ns++; - nd++; - xssi = ssi; - - /* - * move additional segment(s) of the entry - */ - snamlen -= len; - while ((ssi = next) >= 0) { - /* is next slot contiguous ? */ - if (ssi != xssi + 1) { - /* close current linelock */ - slv->length = ns; - sdtlck->index++; - - /* open new linelock */ - if (sdtlck->index < sdtlck->maxcnt) - slv++; - else { - sdtlck = - (struct dt_lock *) - txLinelock(sdtlck); - slv = & sdtlck->lv[0]; - } - - slv->offset = ssi; - ns = 0; - } - - /* get next source segment */ - s = &sp->slot[ssi]; - - /* get next destination free slot */ - d++; - - len = min(snamlen, DTSLOTDATALEN); - UniStrncpy_le(d->name, s->name, len); - - ns++; - nd++; - xssi = ssi; - - dsi++; - d->next = dsi; - - /* free source segment */ - next = s->next; - s->next = sfsi; - s->cnt = 1; - sfsi = ssi; - - snamlen -= len; - } /* end while */ - - /* terminate dst last/only segment */ - if (h == d) { - /* single segment entry */ - if (dp->header.flag & BT_LEAF) - dlh->next = -1; - else - dih->next = -1; - } else - /* multi-segment entry */ - d->next = -1; - } /* end for */ - - /* close current linelock */ - slv->length = ns; - sdtlck->index++; - *sdtlock = sdtlck; - - dlv->length = nd; - ddtlck->index++; - *ddtlock = ddtlck; - - /* update source header */ - sp->header.freelist = sfsi; - sp->header.freecnt += nd; - - /* update destination header */ - dp->header.nextindex = di; - - dp->header.freelist = dsi; - dp->header.freecnt -= nd; -} - - -/* - * dtDeleteEntry() - * - * function: free a (leaf/internal) entry - * - * log freelist header, stbl, and each segment slot of entry - * (even though last/only segment next field is modified, - * physical image logging requires all segment slots of - * the entry logged to avoid applying previous updates - * to the same slots) - */ -static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) -{ - int fsi; /* free entry slot index */ - s8 *stbl; - struct dtslot *t; - int si, freecnt; - struct dt_lock *dtlck = *dtlock; - struct lv *lv; - int xsi, n; - - /* get free entry slot index */ - stbl = DT_GETSTBL(p); - fsi = stbl[fi]; - - /* open new linelock */ - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - - lv->offset = fsi; - - /* get the head/only segment */ - t = &p->slot[fsi]; - if (p->header.flag & BT_LEAF) - si = ((struct ldtentry *) t)->next; - else - si = ((struct idtentry *) t)->next; - t->next = si; - t->cnt = 1; - - n = freecnt = 1; - xsi = fsi; - - /* find the last/only segment */ - while (si >= 0) { - /* is next slot contiguous ? */ - if (si != xsi + 1) { - /* close current linelock */ - lv->length = n; - dtlck->index++; - - /* open new linelock */ - if (dtlck->index < dtlck->maxcnt) - lv++; - else { - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[0]; - } - - lv->offset = si; - n = 0; - } - - n++; - xsi = si; - freecnt++; - - t = &p->slot[si]; - t->cnt = 1; - si = t->next; - } - - /* close current linelock */ - lv->length = n; - dtlck->index++; - - *dtlock = dtlck; - - /* update freelist */ - t->next = p->header.freelist; - p->header.freelist = fsi; - p->header.freecnt += freecnt; - - /* if delete from middle, - * shift left the succedding entries in the stbl - */ - si = p->header.nextindex; - if (fi < si - 1) - memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); - - p->header.nextindex--; -} - - -/* - * dtTruncateEntry() - * - * function: truncate a (leaf/internal) entry - * - * log freelist header, stbl, and each segment slot of entry - * (even though last/only segment next field is modified, - * physical image logging requires all segment slots of - * the entry logged to avoid applying previous updates - * to the same slots) - */ -static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) -{ - int tsi; /* truncate entry slot index */ - s8 *stbl; - struct dtslot *t; - int si, freecnt; - struct dt_lock *dtlck = *dtlock; - struct lv *lv; - int fsi, xsi, n; - - /* get free entry slot index */ - stbl = DT_GETSTBL(p); - tsi = stbl[ti]; - - /* open new linelock */ - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - - lv->offset = tsi; - - /* get the head/only segment */ - t = &p->slot[tsi]; - ASSERT(p->header.flag & BT_INTERNAL); - ((struct idtentry *) t)->namlen = 0; - si = ((struct idtentry *) t)->next; - ((struct idtentry *) t)->next = -1; - - n = 1; - freecnt = 0; - fsi = si; - xsi = tsi; - - /* find the last/only segment */ - while (si >= 0) { - /* is next slot contiguous ? */ - if (si != xsi + 1) { - /* close current linelock */ - lv->length = n; - dtlck->index++; - - /* open new linelock */ - if (dtlck->index < dtlck->maxcnt) - lv++; - else { - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[0]; - } - - lv->offset = si; - n = 0; - } - - n++; - xsi = si; - freecnt++; - - t = &p->slot[si]; - t->cnt = 1; - si = t->next; - } - - /* close current linelock */ - lv->length = n; - dtlck->index++; - - *dtlock = dtlck; - - /* update freelist */ - if (freecnt == 0) - return; - t->next = p->header.freelist; - p->header.freelist = fsi; - p->header.freecnt += freecnt; -} - - -/* - * dtLinelockFreelist() - */ -static void dtLinelockFreelist(dtpage_t * p, /* directory page */ - int m, /* max slot index */ - struct dt_lock ** dtlock) -{ - int fsi; /* free entry slot index */ - struct dtslot *t; - int si; - struct dt_lock *dtlck = *dtlock; - struct lv *lv; - int xsi, n; - - /* get free entry slot index */ - fsi = p->header.freelist; - - /* open new linelock */ - if (dtlck->index >= dtlck->maxcnt) - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[dtlck->index]; - - lv->offset = fsi; - - n = 1; - xsi = fsi; - - t = &p->slot[fsi]; - si = t->next; - - /* find the last/only segment */ - while (si < m && si >= 0) { - /* is next slot contiguous ? */ - if (si != xsi + 1) { - /* close current linelock */ - lv->length = n; - dtlck->index++; - - /* open new linelock */ - if (dtlck->index < dtlck->maxcnt) - lv++; - else { - dtlck = (struct dt_lock *) txLinelock(dtlck); - lv = & dtlck->lv[0]; - } - - lv->offset = si; - n = 0; - } - - n++; - xsi = si; - - t = &p->slot[si]; - si = t->next; - } - - /* close current linelock */ - lv->length = n; - dtlck->index++; - - *dtlock = dtlck; -} - - -/* - * NAME: dtModify - * - * FUNCTION: Modify the inode number part of a directory entry - * - * PARAMETERS: - * tid - Transaction id - * ip - Inode of parent directory - * key - Name of entry to be modified - * orig_ino - Original inode number expected in entry - * new_ino - New inode number to put into entry - * flag - JFS_RENAME - * - * RETURNS: - * -ESTALE - If entry found does not match orig_ino passed in - * -ENOENT - If no entry can be found to match key - * 0 - If successfully modified entry - */ -int dtModify(tid_t tid, struct inode *ip, - struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) -{ - int rc; - s64 bn; - struct metapage *mp; - dtpage_t *p; - int index; - struct btstack btstack; - struct tlock *tlck; - struct dt_lock *dtlck; - struct lv *lv; - s8 *stbl; - int entry_si; /* entry slot index */ - struct ldtentry *entry; - - /* - * search for the entry to modify: - * - * dtSearch() returns (leaf page pinned, index at which to modify). - */ - if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) - return rc; - - /* retrieve search result */ - DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the leaf page of named entry - */ - tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); - dtlck = (struct dt_lock *) & tlck->lock; - - /* get slot index of the entry */ - stbl = DT_GETSTBL(p); - entry_si = stbl[index]; - - /* linelock entry */ - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - lv->offset = entry_si; - lv->length = 1; - dtlck->index++; - - /* get the head/only segment */ - entry = (struct ldtentry *) & p->slot[entry_si]; - - /* substitute the inode number of the entry */ - entry->inumber = cpu_to_le32(new_ino); - - /* unpin the leaf page */ - DT_PUTPAGE(mp); - - return 0; -} diff --git a/ANDROID_3.4.5/fs/jfs/jfs_dtree.h b/ANDROID_3.4.5/fs/jfs/jfs_dtree.h deleted file mode 100644 index 2545bb31..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_dtree.h +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_DTREE -#define _H_JFS_DTREE - -/* - * jfs_dtree.h: directory B+-tree manager - */ - -#include "jfs_btree.h" - -typedef union { - struct { - tid_t tid; - struct inode *ip; - u32 ino; - } leaf; - pxd_t xd; -} ddata_t; - - -/* - * entry segment/slot - * - * an entry consists of type dependent head/only segment/slot and - * additional segments/slots linked vi next field; - * N.B. last/only segment of entry is terminated by next = -1; - */ -/* - * directory page slot - */ -struct dtslot { - s8 next; /* 1: */ - s8 cnt; /* 1: */ - __le16 name[15]; /* 30: */ -}; /* (32) */ - - -#define DATASLOTSIZE 16 -#define L2DATASLOTSIZE 4 -#define DTSLOTSIZE 32 -#define L2DTSLOTSIZE 5 -#define DTSLOTHDRSIZE 2 -#define DTSLOTDATASIZE 30 -#define DTSLOTDATALEN 15 - -/* - * internal node entry head/only segment - */ -struct idtentry { - pxd_t xd; /* 8: child extent descriptor */ - - s8 next; /* 1: */ - u8 namlen; /* 1: */ - __le16 name[11]; /* 22: 2-byte aligned */ -}; /* (32) */ - -#define DTIHDRSIZE 10 -#define DTIHDRDATALEN 11 - -/* compute number of slots for entry */ -#define NDTINTERNAL(klen) (DIV_ROUND_UP((4 + (klen)), 15)) - - -/* - * leaf node entry head/only segment - * - * For legacy filesystems, name contains 13 wchars -- no index field - */ -struct ldtentry { - __le32 inumber; /* 4: 4-byte aligned */ - s8 next; /* 1: */ - u8 namlen; /* 1: */ - __le16 name[11]; /* 22: 2-byte aligned */ - __le32 index; /* 4: index into dir_table */ -}; /* (32) */ - -#define DTLHDRSIZE 6 -#define DTLHDRDATALEN_LEGACY 13 /* Old (OS/2) format */ -#define DTLHDRDATALEN 11 - -/* - * dir_table used for directory traversal during readdir - */ - -/* - * Keep persistent index for directory entries - */ -#define DO_INDEX(INODE) (JFS_SBI((INODE)->i_sb)->mntflag & JFS_DIR_INDEX) - -/* - * Maximum entry in inline directory table - */ -#define MAX_INLINE_DIRTABLE_ENTRY 13 - -struct dir_table_slot { - u8 rsrvd; /* 1: */ - u8 flag; /* 1: 0 if free */ - u8 slot; /* 1: slot within leaf page of entry */ - u8 addr1; /* 1: upper 8 bits of leaf page address */ - __le32 addr2; /* 4: lower 32 bits of leaf page address -OR- - index of next entry when this entry was deleted */ -}; /* (8) */ - -/* - * flag values - */ -#define DIR_INDEX_VALID 1 -#define DIR_INDEX_FREE 0 - -#define DTSaddress(dir_table_slot, address64)\ -{\ - (dir_table_slot)->addr1 = ((u64)address64) >> 32;\ - (dir_table_slot)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ -} - -#define addressDTS(dts)\ - ( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) ) - -/* compute number of slots for entry */ -#define NDTLEAF_LEGACY(klen) (DIV_ROUND_UP((2 + (klen)), 15)) -#define NDTLEAF NDTINTERNAL - - -/* - * directory root page (in-line in on-disk inode): - * - * cf. dtpage_t below. - */ -typedef union { - struct { - struct dasd DASD; /* 16: DASD limit/usage info */ - - u8 flag; /* 1: */ - u8 nextindex; /* 1: next free entry in stbl */ - s8 freecnt; /* 1: free count */ - s8 freelist; /* 1: freelist header */ - - __le32 idotdot; /* 4: parent inode number */ - - s8 stbl[8]; /* 8: sorted entry index table */ - } header; /* (32) */ - - struct dtslot slot[9]; -} dtroot_t; - -#define PARENT(IP) \ - (le32_to_cpu(JFS_IP(IP)->i_dtroot.header.idotdot)) - -#define DTROOTMAXSLOT 9 - -#define dtEmpty(IP) (JFS_IP(IP)->i_dtroot.header.nextindex == 0) - - -/* - * directory regular page: - * - * entry slot array of 32 byte slot - * - * sorted entry slot index table (stbl): - * contiguous slots at slot specified by stblindex, - * 1-byte per entry - * 512 byte block: 16 entry tbl (1 slot) - * 1024 byte block: 32 entry tbl (1 slot) - * 2048 byte block: 64 entry tbl (2 slot) - * 4096 byte block: 128 entry tbl (4 slot) - * - * data area: - * 512 byte block: 16 - 2 = 14 slot - * 1024 byte block: 32 - 2 = 30 slot - * 2048 byte block: 64 - 3 = 61 slot - * 4096 byte block: 128 - 5 = 123 slot - * - * N.B. index is 0-based; index fields refer to slot index - * except nextindex which refers to entry index in stbl; - * end of entry stot list or freelist is marked with -1. - */ -typedef union { - struct { - __le64 next; /* 8: next sibling */ - __le64 prev; /* 8: previous sibling */ - - u8 flag; /* 1: */ - u8 nextindex; /* 1: next entry index in stbl */ - s8 freecnt; /* 1: */ - s8 freelist; /* 1: slot index of head of freelist */ - - u8 maxslot; /* 1: number of slots in page slot[] */ - u8 stblindex; /* 1: slot index of start of stbl */ - u8 rsrvd[2]; /* 2: */ - - pxd_t self; /* 8: self pxd */ - } header; /* (32) */ - - struct dtslot slot[128]; -} dtpage_t; - -#define DTPAGEMAXSLOT 128 - -#define DT8THPGNODEBYTES 512 -#define DT8THPGNODETSLOTS 1 -#define DT8THPGNODESLOTS 16 - -#define DTQTRPGNODEBYTES 1024 -#define DTQTRPGNODETSLOTS 1 -#define DTQTRPGNODESLOTS 32 - -#define DTHALFPGNODEBYTES 2048 -#define DTHALFPGNODETSLOTS 2 -#define DTHALFPGNODESLOTS 64 - -#define DTFULLPGNODEBYTES 4096 -#define DTFULLPGNODETSLOTS 4 -#define DTFULLPGNODESLOTS 128 - -#define DTENTRYSTART 1 - -/* get sorted entry table of the page */ -#define DT_GETSTBL(p) ( ((p)->header.flag & BT_ROOT) ?\ - ((dtroot_t *)(p))->header.stbl : \ - (s8 *)&(p)->slot[(p)->header.stblindex] ) - -/* - * Flags for dtSearch - */ -#define JFS_CREATE 1 -#define JFS_LOOKUP 2 -#define JFS_REMOVE 3 -#define JFS_RENAME 4 - -/* - * Maximum file offset for directories. - */ -#define DIREND INT_MAX - -/* - * external declarations - */ -extern void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot); - -extern int dtSearch(struct inode *ip, struct component_name * key, - ino_t * data, struct btstack * btstack, int flag); - -extern int dtInsert(tid_t tid, struct inode *ip, struct component_name * key, - ino_t * ino, struct btstack * btstack); - -extern int dtDelete(tid_t tid, struct inode *ip, struct component_name * key, - ino_t * data, int flag); - -extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key, - ino_t * orig_ino, ino_t new_ino, int flag); - -extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir); -#endif /* !_H_JFS_DTREE */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_extent.c b/ANDROID_3.4.5/fs/jfs/jfs_extent.c deleted file mode 100644 index e5fe8506..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_extent.c +++ /dev/null @@ -1,651 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/quotaops.h> -#include "jfs_incore.h" -#include "jfs_inode.h" -#include "jfs_superblock.h" -#include "jfs_dmap.h" -#include "jfs_extent.h" -#include "jfs_debug.h" - -/* - * forward references - */ -static int extBalloc(struct inode *, s64, s64 *, s64 *); -#ifdef _NOTYET -static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *); -#endif -static s64 extRoundDown(s64 nb); - -#define DPD(a) (printk("(a): %d\n",(a))) -#define DPC(a) (printk("(a): %c\n",(a))) -#define DPL1(a) \ -{ \ - if ((a) >> 32) \ - printk("(a): %x%08x ",(a)); \ - else \ - printk("(a): %x ",(a) << 32); \ -} -#define DPL(a) \ -{ \ - if ((a) >> 32) \ - printk("(a): %x%08x\n",(a)); \ - else \ - printk("(a): %x\n",(a) << 32); \ -} - -#define DPD1(a) (printk("(a): %d ",(a))) -#define DPX(a) (printk("(a): %08x\n",(a))) -#define DPX1(a) (printk("(a): %08x ",(a))) -#define DPS(a) (printk("%s\n",(a))) -#define DPE(a) (printk("\nENTERING: %s\n",(a))) -#define DPE1(a) (printk("\nENTERING: %s",(a))) -#define DPS1(a) (printk(" %s ",(a))) - - -/* - * NAME: extAlloc() - * - * FUNCTION: allocate an extent for a specified page range within a - * file. - * - * PARAMETERS: - * ip - the inode of the file. - * xlen - requested extent length. - * pno - the starting page number with the file. - * xp - pointer to an xad. on entry, xad describes an - * extent that is used as an allocation hint if the - * xaddr of the xad is non-zero. on successful exit, - * the xad describes the newly allocated extent. - * abnr - bool indicating whether the newly allocated extent - * should be marked as allocated but not recorded. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. - */ -int -extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) -{ - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - s64 nxlen, nxaddr, xoff, hint, xaddr = 0; - int rc; - int xflag; - - /* This blocks if we are low on resources */ - txBeginAnon(ip->i_sb); - - /* Avoid race with jfs_commit_inode() */ - mutex_lock(&JFS_IP(ip)->commit_mutex); - - /* validate extent length */ - if (xlen > MAXXLEN) - xlen = MAXXLEN; - - /* get the page's starting extent offset */ - xoff = pno << sbi->l2nbperpage; - - /* check if an allocation hint was provided */ - if ((hint = addressXAD(xp))) { - /* get the size of the extent described by the hint */ - nxlen = lengthXAD(xp); - - /* check if the hint is for the portion of the file - * immediately previous to the current allocation - * request and if hint extent has the same abnr - * value as the current request. if so, we can - * extend the hint extent to include the current - * extent if we can allocate the blocks immediately - * following the hint extent. - */ - if (offsetXAD(xp) + nxlen == xoff && - abnr == ((xp->flag & XAD_NOTRECORDED) ? true : false)) - xaddr = hint + nxlen; - - /* adjust the hint to the last block of the extent */ - hint += (nxlen - 1); - } - - /* allocate the disk blocks for the extent. initially, extBalloc() - * will try to allocate disk blocks for the requested size (xlen). - * if this fails (xlen contiguous free blocks not available), it'll - * try to allocate a smaller number of blocks (producing a smaller - * extent), with this smaller number of blocks consisting of the - * requested number of blocks rounded down to the next smaller - * power of 2 number (i.e. 16 -> 8). it'll continue to round down - * and retry the allocation until the number of blocks to allocate - * is smaller than the number of blocks per page. - */ - nxlen = xlen; - if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) { - mutex_unlock(&JFS_IP(ip)->commit_mutex); - return (rc); - } - - /* Allocate blocks to quota. */ - rc = dquot_alloc_block(ip, nxlen); - if (rc) { - dbFree(ip, nxaddr, (s64) nxlen); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - return rc; - } - - /* determine the value of the extent flag */ - xflag = abnr ? XAD_NOTRECORDED : 0; - - /* if we can extend the hint extent to cover the current request, - * extend it. otherwise, insert a new extent to - * cover the current request. - */ - if (xaddr && xaddr == nxaddr) - rc = xtExtend(0, ip, xoff, (int) nxlen, 0); - else - rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0); - - /* if the extend or insert failed, - * free the newly allocated blocks and return the error. - */ - if (rc) { - dbFree(ip, nxaddr, nxlen); - dquot_free_block(ip, nxlen); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - return (rc); - } - - /* set the results of the extent allocation */ - XADaddress(xp, nxaddr); - XADlength(xp, nxlen); - XADoffset(xp, xoff); - xp->flag = xflag; - - mark_inode_dirty(ip); - - mutex_unlock(&JFS_IP(ip)->commit_mutex); - /* - * COMMIT_SyncList flags an anonymous tlock on page that is on - * sync list. - * We need to commit the inode to get the page written disk. - */ - if (test_and_clear_cflag(COMMIT_Synclist,ip)) - jfs_commit_inode(ip, 0); - - return (0); -} - - -#ifdef _NOTYET -/* - * NAME: extRealloc() - * - * FUNCTION: extend the allocation of a file extent containing a - * partial back last page. - * - * PARAMETERS: - * ip - the inode of the file. - * cp - cbuf for the partial backed last page. - * xlen - request size of the resulting extent. - * xp - pointer to an xad. on successful exit, the xad - * describes the newly allocated extent. - * abnr - bool indicating whether the newly allocated extent - * should be marked as allocated but not recorded. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. - */ -int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) -{ - struct super_block *sb = ip->i_sb; - s64 xaddr, xlen, nxaddr, delta, xoff; - s64 ntail, nextend, ninsert; - int rc, nbperpage = JFS_SBI(sb)->nbperpage; - int xflag; - - /* This blocks if we are low on resources */ - txBeginAnon(ip->i_sb); - - mutex_lock(&JFS_IP(ip)->commit_mutex); - /* validate extent length */ - if (nxlen > MAXXLEN) - nxlen = MAXXLEN; - - /* get the extend (partial) page's disk block address and - * number of blocks. - */ - xaddr = addressXAD(xp); - xlen = lengthXAD(xp); - xoff = offsetXAD(xp); - - /* if the extend page is abnr and if the request is for - * the extent to be allocated and recorded, - * make the page allocated and recorded. - */ - if ((xp->flag & XAD_NOTRECORDED) && !abnr) { - xp->flag = 0; - if ((rc = xtUpdate(0, ip, xp))) - goto exit; - } - - /* try to allocated the request number of blocks for the - * extent. dbRealloc() first tries to satisfy the request - * by extending the allocation in place. otherwise, it will - * try to allocate a new set of blocks large enough for the - * request. in satisfying a request, dbReAlloc() may allocate - * less than what was request but will always allocate enough - * space as to satisfy the extend page. - */ - if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr))) - goto exit; - - /* Allocat blocks to quota. */ - rc = dquot_alloc_block(ip, nxlen); - if (rc) { - dbFree(ip, nxaddr, (s64) nxlen); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - return rc; - } - - delta = nxlen - xlen; - - /* check if the extend page is not abnr but the request is abnr - * and the allocated disk space is for more than one page. if this - * is the case, there is a miss match of abnr between the extend page - * and the one or more pages following the extend page. as a result, - * two extents will have to be manipulated. the first will be that - * of the extent of the extend page and will be manipulated thru - * an xtExtend() or an xtTailgate(), depending upon whether the - * disk allocation occurred as an inplace extension. the second - * extent will be manipulated (created) through an xtInsert() and - * will be for the pages following the extend page. - */ - if (abnr && (!(xp->flag & XAD_NOTRECORDED)) && (nxlen > nbperpage)) { - ntail = nbperpage; - nextend = ntail - xlen; - ninsert = nxlen - nbperpage; - - xflag = XAD_NOTRECORDED; - } else { - ntail = nxlen; - nextend = delta; - ninsert = 0; - - xflag = xp->flag; - } - - /* if we were able to extend the disk allocation in place, - * extend the extent. otherwise, move the extent to a - * new disk location. - */ - if (xaddr == nxaddr) { - /* extend the extent */ - if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { - dbFree(ip, xaddr + xlen, delta); - dquot_free_block(ip, nxlen); - goto exit; - } - } else { - /* - * move the extent to a new location: - * - * xtTailgate() accounts for relocated tail extent; - */ - if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { - dbFree(ip, nxaddr, nxlen); - dquot_free_block(ip, nxlen); - goto exit; - } - } - - - /* check if we need to also insert a new extent */ - if (ninsert) { - /* perform the insert. if it fails, free the blocks - * to be inserted and make it appear that we only did - * the xtExtend() or xtTailgate() above. - */ - xaddr = nxaddr + ntail; - if (xtInsert (0, ip, xflag, xoff + ntail, (int) ninsert, - &xaddr, 0)) { - dbFree(ip, xaddr, (s64) ninsert); - delta = nextend; - nxlen = ntail; - xflag = 0; - } - } - - /* set the return results */ - XADaddress(xp, nxaddr); - XADlength(xp, nxlen); - XADoffset(xp, xoff); - xp->flag = xflag; - - mark_inode_dirty(ip); -exit: - mutex_unlock(&JFS_IP(ip)->commit_mutex); - return (rc); -} -#endif /* _NOTYET */ - - -/* - * NAME: extHint() - * - * FUNCTION: produce an extent allocation hint for a file offset. - * - * PARAMETERS: - * ip - the inode of the file. - * offset - file offset for which the hint is needed. - * xp - pointer to the xad that is to be filled in with - * the hint. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - */ -int extHint(struct inode *ip, s64 offset, xad_t * xp) -{ - struct super_block *sb = ip->i_sb; - int nbperpage = JFS_SBI(sb)->nbperpage; - s64 prev; - int rc = 0; - s64 xaddr; - int xlen; - int xflag; - - /* init the hint as "no hint provided" */ - XADaddress(xp, 0); - - /* determine the starting extent offset of the page previous - * to the page containing the offset. - */ - prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage; - - /* if the offset is in the first page of the file, no hint provided. - */ - if (prev < 0) - goto out; - - rc = xtLookup(ip, prev, nbperpage, &xflag, &xaddr, &xlen, 0); - - if ((rc == 0) && xlen) { - if (xlen != nbperpage) { - jfs_error(ip->i_sb, "extHint: corrupt xtree"); - rc = -EIO; - } - XADaddress(xp, xaddr); - XADlength(xp, xlen); - XADoffset(xp, prev); - /* - * only preserve the abnr flag within the xad flags - * of the returned hint. - */ - xp->flag = xflag & XAD_NOTRECORDED; - } else - rc = 0; - -out: - return (rc); -} - - -/* - * NAME: extRecord() - * - * FUNCTION: change a page with a file from not recorded to recorded. - * - * PARAMETERS: - * ip - inode of the file. - * cp - cbuf of the file page. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. - */ -int extRecord(struct inode *ip, xad_t * xp) -{ - int rc; - - txBeginAnon(ip->i_sb); - - mutex_lock(&JFS_IP(ip)->commit_mutex); - - /* update the extent */ - rc = xtUpdate(0, ip, xp); - - mutex_unlock(&JFS_IP(ip)->commit_mutex); - return rc; -} - - -#ifdef _NOTYET -/* - * NAME: extFill() - * - * FUNCTION: allocate disk space for a file page that represents - * a file hole. - * - * PARAMETERS: - * ip - the inode of the file. - * cp - cbuf of the file page represent the hole. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. - */ -int extFill(struct inode *ip, xad_t * xp) -{ - int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; - s64 blkno = offsetXAD(xp) >> ip->i_blkbits; - -// assert(ISSPARSE(ip)); - - /* initialize the extent allocation hint */ - XADaddress(xp, 0); - - /* allocate an extent to fill the hole */ - if ((rc = extAlloc(ip, nbperpage, blkno, xp, false))) - return (rc); - - assert(lengthPXD(xp) == nbperpage); - - return (0); -} -#endif /* _NOTYET */ - - -/* - * NAME: extBalloc() - * - * FUNCTION: allocate disk blocks to form an extent. - * - * initially, we will try to allocate disk blocks for the - * requested size (nblocks). if this fails (nblocks - * contiguous free blocks not available), we'll try to allocate - * a smaller number of blocks (producing a smaller extent), with - * this smaller number of blocks consisting of the requested - * number of blocks rounded down to the next smaller power of 2 - * number (i.e. 16 -> 8). we'll continue to round down and - * retry the allocation until the number of blocks to allocate - * is smaller than the number of blocks per page. - * - * PARAMETERS: - * ip - the inode of the file. - * hint - disk block number to be used as an allocation hint. - * *nblocks - pointer to an s64 value. on entry, this value specifies - * the desired number of block to be allocated. on successful - * exit, this value is set to the number of blocks actually - * allocated. - * blkno - pointer to a block address that is filled in on successful - * return with the starting block number of the newly - * allocated block range. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. - */ -static int -extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) -{ - struct jfs_inode_info *ji = JFS_IP(ip); - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - s64 nb, nblks, daddr, max; - int rc, nbperpage = sbi->nbperpage; - struct bmap *bmp = sbi->bmap; - int ag; - - /* get the number of blocks to initially attempt to allocate. - * we'll first try the number of blocks requested unless this - * number is greater than the maximum number of contiguous free - * blocks in the map. in that case, we'll start off with the - * maximum free. - */ - max = (s64) 1 << bmp->db_maxfreebud; - if (*nblocks >= max && *nblocks > nbperpage) - nb = nblks = (max > nbperpage) ? max : nbperpage; - else - nb = nblks = *nblocks; - - /* try to allocate blocks */ - while ((rc = dbAlloc(ip, hint, nb, &daddr)) != 0) { - /* if something other than an out of space error, - * stop and return this error. - */ - if (rc != -ENOSPC) - return (rc); - - /* decrease the allocation request size */ - nb = min(nblks, extRoundDown(nb)); - - /* give up if we cannot cover a page */ - if (nb < nbperpage) - return (rc); - } - - *nblocks = nb; - *blkno = daddr; - - if (S_ISREG(ip->i_mode) && (ji->fileset == FILESYSTEM_I)) { - ag = BLKTOAG(daddr, sbi); - spin_lock_irq(&ji->ag_lock); - if (ji->active_ag == -1) { - atomic_inc(&bmp->db_active[ag]); - ji->active_ag = ag; - } else if (ji->active_ag != ag) { - atomic_dec(&bmp->db_active[ji->active_ag]); - atomic_inc(&bmp->db_active[ag]); - ji->active_ag = ag; - } - spin_unlock_irq(&ji->ag_lock); - } - - return (0); -} - - -#ifdef _NOTYET -/* - * NAME: extBrealloc() - * - * FUNCTION: attempt to extend an extent's allocation. - * - * Initially, we will try to extend the extent's allocation - * in place. If this fails, we'll try to move the extent - * to a new set of blocks. If moving the extent, we initially - * will try to allocate disk blocks for the requested size - * (newnblks). if this fails (new contiguous free blocks not - * available), we'll try to allocate a smaller number of - * blocks (producing a smaller extent), with this smaller - * number of blocks consisting of the requested number of - * blocks rounded down to the next smaller power of 2 - * number (i.e. 16 -> 8). We'll continue to round down and - * retry the allocation until the number of blocks to allocate - * is smaller than the number of blocks per page. - * - * PARAMETERS: - * ip - the inode of the file. - * blkno - starting block number of the extents current allocation. - * nblks - number of blocks within the extents current allocation. - * newnblks - pointer to a s64 value. on entry, this value is the - * the new desired extent size (number of blocks). on - * successful exit, this value is set to the extent's actual - * new size (new number of blocks). - * newblkno - the starting block number of the extents new allocation. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. - */ -static int -extBrealloc(struct inode *ip, - s64 blkno, s64 nblks, s64 * newnblks, s64 * newblkno) -{ - int rc; - - /* try to extend in place */ - if ((rc = dbExtend(ip, blkno, nblks, *newnblks - nblks)) == 0) { - *newblkno = blkno; - return (0); - } else { - if (rc != -ENOSPC) - return (rc); - } - - /* in place extension not possible. - * try to move the extent to a new set of blocks. - */ - return (extBalloc(ip, blkno, newnblks, newblkno)); -} -#endif /* _NOTYET */ - - -/* - * NAME: extRoundDown() - * - * FUNCTION: round down a specified number of blocks to the next - * smallest power of 2 number. - * - * PARAMETERS: - * nb - the inode of the file. - * - * RETURN VALUES: - * next smallest power of 2 number. - */ -static s64 extRoundDown(s64 nb) -{ - int i; - u64 m, k; - - for (i = 0, m = (u64) 1 << 63; i < 64; i++, m >>= 1) { - if (m & nb) - break; - } - - i = 63 - i; - k = (u64) 1 << i; - k = ((k - 1) & nb) ? k : k >> 1; - - return (k); -} diff --git a/ANDROID_3.4.5/fs/jfs/jfs_extent.h b/ANDROID_3.4.5/fs/jfs/jfs_extent.h deleted file mode 100644 index b567e12c..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_extent.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_EXTENT -#define _H_JFS_EXTENT - -/* get block allocation allocation hint as location of disk inode */ -#define INOHINT(ip) \ - (addressPXD(&(JFS_IP(ip)->ixpxd)) + lengthPXD(&(JFS_IP(ip)->ixpxd)) - 1) - -extern int extAlloc(struct inode *, s64, s64, xad_t *, bool); -extern int extFill(struct inode *, xad_t *); -extern int extHint(struct inode *, s64, xad_t *); -extern int extRealloc(struct inode *, s64, xad_t *, bool); -extern int extRecord(struct inode *, xad_t *); - -#endif /* _H_JFS_EXTENT */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_filsys.h b/ANDROID_3.4.5/fs/jfs/jfs_filsys.h deleted file mode 100644 index b3f5463f..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_filsys.h +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2003 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_FILSYS -#define _H_JFS_FILSYS - -/* - * jfs_filsys.h - * - * file system (implementation-dependent) constants - * - * refer to <limits.h> for system wide implementation-dependent constants - */ - -/* - * file system option (superblock flag) - */ - -/* directory option */ -#define JFS_UNICODE 0x00000001 /* unicode name */ - -/* mount time flags for error handling */ -#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ -#define JFS_ERR_CONTINUE 0x00000004 /* continue */ -#define JFS_ERR_PANIC 0x00000008 /* panic */ - -/* Quota support */ -#define JFS_USRQUOTA 0x00000010 -#define JFS_GRPQUOTA 0x00000020 - -/* mount time flag to disable journaling to disk */ -#define JFS_NOINTEGRITY 0x00000040 - -/* commit option */ -#define JFS_COMMIT 0x00000f00 /* commit option mask */ -#define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ -#define JFS_LAZYCOMMIT 0x00000200 /* lazy commit */ -#define JFS_TMPFS 0x00000400 /* temporary file system - - * do not log/commit: - * Never implemented - */ - -/* log logical volume option */ -#define JFS_INLINELOG 0x00000800 /* inline log within file system */ -#define JFS_INLINEMOVE 0x00001000 /* inline log being moved */ - -/* Secondary aggregate inode table */ -#define JFS_BAD_SAIT 0x00010000 /* current secondary ait is bad */ - -/* sparse regular file support */ -#define JFS_SPARSE 0x00020000 /* sparse regular file */ - -/* DASD Limits F226941 */ -#define JFS_DASD_ENABLED 0x00040000 /* DASD limits enabled */ -#define JFS_DASD_PRIME 0x00080000 /* Prime DASD usage on boot */ - -/* big endian flag */ -#define JFS_SWAP_BYTES 0x00100000 /* running on big endian computer */ - -/* Directory index */ -#define JFS_DIR_INDEX 0x00200000 /* Persistent index for */ - -/* platform options */ -#define JFS_LINUX 0x10000000 /* Linux support */ -#define JFS_DFS 0x20000000 /* DCE DFS LFS support */ -/* Never implemented */ - -#define JFS_OS2 0x40000000 /* OS/2 support */ -/* case-insensitive name/directory support */ - -#define JFS_AIX 0x80000000 /* AIX support */ - -/* - * buffer cache configuration - */ -/* page size */ -#ifdef PSIZE -#undef PSIZE -#endif -#define PSIZE 4096 /* page size (in byte) */ -#define L2PSIZE 12 /* log2(PSIZE) */ -#define POFFSET 4095 /* offset within page */ - -/* buffer page size */ -#define BPSIZE PSIZE - -/* - * fs fundamental size - * - * PSIZE >= file system block size >= PBSIZE >= DISIZE - */ -#define PBSIZE 512 /* physical block size (in byte) */ -#define L2PBSIZE 9 /* log2(PBSIZE) */ - -#define DISIZE 512 /* on-disk inode size (in byte) */ -#define L2DISIZE 9 /* log2(DISIZE) */ - -#define IDATASIZE 256 /* inode inline data size */ -#define IXATTRSIZE 128 /* inode inline extended attribute size */ - -#define XTPAGE_SIZE 4096 -#define log2_PAGESIZE 12 - -#define IAG_SIZE 4096 -#define IAG_EXTENT_SIZE 4096 -#define INOSPERIAG 4096 /* number of disk inodes per iag */ -#define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */ -#define INOSPEREXT 32 /* number of disk inode per extent */ -#define L2INOSPEREXT 5 /* l2 number of disk inode per extent */ -#define IXSIZE (DISIZE * INOSPEREXT) /* inode extent size */ -#define INOSPERPAGE 8 /* number of disk inodes per 4K page */ -#define L2INOSPERPAGE 3 /* log2(INOSPERPAGE) */ - -#define IAGFREELIST_LWM 64 - -#define INODE_EXTENT_SIZE IXSIZE /* inode extent size */ -#define NUM_INODE_PER_EXTENT INOSPEREXT -#define NUM_INODE_PER_IAG INOSPERIAG - -#define MINBLOCKSIZE 512 -#define MAXBLOCKSIZE 4096 -#define MAXFILESIZE ((s64)1 << 52) - -#define JFS_LINK_MAX 0xffffffff - -/* Minimum number of bytes supported for a JFS partition */ -#define MINJFS (0x1000000) -#define MINJFSTEXT "16" - -/* - * file system block size -> physical block size - */ -#define LBOFFSET(x) ((x) & (PBSIZE - 1)) -#define LBNUMBER(x) ((x) >> L2PBSIZE) -#define LBLK2PBLK(sb,b) ((b) << (sb->s_blocksize_bits - L2PBSIZE)) -#define PBLK2LBLK(sb,b) ((b) >> (sb->s_blocksize_bits - L2PBSIZE)) -/* size in byte -> last page number */ -#define SIZE2PN(size) ( ((s64)((size) - 1)) >> (L2PSIZE) ) -/* size in byte -> last file system block number */ -#define SIZE2BN(size, l2bsize) ( ((s64)((size) - 1)) >> (l2bsize) ) - -/* - * fixed physical block address (physical block size = 512 byte) - * - * NOTE: since we can't guarantee a physical block size of 512 bytes the use of - * these macros should be removed and the byte offset macros used instead. - */ -#define SUPER1_B 64 /* primary superblock */ -#define AIMAP_B (SUPER1_B + 8) /* 1st extent of aggregate inode map */ -#define AITBL_B (AIMAP_B + 16) /* - * 1st extent of aggregate inode table - */ -#define SUPER2_B (AITBL_B + 32) /* 2ndary superblock pbn */ -#define BMAP_B (SUPER2_B + 8) /* block allocation map */ - -/* - * SIZE_OF_SUPER defines the total amount of space reserved on disk for the - * superblock. This is not the same as the superblock structure, since all of - * this space is not currently being used. - */ -#define SIZE_OF_SUPER PSIZE - -/* - * SIZE_OF_AG_TABLE defines the amount of space reserved to hold the AG table - */ -#define SIZE_OF_AG_TABLE PSIZE - -/* - * SIZE_OF_MAP_PAGE defines the amount of disk space reserved for each page of - * the inode allocation map (to hold iag) - */ -#define SIZE_OF_MAP_PAGE PSIZE - -/* - * fixed byte offset address - */ -#define SUPER1_OFF 0x8000 /* primary superblock */ -#define AIMAP_OFF (SUPER1_OFF + SIZE_OF_SUPER) - /* - * Control page of aggregate inode map - * followed by 1st extent of map - */ -#define AITBL_OFF (AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1)) - /* - * 1st extent of aggregate inode table - */ -#define SUPER2_OFF (AITBL_OFF + INODE_EXTENT_SIZE) - /* - * secondary superblock - */ -#define BMAP_OFF (SUPER2_OFF + SIZE_OF_SUPER) - /* - * block allocation map - */ - -/* - * The following macro is used to indicate the number of reserved disk blocks at - * the front of an aggregate, in terms of physical blocks. This value is - * currently defined to be 32K. This turns out to be the same as the primary - * superblock's address, since it directly follows the reserved blocks. - */ -#define AGGR_RSVD_BLOCKS SUPER1_B - -/* - * The following macro is used to indicate the number of reserved bytes at the - * front of an aggregate. This value is currently defined to be 32K. This - * turns out to be the same as the primary superblock's byte offset, since it - * directly follows the reserved blocks. - */ -#define AGGR_RSVD_BYTES SUPER1_OFF - -/* - * The following macro defines the byte offset for the first inode extent in - * the aggregate inode table. This allows us to find the self inode to find the - * rest of the table. Currently this value is 44K. - */ -#define AGGR_INODE_TABLE_START AITBL_OFF - -/* - * fixed reserved inode number - */ -/* aggregate inode */ -#define AGGR_RESERVED_I 0 /* aggregate inode (reserved) */ -#define AGGREGATE_I 1 /* aggregate inode map inode */ -#define BMAP_I 2 /* aggregate block allocation map inode */ -#define LOG_I 3 /* aggregate inline log inode */ -#define BADBLOCK_I 4 /* aggregate bad block inode */ -#define FILESYSTEM_I 16 /* 1st/only fileset inode in ait: - * fileset inode map inode - */ - -/* per fileset inode */ -#define FILESET_RSVD_I 0 /* fileset inode (reserved) */ -#define FILESET_EXT_I 1 /* fileset inode extension */ -#define ROOT_I 2 /* fileset root inode */ -#define ACL_I 3 /* fileset ACL inode */ - -#define FILESET_OBJECT_I 4 /* the first fileset inode available for a file - * or directory or link... - */ -#define FIRST_FILESET_INO 16 /* the first aggregate inode which describes - * an inode. (To fsck this is also the first - * inode in part 2 of the agg inode table.) - */ - -/* - * directory configuration - */ -#define JFS_NAME_MAX 255 -#define JFS_PATH_MAX BPSIZE - - -/* - * file system state (superblock state) - */ -#define FM_CLEAN 0x00000000 /* file system is unmounted and clean */ -#define FM_MOUNT 0x00000001 /* file system is mounted cleanly */ -#define FM_DIRTY 0x00000002 /* file system was not unmounted and clean - * when mounted or - * commit failure occurred while being mounted: - * fsck() must be run to repair - */ -#define FM_LOGREDO 0x00000004 /* log based recovery (logredo()) failed: - * fsck() must be run to repair - */ -#define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */ - -#endif /* _H_JFS_FILSYS */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_imap.c b/ANDROID_3.4.5/fs/jfs/jfs_imap.c deleted file mode 100644 index 1b6f15f1..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_imap.c +++ /dev/null @@ -1,3187 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * jfs_imap.c: inode allocation map manager - * - * Serialization: - * Each AG has a simple lock which is used to control the serialization of - * the AG level lists. This lock should be taken first whenever an AG - * level list will be modified or accessed. - * - * Each IAG is locked by obtaining the buffer for the IAG page. - * - * There is also a inode lock for the inode map inode. A read lock needs to - * be taken whenever an IAG is read from the map or the global level - * information is read. A write lock needs to be taken whenever the global - * level information is modified or an atomic operation needs to be used. - * - * If more than one IAG is read at one time, the read lock may not - * be given up until all of the IAG's are read. Otherwise, a deadlock - * may occur when trying to obtain the read lock while another thread - * holding the read lock is waiting on the IAG already being held. - * - * The control page of the inode map is read into memory by diMount(). - * Thereafter it should only be modified in memory and then it will be - * written out when the filesystem is unmounted by diUnmount(). - */ - -#include <linux/fs.h> -#include <linux/buffer_head.h> -#include <linux/pagemap.h> -#include <linux/quotaops.h> -#include <linux/slab.h> - -#include "jfs_incore.h" -#include "jfs_inode.h" -#include "jfs_filsys.h" -#include "jfs_dinode.h" -#include "jfs_dmap.h" -#include "jfs_imap.h" -#include "jfs_metapage.h" -#include "jfs_superblock.h" -#include "jfs_debug.h" - -/* - * imap locks - */ -/* iag free list lock */ -#define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) -#define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) -#define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) - -/* per ag iag list locks */ -#define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) -#define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) -#define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) - -/* - * forward references - */ -static int diAllocAG(struct inomap *, int, bool, struct inode *); -static int diAllocAny(struct inomap *, int, bool, struct inode *); -static int diAllocBit(struct inomap *, struct iag *, int); -static int diAllocExt(struct inomap *, int, struct inode *); -static int diAllocIno(struct inomap *, int, struct inode *); -static int diFindFree(u32, int); -static int diNewExt(struct inomap *, struct iag *, int); -static int diNewIAG(struct inomap *, int *, int, struct metapage **); -static void duplicateIXtree(struct super_block *, s64, int, s64 *); - -static int diIAGRead(struct inomap * imap, int, struct metapage **); -static int copy_from_dinode(struct dinode *, struct inode *); -static void copy_to_dinode(struct dinode *, struct inode *); - -/* - * NAME: diMount() - * - * FUNCTION: initialize the incore inode map control structures for - * a fileset or aggregate init time. - * - * the inode map's control structure (dinomap) is - * brought in from disk and placed in virtual memory. - * - * PARAMETERS: - * ipimap - pointer to inode map inode for the aggregate or fileset. - * - * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient free virtual memory. - * -EIO - i/o error. - */ -int diMount(struct inode *ipimap) -{ - struct inomap *imap; - struct metapage *mp; - int index; - struct dinomap_disk *dinom_le; - - /* - * allocate/initialize the in-memory inode map control structure - */ - /* allocate the in-memory inode map control structure. */ - imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); - if (imap == NULL) { - jfs_err("diMount: kmalloc returned NULL!"); - return -ENOMEM; - } - - /* read the on-disk inode map control structure. */ - - mp = read_metapage(ipimap, - IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, - PSIZE, 0); - if (mp == NULL) { - kfree(imap); - return -EIO; - } - - /* copy the on-disk version to the in-memory version. */ - dinom_le = (struct dinomap_disk *) mp->data; - imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); - imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); - atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); - atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); - imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); - imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); - for (index = 0; index < MAXAG; index++) { - imap->im_agctl[index].inofree = - le32_to_cpu(dinom_le->in_agctl[index].inofree); - imap->im_agctl[index].extfree = - le32_to_cpu(dinom_le->in_agctl[index].extfree); - imap->im_agctl[index].numinos = - le32_to_cpu(dinom_le->in_agctl[index].numinos); - imap->im_agctl[index].numfree = - le32_to_cpu(dinom_le->in_agctl[index].numfree); - } - - /* release the buffer. */ - release_metapage(mp); - - /* - * allocate/initialize inode allocation map locks - */ - /* allocate and init iag free list lock */ - IAGFREE_LOCK_INIT(imap); - - /* allocate and init ag list locks */ - for (index = 0; index < MAXAG; index++) { - AG_LOCK_INIT(imap, index); - } - - /* bind the inode map inode and inode map control structure - * to each other. - */ - imap->im_ipimap = ipimap; - JFS_IP(ipimap)->i_imap = imap; - - return (0); -} - - -/* - * NAME: diUnmount() - * - * FUNCTION: write to disk the incore inode map control structures for - * a fileset or aggregate at unmount time. - * - * PARAMETERS: - * ipimap - pointer to inode map inode for the aggregate or fileset. - * - * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient free virtual memory. - * -EIO - i/o error. - */ -int diUnmount(struct inode *ipimap, int mounterror) -{ - struct inomap *imap = JFS_IP(ipimap)->i_imap; - - /* - * update the on-disk inode map control structure - */ - - if (!(mounterror || isReadOnly(ipimap))) - diSync(ipimap); - - /* - * Invalidate the page cache buffers - */ - truncate_inode_pages(ipimap->i_mapping, 0); - - /* - * free in-memory control structure - */ - kfree(imap); - - return (0); -} - - -/* - * diSync() - */ -int diSync(struct inode *ipimap) -{ - struct dinomap_disk *dinom_le; - struct inomap *imp = JFS_IP(ipimap)->i_imap; - struct metapage *mp; - int index; - - /* - * write imap global conrol page - */ - /* read the on-disk inode map control structure */ - mp = get_metapage(ipimap, - IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, - PSIZE, 0); - if (mp == NULL) { - jfs_err("diSync: get_metapage failed!"); - return -EIO; - } - - /* copy the in-memory version to the on-disk version */ - dinom_le = (struct dinomap_disk *) mp->data; - dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); - dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); - dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); - dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); - dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); - dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); - for (index = 0; index < MAXAG; index++) { - dinom_le->in_agctl[index].inofree = - cpu_to_le32(imp->im_agctl[index].inofree); - dinom_le->in_agctl[index].extfree = - cpu_to_le32(imp->im_agctl[index].extfree); - dinom_le->in_agctl[index].numinos = - cpu_to_le32(imp->im_agctl[index].numinos); - dinom_le->in_agctl[index].numfree = - cpu_to_le32(imp->im_agctl[index].numfree); - } - - /* write out the control structure */ - write_metapage(mp); - - /* - * write out dirty pages of imap - */ - filemap_write_and_wait(ipimap->i_mapping); - - diWriteSpecial(ipimap, 0); - - return (0); -} - - -/* - * NAME: diRead() - * - * FUNCTION: initialize an incore inode from disk. - * - * on entry, the specifed incore inode should itself - * specify the disk inode number corresponding to the - * incore inode (i.e. i_number should be initialized). - * - * this routine handles incore inode initialization for - * both "special" and "regular" inodes. special inodes - * are those required early in the mount process and - * require special handling since much of the file system - * is not yet initialized. these "special" inodes are - * identified by a NULL inode map inode pointer and are - * actually initialized by a call to diReadSpecial(). - * - * for regular inodes, the iag describing the disk inode - * is read from disk to determine the inode extent address - * for the disk inode. with the inode extent address in - * hand, the page of the extent that contains the disk - * inode is read and the disk inode is copied to the - * incore inode. - * - * PARAMETERS: - * ip - pointer to incore inode to be initialized from disk. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOMEM - insufficient memory - * - */ -int diRead(struct inode *ip) -{ - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - int iagno, ino, extno, rc; - struct inode *ipimap; - struct dinode *dp; - struct iag *iagp; - struct metapage *mp; - s64 blkno, agstart; - struct inomap *imap; - int block_offset; - int inodes_left; - unsigned long pageno; - int rel_inode; - - jfs_info("diRead: ino = %ld", ip->i_ino); - - ipimap = sbi->ipimap; - JFS_IP(ip)->ipimap = ipimap; - - /* determine the iag number for this inode (number) */ - iagno = INOTOIAG(ip->i_ino); - - /* read the iag */ - imap = JFS_IP(ipimap)->i_imap; - IREAD_LOCK(ipimap, RDWRLOCK_IMAP); - rc = diIAGRead(imap, iagno, &mp); - IREAD_UNLOCK(ipimap); - if (rc) { - jfs_err("diRead: diIAGRead returned %d", rc); - return (rc); - } - - iagp = (struct iag *) mp->data; - - /* determine inode extent that holds the disk inode */ - ino = ip->i_ino & (INOSPERIAG - 1); - extno = ino >> L2INOSPEREXT; - - if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || - (addressPXD(&iagp->inoext[extno]) == 0)) { - release_metapage(mp); - return -ESTALE; - } - - /* get disk block number of the page within the inode extent - * that holds the disk inode. - */ - blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); - - /* get the ag for the iag */ - agstart = le64_to_cpu(iagp->agstart); - - release_metapage(mp); - - rel_inode = (ino & (INOSPERPAGE - 1)); - pageno = blkno >> sbi->l2nbperpage; - - if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { - /* - * OS/2 didn't always align inode extents on page boundaries - */ - inodes_left = - (sbi->nbperpage - block_offset) << sbi->l2niperblk; - - if (rel_inode < inodes_left) - rel_inode += block_offset << sbi->l2niperblk; - else { - pageno += 1; - rel_inode -= inodes_left; - } - } - - /* read the page of disk inode */ - mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); - if (!mp) { - jfs_err("diRead: read_metapage failed"); - return -EIO; - } - - /* locate the disk inode requested */ - dp = (struct dinode *) mp->data; - dp += rel_inode; - - if (ip->i_ino != le32_to_cpu(dp->di_number)) { - jfs_error(ip->i_sb, "diRead: i_ino != di_number"); - rc = -EIO; - } else if (le32_to_cpu(dp->di_nlink) == 0) - rc = -ESTALE; - else - /* copy the disk inode to the in-memory inode */ - rc = copy_from_dinode(dp, ip); - - release_metapage(mp); - - /* set the ag for the inode */ - JFS_IP(ip)->agstart = agstart; - JFS_IP(ip)->active_ag = -1; - - return (rc); -} - - -/* - * NAME: diReadSpecial() - * - * FUNCTION: initialize a 'special' inode from disk. - * - * this routines handles aggregate level inodes. The - * inode cache cannot differentiate between the - * aggregate inodes and the filesystem inodes, so we - * handle these here. We don't actually use the aggregate - * inode map, since these inodes are at a fixed location - * and in some cases the aggregate inode map isn't initialized - * yet. - * - * PARAMETERS: - * sb - filesystem superblock - * inum - aggregate inode number - * secondary - 1 if secondary aggregate inode table - * - * RETURN VALUES: - * new inode - success - * NULL - i/o error. - */ -struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - uint address; - struct dinode *dp; - struct inode *ip; - struct metapage *mp; - - ip = new_inode(sb); - if (ip == NULL) { - jfs_err("diReadSpecial: new_inode returned NULL!"); - return ip; - } - - if (secondary) { - address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; - JFS_IP(ip)->ipimap = sbi->ipaimap2; - } else { - address = AITBL_OFF >> L2PSIZE; - JFS_IP(ip)->ipimap = sbi->ipaimap; - } - - ASSERT(inum < INOSPEREXT); - - ip->i_ino = inum; - - address += inum >> 3; /* 8 inodes per 4K page */ - - /* read the page of fixed disk inode (AIT) in raw mode */ - mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); - if (mp == NULL) { - set_nlink(ip, 1); /* Don't want iput() deleting it */ - iput(ip); - return (NULL); - } - - /* get the pointer to the disk inode of interest */ - dp = (struct dinode *) (mp->data); - dp += inum % 8; /* 8 inodes per 4K page */ - - /* copy on-disk inode to in-memory inode */ - if ((copy_from_dinode(dp, ip)) != 0) { - /* handle bad return by returning NULL for ip */ - set_nlink(ip, 1); /* Don't want iput() deleting it */ - iput(ip); - /* release the page */ - release_metapage(mp); - return (NULL); - - } - - ip->i_mapping->a_ops = &jfs_metapage_aops; - mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); - - /* Allocations to metadata inodes should not affect quotas */ - ip->i_flags |= S_NOQUOTA; - - if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { - sbi->gengen = le32_to_cpu(dp->di_gengen); - sbi->inostamp = le32_to_cpu(dp->di_inostamp); - } - - /* release the page */ - release_metapage(mp); - - /* - * __mark_inode_dirty expects inodes to be hashed. Since we don't - * want special inodes in the fileset inode space, we make them - * appear hashed, but do not put on any lists. hlist_del() - * will work fine and require no locking. - */ - hlist_add_fake(&ip->i_hash); - - return (ip); -} - -/* - * NAME: diWriteSpecial() - * - * FUNCTION: Write the special inode to disk - * - * PARAMETERS: - * ip - special inode - * secondary - 1 if secondary aggregate inode table - * - * RETURN VALUES: none - */ - -void diWriteSpecial(struct inode *ip, int secondary) -{ - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - uint address; - struct dinode *dp; - ino_t inum = ip->i_ino; - struct metapage *mp; - - if (secondary) - address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; - else - address = AITBL_OFF >> L2PSIZE; - - ASSERT(inum < INOSPEREXT); - - address += inum >> 3; /* 8 inodes per 4K page */ - - /* read the page of fixed disk inode (AIT) in raw mode */ - mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); - if (mp == NULL) { - jfs_err("diWriteSpecial: failed to read aggregate inode " - "extent!"); - return; - } - - /* get the pointer to the disk inode of interest */ - dp = (struct dinode *) (mp->data); - dp += inum % 8; /* 8 inodes per 4K page */ - - /* copy on-disk inode to in-memory inode */ - copy_to_dinode(dp, ip); - memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); - - if (inum == FILESYSTEM_I) - dp->di_gengen = cpu_to_le32(sbi->gengen); - - /* write the page */ - write_metapage(mp); -} - -/* - * NAME: diFreeSpecial() - * - * FUNCTION: Free allocated space for special inode - */ -void diFreeSpecial(struct inode *ip) -{ - if (ip == NULL) { - jfs_err("diFreeSpecial called with NULL ip!"); - return; - } - filemap_write_and_wait(ip->i_mapping); - truncate_inode_pages(ip->i_mapping, 0); - iput(ip); -} - - - -/* - * NAME: diWrite() - * - * FUNCTION: write the on-disk inode portion of the in-memory inode - * to its corresponding on-disk inode. - * - * on entry, the specifed incore inode should itself - * specify the disk inode number corresponding to the - * incore inode (i.e. i_number should be initialized). - * - * the inode contains the inode extent address for the disk - * inode. with the inode extent address in hand, the - * page of the extent that contains the disk inode is - * read and the disk inode portion of the incore inode - * is copied to the disk inode. - * - * PARAMETERS: - * tid - transacation id - * ip - pointer to incore inode to be written to the inode extent. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - */ -int diWrite(tid_t tid, struct inode *ip) -{ - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - int rc = 0; - s32 ino; - struct dinode *dp; - s64 blkno; - int block_offset; - int inodes_left; - struct metapage *mp; - unsigned long pageno; - int rel_inode; - int dioffset; - struct inode *ipimap; - uint type; - lid_t lid; - struct tlock *ditlck, *tlck; - struct linelock *dilinelock, *ilinelock; - struct lv *lv; - int n; - - ipimap = jfs_ip->ipimap; - - ino = ip->i_ino & (INOSPERIAG - 1); - - if (!addressPXD(&(jfs_ip->ixpxd)) || - (lengthPXD(&(jfs_ip->ixpxd)) != - JFS_IP(ipimap)->i_imap->im_nbperiext)) { - jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); - return -EIO; - } - - /* - * read the page of disk inode containing the specified inode: - */ - /* compute the block address of the page */ - blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); - - rel_inode = (ino & (INOSPERPAGE - 1)); - pageno = blkno >> sbi->l2nbperpage; - - if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { - /* - * OS/2 didn't always align inode extents on page boundaries - */ - inodes_left = - (sbi->nbperpage - block_offset) << sbi->l2niperblk; - - if (rel_inode < inodes_left) - rel_inode += block_offset << sbi->l2niperblk; - else { - pageno += 1; - rel_inode -= inodes_left; - } - } - /* read the page of disk inode */ - retry: - mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); - if (!mp) - return -EIO; - - /* get the pointer to the disk inode */ - dp = (struct dinode *) mp->data; - dp += rel_inode; - - dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; - - /* - * acquire transaction lock on the on-disk inode; - * N.B. tlock is acquired on ipimap not ip; - */ - if ((ditlck = - txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) - goto retry; - dilinelock = (struct linelock *) & ditlck->lock; - - /* - * copy btree root from in-memory inode to on-disk inode - * - * (tlock is taken from inline B+-tree root in in-memory - * inode when the B+-tree root is updated, which is pointed - * by jfs_ip->blid as well as being on tx tlock list) - * - * further processing of btree root is based on the copy - * in in-memory inode, where txLog() will log from, and, - * for xtree root, txUpdateMap() will update map and reset - * XAD_NEW bit; - */ - - if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { - /* - * This is the special xtree inside the directory for storing - * the directory table - */ - xtpage_t *p, *xp; - xad_t *xad; - - jfs_ip->xtlid = 0; - tlck = lid_to_tlock(lid); - assert(tlck->type & tlckXTREE); - tlck->type |= tlckBTROOT; - tlck->mp = mp; - ilinelock = (struct linelock *) & tlck->lock; - - /* - * copy xtree root from inode to dinode: - */ - p = &jfs_ip->i_xtroot; - xp = (xtpage_t *) &dp->di_dirtable; - lv = ilinelock->lv; - for (n = 0; n < ilinelock->index; n++, lv++) { - memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], - lv->length << L2XTSLOTSIZE); - } - - /* reset on-disk (metadata page) xtree XAD_NEW bit */ - xad = &xp->xad[XTENTRYSTART]; - for (n = XTENTRYSTART; - n < le16_to_cpu(xp->header.nextindex); n++, xad++) - if (xad->flag & (XAD_NEW | XAD_EXTENDED)) - xad->flag &= ~(XAD_NEW | XAD_EXTENDED); - } - - if ((lid = jfs_ip->blid) == 0) - goto inlineData; - jfs_ip->blid = 0; - - tlck = lid_to_tlock(lid); - type = tlck->type; - tlck->type |= tlckBTROOT; - tlck->mp = mp; - ilinelock = (struct linelock *) & tlck->lock; - - /* - * regular file: 16 byte (XAD slot) granularity - */ - if (type & tlckXTREE) { - xtpage_t *p, *xp; - xad_t *xad; - - /* - * copy xtree root from inode to dinode: - */ - p = &jfs_ip->i_xtroot; - xp = &dp->di_xtroot; - lv = ilinelock->lv; - for (n = 0; n < ilinelock->index; n++, lv++) { - memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], - lv->length << L2XTSLOTSIZE); - } - - /* reset on-disk (metadata page) xtree XAD_NEW bit */ - xad = &xp->xad[XTENTRYSTART]; - for (n = XTENTRYSTART; - n < le16_to_cpu(xp->header.nextindex); n++, xad++) - if (xad->flag & (XAD_NEW | XAD_EXTENDED)) - xad->flag &= ~(XAD_NEW | XAD_EXTENDED); - } - /* - * directory: 32 byte (directory entry slot) granularity - */ - else if (type & tlckDTREE) { - dtpage_t *p, *xp; - - /* - * copy dtree root from inode to dinode: - */ - p = (dtpage_t *) &jfs_ip->i_dtroot; - xp = (dtpage_t *) & dp->di_dtroot; - lv = ilinelock->lv; - for (n = 0; n < ilinelock->index; n++, lv++) { - memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], - lv->length << L2DTSLOTSIZE); - } - } else { - jfs_err("diWrite: UFO tlock"); - } - - inlineData: - /* - * copy inline symlink from in-memory inode to on-disk inode - */ - if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { - lv = & dilinelock->lv[dilinelock->index]; - lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; - lv->length = 2; - memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); - dilinelock->index++; - } - /* - * copy inline data from in-memory inode to on-disk inode: - * 128 byte slot granularity - */ - if (test_cflag(COMMIT_Inlineea, ip)) { - lv = & dilinelock->lv[dilinelock->index]; - lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; - lv->length = 1; - memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); - dilinelock->index++; - - clear_cflag(COMMIT_Inlineea, ip); - } - - /* - * lock/copy inode base: 128 byte slot granularity - */ - lv = & dilinelock->lv[dilinelock->index]; - lv->offset = dioffset >> L2INODESLOTSIZE; - copy_to_dinode(dp, ip); - if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { - lv->length = 2; - memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); - } else - lv->length = 1; - dilinelock->index++; - - /* release the buffer holding the updated on-disk inode. - * the buffer will be later written by commit processing. - */ - write_metapage(mp); - - return (rc); -} - - -/* - * NAME: diFree(ip) - * - * FUNCTION: free a specified inode from the inode working map - * for a fileset or aggregate. - * - * if the inode to be freed represents the first (only) - * free inode within the iag, the iag will be placed on - * the ag free inode list. - * - * freeing the inode will cause the inode extent to be - * freed if the inode is the only allocated inode within - * the extent. in this case all the disk resource backing - * up the inode extent will be freed. in addition, the iag - * will be placed on the ag extent free list if the extent - * is the first free extent in the iag. if freeing the - * extent also means that no free inodes will exist for - * the iag, the iag will also be removed from the ag free - * inode list. - * - * the iag describing the inode will be freed if the extent - * is to be freed and it is the only backed extent within - * the iag. in this case, the iag will be removed from the - * ag free extent list and ag free inode list and placed on - * the inode map's free iag list. - * - * a careful update approach is used to provide consistency - * in the face of updates to multiple buffers. under this - * approach, all required buffers are obtained before making - * any updates and are held until all updates are complete. - * - * PARAMETERS: - * ip - inode to be freed. - * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - */ -int diFree(struct inode *ip) -{ - int rc; - ino_t inum = ip->i_ino; - struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; - struct metapage *mp, *amp, *bmp, *cmp, *dmp; - int iagno, ino, extno, bitno, sword, agno; - int back, fwd; - u32 bitmap, mask; - struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; - struct inomap *imap = JFS_IP(ipimap)->i_imap; - pxd_t freepxd; - tid_t tid; - struct inode *iplist[3]; - struct tlock *tlck; - struct pxd_lock *pxdlock; - - /* - * This is just to suppress compiler warnings. The same logic that - * references these variables is used to initialize them. - */ - aiagp = biagp = ciagp = diagp = NULL; - - /* get the iag number containing the inode. - */ - iagno = INOTOIAG(inum); - - /* make sure that the iag is contained within - * the map. - */ - if (iagno >= imap->im_nextiag) { - print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, - imap, 32, 0); - jfs_error(ip->i_sb, - "diFree: inum = %d, iagno = %d, nextiag = %d", - (uint) inum, iagno, imap->im_nextiag); - return -EIO; - } - - /* get the allocation group for this ino. - */ - agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb)); - - /* Lock the AG specific inode map information - */ - AG_LOCK(imap, agno); - - /* Obtain read lock in imap inode. Don't release it until we have - * read all of the IAG's that we are going to. - */ - IREAD_LOCK(ipimap, RDWRLOCK_IMAP); - - /* read the iag. - */ - if ((rc = diIAGRead(imap, iagno, &mp))) { - IREAD_UNLOCK(ipimap); - AG_UNLOCK(imap, agno); - return (rc); - } - iagp = (struct iag *) mp->data; - - /* get the inode number and extent number of the inode within - * the iag and the inode number within the extent. - */ - ino = inum & (INOSPERIAG - 1); - extno = ino >> L2INOSPEREXT; - bitno = ino & (INOSPEREXT - 1); - mask = HIGHORDER >> bitno; - - if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { - jfs_error(ip->i_sb, - "diFree: wmap shows inode already free"); - } - - if (!addressPXD(&iagp->inoext[extno])) { - release_metapage(mp); - IREAD_UNLOCK(ipimap); - AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, "diFree: invalid inoext"); - return -EIO; - } - - /* compute the bitmap for the extent reflecting the freed inode. - */ - bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; - - if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { - release_metapage(mp); - IREAD_UNLOCK(ipimap); - AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, "diFree: numfree > numinos"); - return -EIO; - } - /* - * inode extent still has some inodes or below low water mark: - * keep the inode extent; - */ - if (bitmap || - imap->im_agctl[agno].numfree < 96 || - (imap->im_agctl[agno].numfree < 288 && - (((imap->im_agctl[agno].numfree * 100) / - imap->im_agctl[agno].numinos) <= 25))) { - /* if the iag currently has no free inodes (i.e., - * the inode being freed is the first free inode of iag), - * insert the iag at head of the inode free list for the ag. - */ - if (iagp->nfreeinos == 0) { - /* check if there are any iags on the ag inode - * free list. if so, read the first one so that - * we can link the current iag onto the list at - * the head. - */ - if ((fwd = imap->im_agctl[agno].inofree) >= 0) { - /* read the iag that currently is the head - * of the list. - */ - if ((rc = diIAGRead(imap, fwd, &))) { - IREAD_UNLOCK(ipimap); - AG_UNLOCK(imap, agno); - release_metapage(mp); - return (rc); - } - aiagp = (struct iag *) amp->data; - - /* make current head point back to the iag. - */ - aiagp->inofreeback = cpu_to_le32(iagno); - - write_metapage(amp); - } - - /* iag points forward to current head and iag - * becomes the new head of the list. - */ - iagp->inofreefwd = - cpu_to_le32(imap->im_agctl[agno].inofree); - iagp->inofreeback = cpu_to_le32(-1); - imap->im_agctl[agno].inofree = iagno; - } - IREAD_UNLOCK(ipimap); - - /* update the free inode summary map for the extent if - * freeing the inode means the extent will now have free - * inodes (i.e., the inode being freed is the first free - * inode of extent), - */ - if (iagp->wmap[extno] == cpu_to_le32(ONES)) { - sword = extno >> L2EXTSPERSUM; - bitno = extno & (EXTSPERSUM - 1); - iagp->inosmap[sword] &= - cpu_to_le32(~(HIGHORDER >> bitno)); - } - - /* update the bitmap. - */ - iagp->wmap[extno] = cpu_to_le32(bitmap); - - /* update the free inode counts at the iag, ag and - * map level. - */ - le32_add_cpu(&iagp->nfreeinos, 1); - imap->im_agctl[agno].numfree += 1; - atomic_inc(&imap->im_numfree); - - /* release the AG inode map lock - */ - AG_UNLOCK(imap, agno); - - /* write the iag */ - write_metapage(mp); - - return (0); - } - - - /* - * inode extent has become free and above low water mark: - * free the inode extent; - */ - - /* - * prepare to update iag list(s) (careful update step 1) - */ - amp = bmp = cmp = dmp = NULL; - fwd = back = -1; - - /* check if the iag currently has no free extents. if so, - * it will be placed on the head of the ag extent free list. - */ - if (iagp->nfreeexts == 0) { - /* check if the ag extent free list has any iags. - * if so, read the iag at the head of the list now. - * this (head) iag will be updated later to reflect - * the addition of the current iag at the head of - * the list. - */ - if ((fwd = imap->im_agctl[agno].extfree) >= 0) { - if ((rc = diIAGRead(imap, fwd, &))) - goto error_out; - aiagp = (struct iag *) amp->data; - } - } else { - /* iag has free extents. check if the addition of a free - * extent will cause all extents to be free within this - * iag. if so, the iag will be removed from the ag extent - * free list and placed on the inode map's free iag list. - */ - if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { - /* in preparation for removing the iag from the - * ag extent free list, read the iags preceding - * and following the iag on the ag extent free - * list. - */ - if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { - if ((rc = diIAGRead(imap, fwd, &))) - goto error_out; - aiagp = (struct iag *) amp->data; - } - - if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { - if ((rc = diIAGRead(imap, back, &bmp))) - goto error_out; - biagp = (struct iag *) bmp->data; - } - } - } - - /* remove the iag from the ag inode free list if freeing - * this extent cause the iag to have no free inodes. - */ - if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { - int inofreeback = le32_to_cpu(iagp->inofreeback); - int inofreefwd = le32_to_cpu(iagp->inofreefwd); - - /* in preparation for removing the iag from the - * ag inode free list, read the iags preceding - * and following the iag on the ag inode free - * list. before reading these iags, we must make - * sure that we already don't have them in hand - * from up above, since re-reading an iag (buffer) - * we are currently holding would cause a deadlock. - */ - if (inofreefwd >= 0) { - - if (inofreefwd == fwd) - ciagp = (struct iag *) amp->data; - else if (inofreefwd == back) - ciagp = (struct iag *) bmp->data; - else { - if ((rc = - diIAGRead(imap, inofreefwd, &cmp))) - goto error_out; - ciagp = (struct iag *) cmp->data; - } - assert(ciagp != NULL); - } - - if (inofreeback >= 0) { - if (inofreeback == fwd) - diagp = (struct iag *) amp->data; - else if (inofreeback == back) - diagp = (struct iag *) bmp->data; - else { - if ((rc = - diIAGRead(imap, inofreeback, &dmp))) - goto error_out; - diagp = (struct iag *) dmp->data; - } - assert(diagp != NULL); - } - } - - IREAD_UNLOCK(ipimap); - - /* - * invalidate any page of the inode extent freed from buffer cache; - */ - freepxd = iagp->inoext[extno]; - invalidate_pxd_metapages(ip, freepxd); - - /* - * update iag list(s) (careful update step 2) - */ - /* add the iag to the ag extent free list if this is the - * first free extent for the iag. - */ - if (iagp->nfreeexts == 0) { - if (fwd >= 0) - aiagp->extfreeback = cpu_to_le32(iagno); - - iagp->extfreefwd = - cpu_to_le32(imap->im_agctl[agno].extfree); - iagp->extfreeback = cpu_to_le32(-1); - imap->im_agctl[agno].extfree = iagno; - } else { - /* remove the iag from the ag extent list if all extents - * are now free and place it on the inode map iag free list. - */ - if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { - if (fwd >= 0) - aiagp->extfreeback = iagp->extfreeback; - - if (back >= 0) - biagp->extfreefwd = iagp->extfreefwd; - else - imap->im_agctl[agno].extfree = - le32_to_cpu(iagp->extfreefwd); - - iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); - - IAGFREE_LOCK(imap); - iagp->iagfree = cpu_to_le32(imap->im_freeiag); - imap->im_freeiag = iagno; - IAGFREE_UNLOCK(imap); - } - } - - /* remove the iag from the ag inode free list if freeing - * this extent causes the iag to have no free inodes. - */ - if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { - if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) - ciagp->inofreeback = iagp->inofreeback; - - if ((int) le32_to_cpu(iagp->inofreeback) >= 0) - diagp->inofreefwd = iagp->inofreefwd; - else - imap->im_agctl[agno].inofree = - le32_to_cpu(iagp->inofreefwd); - - iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); - } - - /* update the inode extent address and working map - * to reflect the free extent. - * the permanent map should have been updated already - * for the inode being freed. - */ - if (iagp->pmap[extno] != 0) { - jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); - } - iagp->wmap[extno] = 0; - PXDlength(&iagp->inoext[extno], 0); - PXDaddress(&iagp->inoext[extno], 0); - - /* update the free extent and free inode summary maps - * to reflect the freed extent. - * the inode summary map is marked to indicate no inodes - * available for the freed extent. - */ - sword = extno >> L2EXTSPERSUM; - bitno = extno & (EXTSPERSUM - 1); - mask = HIGHORDER >> bitno; - iagp->inosmap[sword] |= cpu_to_le32(mask); - iagp->extsmap[sword] &= cpu_to_le32(~mask); - - /* update the number of free inodes and number of free extents - * for the iag. - */ - le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); - le32_add_cpu(&iagp->nfreeexts, 1); - - /* update the number of free inodes and backed inodes - * at the ag and inode map level. - */ - imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); - imap->im_agctl[agno].numinos -= INOSPEREXT; - atomic_sub(INOSPEREXT - 1, &imap->im_numfree); - atomic_sub(INOSPEREXT, &imap->im_numinos); - - if (amp) - write_metapage(amp); - if (bmp) - write_metapage(bmp); - if (cmp) - write_metapage(cmp); - if (dmp) - write_metapage(dmp); - - /* - * start transaction to update block allocation map - * for the inode extent freed; - * - * N.B. AG_LOCK is released and iag will be released below, and - * other thread may allocate inode from/reusing the ixad freed - * BUT with new/different backing inode extent from the extent - * to be freed by the transaction; - */ - tid = txBegin(ipimap->i_sb, COMMIT_FORCE); - mutex_lock(&JFS_IP(ipimap)->commit_mutex); - - /* acquire tlock of the iag page of the freed ixad - * to force the page NOHOMEOK (even though no data is - * logged from the iag page) until NOREDOPAGE|FREEXTENT log - * for the free of the extent is committed; - * write FREEXTENT|NOREDOPAGE log record - * N.B. linelock is overlaid as freed extent descriptor; - */ - tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); - pxdlock = (struct pxd_lock *) & tlck->lock; - pxdlock->flag = mlckFREEPXD; - pxdlock->pxd = freepxd; - pxdlock->index = 1; - - write_metapage(mp); - - iplist[0] = ipimap; - - /* - * logredo needs the IAG number and IAG extent index in order - * to ensure that the IMap is consistent. The least disruptive - * way to pass these values through to the transaction manager - * is in the iplist array. - * - * It's not pretty, but it works. - */ - iplist[1] = (struct inode *) (size_t)iagno; - iplist[2] = (struct inode *) (size_t)extno; - - rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); - - txEnd(tid); - mutex_unlock(&JFS_IP(ipimap)->commit_mutex); - - /* unlock the AG inode map information */ - AG_UNLOCK(imap, agno); - - return (0); - - error_out: - IREAD_UNLOCK(ipimap); - - if (amp) - release_metapage(amp); - if (bmp) - release_metapage(bmp); - if (cmp) - release_metapage(cmp); - if (dmp) - release_metapage(dmp); - - AG_UNLOCK(imap, agno); - - release_metapage(mp); - - return (rc); -} - -/* - * There are several places in the diAlloc* routines where we initialize - * the inode. - */ -static inline void -diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - - ip->i_ino = (iagno << L2INOSPERIAG) + ino; - jfs_ip->ixpxd = iagp->inoext[extno]; - jfs_ip->agstart = le64_to_cpu(iagp->agstart); - jfs_ip->active_ag = -1; -} - - -/* - * NAME: diAlloc(pip,dir,ip) - * - * FUNCTION: allocate a disk inode from the inode working map - * for a fileset or aggregate. - * - * PARAMETERS: - * pip - pointer to incore inode for the parent inode. - * dir - 'true' if the new disk inode is for a directory. - * ip - pointer to a new inode - * - * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. - */ -int diAlloc(struct inode *pip, bool dir, struct inode *ip) -{ - int rc, ino, iagno, addext, extno, bitno, sword; - int nwords, rem, i, agno; - u32 mask, inosmap, extsmap; - struct inode *ipimap; - struct metapage *mp; - ino_t inum; - struct iag *iagp; - struct inomap *imap; - - /* get the pointers to the inode map inode and the - * corresponding imap control structure. - */ - ipimap = JFS_SBI(pip->i_sb)->ipimap; - imap = JFS_IP(ipimap)->i_imap; - JFS_IP(ip)->ipimap = ipimap; - JFS_IP(ip)->fileset = FILESYSTEM_I; - - /* for a directory, the allocation policy is to start - * at the ag level using the preferred ag. - */ - if (dir) { - agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); - AG_LOCK(imap, agno); - goto tryag; - } - - /* for files, the policy starts off by trying to allocate from - * the same iag containing the parent disk inode: - * try to allocate the new disk inode close to the parent disk - * inode, using parent disk inode number + 1 as the allocation - * hint. (we use a left-to-right policy to attempt to avoid - * moving backward on the disk.) compute the hint within the - * file system and the iag. - */ - - /* get the ag number of this iag */ - agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); - - if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { - /* - * There is an open file actively growing. We want to - * allocate new inodes from a different ag to avoid - * fragmentation problems. - */ - agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); - AG_LOCK(imap, agno); - goto tryag; - } - - inum = pip->i_ino + 1; - ino = inum & (INOSPERIAG - 1); - - /* back off the hint if it is outside of the iag */ - if (ino == 0) - inum = pip->i_ino; - - /* lock the AG inode map information */ - AG_LOCK(imap, agno); - - /* Get read lock on imap inode */ - IREAD_LOCK(ipimap, RDWRLOCK_IMAP); - - /* get the iag number and read the iag */ - iagno = INOTOIAG(inum); - if ((rc = diIAGRead(imap, iagno, &mp))) { - IREAD_UNLOCK(ipimap); - AG_UNLOCK(imap, agno); - return (rc); - } - iagp = (struct iag *) mp->data; - - /* determine if new inode extent is allowed to be added to the iag. - * new inode extent can be added to the iag if the ag - * has less than 32 free disk inodes and the iag has free extents. - */ - addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); - - /* - * try to allocate from the IAG - */ - /* check if the inode may be allocated from the iag - * (i.e. the inode has free inodes or new extent can be added). - */ - if (iagp->nfreeinos || addext) { - /* determine the extent number of the hint. - */ - extno = ino >> L2INOSPEREXT; - - /* check if the extent containing the hint has backed - * inodes. if so, try to allocate within this extent. - */ - if (addressPXD(&iagp->inoext[extno])) { - bitno = ino & (INOSPEREXT - 1); - if ((bitno = - diFindFree(le32_to_cpu(iagp->wmap[extno]), - bitno)) - < INOSPEREXT) { - ino = (extno << L2INOSPEREXT) + bitno; - - /* a free inode (bit) was found within this - * extent, so allocate it. - */ - rc = diAllocBit(imap, iagp, ino); - IREAD_UNLOCK(ipimap); - if (rc) { - assert(rc == -EIO); - } else { - /* set the results of the allocation - * and write the iag. - */ - diInitInode(ip, iagno, ino, extno, - iagp); - mark_metapage_dirty(mp); - } - release_metapage(mp); - - /* free the AG lock and return. - */ - AG_UNLOCK(imap, agno); - return (rc); - } - - if (!addext) - extno = - (extno == - EXTSPERIAG - 1) ? 0 : extno + 1; - } - - /* - * no free inodes within the extent containing the hint. - * - * try to allocate from the backed extents following - * hint or, if appropriate (i.e. addext is true), allocate - * an extent of free inodes at or following the extent - * containing the hint. - * - * the free inode and free extent summary maps are used - * here, so determine the starting summary map position - * and the number of words we'll have to examine. again, - * the approach is to allocate following the hint, so we - * might have to initially ignore prior bits of the summary - * map that represent extents prior to the extent containing - * the hint and later revisit these bits. - */ - bitno = extno & (EXTSPERSUM - 1); - nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; - sword = extno >> L2EXTSPERSUM; - - /* mask any prior bits for the starting words of the - * summary map. - */ - mask = ONES << (EXTSPERSUM - bitno); - inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; - extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; - - /* scan the free inode and free extent summary maps for - * free resources. - */ - for (i = 0; i < nwords; i++) { - /* check if this word of the free inode summary - * map describes an extent with free inodes. - */ - if (~inosmap) { - /* an extent with free inodes has been - * found. determine the extent number - * and the inode number within the extent. - */ - rem = diFindFree(inosmap, 0); - extno = (sword << L2EXTSPERSUM) + rem; - rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), - 0); - if (rem >= INOSPEREXT) { - IREAD_UNLOCK(ipimap); - release_metapage(mp); - AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, - "diAlloc: can't find free bit " - "in wmap"); - return -EIO; - } - - /* determine the inode number within the - * iag and allocate the inode from the - * map. - */ - ino = (extno << L2INOSPEREXT) + rem; - rc = diAllocBit(imap, iagp, ino); - IREAD_UNLOCK(ipimap); - if (rc) - assert(rc == -EIO); - else { - /* set the results of the allocation - * and write the iag. - */ - diInitInode(ip, iagno, ino, extno, - iagp); - mark_metapage_dirty(mp); - } - release_metapage(mp); - - /* free the AG lock and return. - */ - AG_UNLOCK(imap, agno); - return (rc); - - } - - /* check if we may allocate an extent of free - * inodes and whether this word of the free - * extents summary map describes a free extent. - */ - if (addext && ~extsmap) { - /* a free extent has been found. determine - * the extent number. - */ - rem = diFindFree(extsmap, 0); - extno = (sword << L2EXTSPERSUM) + rem; - - /* allocate an extent of free inodes. - */ - if ((rc = diNewExt(imap, iagp, extno))) { - /* if there is no disk space for a - * new extent, try to allocate the - * disk inode from somewhere else. - */ - if (rc == -ENOSPC) - break; - - assert(rc == -EIO); - } else { - /* set the results of the allocation - * and write the iag. - */ - diInitInode(ip, iagno, - extno << L2INOSPEREXT, - extno, iagp); - mark_metapage_dirty(mp); - } - release_metapage(mp); - /* free the imap inode & the AG lock & return. - */ - IREAD_UNLOCK(ipimap); - AG_UNLOCK(imap, agno); - return (rc); - } - - /* move on to the next set of summary map words. - */ - sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; - inosmap = le32_to_cpu(iagp->inosmap[sword]); - extsmap = le32_to_cpu(iagp->extsmap[sword]); - } - } - /* unlock imap inode */ - IREAD_UNLOCK(ipimap); - - /* nothing doing in this iag, so release it. */ - release_metapage(mp); - - tryag: - /* - * try to allocate anywhere within the same AG as the parent inode. - */ - rc = diAllocAG(imap, agno, dir, ip); - - AG_UNLOCK(imap, agno); - - if (rc != -ENOSPC) - return (rc); - - /* - * try to allocate in any AG. - */ - return (diAllocAny(imap, agno, dir, ip)); -} - - -/* - * NAME: diAllocAG(imap,agno,dir,ip) - * - * FUNCTION: allocate a disk inode from the allocation group. - * - * this routine first determines if a new extent of free - * inodes should be added for the allocation group, with - * the current request satisfied from this extent. if this - * is the case, an attempt will be made to do just that. if - * this attempt fails or it has been determined that a new - * extent should not be added, an attempt is made to satisfy - * the request by allocating an existing (backed) free inode - * from the allocation group. - * - * PRE CONDITION: Already have the AG lock for this AG. - * - * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group to allocate from. - * dir - 'true' if the new disk inode is for a directory. - * ip - pointer to the new inode to be filled in on successful return - * with the disk inode number allocated, its extent address - * and the start of the ag. - * - * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. - */ -static int -diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) -{ - int rc, addext, numfree, numinos; - - /* get the number of free and the number of backed disk - * inodes currently within the ag. - */ - numfree = imap->im_agctl[agno].numfree; - numinos = imap->im_agctl[agno].numinos; - - if (numfree > numinos) { - jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); - return -EIO; - } - - /* determine if we should allocate a new extent of free inodes - * within the ag: for directory inodes, add a new extent - * if there are a small number of free inodes or number of free - * inodes is a small percentage of the number of backed inodes. - */ - if (dir) - addext = (numfree < 64 || - (numfree < 256 - && ((numfree * 100) / numinos) <= 20)); - else - addext = (numfree == 0); - - /* - * try to allocate a new extent of free inodes. - */ - if (addext) { - /* if free space is not available for this new extent, try - * below to allocate a free and existing (already backed) - * inode from the ag. - */ - if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) - return (rc); - } - - /* - * try to allocate an existing free inode from the ag. - */ - return (diAllocIno(imap, agno, ip)); -} - - -/* - * NAME: diAllocAny(imap,agno,dir,iap) - * - * FUNCTION: allocate a disk inode from any other allocation group. - * - * this routine is called when an allocation attempt within - * the primary allocation group has failed. if attempts to - * allocate an inode from any allocation group other than the - * specified primary group. - * - * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - primary allocation group (to avoid). - * dir - 'true' if the new disk inode is for a directory. - * ip - pointer to a new inode to be filled in on successful return - * with the disk inode number allocated, its extent address - * and the start of the ag. - * - * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. - */ -static int -diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) -{ - int ag, rc; - int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; - - - /* try to allocate from the ags following agno up to - * the maximum ag number. - */ - for (ag = agno + 1; ag <= maxag; ag++) { - AG_LOCK(imap, ag); - - rc = diAllocAG(imap, ag, dir, ip); - - AG_UNLOCK(imap, ag); - - if (rc != -ENOSPC) - return (rc); - } - - /* try to allocate from the ags in front of agno. - */ - for (ag = 0; ag < agno; ag++) { - AG_LOCK(imap, ag); - - rc = diAllocAG(imap, ag, dir, ip); - - AG_UNLOCK(imap, ag); - - if (rc != -ENOSPC) - return (rc); - } - - /* no free disk inodes. - */ - return -ENOSPC; -} - - -/* - * NAME: diAllocIno(imap,agno,ip) - * - * FUNCTION: allocate a disk inode from the allocation group's free - * inode list, returning an error if this free list is - * empty (i.e. no iags on the list). - * - * allocation occurs from the first iag on the list using - * the iag's free inode summary map to find the leftmost - * free inode in the iag. - * - * PRE CONDITION: Already have AG lock for this AG. - * - * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group. - * ip - pointer to new inode to be filled in on successful return - * with the disk inode number allocated, its extent address - * and the start of the ag. - * - * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. - */ -static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) -{ - int iagno, ino, rc, rem, extno, sword; - struct metapage *mp; - struct iag *iagp; - - /* check if there are iags on the ag's free inode list. - */ - if ((iagno = imap->im_agctl[agno].inofree) < 0) - return -ENOSPC; - - /* obtain read lock on imap inode */ - IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); - - /* read the iag at the head of the list. - */ - if ((rc = diIAGRead(imap, iagno, &mp))) { - IREAD_UNLOCK(imap->im_ipimap); - return (rc); - } - iagp = (struct iag *) mp->data; - - /* better be free inodes in this iag if it is on the - * list. - */ - if (!iagp->nfreeinos) { - IREAD_UNLOCK(imap->im_ipimap); - release_metapage(mp); - jfs_error(ip->i_sb, - "diAllocIno: nfreeinos = 0, but iag on freelist"); - return -EIO; - } - - /* scan the free inode summary map to find an extent - * with free inodes. - */ - for (sword = 0;; sword++) { - if (sword >= SMAPSZ) { - IREAD_UNLOCK(imap->im_ipimap); - release_metapage(mp); - jfs_error(ip->i_sb, - "diAllocIno: free inode not found in summary map"); - return -EIO; - } - - if (~iagp->inosmap[sword]) - break; - } - - /* found a extent with free inodes. determine - * the extent number. - */ - rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); - if (rem >= EXTSPERSUM) { - IREAD_UNLOCK(imap->im_ipimap); - release_metapage(mp); - jfs_error(ip->i_sb, "diAllocIno: no free extent found"); - return -EIO; - } - extno = (sword << L2EXTSPERSUM) + rem; - - /* find the first free inode in the extent. - */ - rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); - if (rem >= INOSPEREXT) { - IREAD_UNLOCK(imap->im_ipimap); - release_metapage(mp); - jfs_error(ip->i_sb, "diAllocIno: free inode not found"); - return -EIO; - } - - /* compute the inode number within the iag. - */ - ino = (extno << L2INOSPEREXT) + rem; - - /* allocate the inode. - */ - rc = diAllocBit(imap, iagp, ino); - IREAD_UNLOCK(imap->im_ipimap); - if (rc) { - release_metapage(mp); - return (rc); - } - - /* set the results of the allocation and write the iag. - */ - diInitInode(ip, iagno, ino, extno, iagp); - write_metapage(mp); - - return (0); -} - - -/* - * NAME: diAllocExt(imap,agno,ip) - * - * FUNCTION: add a new extent of free inodes to an iag, allocating - * an inode from this extent to satisfy the current allocation - * request. - * - * this routine first tries to find an existing iag with free - * extents through the ag free extent list. if list is not - * empty, the head of the list will be selected as the home - * of the new extent of free inodes. otherwise (the list is - * empty), a new iag will be allocated for the ag to contain - * the extent. - * - * once an iag has been selected, the free extent summary map - * is used to locate a free extent within the iag and diNewExt() - * is called to initialize the extent, with initialization - * including the allocation of the first inode of the extent - * for the purpose of satisfying this request. - * - * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group number. - * ip - pointer to new inode to be filled in on successful return - * with the disk inode number allocated, its extent address - * and the start of the ag. - * - * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. - */ -static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) -{ - int rem, iagno, sword, extno, rc; - struct metapage *mp; - struct iag *iagp; - - /* check if the ag has any iags with free extents. if not, - * allocate a new iag for the ag. - */ - if ((iagno = imap->im_agctl[agno].extfree) < 0) { - /* If successful, diNewIAG will obtain the read lock on the - * imap inode. - */ - if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { - return (rc); - } - iagp = (struct iag *) mp->data; - - /* set the ag number if this a brand new iag - */ - iagp->agstart = - cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); - } else { - /* read the iag. - */ - IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); - if ((rc = diIAGRead(imap, iagno, &mp))) { - IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, "diAllocExt: error reading iag"); - return rc; - } - iagp = (struct iag *) mp->data; - } - - /* using the free extent summary map, find a free extent. - */ - for (sword = 0;; sword++) { - if (sword >= SMAPSZ) { - release_metapage(mp); - IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, - "diAllocExt: free ext summary map not found"); - return -EIO; - } - if (~iagp->extsmap[sword]) - break; - } - - /* determine the extent number of the free extent. - */ - rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); - if (rem >= EXTSPERSUM) { - release_metapage(mp); - IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, "diAllocExt: free extent not found"); - return -EIO; - } - extno = (sword << L2EXTSPERSUM) + rem; - - /* initialize the new extent. - */ - rc = diNewExt(imap, iagp, extno); - IREAD_UNLOCK(imap->im_ipimap); - if (rc) { - /* something bad happened. if a new iag was allocated, - * place it back on the inode map's iag free list, and - * clear the ag number information. - */ - if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { - IAGFREE_LOCK(imap); - iagp->iagfree = cpu_to_le32(imap->im_freeiag); - imap->im_freeiag = iagno; - IAGFREE_UNLOCK(imap); - } - write_metapage(mp); - return (rc); - } - - /* set the results of the allocation and write the iag. - */ - diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); - - write_metapage(mp); - - return (0); -} - - -/* - * NAME: diAllocBit(imap,iagp,ino) - * - * FUNCTION: allocate a backed inode from an iag. - * - * this routine performs the mechanics of allocating a - * specified inode from a backed extent. - * - * if the inode to be allocated represents the last free - * inode within the iag, the iag will be removed from the - * ag free inode list. - * - * a careful update approach is used to provide consistency - * in the face of updates to multiple buffers. under this - * approach, all required buffers are obtained before making - * any updates and are held all are updates are complete. - * - * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on - * this AG. Must have read lock on imap inode. - * - * PARAMETERS: - * imap - pointer to inode map control structure. - * iagp - pointer to iag. - * ino - inode number to be allocated within the iag. - * - * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. - */ -static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) -{ - int extno, bitno, agno, sword, rc; - struct metapage *amp = NULL, *bmp = NULL; - struct iag *aiagp = NULL, *biagp = NULL; - u32 mask; - - /* check if this is the last free inode within the iag. - * if so, it will have to be removed from the ag free - * inode list, so get the iags preceding and following - * it on the list. - */ - if (iagp->nfreeinos == cpu_to_le32(1)) { - if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { - if ((rc = - diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), - &))) - return (rc); - aiagp = (struct iag *) amp->data; - } - - if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { - if ((rc = - diIAGRead(imap, - le32_to_cpu(iagp->inofreeback), - &bmp))) { - if (amp) - release_metapage(amp); - return (rc); - } - biagp = (struct iag *) bmp->data; - } - } - - /* get the ag number, extent number, inode number within - * the extent. - */ - agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); - extno = ino >> L2INOSPEREXT; - bitno = ino & (INOSPEREXT - 1); - - /* compute the mask for setting the map. - */ - mask = HIGHORDER >> bitno; - - /* the inode should be free and backed. - */ - if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || - ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || - (addressPXD(&iagp->inoext[extno]) == 0)) { - if (amp) - release_metapage(amp); - if (bmp) - release_metapage(bmp); - - jfs_error(imap->im_ipimap->i_sb, - "diAllocBit: iag inconsistent"); - return -EIO; - } - - /* mark the inode as allocated in the working map. - */ - iagp->wmap[extno] |= cpu_to_le32(mask); - - /* check if all inodes within the extent are now - * allocated. if so, update the free inode summary - * map to reflect this. - */ - if (iagp->wmap[extno] == cpu_to_le32(ONES)) { - sword = extno >> L2EXTSPERSUM; - bitno = extno & (EXTSPERSUM - 1); - iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); - } - - /* if this was the last free inode in the iag, remove the - * iag from the ag free inode list. - */ - if (iagp->nfreeinos == cpu_to_le32(1)) { - if (amp) { - aiagp->inofreeback = iagp->inofreeback; - write_metapage(amp); - } - - if (bmp) { - biagp->inofreefwd = iagp->inofreefwd; - write_metapage(bmp); - } else { - imap->im_agctl[agno].inofree = - le32_to_cpu(iagp->inofreefwd); - } - iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); - } - - /* update the free inode count at the iag, ag, inode - * map levels. - */ - le32_add_cpu(&iagp->nfreeinos, -1); - imap->im_agctl[agno].numfree -= 1; - atomic_dec(&imap->im_numfree); - - return (0); -} - - -/* - * NAME: diNewExt(imap,iagp,extno) - * - * FUNCTION: initialize a new extent of inodes for an iag, allocating - * the first inode of the extent for use for the current - * allocation request. - * - * disk resources are allocated for the new extent of inodes - * and the inodes themselves are initialized to reflect their - * existence within the extent (i.e. their inode numbers and - * inode extent addresses are set) and their initial state - * (mode and link count are set to zero). - * - * if the iag is new, it is not yet on an ag extent free list - * but will now be placed on this list. - * - * if the allocation of the new extent causes the iag to - * have no free extent, the iag will be removed from the - * ag extent free list. - * - * if the iag has no free backed inodes, it will be placed - * on the ag free inode list, since the addition of the new - * extent will now cause it to have free inodes. - * - * a careful update approach is used to provide consistency - * (i.e. list consistency) in the face of updates to multiple - * buffers. under this approach, all required buffers are - * obtained before making any updates and are held until all - * updates are complete. - * - * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on - * this AG. Must have read lock on imap inode. - * - * PARAMETERS: - * imap - pointer to inode map control structure. - * iagp - pointer to iag. - * extno - extent number. - * - * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. - */ -static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) -{ - int agno, iagno, fwd, back, freei = 0, sword, rc; - struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; - struct metapage *amp, *bmp, *cmp, *dmp; - struct inode *ipimap; - s64 blkno, hint; - int i, j; - u32 mask; - ino_t ino; - struct dinode *dp; - struct jfs_sb_info *sbi; - - /* better have free extents. - */ - if (!iagp->nfreeexts) { - jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); - return -EIO; - } - - /* get the inode map inode. - */ - ipimap = imap->im_ipimap; - sbi = JFS_SBI(ipimap->i_sb); - - amp = bmp = cmp = NULL; - - /* get the ag and iag numbers for this iag. - */ - agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); - iagno = le32_to_cpu(iagp->iagnum); - - /* check if this is the last free extent within the - * iag. if so, the iag must be removed from the ag - * free extent list, so get the iags preceding and - * following the iag on this list. - */ - if (iagp->nfreeexts == cpu_to_le32(1)) { - if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { - if ((rc = diIAGRead(imap, fwd, &))) - return (rc); - aiagp = (struct iag *) amp->data; - } - - if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { - if ((rc = diIAGRead(imap, back, &bmp))) - goto error_out; - biagp = (struct iag *) bmp->data; - } - } else { - /* the iag has free extents. if all extents are free - * (as is the case for a newly allocated iag), the iag - * must be added to the ag free extent list, so get - * the iag at the head of the list in preparation for - * adding this iag to this list. - */ - fwd = back = -1; - if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { - if ((fwd = imap->im_agctl[agno].extfree) >= 0) { - if ((rc = diIAGRead(imap, fwd, &))) - goto error_out; - aiagp = (struct iag *) amp->data; - } - } - } - - /* check if the iag has no free inodes. if so, the iag - * will have to be added to the ag free inode list, so get - * the iag at the head of the list in preparation for - * adding this iag to this list. in doing this, we must - * check if we already have the iag at the head of - * the list in hand. - */ - if (iagp->nfreeinos == 0) { - freei = imap->im_agctl[agno].inofree; - - if (freei >= 0) { - if (freei == fwd) { - ciagp = aiagp; - } else if (freei == back) { - ciagp = biagp; - } else { - if ((rc = diIAGRead(imap, freei, &cmp))) - goto error_out; - ciagp = (struct iag *) cmp->data; - } - if (ciagp == NULL) { - jfs_error(imap->im_ipimap->i_sb, - "diNewExt: ciagp == NULL"); - rc = -EIO; - goto error_out; - } - } - } - - /* allocate disk space for the inode extent. - */ - if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) - hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; - else - hint = addressPXD(&iagp->inoext[extno - 1]) + - lengthPXD(&iagp->inoext[extno - 1]) - 1; - - if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) - goto error_out; - - /* compute the inode number of the first inode within the - * extent. - */ - ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); - - /* initialize the inodes within the newly allocated extent a - * page at a time. - */ - for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { - /* get a buffer for this page of disk inodes. - */ - dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); - if (dmp == NULL) { - rc = -EIO; - goto error_out; - } - dp = (struct dinode *) dmp->data; - - /* initialize the inode number, mode, link count and - * inode extent address. - */ - for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { - dp->di_inostamp = cpu_to_le32(sbi->inostamp); - dp->di_number = cpu_to_le32(ino); - dp->di_fileset = cpu_to_le32(FILESYSTEM_I); - dp->di_mode = 0; - dp->di_nlink = 0; - PXDaddress(&(dp->di_ixpxd), blkno); - PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); - } - write_metapage(dmp); - } - - /* if this is the last free extent within the iag, remove the - * iag from the ag free extent list. - */ - if (iagp->nfreeexts == cpu_to_le32(1)) { - if (fwd >= 0) - aiagp->extfreeback = iagp->extfreeback; - - if (back >= 0) - biagp->extfreefwd = iagp->extfreefwd; - else - imap->im_agctl[agno].extfree = - le32_to_cpu(iagp->extfreefwd); - - iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); - } else { - /* if the iag has all free extents (newly allocated iag), - * add the iag to the ag free extent list. - */ - if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { - if (fwd >= 0) - aiagp->extfreeback = cpu_to_le32(iagno); - - iagp->extfreefwd = cpu_to_le32(fwd); - iagp->extfreeback = cpu_to_le32(-1); - imap->im_agctl[agno].extfree = iagno; - } - } - - /* if the iag has no free inodes, add the iag to the - * ag free inode list. - */ - if (iagp->nfreeinos == 0) { - if (freei >= 0) - ciagp->inofreeback = cpu_to_le32(iagno); - - iagp->inofreefwd = - cpu_to_le32(imap->im_agctl[agno].inofree); - iagp->inofreeback = cpu_to_le32(-1); - imap->im_agctl[agno].inofree = iagno; - } - - /* initialize the extent descriptor of the extent. */ - PXDlength(&iagp->inoext[extno], imap->im_nbperiext); - PXDaddress(&iagp->inoext[extno], blkno); - - /* initialize the working and persistent map of the extent. - * the working map will be initialized such that - * it indicates the first inode of the extent is allocated. - */ - iagp->wmap[extno] = cpu_to_le32(HIGHORDER); - iagp->pmap[extno] = 0; - - /* update the free inode and free extent summary maps - * for the extent to indicate the extent has free inodes - * and no longer represents a free extent. - */ - sword = extno >> L2EXTSPERSUM; - mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); - iagp->extsmap[sword] |= cpu_to_le32(mask); - iagp->inosmap[sword] &= cpu_to_le32(~mask); - - /* update the free inode and free extent counts for the - * iag. - */ - le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); - le32_add_cpu(&iagp->nfreeexts, -1); - - /* update the free and backed inode counts for the ag. - */ - imap->im_agctl[agno].numfree += (INOSPEREXT - 1); - imap->im_agctl[agno].numinos += INOSPEREXT; - - /* update the free and backed inode counts for the inode map. - */ - atomic_add(INOSPEREXT - 1, &imap->im_numfree); - atomic_add(INOSPEREXT, &imap->im_numinos); - - /* write the iags. - */ - if (amp) - write_metapage(amp); - if (bmp) - write_metapage(bmp); - if (cmp) - write_metapage(cmp); - - return (0); - - error_out: - - /* release the iags. - */ - if (amp) - release_metapage(amp); - if (bmp) - release_metapage(bmp); - if (cmp) - release_metapage(cmp); - - return (rc); -} - - -/* - * NAME: diNewIAG(imap,iagnop,agno) - * - * FUNCTION: allocate a new iag for an allocation group. - * - * first tries to allocate the iag from the inode map - * iagfree list: - * if the list has free iags, the head of the list is removed - * and returned to satisfy the request. - * if the inode map's iag free list is empty, the inode map - * is extended to hold a new iag. this new iag is initialized - * and returned to satisfy the request. - * - * PARAMETERS: - * imap - pointer to inode map control structure. - * iagnop - pointer to an iag number set with the number of the - * newly allocated iag upon successful return. - * agno - allocation group number. - * bpp - Buffer pointer to be filled in with new IAG's buffer - * - * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. - * - * serialization: - * AG lock held on entry/exit; - * write lock on the map is held inside; - * read lock on the map is held on successful completion; - * - * note: new iag transaction: - * . synchronously write iag; - * . write log of xtree and inode of imap; - * . commit; - * . synchronous write of xtree (right to left, bottom to top); - * . at start of logredo(): init in-memory imap with one additional iag page; - * . at end of logredo(): re-read imap inode to determine - * new imap size; - */ -static int -diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) -{ - int rc; - int iagno, i, xlen; - struct inode *ipimap; - struct super_block *sb; - struct jfs_sb_info *sbi; - struct metapage *mp; - struct iag *iagp; - s64 xaddr = 0; - s64 blkno; - tid_t tid; - struct inode *iplist[1]; - - /* pick up pointers to the inode map and mount inodes */ - ipimap = imap->im_ipimap; - sb = ipimap->i_sb; - sbi = JFS_SBI(sb); - - /* acquire the free iag lock */ - IAGFREE_LOCK(imap); - - /* if there are any iags on the inode map free iag list, - * allocate the iag from the head of the list. - */ - if (imap->im_freeiag >= 0) { - /* pick up the iag number at the head of the list */ - iagno = imap->im_freeiag; - - /* determine the logical block number of the iag */ - blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); - } else { - /* no free iags. the inode map will have to be extented - * to include a new iag. - */ - - /* acquire inode map lock */ - IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); - - if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { - IWRITE_UNLOCK(ipimap); - IAGFREE_UNLOCK(imap); - jfs_error(imap->im_ipimap->i_sb, - "diNewIAG: ipimap->i_size is wrong"); - return -EIO; - } - - - /* get the next available iag number */ - iagno = imap->im_nextiag; - - /* make sure that we have not exceeded the maximum inode - * number limit. - */ - if (iagno > (MAXIAGS - 1)) { - /* release the inode map lock */ - IWRITE_UNLOCK(ipimap); - - rc = -ENOSPC; - goto out; - } - - /* - * synchronously append new iag page. - */ - /* determine the logical address of iag page to append */ - blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); - - /* Allocate extent for new iag page */ - xlen = sbi->nbperpage; - if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { - /* release the inode map lock */ - IWRITE_UNLOCK(ipimap); - - goto out; - } - - /* - * start transaction of update of the inode map - * addressing structure pointing to the new iag page; - */ - tid = txBegin(sb, COMMIT_FORCE); - mutex_lock(&JFS_IP(ipimap)->commit_mutex); - - /* update the inode map addressing structure to point to it */ - if ((rc = - xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { - txEnd(tid); - mutex_unlock(&JFS_IP(ipimap)->commit_mutex); - /* Free the blocks allocated for the iag since it was - * not successfully added to the inode map - */ - dbFree(ipimap, xaddr, (s64) xlen); - - /* release the inode map lock */ - IWRITE_UNLOCK(ipimap); - - goto out; - } - - /* update the inode map's inode to reflect the extension */ - ipimap->i_size += PSIZE; - inode_add_bytes(ipimap, PSIZE); - - /* assign a buffer for the page */ - mp = get_metapage(ipimap, blkno, PSIZE, 0); - if (!mp) { - /* - * This is very unlikely since we just created the - * extent, but let's try to handle it correctly - */ - xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, - COMMIT_PWMAP); - - txAbort(tid, 0); - txEnd(tid); - mutex_unlock(&JFS_IP(ipimap)->commit_mutex); - - /* release the inode map lock */ - IWRITE_UNLOCK(ipimap); - - rc = -EIO; - goto out; - } - iagp = (struct iag *) mp->data; - - /* init the iag */ - memset(iagp, 0, sizeof(struct iag)); - iagp->iagnum = cpu_to_le32(iagno); - iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); - iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); - iagp->iagfree = cpu_to_le32(-1); - iagp->nfreeinos = 0; - iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); - - /* initialize the free inode summary map (free extent - * summary map initialization handled by bzero). - */ - for (i = 0; i < SMAPSZ; i++) - iagp->inosmap[i] = cpu_to_le32(ONES); - - /* - * Write and sync the metapage - */ - flush_metapage(mp); - - /* - * txCommit(COMMIT_FORCE) will synchronously write address - * index pages and inode after commit in careful update order - * of address index pages (right to left, bottom up); - */ - iplist[0] = ipimap; - rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); - - txEnd(tid); - mutex_unlock(&JFS_IP(ipimap)->commit_mutex); - - duplicateIXtree(sb, blkno, xlen, &xaddr); - - /* update the next available iag number */ - imap->im_nextiag += 1; - - /* Add the iag to the iag free list so we don't lose the iag - * if a failure happens now. - */ - imap->im_freeiag = iagno; - - /* Until we have logredo working, we want the imap inode & - * control page to be up to date. - */ - diSync(ipimap); - - /* release the inode map lock */ - IWRITE_UNLOCK(ipimap); - } - - /* obtain read lock on map */ - IREAD_LOCK(ipimap, RDWRLOCK_IMAP); - - /* read the iag */ - if ((rc = diIAGRead(imap, iagno, &mp))) { - IREAD_UNLOCK(ipimap); - rc = -EIO; - goto out; - } - iagp = (struct iag *) mp->data; - - /* remove the iag from the iag free list */ - imap->im_freeiag = le32_to_cpu(iagp->iagfree); - iagp->iagfree = cpu_to_le32(-1); - - /* set the return iag number and buffer pointer */ - *iagnop = iagno; - *mpp = mp; - - out: - /* release the iag free lock */ - IAGFREE_UNLOCK(imap); - - return (rc); -} - -/* - * NAME: diIAGRead() - * - * FUNCTION: get the buffer for the specified iag within a fileset - * or aggregate inode map. - * - * PARAMETERS: - * imap - pointer to inode map control structure. - * iagno - iag number. - * bpp - point to buffer pointer to be filled in on successful - * exit. - * - * SERIALIZATION: - * must have read lock on imap inode - * (When called by diExtendFS, the filesystem is quiesced, therefore - * the read lock is unnecessary.) - * - * RETURN VALUES: - * 0 - success. - * -EIO - i/o error. - */ -static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) -{ - struct inode *ipimap = imap->im_ipimap; - s64 blkno; - - /* compute the logical block number of the iag. */ - blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); - - /* read the iag. */ - *mpp = read_metapage(ipimap, blkno, PSIZE, 0); - if (*mpp == NULL) { - return -EIO; - } - - return (0); -} - -/* - * NAME: diFindFree() - * - * FUNCTION: find the first free bit in a word starting at - * the specified bit position. - * - * PARAMETERS: - * word - word to be examined. - * start - starting bit position. - * - * RETURN VALUES: - * bit position of first free bit in the word or 32 if - * no free bits were found. - */ -static int diFindFree(u32 word, int start) -{ - int bitno; - assert(start < 32); - /* scan the word for the first free bit. */ - for (word <<= start, bitno = start; bitno < 32; - bitno++, word <<= 1) { - if ((word & HIGHORDER) == 0) - break; - } - return (bitno); -} - -/* - * NAME: diUpdatePMap() - * - * FUNCTION: Update the persistent map in an IAG for the allocation or - * freeing of the specified inode. - * - * PRE CONDITIONS: Working map has already been updated for allocate. - * - * PARAMETERS: - * ipimap - Incore inode map inode - * inum - Number of inode to mark in permanent map - * is_free - If 'true' indicates inode should be marked freed, otherwise - * indicates inode should be marked allocated. - * - * RETURN VALUES: - * 0 for success - */ -int -diUpdatePMap(struct inode *ipimap, - unsigned long inum, bool is_free, struct tblock * tblk) -{ - int rc; - struct iag *iagp; - struct metapage *mp; - int iagno, ino, extno, bitno; - struct inomap *imap; - u32 mask; - struct jfs_log *log; - int lsn, difft, diffp; - unsigned long flags; - - imap = JFS_IP(ipimap)->i_imap; - /* get the iag number containing the inode */ - iagno = INOTOIAG(inum); - /* make sure that the iag is contained within the map */ - if (iagno >= imap->im_nextiag) { - jfs_error(ipimap->i_sb, - "diUpdatePMap: the iag is outside the map"); - return -EIO; - } - /* read the iag */ - IREAD_LOCK(ipimap, RDWRLOCK_IMAP); - rc = diIAGRead(imap, iagno, &mp); - IREAD_UNLOCK(ipimap); - if (rc) - return (rc); - metapage_wait_for_io(mp); - iagp = (struct iag *) mp->data; - /* get the inode number and extent number of the inode within - * the iag and the inode number within the extent. - */ - ino = inum & (INOSPERIAG - 1); - extno = ino >> L2INOSPEREXT; - bitno = ino & (INOSPEREXT - 1); - mask = HIGHORDER >> bitno; - /* - * mark the inode free in persistent map: - */ - if (is_free) { - /* The inode should have been allocated both in working - * map and in persistent map; - * the inode will be freed from working map at the release - * of last reference release; - */ - if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { - jfs_error(ipimap->i_sb, - "diUpdatePMap: inode %ld not marked as " - "allocated in wmap!", inum); - } - if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { - jfs_error(ipimap->i_sb, - "diUpdatePMap: inode %ld not marked as " - "allocated in pmap!", inum); - } - /* update the bitmap for the extent of the freed inode */ - iagp->pmap[extno] &= cpu_to_le32(~mask); - } - /* - * mark the inode allocated in persistent map: - */ - else { - /* The inode should be already allocated in the working map - * and should be free in persistent map; - */ - if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { - release_metapage(mp); - jfs_error(ipimap->i_sb, - "diUpdatePMap: the inode is not allocated in " - "the working map"); - return -EIO; - } - if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { - release_metapage(mp); - jfs_error(ipimap->i_sb, - "diUpdatePMap: the inode is not free in the " - "persistent map"); - return -EIO; - } - /* update the bitmap for the extent of the allocated inode */ - iagp->pmap[extno] |= cpu_to_le32(mask); - } - /* - * update iag lsn - */ - lsn = tblk->lsn; - log = JFS_SBI(tblk->sb)->log; - LOGSYNC_LOCK(log, flags); - if (mp->lsn != 0) { - /* inherit older/smaller lsn */ - logdiff(difft, lsn, log); - logdiff(diffp, mp->lsn, log); - if (difft < diffp) { - mp->lsn = lsn; - /* move mp after tblock in logsync list */ - list_move(&mp->synclist, &tblk->synclist); - } - /* inherit younger/larger clsn */ - assert(mp->clsn); - logdiff(difft, tblk->clsn, log); - logdiff(diffp, mp->clsn, log); - if (difft > diffp) - mp->clsn = tblk->clsn; - } else { - mp->log = log; - mp->lsn = lsn; - /* insert mp after tblock in logsync list */ - log->count++; - list_add(&mp->synclist, &tblk->synclist); - mp->clsn = tblk->clsn; - } - LOGSYNC_UNLOCK(log, flags); - write_metapage(mp); - return (0); -} - -/* - * diExtendFS() - * - * function: update imap for extendfs(); - * - * note: AG size has been increased s.t. each k old contiguous AGs are - * coalesced into a new AG; - */ -int diExtendFS(struct inode *ipimap, struct inode *ipbmap) -{ - int rc, rcx = 0; - struct inomap *imap = JFS_IP(ipimap)->i_imap; - struct iag *iagp = NULL, *hiagp = NULL; - struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; - struct metapage *bp, *hbp; - int i, n, head; - int numinos, xnuminos = 0, xnumfree = 0; - s64 agstart; - - jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", - imap->im_nextiag, atomic_read(&imap->im_numinos), - atomic_read(&imap->im_numfree)); - - /* - * reconstruct imap - * - * coalesce contiguous k (newAGSize/oldAGSize) AGs; - * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; - * note: new AG size = old AG size * (2**x). - */ - - /* init per AG control information im_agctl[] */ - for (i = 0; i < MAXAG; i++) { - imap->im_agctl[i].inofree = -1; - imap->im_agctl[i].extfree = -1; - imap->im_agctl[i].numinos = 0; /* number of backed inodes */ - imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ - } - - /* - * process each iag page of the map. - * - * rebuild AG Free Inode List, AG Free Inode Extent List; - */ - for (i = 0; i < imap->im_nextiag; i++) { - if ((rc = diIAGRead(imap, i, &bp))) { - rcx = rc; - continue; - } - iagp = (struct iag *) bp->data; - if (le32_to_cpu(iagp->iagnum) != i) { - release_metapage(bp); - jfs_error(ipimap->i_sb, - "diExtendFs: unexpected value of iagnum"); - return -EIO; - } - - /* leave free iag in the free iag list */ - if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { - release_metapage(bp); - continue; - } - - agstart = le64_to_cpu(iagp->agstart); - n = agstart >> mp->db_agl2size; - iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size); - - /* compute backed inodes */ - numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) - << L2INOSPEREXT; - if (numinos > 0) { - /* merge AG backed inodes */ - imap->im_agctl[n].numinos += numinos; - xnuminos += numinos; - } - - /* if any backed free inodes, insert at AG free inode list */ - if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { - if ((head = imap->im_agctl[n].inofree) == -1) { - iagp->inofreefwd = cpu_to_le32(-1); - iagp->inofreeback = cpu_to_le32(-1); - } else { - if ((rc = diIAGRead(imap, head, &hbp))) { - rcx = rc; - goto nextiag; - } - hiagp = (struct iag *) hbp->data; - hiagp->inofreeback = iagp->iagnum; - iagp->inofreefwd = cpu_to_le32(head); - iagp->inofreeback = cpu_to_le32(-1); - write_metapage(hbp); - } - - imap->im_agctl[n].inofree = - le32_to_cpu(iagp->iagnum); - - /* merge AG backed free inodes */ - imap->im_agctl[n].numfree += - le32_to_cpu(iagp->nfreeinos); - xnumfree += le32_to_cpu(iagp->nfreeinos); - } - - /* if any free extents, insert at AG free extent list */ - if (le32_to_cpu(iagp->nfreeexts) > 0) { - if ((head = imap->im_agctl[n].extfree) == -1) { - iagp->extfreefwd = cpu_to_le32(-1); - iagp->extfreeback = cpu_to_le32(-1); - } else { - if ((rc = diIAGRead(imap, head, &hbp))) { - rcx = rc; - goto nextiag; - } - hiagp = (struct iag *) hbp->data; - hiagp->extfreeback = iagp->iagnum; - iagp->extfreefwd = cpu_to_le32(head); - iagp->extfreeback = cpu_to_le32(-1); - write_metapage(hbp); - } - - imap->im_agctl[n].extfree = - le32_to_cpu(iagp->iagnum); - } - - nextiag: - write_metapage(bp); - } - - if (xnuminos != atomic_read(&imap->im_numinos) || - xnumfree != atomic_read(&imap->im_numfree)) { - jfs_error(ipimap->i_sb, - "diExtendFs: numinos or numfree incorrect"); - return -EIO; - } - - return rcx; -} - - -/* - * duplicateIXtree() - * - * serialization: IWRITE_LOCK held on entry/exit - * - * note: shadow page with regular inode (rel.2); - */ -static void duplicateIXtree(struct super_block *sb, s64 blkno, - int xlen, s64 *xaddr) -{ - struct jfs_superblock *j_sb; - struct buffer_head *bh; - struct inode *ip; - tid_t tid; - - /* if AIT2 ipmap2 is bad, do not try to update it */ - if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ - return; - ip = diReadSpecial(sb, FILESYSTEM_I, 1); - if (ip == NULL) { - JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; - if (readSuper(sb, &bh)) - return; - j_sb = (struct jfs_superblock *)bh->b_data; - j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); - - mark_buffer_dirty(bh); - sync_dirty_buffer(bh); - brelse(bh); - return; - } - - /* start transaction */ - tid = txBegin(sb, COMMIT_FORCE); - /* update the inode map addressing structure to point to it */ - if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { - JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; - txAbort(tid, 1); - goto cleanup; - - } - /* update the inode map's inode to reflect the extension */ - ip->i_size += PSIZE; - inode_add_bytes(ip, PSIZE); - txCommit(tid, 1, &ip, COMMIT_FORCE); - cleanup: - txEnd(tid); - diFreeSpecial(ip); -} - -/* - * NAME: copy_from_dinode() - * - * FUNCTION: Copies inode info from disk inode to in-memory inode - * - * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient memory - */ -static int copy_from_dinode(struct dinode * dip, struct inode *ip) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - - jfs_ip->fileset = le32_to_cpu(dip->di_fileset); - jfs_ip->mode2 = le32_to_cpu(dip->di_mode); - jfs_set_inode_flags(ip); - - ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; - if (sbi->umask != -1) { - ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); - /* For directories, add x permission if r is allowed by umask */ - if (S_ISDIR(ip->i_mode)) { - if (ip->i_mode & 0400) - ip->i_mode |= 0100; - if (ip->i_mode & 0040) - ip->i_mode |= 0010; - if (ip->i_mode & 0004) - ip->i_mode |= 0001; - } - } - set_nlink(ip, le32_to_cpu(dip->di_nlink)); - - jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); - if (sbi->uid == -1) - ip->i_uid = jfs_ip->saved_uid; - else { - ip->i_uid = sbi->uid; - } - - jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); - if (sbi->gid == -1) - ip->i_gid = jfs_ip->saved_gid; - else { - ip->i_gid = sbi->gid; - } - - ip->i_size = le64_to_cpu(dip->di_size); - ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); - ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); - ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); - ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); - ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); - ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); - ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); - ip->i_generation = le32_to_cpu(dip->di_gen); - - jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ - jfs_ip->acl = dip->di_acl; /* as are dxd's */ - jfs_ip->ea = dip->di_ea; - jfs_ip->next_index = le32_to_cpu(dip->di_next_index); - jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); - jfs_ip->acltype = le32_to_cpu(dip->di_acltype); - - if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { - jfs_ip->dev = le32_to_cpu(dip->di_rdev); - ip->i_rdev = new_decode_dev(jfs_ip->dev); - } - - if (S_ISDIR(ip->i_mode)) { - memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); - } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { - memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); - } else - memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); - - /* Zero the in-memory-only stuff */ - jfs_ip->cflag = 0; - jfs_ip->btindex = 0; - jfs_ip->btorder = 0; - jfs_ip->bxflag = 0; - jfs_ip->blid = 0; - jfs_ip->atlhead = 0; - jfs_ip->atltail = 0; - jfs_ip->xtlid = 0; - return (0); -} - -/* - * NAME: copy_to_dinode() - * - * FUNCTION: Copies inode info from in-memory inode to disk inode - */ -static void copy_to_dinode(struct dinode * dip, struct inode *ip) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - - dip->di_fileset = cpu_to_le32(jfs_ip->fileset); - dip->di_inostamp = cpu_to_le32(sbi->inostamp); - dip->di_number = cpu_to_le32(ip->i_ino); - dip->di_gen = cpu_to_le32(ip->i_generation); - dip->di_size = cpu_to_le64(ip->i_size); - dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); - dip->di_nlink = cpu_to_le32(ip->i_nlink); - if (sbi->uid == -1) - dip->di_uid = cpu_to_le32(ip->i_uid); - else - dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); - if (sbi->gid == -1) - dip->di_gid = cpu_to_le32(ip->i_gid); - else - dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); - jfs_get_inode_flags(jfs_ip); - /* - * mode2 is only needed for storing the higher order bits. - * Trust i_mode for the lower order ones - */ - if (sbi->umask == -1) - dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | - ip->i_mode); - else /* Leave the original permissions alone */ - dip->di_mode = cpu_to_le32(jfs_ip->mode2); - - dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); - dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); - dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); - dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); - dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); - dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); - dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ - dip->di_acl = jfs_ip->acl; /* as are dxd's */ - dip->di_ea = jfs_ip->ea; - dip->di_next_index = cpu_to_le32(jfs_ip->next_index); - dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); - dip->di_otime.tv_nsec = 0; - dip->di_acltype = cpu_to_le32(jfs_ip->acltype); - if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) - dip->di_rdev = cpu_to_le32(jfs_ip->dev); -} diff --git a/ANDROID_3.4.5/fs/jfs/jfs_imap.h b/ANDROID_3.4.5/fs/jfs/jfs_imap.h deleted file mode 100644 index 610a0e9d..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_imap.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_IMAP -#define _H_JFS_IMAP - -#include "jfs_txnmgr.h" - -/* - * jfs_imap.h: disk inode manager - */ - -#define EXTSPERIAG 128 /* number of disk inode extent per iag */ -#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ -#define SMAPSZ 4 /* number of words per summary map */ -#define EXTSPERSUM 32 /* number of extents per summary map entry */ -#define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ -#define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ -#define MAXIAGS ((1<<20)-1) /* maximum number of iags */ -#define MAXAG 128 /* maximum number of allocation groups */ - -#define AMAPSIZE 512 /* bytes in the IAG allocation maps */ -#define SMAPSIZE 16 /* bytes in the IAG summary maps */ - -/* convert inode number to iag number */ -#define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) - -/* convert iag number to logical block number of the iag page */ -#define IAGTOLBLK(iagno,l2nbperpg) (((iagno) + 1) << (l2nbperpg)) - -/* get the starting block number of the 4K page of an inode extent - * that contains ino. - */ -#define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \ - ((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg))) - -/* - * inode allocation map: - * - * inode allocation map consists of - * . the inode map control page and - * . inode allocation group pages (per 4096 inodes) - * which are addressed by standard JFS xtree. - */ -/* - * inode allocation group page (per 4096 inodes of an AG) - */ -struct iag { - __le64 agstart; /* 8: starting block of ag */ - __le32 iagnum; /* 4: inode allocation group number */ - __le32 inofreefwd; /* 4: ag inode free list forward */ - __le32 inofreeback; /* 4: ag inode free list back */ - __le32 extfreefwd; /* 4: ag inode extent free list forward */ - __le32 extfreeback; /* 4: ag inode extent free list back */ - __le32 iagfree; /* 4: iag free list */ - - /* summary map: 1 bit per inode extent */ - __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes; - * note: this indicates free and backed - * inodes, if the extent is not backed the - * value will be 1. if the extent is - * backed but all inodes are being used the - * value will be 1. if the extent is - * backed but at least one of the inodes is - * free the value will be 0. - */ - __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */ - __le32 nfreeinos; /* 4: number of free inodes */ - __le32 nfreeexts; /* 4: number of free extents */ - /* (72) */ - u8 pad[1976]; /* 1976: pad to 2048 bytes */ - /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ - __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ - __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ - pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ -}; /* (4096) */ - -/* - * per AG control information (in inode map control page) - */ -struct iagctl_disk { - __le32 inofree; /* 4: free inode list anchor */ - __le32 extfree; /* 4: free extent list anchor */ - __le32 numinos; /* 4: number of backed inodes */ - __le32 numfree; /* 4: number of free inodes */ -}; /* (16) */ - -struct iagctl { - int inofree; /* free inode list anchor */ - int extfree; /* free extent list anchor */ - int numinos; /* number of backed inodes */ - int numfree; /* number of free inodes */ -}; - -/* - * per fileset/aggregate inode map control page - */ -struct dinomap_disk { - __le32 in_freeiag; /* 4: free iag list anchor */ - __le32 in_nextiag; /* 4: next free iag number */ - __le32 in_numinos; /* 4: num of backed inodes */ - __le32 in_numfree; /* 4: num of free backed inodes */ - __le32 in_nbperiext; /* 4: num of blocks per inode extent */ - __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ - __le32 in_diskblock; /* 4: for standalone test driver */ - __le32 in_maxag; /* 4: for standalone test driver */ - u8 pad[2016]; /* 2016: pad to 2048 */ - struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */ -}; /* (4096) */ - -struct dinomap { - int in_freeiag; /* free iag list anchor */ - int in_nextiag; /* next free iag number */ - int in_numinos; /* num of backed inodes */ - int in_numfree; /* num of free backed inodes */ - int in_nbperiext; /* num of blocks per inode extent */ - int in_l2nbperiext; /* l2 of in_nbperiext */ - int in_diskblock; /* for standalone test driver */ - int in_maxag; /* for standalone test driver */ - struct iagctl in_agctl[MAXAG]; /* AG control information */ -}; - -/* - * In-core inode map control page - */ -struct inomap { - struct dinomap im_imap; /* 4096: inode allocation control */ - struct inode *im_ipimap; /* 4: ptr to inode for imap */ - struct mutex im_freelock; /* 4: iag free list lock */ - struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ - u32 *im_DBGdimap; - atomic_t im_numinos; /* num of backed inodes */ - atomic_t im_numfree; /* num of free backed inodes */ -}; - -#define im_freeiag im_imap.in_freeiag -#define im_nextiag im_imap.in_nextiag -#define im_agctl im_imap.in_agctl -#define im_nbperiext im_imap.in_nbperiext -#define im_l2nbperiext im_imap.in_l2nbperiext - -/* for standalone testdriver - */ -#define im_diskblock im_imap.in_diskblock -#define im_maxag im_imap.in_maxag - -extern int diFree(struct inode *); -extern int diAlloc(struct inode *, bool, struct inode *); -extern int diSync(struct inode *); -/* external references */ -extern int diUpdatePMap(struct inode *ipimap, unsigned long inum, - bool is_free, struct tblock * tblk); -extern int diExtendFS(struct inode *ipimap, struct inode *ipbmap); -extern int diMount(struct inode *); -extern int diUnmount(struct inode *, int); -extern int diRead(struct inode *); -extern struct inode *diReadSpecial(struct super_block *, ino_t, int); -extern void diWriteSpecial(struct inode *, int); -extern void diFreeSpecial(struct inode *); -extern int diWrite(tid_t tid, struct inode *); -#endif /* _H_JFS_IMAP */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_incore.h b/ANDROID_3.4.5/fs/jfs/jfs_incore.h deleted file mode 100644 index 584a4a1a..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_incore.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_INCORE -#define _H_JFS_INCORE - -#include <linux/mutex.h> -#include <linux/rwsem.h> -#include <linux/slab.h> -#include <linux/bitops.h> -#include "jfs_types.h" -#include "jfs_xtree.h" -#include "jfs_dtree.h" - -/* - * JFS magic number - */ -#define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */ - -/* - * JFS-private inode information - */ -struct jfs_inode_info { - int fileset; /* fileset number (always 16)*/ - uint mode2; /* jfs-specific mode */ - uint saved_uid; /* saved for uid mount option */ - uint saved_gid; /* saved for gid mount option */ - pxd_t ixpxd; /* inode extent descriptor */ - dxd_t acl; /* dxd describing acl */ - dxd_t ea; /* dxd describing ea */ - time_t otime; /* time created */ - uint next_index; /* next available directory entry index */ - int acltype; /* Type of ACL */ - short btorder; /* access order */ - short btindex; /* btpage entry index*/ - struct inode *ipimap; /* inode map */ - unsigned long cflag; /* commit flags */ - u64 agstart; /* agstart of the containing IAG */ - u16 bxflag; /* xflag of pseudo buffer? */ - unchar pad; - signed char active_ag; /* ag currently allocating from */ - lid_t blid; /* lid of pseudo buffer? */ - lid_t atlhead; /* anonymous tlock list head */ - lid_t atltail; /* anonymous tlock list tail */ - spinlock_t ag_lock; /* protects active_ag */ - struct list_head anon_inode_list; /* inodes having anonymous txns */ - /* - * rdwrlock serializes xtree between reads & writes and synchronizes - * changes to special inodes. It's use would be redundant on - * directories since the i_mutex taken in the VFS is sufficient. - */ - struct rw_semaphore rdwrlock; - /* - * commit_mutex serializes transaction processing on an inode. - * It must be taken after beginning a transaction (txBegin), since - * dirty inodes may be committed while a new transaction on the - * inode is blocked in txBegin or TxBeginAnon - */ - struct mutex commit_mutex; - /* xattr_sem allows us to access the xattrs without taking i_mutex */ - struct rw_semaphore xattr_sem; - lid_t xtlid; /* lid of xtree lock on directory */ - union { - struct { - xtpage_t _xtroot; /* 288: xtree root */ - struct inomap *_imap; /* 4: inode map header */ - } file; - struct { - struct dir_table_slot _table[12]; /* 96: dir index */ - dtroot_t _dtroot; /* 288: dtree root */ - } dir; - struct { - unchar _unused[16]; /* 16: */ - dxd_t _dxd; /* 16: */ - unchar _inline[128]; /* 128: inline symlink */ - /* _inline_ea may overlay the last part of - * file._xtroot if maxentry = XTROOTINITSLOT - */ - unchar _inline_ea[128]; /* 128: inline extended attr */ - } link; - } u; - u32 dev; /* will die when we get wide dev_t */ - struct inode vfs_inode; -}; -#define i_xtroot u.file._xtroot -#define i_imap u.file._imap -#define i_dirtable u.dir._table -#define i_dtroot u.dir._dtroot -#define i_inline u.link._inline -#define i_inline_ea u.link._inline_ea - -#define IREAD_LOCK(ip, subclass) \ - down_read_nested(&JFS_IP(ip)->rdwrlock, subclass) -#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) -#define IWRITE_LOCK(ip, subclass) \ - down_write_nested(&JFS_IP(ip)->rdwrlock, subclass) -#define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock) - -/* - * cflag - */ -enum cflags { - COMMIT_Nolink, /* inode committed with zero link count */ - COMMIT_Inlineea, /* commit inode inline EA */ - COMMIT_Freewmap, /* free WMAP at iClose() */ - COMMIT_Dirty, /* Inode is really dirty */ - COMMIT_Dirtable, /* commit changes to di_dirtable */ - COMMIT_Stale, /* data extent is no longer valid */ - COMMIT_Synclist, /* metadata pages on group commit synclist */ -}; - -/* - * commit_mutex nesting subclasses: - */ -enum commit_mutex_class -{ - COMMIT_MUTEX_PARENT, - COMMIT_MUTEX_CHILD, - COMMIT_MUTEX_SECOND_PARENT, /* Renaming */ - COMMIT_MUTEX_VICTIM /* Inode being unlinked due to rename */ -}; - -/* - * rdwrlock subclasses: - * The dmap inode may be locked while a normal inode or the imap inode are - * locked. - */ -enum rdwrlock_class -{ - RDWRLOCK_NORMAL, - RDWRLOCK_IMAP, - RDWRLOCK_DMAP -}; - -#define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag)) -#define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag)) -#define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag)) -#define test_and_clear_cflag(flag, ip) \ - test_and_clear_bit(flag, &(JFS_IP(ip)->cflag)) -/* - * JFS-private superblock information. - */ -struct jfs_sb_info { - struct super_block *sb; /* Point back to vfs super block */ - unsigned long mntflag; /* aggregate attributes */ - struct inode *ipbmap; /* block map inode */ - struct inode *ipaimap; /* aggregate inode map inode */ - struct inode *ipaimap2; /* secondary aimap inode */ - struct inode *ipimap; /* aggregate inode map inode */ - struct jfs_log *log; /* log */ - struct list_head log_list; /* volumes associated with a journal */ - short bsize; /* logical block size */ - short l2bsize; /* log2 logical block size */ - short nbperpage; /* blocks per page */ - short l2nbperpage; /* log2 blocks per page */ - short l2niperblk; /* log2 inodes per page */ - dev_t logdev; /* external log device */ - uint aggregate; /* volume identifier in log record */ - pxd_t logpxd; /* pxd describing log */ - pxd_t fsckpxd; /* pxd describing fsck wkspc */ - pxd_t ait2; /* pxd describing AIT copy */ - char uuid[16]; /* 128-bit uuid for volume */ - char loguuid[16]; /* 128-bit uuid for log */ - /* - * commit_state is used for synchronization of the jfs_commit - * threads. It is protected by LAZY_LOCK(). - */ - int commit_state; /* commit state */ - /* Formerly in ipimap */ - uint gengen; /* inode generation generator*/ - uint inostamp; /* shows inode belongs to fileset*/ - - /* Formerly in ipbmap */ - struct bmap *bmap; /* incore bmap descriptor */ - struct nls_table *nls_tab; /* current codepage */ - struct inode *direct_inode; /* metadata inode */ - uint state; /* mount/recovery state */ - unsigned long flag; /* mount time flags */ - uint p_state; /* state prior to going no integrity */ - uint uid; /* uid to override on-disk uid */ - uint gid; /* gid to override on-disk gid */ - uint umask; /* umask to override on-disk umask */ -}; - -/* jfs_sb_info commit_state */ -#define IN_LAZYCOMMIT 1 - -static inline struct jfs_inode_info *JFS_IP(struct inode *inode) -{ - return list_entry(inode, struct jfs_inode_info, vfs_inode); -} - -static inline int jfs_dirtable_inline(struct inode *inode) -{ - return (JFS_IP(inode)->next_index <= (MAX_INLINE_DIRTABLE_ENTRY + 1)); -} - -static inline struct jfs_sb_info *JFS_SBI(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline int isReadOnly(struct inode *inode) -{ - if (JFS_SBI(inode->i_sb)->log) - return 0; - return 1; -} -#endif /* _H_JFS_INCORE */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_inode.c b/ANDROID_3.4.5/fs/jfs/jfs_inode.c deleted file mode 100644 index c1a3e603..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_inode.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/quotaops.h> -#include "jfs_incore.h" -#include "jfs_inode.h" -#include "jfs_filsys.h" -#include "jfs_imap.h" -#include "jfs_dinode.h" -#include "jfs_debug.h" - - -void jfs_set_inode_flags(struct inode *inode) -{ - unsigned int flags = JFS_IP(inode)->mode2; - - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | - S_NOATIME | S_DIRSYNC | S_SYNC); - - if (flags & JFS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - if (flags & JFS_APPEND_FL) - inode->i_flags |= S_APPEND; - if (flags & JFS_NOATIME_FL) - inode->i_flags |= S_NOATIME; - if (flags & JFS_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; - if (flags & JFS_SYNC_FL) - inode->i_flags |= S_SYNC; -} - -void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip) -{ - unsigned int flags = jfs_ip->vfs_inode.i_flags; - - jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL | - JFS_DIRSYNC_FL | JFS_SYNC_FL); - if (flags & S_IMMUTABLE) - jfs_ip->mode2 |= JFS_IMMUTABLE_FL; - if (flags & S_APPEND) - jfs_ip->mode2 |= JFS_APPEND_FL; - if (flags & S_NOATIME) - jfs_ip->mode2 |= JFS_NOATIME_FL; - if (flags & S_DIRSYNC) - jfs_ip->mode2 |= JFS_DIRSYNC_FL; - if (flags & S_SYNC) - jfs_ip->mode2 |= JFS_SYNC_FL; -} - -/* - * NAME: ialloc() - * - * FUNCTION: Allocate a new inode - * - */ -struct inode *ialloc(struct inode *parent, umode_t mode) -{ - struct super_block *sb = parent->i_sb; - struct inode *inode; - struct jfs_inode_info *jfs_inode; - int rc; - - inode = new_inode(sb); - if (!inode) { - jfs_warn("ialloc: new_inode returned NULL!"); - rc = -ENOMEM; - goto fail; - } - - jfs_inode = JFS_IP(inode); - - rc = diAlloc(parent, S_ISDIR(mode), inode); - if (rc) { - jfs_warn("ialloc: diAlloc returned %d!", rc); - if (rc == -EIO) - make_bad_inode(inode); - goto fail_put; - } - - if (insert_inode_locked(inode) < 0) { - rc = -EINVAL; - goto fail_unlock; - } - - inode_init_owner(inode, parent, mode); - /* - * New inodes need to save sane values on disk when - * uid & gid mount options are used - */ - jfs_inode->saved_uid = inode->i_uid; - jfs_inode->saved_gid = inode->i_gid; - - /* - * Allocate inode to quota. - */ - dquot_initialize(inode); - rc = dquot_alloc_inode(inode); - if (rc) - goto fail_drop; - - /* inherit flags from parent */ - jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; - - if (S_ISDIR(mode)) { - jfs_inode->mode2 |= IDIRECTORY; - jfs_inode->mode2 &= ~JFS_DIRSYNC_FL; - } - else { - jfs_inode->mode2 |= INLINEEA | ISPARSE; - if (S_ISLNK(mode)) - jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); - } - jfs_inode->mode2 |= inode->i_mode; - - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - jfs_inode->otime = inode->i_ctime.tv_sec; - inode->i_generation = JFS_SBI(sb)->gengen++; - - jfs_inode->cflag = 0; - - /* Zero remaining fields */ - memset(&jfs_inode->acl, 0, sizeof(dxd_t)); - memset(&jfs_inode->ea, 0, sizeof(dxd_t)); - jfs_inode->next_index = 0; - jfs_inode->acltype = 0; - jfs_inode->btorder = 0; - jfs_inode->btindex = 0; - jfs_inode->bxflag = 0; - jfs_inode->blid = 0; - jfs_inode->atlhead = 0; - jfs_inode->atltail = 0; - jfs_inode->xtlid = 0; - jfs_set_inode_flags(inode); - - jfs_info("ialloc returns inode = 0x%p\n", inode); - - return inode; - -fail_drop: - dquot_drop(inode); - inode->i_flags |= S_NOQUOTA; -fail_unlock: - clear_nlink(inode); - unlock_new_inode(inode); -fail_put: - iput(inode); -fail: - return ERR_PTR(rc); -} diff --git a/ANDROID_3.4.5/fs/jfs/jfs_inode.h b/ANDROID_3.4.5/fs/jfs/jfs_inode.h deleted file mode 100644 index 9271cfe4..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_inode.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_INODE -#define _H_JFS_INODE - -struct fid; - -extern struct inode *ialloc(struct inode *, umode_t); -extern int jfs_fsync(struct file *, loff_t, loff_t, int); -extern long jfs_ioctl(struct file *, unsigned int, unsigned long); -extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); -extern struct inode *jfs_iget(struct super_block *, unsigned long); -extern int jfs_commit_inode(struct inode *, int); -extern int jfs_write_inode(struct inode *, struct writeback_control *); -extern void jfs_evict_inode(struct inode *); -extern void jfs_dirty_inode(struct inode *, int); -extern void jfs_truncate(struct inode *); -extern void jfs_truncate_nolock(struct inode *, loff_t); -extern void jfs_free_zero_link(struct inode *); -extern struct dentry *jfs_get_parent(struct dentry *dentry); -extern void jfs_get_inode_flags(struct jfs_inode_info *); -extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type); -extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type); -extern void jfs_set_inode_flags(struct inode *); -extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern int jfs_setattr(struct dentry *, struct iattr *); - -extern const struct address_space_operations jfs_aops; -extern const struct inode_operations jfs_dir_inode_operations; -extern const struct file_operations jfs_dir_operations; -extern const struct inode_operations jfs_file_inode_operations; -extern const struct file_operations jfs_file_operations; -extern const struct inode_operations jfs_symlink_inode_operations; -extern const struct inode_operations jfs_fast_symlink_inode_operations; -extern const struct dentry_operations jfs_ci_dentry_operations; -#endif /* _H_JFS_INODE */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_lock.h b/ANDROID_3.4.5/fs/jfs/jfs_lock.h deleted file mode 100644 index ecf04882..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_lock.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2001 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_LOCK -#define _H_JFS_LOCK - -#include <linux/spinlock.h> -#include <linux/mutex.h> -#include <linux/sched.h> - -/* - * jfs_lock.h - */ - -/* - * Conditional sleep where condition is protected by spinlock - * - * lock_cmd and unlock_cmd take and release the spinlock - */ -#define __SLEEP_COND(wq, cond, lock_cmd, unlock_cmd) \ -do { \ - DECLARE_WAITQUEUE(__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE);\ - if (cond) \ - break; \ - unlock_cmd; \ - io_schedule(); \ - lock_cmd; \ - } \ - __set_current_state(TASK_RUNNING); \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#endif /* _H_JFS_LOCK */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_logmgr.c b/ANDROID_3.4.5/fs/jfs/jfs_logmgr.c deleted file mode 100644 index 2eb952c4..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_logmgr.c +++ /dev/null @@ -1,2530 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * jfs_logmgr.c: log manager - * - * for related information, see transaction manager (jfs_txnmgr.c), and - * recovery manager (jfs_logredo.c). - * - * note: for detail, RTFS. - * - * log buffer manager: - * special purpose buffer manager supporting log i/o requirements. - * per log serial pageout of logpage - * queuing i/o requests and redrive i/o at iodone - * maintain current logpage buffer - * no caching since append only - * appropriate jfs buffer cache buffers as needed - * - * group commit: - * transactions which wrote COMMIT records in the same in-memory - * log page during the pageout of previous/current log page(s) are - * committed together by the pageout of the page. - * - * TBD lazy commit: - * transactions are committed asynchronously when the log page - * containing it COMMIT is paged out when it becomes full; - * - * serialization: - * . a per log lock serialize log write. - * . a per log lock serialize group commit. - * . a per log lock serialize log open/close; - * - * TBD log integrity: - * careful-write (ping-pong) of last logpage to recover from crash - * in overwrite. - * detection of split (out-of-order) write of physical sectors - * of last logpage via timestamp at end of each sector - * with its mirror data array at trailer). - * - * alternatives: - * lsn - 64-bit monotonically increasing integer vs - * 32-bit lspn and page eor. - */ - -#include <linux/fs.h> -#include <linux/blkdev.h> -#include <linux/interrupt.h> -#include <linux/completion.h> -#include <linux/kthread.h> -#include <linux/buffer_head.h> /* for sync_blockdev() */ -#include <linux/bio.h> -#include <linux/freezer.h> -#include <linux/export.h> -#include <linux/delay.h> -#include <linux/mutex.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include "jfs_incore.h" -#include "jfs_filsys.h" -#include "jfs_metapage.h" -#include "jfs_superblock.h" -#include "jfs_txnmgr.h" -#include "jfs_debug.h" - - -/* - * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread) - */ -static struct lbuf *log_redrive_list; -static DEFINE_SPINLOCK(log_redrive_lock); - - -/* - * log read/write serialization (per log) - */ -#define LOG_LOCK_INIT(log) mutex_init(&(log)->loglock) -#define LOG_LOCK(log) mutex_lock(&((log)->loglock)) -#define LOG_UNLOCK(log) mutex_unlock(&((log)->loglock)) - - -/* - * log group commit serialization (per log) - */ - -#define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock) -#define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock) -#define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock) -#define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait) - -/* - * log sync serialization (per log) - */ -#define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE) -#define LOGSYNC_BARRIER(logsize) ((logsize)/4) -/* -#define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE) -#define LOGSYNC_BARRIER(logsize) ((logsize)/2) -*/ - - -/* - * log buffer cache synchronization - */ -static DEFINE_SPINLOCK(jfsLCacheLock); - -#define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags) -#define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags) - -/* - * See __SLEEP_COND in jfs_locks.h - */ -#define LCACHE_SLEEP_COND(wq, cond, flags) \ -do { \ - if (cond) \ - break; \ - __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \ -} while (0) - -#define LCACHE_WAKEUP(event) wake_up(event) - - -/* - * lbuf buffer cache (lCache) control - */ -/* log buffer manager pageout control (cumulative, inclusive) */ -#define lbmREAD 0x0001 -#define lbmWRITE 0x0002 /* enqueue at tail of write queue; - * init pageout if at head of queue; - */ -#define lbmRELEASE 0x0004 /* remove from write queue - * at completion of pageout; - * do not free/recycle it yet: - * caller will free it; - */ -#define lbmSYNC 0x0008 /* do not return to freelist - * when removed from write queue; - */ -#define lbmFREE 0x0010 /* return to freelist - * at completion of pageout; - * the buffer may be recycled; - */ -#define lbmDONE 0x0020 -#define lbmERROR 0x0040 -#define lbmGC 0x0080 /* lbmIODone to perform post-GC processing - * of log page - */ -#define lbmDIRECT 0x0100 - -/* - * Global list of active external journals - */ -static LIST_HEAD(jfs_external_logs); -static struct jfs_log *dummy_log = NULL; -static DEFINE_MUTEX(jfs_log_mutex); - -/* - * forward references - */ -static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, - struct lrd * lrd, struct tlock * tlck); - -static int lmNextPage(struct jfs_log * log); -static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, - int activate); - -static int open_inline_log(struct super_block *sb); -static int open_dummy_log(struct super_block *sb); -static int lbmLogInit(struct jfs_log * log); -static void lbmLogShutdown(struct jfs_log * log); -static struct lbuf *lbmAllocate(struct jfs_log * log, int); -static void lbmFree(struct lbuf * bp); -static void lbmfree(struct lbuf * bp); -static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp); -static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block); -static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag); -static int lbmIOWait(struct lbuf * bp, int flag); -static bio_end_io_t lbmIODone; -static void lbmStartIO(struct lbuf * bp); -static void lmGCwrite(struct jfs_log * log, int cant_block); -static int lmLogSync(struct jfs_log * log, int hard_sync); - - - -/* - * statistics - */ -#ifdef CONFIG_JFS_STATISTICS -static struct lmStat { - uint commit; /* # of commit */ - uint pagedone; /* # of page written */ - uint submitted; /* # of pages submitted */ - uint full_page; /* # of full pages submitted */ - uint partial_page; /* # of partial pages submitted */ -} lmStat; -#endif - -static void write_special_inodes(struct jfs_log *log, - int (*writer)(struct address_space *)) -{ - struct jfs_sb_info *sbi; - - list_for_each_entry(sbi, &log->sb_list, log_list) { - writer(sbi->ipbmap->i_mapping); - writer(sbi->ipimap->i_mapping); - writer(sbi->direct_inode->i_mapping); - } -} - -/* - * NAME: lmLog() - * - * FUNCTION: write a log record; - * - * PARAMETER: - * - * RETURN: lsn - offset to the next log record to write (end-of-log); - * -1 - error; - * - * note: todo: log error handler - */ -int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck) -{ - int lsn; - int diffp, difft; - struct metapage *mp = NULL; - unsigned long flags; - - jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", - log, tblk, lrd, tlck); - - LOG_LOCK(log); - - /* log by (out-of-transaction) JFS ? */ - if (tblk == NULL) - goto writeRecord; - - /* log from page ? */ - if (tlck == NULL || - tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) - goto writeRecord; - - /* - * initialize/update page/transaction recovery lsn - */ - lsn = log->lsn; - - LOGSYNC_LOCK(log, flags); - - /* - * initialize page lsn if first log write of the page - */ - if (mp->lsn == 0) { - mp->log = log; - mp->lsn = lsn; - log->count++; - - /* insert page at tail of logsynclist */ - list_add_tail(&mp->synclist, &log->synclist); - } - - /* - * initialize/update lsn of tblock of the page - * - * transaction inherits oldest lsn of pages associated - * with allocation/deallocation of resources (their - * log records are used to reconstruct allocation map - * at recovery time: inode for inode allocation map, - * B+-tree index of extent descriptors for block - * allocation map); - * allocation map pages inherit transaction lsn at - * commit time to allow forwarding log syncpt past log - * records associated with allocation/deallocation of - * resources only after persistent map of these map pages - * have been updated and propagated to home. - */ - /* - * initialize transaction lsn: - */ - if (tblk->lsn == 0) { - /* inherit lsn of its first page logged */ - tblk->lsn = mp->lsn; - log->count++; - - /* insert tblock after the page on logsynclist */ - list_add(&tblk->synclist, &mp->synclist); - } - /* - * update transaction lsn: - */ - else { - /* inherit oldest/smallest lsn of page */ - logdiff(diffp, mp->lsn, log); - logdiff(difft, tblk->lsn, log); - if (diffp < difft) { - /* update tblock lsn with page lsn */ - tblk->lsn = mp->lsn; - - /* move tblock after page on logsynclist */ - list_move(&tblk->synclist, &mp->synclist); - } - } - - LOGSYNC_UNLOCK(log, flags); - - /* - * write the log record - */ - writeRecord: - lsn = lmWriteRecord(log, tblk, lrd, tlck); - - /* - * forward log syncpt if log reached next syncpt trigger - */ - logdiff(diffp, lsn, log); - if (diffp >= log->nextsync) - lsn = lmLogSync(log, 0); - - /* update end-of-log lsn */ - log->lsn = lsn; - - LOG_UNLOCK(log); - - /* return end-of-log address */ - return lsn; -} - -/* - * NAME: lmWriteRecord() - * - * FUNCTION: move the log record to current log page - * - * PARAMETER: cd - commit descriptor - * - * RETURN: end-of-log address - * - * serialization: LOG_LOCK() held on entry/exit - */ -static int -lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck) -{ - int lsn = 0; /* end-of-log address */ - struct lbuf *bp; /* dst log page buffer */ - struct logpage *lp; /* dst log page */ - caddr_t dst; /* destination address in log page */ - int dstoffset; /* end-of-log offset in log page */ - int freespace; /* free space in log page */ - caddr_t p; /* src meta-data page */ - caddr_t src; - int srclen; - int nbytes; /* number of bytes to move */ - int i; - int len; - struct linelock *linelock; - struct lv *lv; - struct lvd *lvd; - int l2linesize; - - len = 0; - - /* retrieve destination log page to write */ - bp = (struct lbuf *) log->bp; - lp = (struct logpage *) bp->l_ldata; - dstoffset = log->eor; - - /* any log data to write ? */ - if (tlck == NULL) - goto moveLrd; - - /* - * move log record data - */ - /* retrieve source meta-data page to log */ - if (tlck->flag & tlckPAGELOCK) { - p = (caddr_t) (tlck->mp->data); - linelock = (struct linelock *) & tlck->lock; - } - /* retrieve source in-memory inode to log */ - else if (tlck->flag & tlckINODELOCK) { - if (tlck->type & tlckDTREE) - p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; - else - p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; - linelock = (struct linelock *) & tlck->lock; - } -#ifdef _JFS_WIP - else if (tlck->flag & tlckINLINELOCK) { - - inlinelock = (struct inlinelock *) & tlck; - p = (caddr_t) & inlinelock->pxd; - linelock = (struct linelock *) & tlck; - } -#endif /* _JFS_WIP */ - else { - jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); - return 0; /* Probably should trap */ - } - l2linesize = linelock->l2linesize; - - moveData: - ASSERT(linelock->index <= linelock->maxcnt); - - lv = linelock->lv; - for (i = 0; i < linelock->index; i++, lv++) { - if (lv->length == 0) - continue; - - /* is page full ? */ - if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { - /* page become full: move on to next page */ - lmNextPage(log); - - bp = log->bp; - lp = (struct logpage *) bp->l_ldata; - dstoffset = LOGPHDRSIZE; - } - - /* - * move log vector data - */ - src = (u8 *) p + (lv->offset << l2linesize); - srclen = lv->length << l2linesize; - len += srclen; - while (srclen > 0) { - freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; - nbytes = min(freespace, srclen); - dst = (caddr_t) lp + dstoffset; - memcpy(dst, src, nbytes); - dstoffset += nbytes; - - /* is page not full ? */ - if (dstoffset < LOGPSIZE - LOGPTLRSIZE) - break; - - /* page become full: move on to next page */ - lmNextPage(log); - - bp = (struct lbuf *) log->bp; - lp = (struct logpage *) bp->l_ldata; - dstoffset = LOGPHDRSIZE; - - srclen -= nbytes; - src += nbytes; - } - - /* - * move log vector descriptor - */ - len += 4; - lvd = (struct lvd *) ((caddr_t) lp + dstoffset); - lvd->offset = cpu_to_le16(lv->offset); - lvd->length = cpu_to_le16(lv->length); - dstoffset += 4; - jfs_info("lmWriteRecord: lv offset:%d length:%d", - lv->offset, lv->length); - } - - if ((i = linelock->next)) { - linelock = (struct linelock *) lid_to_tlock(i); - goto moveData; - } - - /* - * move log record descriptor - */ - moveLrd: - lrd->length = cpu_to_le16(len); - - src = (caddr_t) lrd; - srclen = LOGRDSIZE; - - while (srclen > 0) { - freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; - nbytes = min(freespace, srclen); - dst = (caddr_t) lp + dstoffset; - memcpy(dst, src, nbytes); - - dstoffset += nbytes; - srclen -= nbytes; - - /* are there more to move than freespace of page ? */ - if (srclen) - goto pageFull; - - /* - * end of log record descriptor - */ - - /* update last log record eor */ - log->eor = dstoffset; - bp->l_eor = dstoffset; - lsn = (log->page << L2LOGPSIZE) + dstoffset; - - if (lrd->type & cpu_to_le16(LOG_COMMIT)) { - tblk->clsn = lsn; - jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, - bp->l_eor); - - INCREMENT(lmStat.commit); /* # of commit */ - - /* - * enqueue tblock for group commit: - * - * enqueue tblock of non-trivial/synchronous COMMIT - * at tail of group commit queue - * (trivial/asynchronous COMMITs are ignored by - * group commit.) - */ - LOGGC_LOCK(log); - - /* init tblock gc state */ - tblk->flag = tblkGC_QUEUE; - tblk->bp = log->bp; - tblk->pn = log->page; - tblk->eor = log->eor; - - /* enqueue transaction to commit queue */ - list_add_tail(&tblk->cqueue, &log->cqueue); - - LOGGC_UNLOCK(log); - } - - jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", - le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); - - /* page not full ? */ - if (dstoffset < LOGPSIZE - LOGPTLRSIZE) - return lsn; - - pageFull: - /* page become full: move on to next page */ - lmNextPage(log); - - bp = (struct lbuf *) log->bp; - lp = (struct logpage *) bp->l_ldata; - dstoffset = LOGPHDRSIZE; - src += nbytes; - } - - return lsn; -} - - -/* - * NAME: lmNextPage() - * - * FUNCTION: write current page and allocate next page. - * - * PARAMETER: log - * - * RETURN: 0 - * - * serialization: LOG_LOCK() held on entry/exit - */ -static int lmNextPage(struct jfs_log * log) -{ - struct logpage *lp; - int lspn; /* log sequence page number */ - int pn; /* current page number */ - struct lbuf *bp; - struct lbuf *nextbp; - struct tblock *tblk; - - /* get current log page number and log sequence page number */ - pn = log->page; - bp = log->bp; - lp = (struct logpage *) bp->l_ldata; - lspn = le32_to_cpu(lp->h.page); - - LOGGC_LOCK(log); - - /* - * write or queue the full page at the tail of write queue - */ - /* get the tail tblk on commit queue */ - if (list_empty(&log->cqueue)) - tblk = NULL; - else - tblk = list_entry(log->cqueue.prev, struct tblock, cqueue); - - /* every tblk who has COMMIT record on the current page, - * and has not been committed, must be on commit queue - * since tblk is queued at commit queueu at the time - * of writing its COMMIT record on the page before - * page becomes full (even though the tblk thread - * who wrote COMMIT record may have been suspended - * currently); - */ - - /* is page bound with outstanding tail tblk ? */ - if (tblk && tblk->pn == pn) { - /* mark tblk for end-of-page */ - tblk->flag |= tblkGC_EOP; - - if (log->cflag & logGC_PAGEOUT) { - /* if page is not already on write queue, - * just enqueue (no lbmWRITE to prevent redrive) - * buffer to wqueue to ensure correct serial order - * of the pages since log pages will be added - * continuously - */ - if (bp->l_wqnext == NULL) - lbmWrite(log, bp, 0, 0); - } else { - /* - * No current GC leader, initiate group commit - */ - log->cflag |= logGC_PAGEOUT; - lmGCwrite(log, 0); - } - } - /* page is not bound with outstanding tblk: - * init write or mark it to be redriven (lbmWRITE) - */ - else { - /* finalize the page */ - bp->l_ceor = bp->l_eor; - lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); - lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); - } - LOGGC_UNLOCK(log); - - /* - * allocate/initialize next page - */ - /* if log wraps, the first data page of log is 2 - * (0 never used, 1 is superblock). - */ - log->page = (pn == log->size - 1) ? 2 : pn + 1; - log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ - - /* allocate/initialize next log page buffer */ - nextbp = lbmAllocate(log, log->page); - nextbp->l_eor = log->eor; - log->bp = nextbp; - - /* initialize next log page */ - lp = (struct logpage *) nextbp->l_ldata; - lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); - lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); - - return 0; -} - - -/* - * NAME: lmGroupCommit() - * - * FUNCTION: group commit - * initiate pageout of the pages with COMMIT in the order of - * page number - redrive pageout of the page at the head of - * pageout queue until full page has been written. - * - * RETURN: - * - * NOTE: - * LOGGC_LOCK serializes log group commit queue, and - * transaction blocks on the commit queue. - * N.B. LOG_LOCK is NOT held during lmGroupCommit(). - */ -int lmGroupCommit(struct jfs_log * log, struct tblock * tblk) -{ - int rc = 0; - - LOGGC_LOCK(log); - - /* group committed already ? */ - if (tblk->flag & tblkGC_COMMITTED) { - if (tblk->flag & tblkGC_ERROR) - rc = -EIO; - - LOGGC_UNLOCK(log); - return rc; - } - jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc); - - if (tblk->xflag & COMMIT_LAZY) - tblk->flag |= tblkGC_LAZY; - - if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) && - (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag) - || jfs_tlocks_low)) { - /* - * No pageout in progress - * - * start group commit as its group leader. - */ - log->cflag |= logGC_PAGEOUT; - - lmGCwrite(log, 0); - } - - if (tblk->xflag & COMMIT_LAZY) { - /* - * Lazy transactions can leave now - */ - LOGGC_UNLOCK(log); - return 0; - } - - /* lmGCwrite gives up LOGGC_LOCK, check again */ - - if (tblk->flag & tblkGC_COMMITTED) { - if (tblk->flag & tblkGC_ERROR) - rc = -EIO; - - LOGGC_UNLOCK(log); - return rc; - } - - /* upcount transaction waiting for completion - */ - log->gcrtc++; - tblk->flag |= tblkGC_READY; - - __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), - LOGGC_LOCK(log), LOGGC_UNLOCK(log)); - - /* removed from commit queue */ - if (tblk->flag & tblkGC_ERROR) - rc = -EIO; - - LOGGC_UNLOCK(log); - return rc; -} - -/* - * NAME: lmGCwrite() - * - * FUNCTION: group commit write - * initiate write of log page, building a group of all transactions - * with commit records on that page. - * - * RETURN: None - * - * NOTE: - * LOGGC_LOCK must be held by caller. - * N.B. LOG_LOCK is NOT held during lmGroupCommit(). - */ -static void lmGCwrite(struct jfs_log * log, int cant_write) -{ - struct lbuf *bp; - struct logpage *lp; - int gcpn; /* group commit page number */ - struct tblock *tblk; - struct tblock *xtblk = NULL; - - /* - * build the commit group of a log page - * - * scan commit queue and make a commit group of all - * transactions with COMMIT records on the same log page. - */ - /* get the head tblk on the commit queue */ - gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn; - - list_for_each_entry(tblk, &log->cqueue, cqueue) { - if (tblk->pn != gcpn) - break; - - xtblk = tblk; - - /* state transition: (QUEUE, READY) -> COMMIT */ - tblk->flag |= tblkGC_COMMIT; - } - tblk = xtblk; /* last tblk of the page */ - - /* - * pageout to commit transactions on the log page. - */ - bp = (struct lbuf *) tblk->bp; - lp = (struct logpage *) bp->l_ldata; - /* is page already full ? */ - if (tblk->flag & tblkGC_EOP) { - /* mark page to free at end of group commit of the page */ - tblk->flag &= ~tblkGC_EOP; - tblk->flag |= tblkGC_FREE; - bp->l_ceor = bp->l_eor; - lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); - lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, - cant_write); - INCREMENT(lmStat.full_page); - } - /* page is not yet full */ - else { - bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ - lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); - lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); - INCREMENT(lmStat.partial_page); - } -} - -/* - * NAME: lmPostGC() - * - * FUNCTION: group commit post-processing - * Processes transactions after their commit records have been written - * to disk, redriving log I/O if necessary. - * - * RETURN: None - * - * NOTE: - * This routine is called a interrupt time by lbmIODone - */ -static void lmPostGC(struct lbuf * bp) -{ - unsigned long flags; - struct jfs_log *log = bp->l_log; - struct logpage *lp; - struct tblock *tblk, *temp; - - //LOGGC_LOCK(log); - spin_lock_irqsave(&log->gclock, flags); - /* - * current pageout of group commit completed. - * - * remove/wakeup transactions from commit queue who were - * group committed with the current log page - */ - list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { - if (!(tblk->flag & tblkGC_COMMIT)) - break; - /* if transaction was marked GC_COMMIT then - * it has been shipped in the current pageout - * and made it to disk - it is committed. - */ - - if (bp->l_flag & lbmERROR) - tblk->flag |= tblkGC_ERROR; - - /* remove it from the commit queue */ - list_del(&tblk->cqueue); - tblk->flag &= ~tblkGC_QUEUE; - - if (tblk == log->flush_tblk) { - /* we can stop flushing the log now */ - clear_bit(log_FLUSH, &log->flag); - log->flush_tblk = NULL; - } - - jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk, - tblk->flag); - - if (!(tblk->xflag & COMMIT_FORCE)) - /* - * Hand tblk over to lazy commit thread - */ - txLazyUnlock(tblk); - else { - /* state transition: COMMIT -> COMMITTED */ - tblk->flag |= tblkGC_COMMITTED; - - if (tblk->flag & tblkGC_READY) - log->gcrtc--; - - LOGGC_WAKEUP(tblk); - } - - /* was page full before pageout ? - * (and this is the last tblk bound with the page) - */ - if (tblk->flag & tblkGC_FREE) - lbmFree(bp); - /* did page become full after pageout ? - * (and this is the last tblk bound with the page) - */ - else if (tblk->flag & tblkGC_EOP) { - /* finalize the page */ - lp = (struct logpage *) bp->l_ldata; - bp->l_ceor = bp->l_eor; - lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); - jfs_info("lmPostGC: calling lbmWrite"); - lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, - 1); - } - - } - - /* are there any transactions who have entered lnGroupCommit() - * (whose COMMITs are after that of the last log page written. - * They are waiting for new group commit (above at (SLEEP 1)) - * or lazy transactions are on a full (queued) log page, - * select the latest ready transaction as new group leader and - * wake her up to lead her group. - */ - if ((!list_empty(&log->cqueue)) && - ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) || - test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) - /* - * Call lmGCwrite with new group leader - */ - lmGCwrite(log, 1); - - /* no transaction are ready yet (transactions are only just - * queued (GC_QUEUE) and not entered for group commit yet). - * the first transaction entering group commit - * will elect herself as new group leader. - */ - else - log->cflag &= ~logGC_PAGEOUT; - - //LOGGC_UNLOCK(log); - spin_unlock_irqrestore(&log->gclock, flags); - return; -} - -/* - * NAME: lmLogSync() - * - * FUNCTION: write log SYNCPT record for specified log - * if new sync address is available - * (normally the case if sync() is executed by back-ground - * process). - * calculate new value of i_nextsync which determines when - * this code is called again. - * - * PARAMETERS: log - log structure - * hard_sync - 1 to force all metadata to be written - * - * RETURN: 0 - * - * serialization: LOG_LOCK() held on entry/exit - */ -static int lmLogSync(struct jfs_log * log, int hard_sync) -{ - int logsize; - int written; /* written since last syncpt */ - int free; /* free space left available */ - int delta; /* additional delta to write normally */ - int more; /* additional write granted */ - struct lrd lrd; - int lsn; - struct logsyncblk *lp; - unsigned long flags; - - /* push dirty metapages out to disk */ - if (hard_sync) - write_special_inodes(log, filemap_fdatawrite); - else - write_special_inodes(log, filemap_flush); - - /* - * forward syncpt - */ - /* if last sync is same as last syncpt, - * invoke sync point forward processing to update sync. - */ - - if (log->sync == log->syncpt) { - LOGSYNC_LOCK(log, flags); - if (list_empty(&log->synclist)) - log->sync = log->lsn; - else { - lp = list_entry(log->synclist.next, - struct logsyncblk, synclist); - log->sync = lp->lsn; - } - LOGSYNC_UNLOCK(log, flags); - - } - - /* if sync is different from last syncpt, - * write a SYNCPT record with syncpt = sync. - * reset syncpt = sync - */ - if (log->sync != log->syncpt) { - lrd.logtid = 0; - lrd.backchain = 0; - lrd.type = cpu_to_le16(LOG_SYNCPT); - lrd.length = 0; - lrd.log.syncpt.sync = cpu_to_le32(log->sync); - lsn = lmWriteRecord(log, NULL, &lrd, NULL); - - log->syncpt = log->sync; - } else - lsn = log->lsn; - - /* - * setup next syncpt trigger (SWAG) - */ - logsize = log->logsize; - - logdiff(written, lsn, log); - free = logsize - written; - delta = LOGSYNC_DELTA(logsize); - more = min(free / 2, delta); - if (more < 2 * LOGPSIZE) { - jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); - /* - * log wrapping - * - * option 1 - panic ? No.! - * option 2 - shutdown file systems - * associated with log ? - * option 3 - extend log ? - * option 4 - second chance - * - * mark log wrapped, and continue. - * when all active transactions are completed, - * mark log valid for recovery. - * if crashed during invalid state, log state - * implies invalid log, forcing fsck(). - */ - /* mark log state log wrap in log superblock */ - /* log->state = LOGWRAP; */ - - /* reset sync point computation */ - log->syncpt = log->sync = lsn; - log->nextsync = delta; - } else - /* next syncpt trigger = written + more */ - log->nextsync = written + more; - - /* if number of bytes written from last sync point is more - * than 1/4 of the log size, stop new transactions from - * starting until all current transactions are completed - * by setting syncbarrier flag. - */ - if (!test_bit(log_SYNCBARRIER, &log->flag) && - (written > LOGSYNC_BARRIER(logsize)) && log->active) { - set_bit(log_SYNCBARRIER, &log->flag); - jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, - log->syncpt); - /* - * We may have to initiate group commit - */ - jfs_flush_journal(log, 0); - } - - return lsn; -} - -/* - * NAME: jfs_syncpt - * - * FUNCTION: write log SYNCPT record for specified log - * - * PARAMETERS: log - log structure - * hard_sync - set to 1 to force metadata to be written - */ -void jfs_syncpt(struct jfs_log *log, int hard_sync) -{ LOG_LOCK(log); - lmLogSync(log, hard_sync); - LOG_UNLOCK(log); -} - -/* - * NAME: lmLogOpen() - * - * FUNCTION: open the log on first open; - * insert filesystem in the active list of the log. - * - * PARAMETER: ipmnt - file system mount inode - * iplog - log inode (out) - * - * RETURN: - * - * serialization: - */ -int lmLogOpen(struct super_block *sb) -{ - int rc; - struct block_device *bdev; - struct jfs_log *log; - struct jfs_sb_info *sbi = JFS_SBI(sb); - - if (sbi->flag & JFS_NOINTEGRITY) - return open_dummy_log(sb); - - if (sbi->mntflag & JFS_INLINELOG) - return open_inline_log(sb); - - mutex_lock(&jfs_log_mutex); - list_for_each_entry(log, &jfs_external_logs, journal_list) { - if (log->bdev->bd_dev == sbi->logdev) { - if (memcmp(log->uuid, sbi->loguuid, - sizeof(log->uuid))) { - jfs_warn("wrong uuid on JFS journal\n"); - mutex_unlock(&jfs_log_mutex); - return -EINVAL; - } - /* - * add file system to log active file system list - */ - if ((rc = lmLogFileSystem(log, sbi, 1))) { - mutex_unlock(&jfs_log_mutex); - return rc; - } - goto journal_found; - } - } - - if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) { - mutex_unlock(&jfs_log_mutex); - return -ENOMEM; - } - INIT_LIST_HEAD(&log->sb_list); - init_waitqueue_head(&log->syncwait); - - /* - * external log as separate logical volume - * - * file systems to log may have n-to-1 relationship; - */ - - bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - log); - if (IS_ERR(bdev)) { - rc = PTR_ERR(bdev); - goto free; - } - - log->bdev = bdev; - memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); - - /* - * initialize log: - */ - if ((rc = lmLogInit(log))) - goto close; - - list_add(&log->journal_list, &jfs_external_logs); - - /* - * add file system to log active file system list - */ - if ((rc = lmLogFileSystem(log, sbi, 1))) - goto shutdown; - -journal_found: - LOG_LOCK(log); - list_add(&sbi->log_list, &log->sb_list); - sbi->log = log; - LOG_UNLOCK(log); - - mutex_unlock(&jfs_log_mutex); - return 0; - - /* - * unwind on error - */ - shutdown: /* unwind lbmLogInit() */ - list_del(&log->journal_list); - lbmLogShutdown(log); - - close: /* close external log device */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); - - free: /* free log descriptor */ - mutex_unlock(&jfs_log_mutex); - kfree(log); - - jfs_warn("lmLogOpen: exit(%d)", rc); - return rc; -} - -static int open_inline_log(struct super_block *sb) -{ - struct jfs_log *log; - int rc; - - if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) - return -ENOMEM; - INIT_LIST_HEAD(&log->sb_list); - init_waitqueue_head(&log->syncwait); - - set_bit(log_INLINELOG, &log->flag); - log->bdev = sb->s_bdev; - log->base = addressPXD(&JFS_SBI(sb)->logpxd); - log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> - (L2LOGPSIZE - sb->s_blocksize_bits); - log->l2bsize = sb->s_blocksize_bits; - ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits); - - /* - * initialize log. - */ - if ((rc = lmLogInit(log))) { - kfree(log); - jfs_warn("lmLogOpen: exit(%d)", rc); - return rc; - } - - list_add(&JFS_SBI(sb)->log_list, &log->sb_list); - JFS_SBI(sb)->log = log; - - return rc; -} - -static int open_dummy_log(struct super_block *sb) -{ - int rc; - - mutex_lock(&jfs_log_mutex); - if (!dummy_log) { - dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL); - if (!dummy_log) { - mutex_unlock(&jfs_log_mutex); - return -ENOMEM; - } - INIT_LIST_HEAD(&dummy_log->sb_list); - init_waitqueue_head(&dummy_log->syncwait); - dummy_log->no_integrity = 1; - /* Make up some stuff */ - dummy_log->base = 0; - dummy_log->size = 1024; - rc = lmLogInit(dummy_log); - if (rc) { - kfree(dummy_log); - dummy_log = NULL; - mutex_unlock(&jfs_log_mutex); - return rc; - } - } - - LOG_LOCK(dummy_log); - list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list); - JFS_SBI(sb)->log = dummy_log; - LOG_UNLOCK(dummy_log); - mutex_unlock(&jfs_log_mutex); - - return 0; -} - -/* - * NAME: lmLogInit() - * - * FUNCTION: log initialization at first log open. - * - * logredo() (or logformat()) should have been run previously. - * initialize the log from log superblock. - * set the log state in the superblock to LOGMOUNT and - * write SYNCPT log record. - * - * PARAMETER: log - log structure - * - * RETURN: 0 - if ok - * -EINVAL - bad log magic number or superblock dirty - * error returned from logwait() - * - * serialization: single first open thread - */ -int lmLogInit(struct jfs_log * log) -{ - int rc = 0; - struct lrd lrd; - struct logsuper *logsuper; - struct lbuf *bpsuper; - struct lbuf *bp; - struct logpage *lp; - int lsn = 0; - - jfs_info("lmLogInit: log:0x%p", log); - - /* initialize the group commit serialization lock */ - LOGGC_LOCK_INIT(log); - - /* allocate/initialize the log write serialization lock */ - LOG_LOCK_INIT(log); - - LOGSYNC_LOCK_INIT(log); - - INIT_LIST_HEAD(&log->synclist); - - INIT_LIST_HEAD(&log->cqueue); - log->flush_tblk = NULL; - - log->count = 0; - - /* - * initialize log i/o - */ - if ((rc = lbmLogInit(log))) - return rc; - - if (!test_bit(log_INLINELOG, &log->flag)) - log->l2bsize = L2LOGPSIZE; - - /* check for disabled journaling to disk */ - if (log->no_integrity) { - /* - * Journal pages will still be filled. When the time comes - * to actually do the I/O, the write is not done, and the - * endio routine is called directly. - */ - bp = lbmAllocate(log , 0); - log->bp = bp; - bp->l_pn = bp->l_eor = 0; - } else { - /* - * validate log superblock - */ - if ((rc = lbmRead(log, 1, &bpsuper))) - goto errout10; - - logsuper = (struct logsuper *) bpsuper->l_ldata; - - if (logsuper->magic != cpu_to_le32(LOGMAGIC)) { - jfs_warn("*** Log Format Error ! ***"); - rc = -EINVAL; - goto errout20; - } - - /* logredo() should have been run successfully. */ - if (logsuper->state != cpu_to_le32(LOGREDONE)) { - jfs_warn("*** Log Is Dirty ! ***"); - rc = -EINVAL; - goto errout20; - } - - /* initialize log from log superblock */ - if (test_bit(log_INLINELOG,&log->flag)) { - if (log->size != le32_to_cpu(logsuper->size)) { - rc = -EINVAL; - goto errout20; - } - jfs_info("lmLogInit: inline log:0x%p base:0x%Lx " - "size:0x%x", log, - (unsigned long long) log->base, log->size); - } else { - if (memcmp(logsuper->uuid, log->uuid, 16)) { - jfs_warn("wrong uuid on JFS log device"); - goto errout20; - } - log->size = le32_to_cpu(logsuper->size); - log->l2bsize = le32_to_cpu(logsuper->l2bsize); - jfs_info("lmLogInit: external log:0x%p base:0x%Lx " - "size:0x%x", log, - (unsigned long long) log->base, log->size); - } - - log->page = le32_to_cpu(logsuper->end) / LOGPSIZE; - log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page); - - /* - * initialize for log append write mode - */ - /* establish current/end-of-log page/buffer */ - if ((rc = lbmRead(log, log->page, &bp))) - goto errout20; - - lp = (struct logpage *) bp->l_ldata; - - jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d", - le32_to_cpu(logsuper->end), log->page, log->eor, - le16_to_cpu(lp->h.eor)); - - log->bp = bp; - bp->l_pn = log->page; - bp->l_eor = log->eor; - - /* if current page is full, move on to next page */ - if (log->eor >= LOGPSIZE - LOGPTLRSIZE) - lmNextPage(log); - - /* - * initialize log syncpoint - */ - /* - * write the first SYNCPT record with syncpoint = 0 - * (i.e., log redo up to HERE !); - * remove current page from lbm write queue at end of pageout - * (to write log superblock update), but do not release to - * freelist; - */ - lrd.logtid = 0; - lrd.backchain = 0; - lrd.type = cpu_to_le16(LOG_SYNCPT); - lrd.length = 0; - lrd.log.syncpt.sync = 0; - lsn = lmWriteRecord(log, NULL, &lrd, NULL); - bp = log->bp; - bp->l_ceor = bp->l_eor; - lp = (struct logpage *) bp->l_ldata; - lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); - lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0); - if ((rc = lbmIOWait(bp, 0))) - goto errout30; - - /* - * update/write superblock - */ - logsuper->state = cpu_to_le32(LOGMOUNT); - log->serial = le32_to_cpu(logsuper->serial) + 1; - logsuper->serial = cpu_to_le32(log->serial); - lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); - if ((rc = lbmIOWait(bpsuper, lbmFREE))) - goto errout30; - } - - /* initialize logsync parameters */ - log->logsize = (log->size - 2) << L2LOGPSIZE; - log->lsn = lsn; - log->syncpt = lsn; - log->sync = log->syncpt; - log->nextsync = LOGSYNC_DELTA(log->logsize); - - jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x", - log->lsn, log->syncpt, log->sync); - - /* - * initialize for lazy/group commit - */ - log->clsn = lsn; - - return 0; - - /* - * unwind on error - */ - errout30: /* release log page */ - log->wqueue = NULL; - bp->l_wqnext = NULL; - lbmFree(bp); - - errout20: /* release log superblock */ - lbmFree(bpsuper); - - errout10: /* unwind lbmLogInit() */ - lbmLogShutdown(log); - - jfs_warn("lmLogInit: exit(%d)", rc); - return rc; -} - - -/* - * NAME: lmLogClose() - * - * FUNCTION: remove file system <ipmnt> from active list of log <iplog> - * and close it on last close. - * - * PARAMETER: sb - superblock - * - * RETURN: errors from subroutines - * - * serialization: - */ -int lmLogClose(struct super_block *sb) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct jfs_log *log = sbi->log; - struct block_device *bdev; - int rc = 0; - - jfs_info("lmLogClose: log:0x%p", log); - - mutex_lock(&jfs_log_mutex); - LOG_LOCK(log); - list_del(&sbi->log_list); - LOG_UNLOCK(log); - sbi->log = NULL; - - /* - * We need to make sure all of the "written" metapages - * actually make it to disk - */ - sync_blockdev(sb->s_bdev); - - if (test_bit(log_INLINELOG, &log->flag)) { - /* - * in-line log in host file system - */ - rc = lmLogShutdown(log); - kfree(log); - goto out; - } - - if (!log->no_integrity) - lmLogFileSystem(log, sbi, 0); - - if (!list_empty(&log->sb_list)) - goto out; - - /* - * TODO: ensure that the dummy_log is in a state to allow - * lbmLogShutdown to deallocate all the buffers and call - * kfree against dummy_log. For now, leave dummy_log & its - * buffers in memory, and resuse if another no-integrity mount - * is requested. - */ - if (log->no_integrity) - goto out; - - /* - * external log as separate logical volume - */ - list_del(&log->journal_list); - bdev = log->bdev; - rc = lmLogShutdown(log); - - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); - - kfree(log); - - out: - mutex_unlock(&jfs_log_mutex); - jfs_info("lmLogClose: exit(%d)", rc); - return rc; -} - - -/* - * NAME: jfs_flush_journal() - * - * FUNCTION: initiate write of any outstanding transactions to the journal - * and optionally wait until they are all written to disk - * - * wait == 0 flush until latest txn is committed, don't wait - * wait == 1 flush until latest txn is committed, wait - * wait > 1 flush until all txn's are complete, wait - */ -void jfs_flush_journal(struct jfs_log *log, int wait) -{ - int i; - struct tblock *target = NULL; - - /* jfs_write_inode may call us during read-only mount */ - if (!log) - return; - - jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait); - - LOGGC_LOCK(log); - - if (!list_empty(&log->cqueue)) { - /* - * This ensures that we will keep writing to the journal as long - * as there are unwritten commit records - */ - target = list_entry(log->cqueue.prev, struct tblock, cqueue); - - if (test_bit(log_FLUSH, &log->flag)) { - /* - * We're already flushing. - * if flush_tblk is NULL, we are flushing everything, - * so leave it that way. Otherwise, update it to the - * latest transaction - */ - if (log->flush_tblk) - log->flush_tblk = target; - } else { - /* Only flush until latest transaction is committed */ - log->flush_tblk = target; - set_bit(log_FLUSH, &log->flag); - - /* - * Initiate I/O on outstanding transactions - */ - if (!(log->cflag & logGC_PAGEOUT)) { - log->cflag |= logGC_PAGEOUT; - lmGCwrite(log, 0); - } - } - } - if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) { - /* Flush until all activity complete */ - set_bit(log_FLUSH, &log->flag); - log->flush_tblk = NULL; - } - - if (wait && target && !(target->flag & tblkGC_COMMITTED)) { - DECLARE_WAITQUEUE(__wait, current); - - add_wait_queue(&target->gcwait, &__wait); - set_current_state(TASK_UNINTERRUPTIBLE); - LOGGC_UNLOCK(log); - schedule(); - __set_current_state(TASK_RUNNING); - LOGGC_LOCK(log); - remove_wait_queue(&target->gcwait, &__wait); - } - LOGGC_UNLOCK(log); - - if (wait < 2) - return; - - write_special_inodes(log, filemap_fdatawrite); - - /* - * If there was recent activity, we may need to wait - * for the lazycommit thread to catch up - */ - if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { - for (i = 0; i < 200; i++) { /* Too much? */ - msleep(250); - write_special_inodes(log, filemap_fdatawrite); - if (list_empty(&log->cqueue) && - list_empty(&log->synclist)) - break; - } - } - assert(list_empty(&log->cqueue)); - -#ifdef CONFIG_JFS_DEBUG - if (!list_empty(&log->synclist)) { - struct logsyncblk *lp; - - printk(KERN_ERR "jfs_flush_journal: synclist not empty\n"); - list_for_each_entry(lp, &log->synclist, synclist) { - if (lp->xflag & COMMIT_PAGE) { - struct metapage *mp = (struct metapage *)lp; - print_hex_dump(KERN_ERR, "metapage: ", - DUMP_PREFIX_ADDRESS, 16, 4, - mp, sizeof(struct metapage), 0); - print_hex_dump(KERN_ERR, "page: ", - DUMP_PREFIX_ADDRESS, 16, - sizeof(long), mp->page, - sizeof(struct page), 0); - } else - print_hex_dump(KERN_ERR, "tblock:", - DUMP_PREFIX_ADDRESS, 16, 4, - lp, sizeof(struct tblock), 0); - } - } -#else - WARN_ON(!list_empty(&log->synclist)); -#endif - clear_bit(log_FLUSH, &log->flag); -} - -/* - * NAME: lmLogShutdown() - * - * FUNCTION: log shutdown at last LogClose(). - * - * write log syncpt record. - * update super block to set redone flag to 0. - * - * PARAMETER: log - log inode - * - * RETURN: 0 - success - * - * serialization: single last close thread - */ -int lmLogShutdown(struct jfs_log * log) -{ - int rc; - struct lrd lrd; - int lsn; - struct logsuper *logsuper; - struct lbuf *bpsuper; - struct lbuf *bp; - struct logpage *lp; - - jfs_info("lmLogShutdown: log:0x%p", log); - - jfs_flush_journal(log, 2); - - /* - * write the last SYNCPT record with syncpoint = 0 - * (i.e., log redo up to HERE !) - */ - lrd.logtid = 0; - lrd.backchain = 0; - lrd.type = cpu_to_le16(LOG_SYNCPT); - lrd.length = 0; - lrd.log.syncpt.sync = 0; - - lsn = lmWriteRecord(log, NULL, &lrd, NULL); - bp = log->bp; - lp = (struct logpage *) bp->l_ldata; - lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); - lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); - lbmIOWait(log->bp, lbmFREE); - log->bp = NULL; - - /* - * synchronous update log superblock - * mark log state as shutdown cleanly - * (i.e., Log does not need to be replayed). - */ - if ((rc = lbmRead(log, 1, &bpsuper))) - goto out; - - logsuper = (struct logsuper *) bpsuper->l_ldata; - logsuper->state = cpu_to_le32(LOGREDONE); - logsuper->end = cpu_to_le32(lsn); - lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); - rc = lbmIOWait(bpsuper, lbmFREE); - - jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", - lsn, log->page, log->eor); - - out: - /* - * shutdown per log i/o - */ - lbmLogShutdown(log); - - if (rc) { - jfs_warn("lmLogShutdown: exit(%d)", rc); - } - return rc; -} - - -/* - * NAME: lmLogFileSystem() - * - * FUNCTION: insert (<activate> = true)/remove (<activate> = false) - * file system into/from log active file system list. - * - * PARAMETE: log - pointer to logs inode. - * fsdev - kdev_t of filesystem. - * serial - pointer to returned log serial number - * activate - insert/remove device from active list. - * - * RETURN: 0 - success - * errors returned by vms_iowait(). - */ -static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, - int activate) -{ - int rc = 0; - int i; - struct logsuper *logsuper; - struct lbuf *bpsuper; - char *uuid = sbi->uuid; - - /* - * insert/remove file system device to log active file system list. - */ - if ((rc = lbmRead(log, 1, &bpsuper))) - return rc; - - logsuper = (struct logsuper *) bpsuper->l_ldata; - if (activate) { - for (i = 0; i < MAX_ACTIVE; i++) - if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) { - memcpy(logsuper->active[i].uuid, uuid, 16); - sbi->aggregate = i; - break; - } - if (i == MAX_ACTIVE) { - jfs_warn("Too many file systems sharing journal!"); - lbmFree(bpsuper); - return -EMFILE; /* Is there a better rc? */ - } - } else { - for (i = 0; i < MAX_ACTIVE; i++) - if (!memcmp(logsuper->active[i].uuid, uuid, 16)) { - memcpy(logsuper->active[i].uuid, NULL_UUID, 16); - break; - } - if (i == MAX_ACTIVE) { - jfs_warn("Somebody stomped on the journal!"); - lbmFree(bpsuper); - return -EIO; - } - - } - - /* - * synchronous write log superblock: - * - * write sidestream bypassing write queue: - * at file system mount, log super block is updated for - * activation of the file system before any log record - * (MOUNT record) of the file system, and at file system - * unmount, all meta data for the file system has been - * flushed before log super block is updated for deactivation - * of the file system. - */ - lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); - rc = lbmIOWait(bpsuper, lbmFREE); - - return rc; -} - -/* - * log buffer manager (lbm) - * ------------------------ - * - * special purpose buffer manager supporting log i/o requirements. - * - * per log write queue: - * log pageout occurs in serial order by fifo write queue and - * restricting to a single i/o in pregress at any one time. - * a circular singly-linked list - * (log->wrqueue points to the tail, and buffers are linked via - * bp->wrqueue field), and - * maintains log page in pageout ot waiting for pageout in serial pageout. - */ - -/* - * lbmLogInit() - * - * initialize per log I/O setup at lmLogInit() - */ -static int lbmLogInit(struct jfs_log * log) -{ /* log inode */ - int i; - struct lbuf *lbuf; - - jfs_info("lbmLogInit: log:0x%p", log); - - /* initialize current buffer cursor */ - log->bp = NULL; - - /* initialize log device write queue */ - log->wqueue = NULL; - - /* - * Each log has its own buffer pages allocated to it. These are - * not managed by the page cache. This ensures that a transaction - * writing to the log does not block trying to allocate a page from - * the page cache (for the log). This would be bad, since page - * allocation waits on the kswapd thread that may be committing inodes - * which would cause log activity. Was that clear? I'm trying to - * avoid deadlock here. - */ - init_waitqueue_head(&log->free_wait); - - log->lbuf_free = NULL; - - for (i = 0; i < LOGPAGES;) { - char *buffer; - uint offset; - struct page *page; - - buffer = (char *) get_zeroed_page(GFP_KERNEL); - if (buffer == NULL) - goto error; - page = virt_to_page(buffer); - for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) { - lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); - if (lbuf == NULL) { - if (offset == 0) - free_page((unsigned long) buffer); - goto error; - } - if (offset) /* we already have one reference */ - get_page(page); - lbuf->l_offset = offset; - lbuf->l_ldata = buffer + offset; - lbuf->l_page = page; - lbuf->l_log = log; - init_waitqueue_head(&lbuf->l_ioevent); - - lbuf->l_freelist = log->lbuf_free; - log->lbuf_free = lbuf; - i++; - } - } - - return (0); - - error: - lbmLogShutdown(log); - return -ENOMEM; -} - - -/* - * lbmLogShutdown() - * - * finalize per log I/O setup at lmLogShutdown() - */ -static void lbmLogShutdown(struct jfs_log * log) -{ - struct lbuf *lbuf; - - jfs_info("lbmLogShutdown: log:0x%p", log); - - lbuf = log->lbuf_free; - while (lbuf) { - struct lbuf *next = lbuf->l_freelist; - __free_page(lbuf->l_page); - kfree(lbuf); - lbuf = next; - } -} - - -/* - * lbmAllocate() - * - * allocate an empty log buffer - */ -static struct lbuf *lbmAllocate(struct jfs_log * log, int pn) -{ - struct lbuf *bp; - unsigned long flags; - - /* - * recycle from log buffer freelist if any - */ - LCACHE_LOCK(flags); - LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags); - log->lbuf_free = bp->l_freelist; - LCACHE_UNLOCK(flags); - - bp->l_flag = 0; - - bp->l_wqnext = NULL; - bp->l_freelist = NULL; - - bp->l_pn = pn; - bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize)); - bp->l_ceor = 0; - - return bp; -} - - -/* - * lbmFree() - * - * release a log buffer to freelist - */ -static void lbmFree(struct lbuf * bp) -{ - unsigned long flags; - - LCACHE_LOCK(flags); - - lbmfree(bp); - - LCACHE_UNLOCK(flags); -} - -static void lbmfree(struct lbuf * bp) -{ - struct jfs_log *log = bp->l_log; - - assert(bp->l_wqnext == NULL); - - /* - * return the buffer to head of freelist - */ - bp->l_freelist = log->lbuf_free; - log->lbuf_free = bp; - - wake_up(&log->free_wait); - return; -} - - -/* - * NAME: lbmRedrive - * - * FUNCTION: add a log buffer to the log redrive list - * - * PARAMETER: - * bp - log buffer - * - * NOTES: - * Takes log_redrive_lock. - */ -static inline void lbmRedrive(struct lbuf *bp) -{ - unsigned long flags; - - spin_lock_irqsave(&log_redrive_lock, flags); - bp->l_redrive_next = log_redrive_list; - log_redrive_list = bp; - spin_unlock_irqrestore(&log_redrive_lock, flags); - - wake_up_process(jfsIOthread); -} - - -/* - * lbmRead() - */ -static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) -{ - struct bio *bio; - struct lbuf *bp; - - /* - * allocate a log buffer - */ - *bpp = bp = lbmAllocate(log, pn); - jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn); - - bp->l_flag |= lbmREAD; - - bio = bio_alloc(GFP_NOFS, 1); - - bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); - bio->bi_bdev = log->bdev; - bio->bi_io_vec[0].bv_page = bp->l_page; - bio->bi_io_vec[0].bv_len = LOGPSIZE; - bio->bi_io_vec[0].bv_offset = bp->l_offset; - - bio->bi_vcnt = 1; - bio->bi_idx = 0; - bio->bi_size = LOGPSIZE; - - bio->bi_end_io = lbmIODone; - bio->bi_private = bp; - submit_bio(READ_SYNC, bio); - - wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); - - return 0; -} - - -/* - * lbmWrite() - * - * buffer at head of pageout queue stays after completion of - * partial-page pageout and redriven by explicit initiation of - * pageout by caller until full-page pageout is completed and - * released. - * - * device driver i/o done redrives pageout of new buffer at - * head of pageout queue when current buffer at head of pageout - * queue is released at the completion of its full-page pageout. - * - * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). - * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone() - */ -static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, - int cant_block) -{ - struct lbuf *tail; - unsigned long flags; - - jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn); - - /* map the logical block address to physical block address */ - bp->l_blkno = - log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); - - LCACHE_LOCK(flags); /* disable+lock */ - - /* - * initialize buffer for device driver - */ - bp->l_flag = flag; - - /* - * insert bp at tail of write queue associated with log - * - * (request is either for bp already/currently at head of queue - * or new bp to be inserted at tail) - */ - tail = log->wqueue; - - /* is buffer not already on write queue ? */ - if (bp->l_wqnext == NULL) { - /* insert at tail of wqueue */ - if (tail == NULL) { - log->wqueue = bp; - bp->l_wqnext = bp; - } else { - log->wqueue = bp; - bp->l_wqnext = tail->l_wqnext; - tail->l_wqnext = bp; - } - - tail = bp; - } - - /* is buffer at head of wqueue and for write ? */ - if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) { - LCACHE_UNLOCK(flags); /* unlock+enable */ - return; - } - - LCACHE_UNLOCK(flags); /* unlock+enable */ - - if (cant_block) - lbmRedrive(bp); - else if (flag & lbmSYNC) - lbmStartIO(bp); - else { - LOGGC_UNLOCK(log); - lbmStartIO(bp); - LOGGC_LOCK(log); - } -} - - -/* - * lbmDirectWrite() - * - * initiate pageout bypassing write queue for sidestream - * (e.g., log superblock) write; - */ -static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) -{ - jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x", - bp, flag, bp->l_pn); - - /* - * initialize buffer for device driver - */ - bp->l_flag = flag | lbmDIRECT; - - /* map the logical block address to physical block address */ - bp->l_blkno = - log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); - - /* - * initiate pageout of the page - */ - lbmStartIO(bp); -} - - -/* - * NAME: lbmStartIO() - * - * FUNCTION: Interface to DD strategy routine - * - * RETURN: none - * - * serialization: LCACHE_LOCK() is NOT held during log i/o; - */ -static void lbmStartIO(struct lbuf * bp) -{ - struct bio *bio; - struct jfs_log *log = bp->l_log; - - jfs_info("lbmStartIO\n"); - - bio = bio_alloc(GFP_NOFS, 1); - bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); - bio->bi_bdev = log->bdev; - bio->bi_io_vec[0].bv_page = bp->l_page; - bio->bi_io_vec[0].bv_len = LOGPSIZE; - bio->bi_io_vec[0].bv_offset = bp->l_offset; - - bio->bi_vcnt = 1; - bio->bi_idx = 0; - bio->bi_size = LOGPSIZE; - - bio->bi_end_io = lbmIODone; - bio->bi_private = bp; - - /* check if journaling to disk has been disabled */ - if (log->no_integrity) { - bio->bi_size = 0; - lbmIODone(bio, 0); - } else { - submit_bio(WRITE_SYNC, bio); - INCREMENT(lmStat.submitted); - } -} - - -/* - * lbmIOWait() - */ -static int lbmIOWait(struct lbuf * bp, int flag) -{ - unsigned long flags; - int rc = 0; - - jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); - - LCACHE_LOCK(flags); /* disable+lock */ - - LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags); - - rc = (bp->l_flag & lbmERROR) ? -EIO : 0; - - if (flag & lbmFREE) - lbmfree(bp); - - LCACHE_UNLOCK(flags); /* unlock+enable */ - - jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); - return rc; -} - -/* - * lbmIODone() - * - * executed at INTIODONE level - */ -static void lbmIODone(struct bio *bio, int error) -{ - struct lbuf *bp = bio->bi_private; - struct lbuf *nextbp, *tail; - struct jfs_log *log; - unsigned long flags; - - /* - * get back jfs buffer bound to the i/o buffer - */ - jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag); - - LCACHE_LOCK(flags); /* disable+lock */ - - bp->l_flag |= lbmDONE; - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { - bp->l_flag |= lbmERROR; - - jfs_err("lbmIODone: I/O error in JFS log"); - } - - bio_put(bio); - - /* - * pagein completion - */ - if (bp->l_flag & lbmREAD) { - bp->l_flag &= ~lbmREAD; - - LCACHE_UNLOCK(flags); /* unlock+enable */ - - /* wakeup I/O initiator */ - LCACHE_WAKEUP(&bp->l_ioevent); - - return; - } - - /* - * pageout completion - * - * the bp at the head of write queue has completed pageout. - * - * if single-commit/full-page pageout, remove the current buffer - * from head of pageout queue, and redrive pageout with - * the new buffer at head of pageout queue; - * otherwise, the partial-page pageout buffer stays at - * the head of pageout queue to be redriven for pageout - * by lmGroupCommit() until full-page pageout is completed. - */ - bp->l_flag &= ~lbmWRITE; - INCREMENT(lmStat.pagedone); - - /* update committed lsn */ - log = bp->l_log; - log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor; - - if (bp->l_flag & lbmDIRECT) { - LCACHE_WAKEUP(&bp->l_ioevent); - LCACHE_UNLOCK(flags); - return; - } - - tail = log->wqueue; - - /* single element queue */ - if (bp == tail) { - /* remove head buffer of full-page pageout - * from log device write queue - */ - if (bp->l_flag & lbmRELEASE) { - log->wqueue = NULL; - bp->l_wqnext = NULL; - } - } - /* multi element queue */ - else { - /* remove head buffer of full-page pageout - * from log device write queue - */ - if (bp->l_flag & lbmRELEASE) { - nextbp = tail->l_wqnext = bp->l_wqnext; - bp->l_wqnext = NULL; - - /* - * redrive pageout of next page at head of write queue: - * redrive next page without any bound tblk - * (i.e., page w/o any COMMIT records), or - * first page of new group commit which has been - * queued after current page (subsequent pageout - * is performed synchronously, except page without - * any COMMITs) by lmGroupCommit() as indicated - * by lbmWRITE flag; - */ - if (nextbp->l_flag & lbmWRITE) { - /* - * We can't do the I/O at interrupt time. - * The jfsIO thread can do it - */ - lbmRedrive(nextbp); - } - } - } - - /* - * synchronous pageout: - * - * buffer has not necessarily been removed from write queue - * (e.g., synchronous write of partial-page with COMMIT): - * leave buffer for i/o initiator to dispose - */ - if (bp->l_flag & lbmSYNC) { - LCACHE_UNLOCK(flags); /* unlock+enable */ - - /* wakeup I/O initiator */ - LCACHE_WAKEUP(&bp->l_ioevent); - } - - /* - * Group Commit pageout: - */ - else if (bp->l_flag & lbmGC) { - LCACHE_UNLOCK(flags); - lmPostGC(bp); - } - - /* - * asynchronous pageout: - * - * buffer must have been removed from write queue: - * insert buffer at head of freelist where it can be recycled - */ - else { - assert(bp->l_flag & lbmRELEASE); - assert(bp->l_flag & lbmFREE); - lbmfree(bp); - - LCACHE_UNLOCK(flags); /* unlock+enable */ - } -} - -int jfsIOWait(void *arg) -{ - struct lbuf *bp; - - do { - spin_lock_irq(&log_redrive_lock); - while ((bp = log_redrive_list)) { - log_redrive_list = bp->l_redrive_next; - bp->l_redrive_next = NULL; - spin_unlock_irq(&log_redrive_lock); - lbmStartIO(bp); - spin_lock_irq(&log_redrive_lock); - } - - if (freezing(current)) { - spin_unlock_irq(&log_redrive_lock); - try_to_freeze(); - } else { - set_current_state(TASK_INTERRUPTIBLE); - spin_unlock_irq(&log_redrive_lock); - schedule(); - __set_current_state(TASK_RUNNING); - } - } while (!kthread_should_stop()); - - jfs_info("jfsIOWait being killed!"); - return 0; -} - -/* - * NAME: lmLogFormat()/jfs_logform() - * - * FUNCTION: format file system log - * - * PARAMETERS: - * log - volume log - * logAddress - start address of log space in FS block - * logSize - length of log space in FS block; - * - * RETURN: 0 - success - * -EIO - i/o error - * - * XXX: We're synchronously writing one page at a time. This needs to - * be improved by writing multiple pages at once. - */ -int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) -{ - int rc = -EIO; - struct jfs_sb_info *sbi; - struct logsuper *logsuper; - struct logpage *lp; - int lspn; /* log sequence page number */ - struct lrd *lrd_ptr; - int npages = 0; - struct lbuf *bp; - - jfs_info("lmLogFormat: logAddress:%Ld logSize:%d", - (long long)logAddress, logSize); - - sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list); - - /* allocate a log buffer */ - bp = lbmAllocate(log, 1); - - npages = logSize >> sbi->l2nbperpage; - - /* - * log space: - * - * page 0 - reserved; - * page 1 - log superblock; - * page 2 - log data page: A SYNC log record is written - * into this page at logform time; - * pages 3-N - log data page: set to empty log data pages; - */ - /* - * init log superblock: log page 1 - */ - logsuper = (struct logsuper *) bp->l_ldata; - - logsuper->magic = cpu_to_le32(LOGMAGIC); - logsuper->version = cpu_to_le32(LOGVERSION); - logsuper->state = cpu_to_le32(LOGREDONE); - logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */ - logsuper->size = cpu_to_le32(npages); - logsuper->bsize = cpu_to_le32(sbi->bsize); - logsuper->l2bsize = cpu_to_le32(sbi->l2bsize); - logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE); - - bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; - bp->l_blkno = logAddress + sbi->nbperpage; - lbmStartIO(bp); - if ((rc = lbmIOWait(bp, 0))) - goto exit; - - /* - * init pages 2 to npages-1 as log data pages: - * - * log page sequence number (lpsn) initialization: - * - * pn: 0 1 2 3 n-1 - * +-----+-----+=====+=====+===.....===+=====+ - * lspn: N-1 0 1 N-2 - * <--- N page circular file ----> - * - * the N (= npages-2) data pages of the log is maintained as - * a circular file for the log records; - * lpsn grows by 1 monotonically as each log page is written - * to the circular file of the log; - * and setLogpage() will not reset the page number even if - * the eor is equal to LOGPHDRSIZE. In order for binary search - * still work in find log end process, we have to simulate the - * log wrap situation at the log format time. - * The 1st log page written will have the highest lpsn. Then - * the succeeding log pages will have ascending order of - * the lspn starting from 0, ... (N-2) - */ - lp = (struct logpage *) bp->l_ldata; - /* - * initialize 1st log page to be written: lpsn = N - 1, - * write a SYNCPT log record is written to this page - */ - lp->h.page = lp->t.page = cpu_to_le32(npages - 3); - lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE); - - lrd_ptr = (struct lrd *) &lp->data; - lrd_ptr->logtid = 0; - lrd_ptr->backchain = 0; - lrd_ptr->type = cpu_to_le16(LOG_SYNCPT); - lrd_ptr->length = 0; - lrd_ptr->log.syncpt.sync = 0; - - bp->l_blkno += sbi->nbperpage; - bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; - lbmStartIO(bp); - if ((rc = lbmIOWait(bp, 0))) - goto exit; - - /* - * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) - */ - for (lspn = 0; lspn < npages - 3; lspn++) { - lp->h.page = lp->t.page = cpu_to_le32(lspn); - lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); - - bp->l_blkno += sbi->nbperpage; - bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; - lbmStartIO(bp); - if ((rc = lbmIOWait(bp, 0))) - goto exit; - } - - rc = 0; -exit: - /* - * finalize log - */ - /* release the buffer */ - lbmFree(bp); - - return rc; -} - -#ifdef CONFIG_JFS_STATISTICS -static int jfs_lmstats_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, - "JFS Logmgr stats\n" - "================\n" - "commits = %d\n" - "writes submitted = %d\n" - "writes completed = %d\n" - "full pages submitted = %d\n" - "partial pages submitted = %d\n", - lmStat.commit, - lmStat.submitted, - lmStat.pagedone, - lmStat.full_page, - lmStat.partial_page); - return 0; -} - -static int jfs_lmstats_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_lmstats_proc_show, NULL); -} - -const struct file_operations jfs_lmstats_proc_fops = { - .owner = THIS_MODULE, - .open = jfs_lmstats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif /* CONFIG_JFS_STATISTICS */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_logmgr.h b/ANDROID_3.4.5/fs/jfs/jfs_logmgr.h deleted file mode 100644 index e38c2159..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_logmgr.h +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_LOGMGR -#define _H_JFS_LOGMGR - -#include "jfs_filsys.h" -#include "jfs_lock.h" - -/* - * log manager configuration parameters - */ - -/* log page size */ -#define LOGPSIZE 4096 -#define L2LOGPSIZE 12 - -#define LOGPAGES 16 /* Log pages per mounted file system */ - -/* - * log logical volume - * - * a log is used to make the commit operation on journalled - * files within the same logical volume group atomic. - * a log is implemented with a logical volume. - * there is one log per logical volume group. - * - * block 0 of the log logical volume is not used (ipl etc). - * block 1 contains a log "superblock" and is used by logFormat(), - * lmLogInit(), lmLogShutdown(), and logRedo() to record status - * of the log but is not otherwise used during normal processing. - * blocks 2 - (N-1) are used to contain log records. - * - * when a volume group is varied-on-line, logRedo() must have - * been executed before the file systems (logical volumes) in - * the volume group can be mounted. - */ -/* - * log superblock (block 1 of logical volume) - */ -#define LOGSUPER_B 1 -#define LOGSTART_B 2 - -#define LOGMAGIC 0x87654321 -#define LOGVERSION 1 - -#define MAX_ACTIVE 128 /* Max active file systems sharing log */ - -struct logsuper { - __le32 magic; /* 4: log lv identifier */ - __le32 version; /* 4: version number */ - __le32 serial; /* 4: log open/mount counter */ - __le32 size; /* 4: size in number of LOGPSIZE blocks */ - __le32 bsize; /* 4: logical block size in byte */ - __le32 l2bsize; /* 4: log2 of bsize */ - - __le32 flag; /* 4: option */ - __le32 state; /* 4: state - see below */ - - __le32 end; /* 4: addr of last log record set by logredo */ - char uuid[16]; /* 16: 128-bit journal uuid */ - char label[16]; /* 16: journal label */ - struct { - char uuid[16]; - } active[MAX_ACTIVE]; /* 2048: active file systems list */ -}; - -#define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - -/* log flag: commit option (see jfs_filsys.h) */ - -/* log state */ -#define LOGMOUNT 0 /* log mounted by lmLogInit() */ -#define LOGREDONE 1 /* log shutdown by lmLogShutdown(). - * log redo completed by logredo(). - */ -#define LOGWRAP 2 /* log wrapped */ -#define LOGREADERR 3 /* log read error detected in logredo() */ - - -/* - * log logical page - * - * (this comment should be rewritten !) - * the header and trailer structures (h,t) will normally have - * the same page and eor value. - * An exception to this occurs when a complete page write is not - * accomplished on a power failure. Since the hardware may "split write" - * sectors in the page, any out of order sequence may occur during powerfail - * and needs to be recognized during log replay. The xor value is - * an "exclusive or" of all log words in the page up to eor. This - * 32 bit eor is stored with the top 16 bits in the header and the - * bottom 16 bits in the trailer. logredo can easily recognize pages - * that were not completed by reconstructing this eor and checking - * the log page. - * - * Previous versions of the operating system did not allow split - * writes and detected partially written records in logredo by - * ordering the updates to the header, trailer, and the move of data - * into the logdata area. The order: (1) data is moved (2) header - * is updated (3) trailer is updated. In logredo, when the header - * differed from the trailer, the header and trailer were reconciled - * as follows: if h.page != t.page they were set to the smaller of - * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) - * h.eor != t.eor they were set to the smaller of their two values. - */ -struct logpage { - struct { /* header */ - __le32 page; /* 4: log sequence page number */ - __le16 rsrvd; /* 2: */ - __le16 eor; /* 2: end-of-log offset of lasrt record write */ - } h; - - __le32 data[LOGPSIZE / 4 - 4]; /* log record area */ - - struct { /* trailer */ - __le32 page; /* 4: normally the same as h.page */ - __le16 rsrvd; /* 2: */ - __le16 eor; /* 2: normally the same as h.eor */ - } t; -}; - -#define LOGPHDRSIZE 8 /* log page header size */ -#define LOGPTLRSIZE 8 /* log page trailer size */ - - -/* - * log record - * - * (this comment should be rewritten !) - * jfs uses only "after" log records (only a single writer is allowed - * in a page, pages are written to temporary paging space if - * if they must be written to disk before commit, and i/o is - * scheduled for modified pages to their home location after - * the log records containing the after values and the commit - * record is written to the log on disk, undo discards the copy - * in main-memory.) - * - * a log record consists of a data area of variable length followed by - * a descriptor of fixed size LOGRDSIZE bytes. - * the data area is rounded up to an integral number of 4-bytes and - * must be no longer than LOGPSIZE. - * the descriptor is of size of multiple of 4-bytes and aligned on a - * 4-byte boundary. - * records are packed one after the other in the data area of log pages. - * (sometimes a DUMMY record is inserted so that at least one record ends - * on every page or the longest record is placed on at most two pages). - * the field eor in page header/trailer points to the byte following - * the last record on a page. - */ - -/* log record types */ -#define LOG_COMMIT 0x8000 -#define LOG_SYNCPT 0x4000 -#define LOG_MOUNT 0x2000 -#define LOG_REDOPAGE 0x0800 -#define LOG_NOREDOPAGE 0x0080 -#define LOG_NOREDOINOEXT 0x0040 -#define LOG_UPDATEMAP 0x0008 -#define LOG_NOREDOFILE 0x0001 - -/* REDOPAGE/NOREDOPAGE log record data type */ -#define LOG_INODE 0x0001 -#define LOG_XTREE 0x0002 -#define LOG_DTREE 0x0004 -#define LOG_BTROOT 0x0010 -#define LOG_EA 0x0020 -#define LOG_ACL 0x0040 -#define LOG_DATA 0x0080 -#define LOG_NEW 0x0100 -#define LOG_EXTEND 0x0200 -#define LOG_RELOCATE 0x0400 -#define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ - -/* UPDATEMAP log record descriptor type */ -#define LOG_ALLOCXADLIST 0x0080 -#define LOG_ALLOCPXDLIST 0x0040 -#define LOG_ALLOCXAD 0x0020 -#define LOG_ALLOCPXD 0x0010 -#define LOG_FREEXADLIST 0x0008 -#define LOG_FREEPXDLIST 0x0004 -#define LOG_FREEXAD 0x0002 -#define LOG_FREEPXD 0x0001 - - -struct lrd { - /* - * type independent area - */ - __le32 logtid; /* 4: log transaction identifier */ - __le32 backchain; /* 4: ptr to prev record of same transaction */ - __le16 type; /* 2: record type */ - __le16 length; /* 2: length of data in record (in byte) */ - __le32 aggregate; /* 4: file system lv/aggregate */ - /* (16) */ - - /* - * type dependent area (20) - */ - union { - - /* - * COMMIT: commit - * - * transaction commit: no type-dependent information; - */ - - /* - * REDOPAGE: after-image - * - * apply after-image; - * - * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; - */ - struct { - __le32 fileset; /* 4: fileset number */ - __le32 inode; /* 4: inode number */ - __le16 type; /* 2: REDOPAGE record type */ - __le16 l2linesize; /* 2: log2 of line size */ - pxd_t pxd; /* 8: on-disk page pxd */ - } redopage; /* (20) */ - - /* - * NOREDOPAGE: the page is freed - * - * do not apply after-image records which precede this record - * in the log with the same page block number to this page. - * - * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; - */ - struct { - __le32 fileset; /* 4: fileset number */ - __le32 inode; /* 4: inode number */ - __le16 type; /* 2: NOREDOPAGE record type */ - __le16 rsrvd; /* 2: reserved */ - pxd_t pxd; /* 8: on-disk page pxd */ - } noredopage; /* (20) */ - - /* - * UPDATEMAP: update block allocation map - * - * either in-line PXD, - * or out-of-line XADLIST; - * - * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; - */ - struct { - __le32 fileset; /* 4: fileset number */ - __le32 inode; /* 4: inode number */ - __le16 type; /* 2: UPDATEMAP record type */ - __le16 nxd; /* 2: number of extents */ - pxd_t pxd; /* 8: pxd */ - } updatemap; /* (20) */ - - /* - * NOREDOINOEXT: the inode extent is freed - * - * do not apply after-image records which precede this - * record in the log with the any of the 4 page block - * numbers in this inode extent. - * - * NOTE: The fileset and pxd fields MUST remain in - * the same fields in the REDOPAGE record format. - * - */ - struct { - __le32 fileset; /* 4: fileset number */ - __le32 iagnum; /* 4: IAG number */ - __le32 inoext_idx; /* 4: inode extent index */ - pxd_t pxd; /* 8: on-disk page pxd */ - } noredoinoext; /* (20) */ - - /* - * SYNCPT: log sync point - * - * replay log up to syncpt address specified; - */ - struct { - __le32 sync; /* 4: syncpt address (0 = here) */ - } syncpt; - - /* - * MOUNT: file system mount - * - * file system mount: no type-dependent information; - */ - - /* - * ? FREEXTENT: free specified extent(s) - * - * free specified extent(s) from block allocation map - * N.B.: nextents should be length of data/sizeof(xad_t) - */ - struct { - __le32 type; /* 4: FREEXTENT record type */ - __le32 nextent; /* 4: number of extents */ - - /* data: PXD or XAD list */ - } freextent; - - /* - * ? NOREDOFILE: this file is freed - * - * do not apply records which precede this record in the log - * with the same inode number. - * - * NOREDOFILE must be the first to be written at commit - * (last to be read in logredo()) - it prevents - * replay of preceding updates of all preceding generations - * of the inumber esp. the on-disk inode itself. - */ - struct { - __le32 fileset; /* 4: fileset number */ - __le32 inode; /* 4: inode number */ - } noredofile; - - /* - * ? NEWPAGE: - * - * metadata type dependent - */ - struct { - __le32 fileset; /* 4: fileset number */ - __le32 inode; /* 4: inode number */ - __le32 type; /* 4: NEWPAGE record type */ - pxd_t pxd; /* 8: on-disk page pxd */ - } newpage; - - /* - * ? DUMMY: filler - * - * no type-dependent information - */ - } log; -}; /* (36) */ - -#define LOGRDSIZE (sizeof(struct lrd)) - -/* - * line vector descriptor - */ -struct lvd { - __le16 offset; - __le16 length; -}; - - -/* - * log logical volume - */ -struct jfs_log { - - struct list_head sb_list;/* This is used to sync metadata - * before writing syncpt. - */ - struct list_head journal_list; /* Global list */ - struct block_device *bdev; /* 4: log lv pointer */ - int serial; /* 4: log mount serial number */ - - s64 base; /* @8: log extent address (inline log ) */ - int size; /* 4: log size in log page (in page) */ - int l2bsize; /* 4: log2 of bsize */ - - unsigned long flag; /* 4: flag */ - - struct lbuf *lbuf_free; /* 4: free lbufs */ - wait_queue_head_t free_wait; /* 4: */ - - /* log write */ - int logtid; /* 4: log tid */ - int page; /* 4: page number of eol page */ - int eor; /* 4: eor of last record in eol page */ - struct lbuf *bp; /* 4: current log page buffer */ - - struct mutex loglock; /* 4: log write serialization lock */ - - /* syncpt */ - int nextsync; /* 4: bytes to write before next syncpt */ - int active; /* 4: */ - wait_queue_head_t syncwait; /* 4: */ - - /* commit */ - uint cflag; /* 4: */ - struct list_head cqueue; /* FIFO commit queue */ - struct tblock *flush_tblk; /* tblk we're waiting on for flush */ - int gcrtc; /* 4: GC_READY transaction count */ - struct tblock *gclrt; /* 4: latest GC_READY transaction */ - spinlock_t gclock; /* 4: group commit lock */ - int logsize; /* 4: log data area size in byte */ - int lsn; /* 4: end-of-log */ - int clsn; /* 4: clsn */ - int syncpt; /* 4: addr of last syncpt record */ - int sync; /* 4: addr from last logsync() */ - struct list_head synclist; /* 8: logsynclist anchor */ - spinlock_t synclock; /* 4: synclist lock */ - struct lbuf *wqueue; /* 4: log pageout queue */ - int count; /* 4: count */ - char uuid[16]; /* 16: 128-bit uuid of log device */ - - int no_integrity; /* 3: flag to disable journaling to disk */ -}; - -/* - * Log flag - */ -#define log_INLINELOG 1 -#define log_SYNCBARRIER 2 -#define log_QUIESCE 3 -#define log_FLUSH 4 - -/* - * group commit flag - */ -/* jfs_log */ -#define logGC_PAGEOUT 0x00000001 - -/* tblock/lbuf */ -#define tblkGC_QUEUE 0x0001 -#define tblkGC_READY 0x0002 -#define tblkGC_COMMIT 0x0004 -#define tblkGC_COMMITTED 0x0008 -#define tblkGC_EOP 0x0010 -#define tblkGC_FREE 0x0020 -#define tblkGC_LEADER 0x0040 -#define tblkGC_ERROR 0x0080 -#define tblkGC_LAZY 0x0100 // D230860 -#define tblkGC_UNLOCKED 0x0200 // D230860 - -/* - * log cache buffer header - */ -struct lbuf { - struct jfs_log *l_log; /* 4: log associated with buffer */ - - /* - * data buffer base area - */ - uint l_flag; /* 4: pageout control flags */ - - struct lbuf *l_wqnext; /* 4: write queue link */ - struct lbuf *l_freelist; /* 4: freelistlink */ - - int l_pn; /* 4: log page number */ - int l_eor; /* 4: log record eor */ - int l_ceor; /* 4: committed log record eor */ - - s64 l_blkno; /* 8: log page block number */ - caddr_t l_ldata; /* 4: data page */ - struct page *l_page; /* The page itself */ - uint l_offset; /* Offset of l_ldata within the page */ - - wait_queue_head_t l_ioevent; /* 4: i/o done event */ -}; - -/* Reuse l_freelist for redrive list */ -#define l_redrive_next l_freelist - -/* - * logsynclist block - * - * common logsyncblk prefix for jbuf_t and tblock - */ -struct logsyncblk { - u16 xflag; /* flags */ - u16 flag; /* only meaninful in tblock */ - lid_t lid; /* lock id */ - s32 lsn; /* log sequence number */ - struct list_head synclist; /* log sync list link */ -}; - -/* - * logsynclist serialization (per log) - */ - -#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) -#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) -#define LOGSYNC_UNLOCK(log, flags) \ - spin_unlock_irqrestore(&(log)->synclock, flags) - -/* compute the difference in bytes of lsn from sync point */ -#define logdiff(diff, lsn, log)\ -{\ - diff = (lsn) - (log)->syncpt;\ - if (diff < 0)\ - diff += (log)->logsize;\ -} - -extern int lmLogOpen(struct super_block *sb); -extern int lmLogClose(struct super_block *sb); -extern int lmLogShutdown(struct jfs_log * log); -extern int lmLogInit(struct jfs_log * log); -extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); -extern int lmGroupCommit(struct jfs_log *, struct tblock *); -extern int jfsIOWait(void *); -extern void jfs_flush_journal(struct jfs_log * log, int wait); -extern void jfs_syncpt(struct jfs_log *log, int hard_sync); - -#endif /* _H_JFS_LOGMGR */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_metapage.c b/ANDROID_3.4.5/fs/jfs/jfs_metapage.c deleted file mode 100644 index 6740d34c..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_metapage.c +++ /dev/null @@ -1,843 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2005 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/bio.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/buffer_head.h> -#include <linux/mempool.h> -#include <linux/seq_file.h> -#include "jfs_incore.h" -#include "jfs_superblock.h" -#include "jfs_filsys.h" -#include "jfs_metapage.h" -#include "jfs_txnmgr.h" -#include "jfs_debug.h" - -#ifdef CONFIG_JFS_STATISTICS -static struct { - uint pagealloc; /* # of page allocations */ - uint pagefree; /* # of page frees */ - uint lockwait; /* # of sleeping lock_metapage() calls */ -} mpStat; -#endif - -#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) -#define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag) - -static inline void unlock_metapage(struct metapage *mp) -{ - clear_bit_unlock(META_locked, &mp->flag); - wake_up(&mp->wait); -} - -static inline void __lock_metapage(struct metapage *mp) -{ - DECLARE_WAITQUEUE(wait, current); - INCREMENT(mpStat.lockwait); - add_wait_queue_exclusive(&mp->wait, &wait); - do { - set_current_state(TASK_UNINTERRUPTIBLE); - if (metapage_locked(mp)) { - unlock_page(mp->page); - io_schedule(); - lock_page(mp->page); - } - } while (trylock_metapage(mp)); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&mp->wait, &wait); -} - -/* - * Must have mp->page locked - */ -static inline void lock_metapage(struct metapage *mp) -{ - if (trylock_metapage(mp)) - __lock_metapage(mp); -} - -#define METAPOOL_MIN_PAGES 32 -static struct kmem_cache *metapage_cache; -static mempool_t *metapage_mempool; - -#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE) - -#if MPS_PER_PAGE > 1 - -struct meta_anchor { - int mp_count; - atomic_t io_count; - struct metapage *mp[MPS_PER_PAGE]; -}; -#define mp_anchor(page) ((struct meta_anchor *)page_private(page)) - -static inline struct metapage *page_to_mp(struct page *page, int offset) -{ - if (!PagePrivate(page)) - return NULL; - return mp_anchor(page)->mp[offset >> L2PSIZE]; -} - -static inline int insert_metapage(struct page *page, struct metapage *mp) -{ - struct meta_anchor *a; - int index; - int l2mp_blocks; /* log2 blocks per metapage */ - - if (PagePrivate(page)) - a = mp_anchor(page); - else { - a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS); - if (!a) - return -ENOMEM; - set_page_private(page, (unsigned long)a); - SetPagePrivate(page); - kmap(page); - } - - if (mp) { - l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; - index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); - a->mp_count++; - a->mp[index] = mp; - } - - return 0; -} - -static inline void remove_metapage(struct page *page, struct metapage *mp) -{ - struct meta_anchor *a = mp_anchor(page); - int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; - int index; - - index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); - - BUG_ON(a->mp[index] != mp); - - a->mp[index] = NULL; - if (--a->mp_count == 0) { - kfree(a); - set_page_private(page, 0); - ClearPagePrivate(page); - kunmap(page); - } -} - -static inline void inc_io(struct page *page) -{ - atomic_inc(&mp_anchor(page)->io_count); -} - -static inline void dec_io(struct page *page, void (*handler) (struct page *)) -{ - if (atomic_dec_and_test(&mp_anchor(page)->io_count)) - handler(page); -} - -#else -static inline struct metapage *page_to_mp(struct page *page, int offset) -{ - return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; -} - -static inline int insert_metapage(struct page *page, struct metapage *mp) -{ - if (mp) { - set_page_private(page, (unsigned long)mp); - SetPagePrivate(page); - kmap(page); - } - return 0; -} - -static inline void remove_metapage(struct page *page, struct metapage *mp) -{ - set_page_private(page, 0); - ClearPagePrivate(page); - kunmap(page); -} - -#define inc_io(page) do {} while(0) -#define dec_io(page, handler) handler(page) - -#endif - -static void init_once(void *foo) -{ - struct metapage *mp = (struct metapage *)foo; - - mp->lid = 0; - mp->lsn = 0; - mp->flag = 0; - mp->data = NULL; - mp->clsn = 0; - mp->log = NULL; - set_bit(META_free, &mp->flag); - init_waitqueue_head(&mp->wait); -} - -static inline struct metapage *alloc_metapage(gfp_t gfp_mask) -{ - return mempool_alloc(metapage_mempool, gfp_mask); -} - -static inline void free_metapage(struct metapage *mp) -{ - mp->flag = 0; - set_bit(META_free, &mp->flag); - - mempool_free(mp, metapage_mempool); -} - -int __init metapage_init(void) -{ - /* - * Allocate the metapage structures - */ - metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), - 0, 0, init_once); - if (metapage_cache == NULL) - return -ENOMEM; - - metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES, - metapage_cache); - - if (metapage_mempool == NULL) { - kmem_cache_destroy(metapage_cache); - return -ENOMEM; - } - - return 0; -} - -void metapage_exit(void) -{ - mempool_destroy(metapage_mempool); - kmem_cache_destroy(metapage_cache); -} - -static inline void drop_metapage(struct page *page, struct metapage *mp) -{ - if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) || - test_bit(META_io, &mp->flag)) - return; - remove_metapage(page, mp); - INCREMENT(mpStat.pagefree); - free_metapage(mp); -} - -/* - * Metapage address space operations - */ - -static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, - int *len) -{ - int rc = 0; - int xflag; - s64 xaddr; - sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_blkbits; - - if (lblock >= file_blocks) - return 0; - if (lblock + *len > file_blocks) - *len = file_blocks - lblock; - - if (inode->i_ino) { - rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0); - if ((rc == 0) && *len) - lblock = (sector_t)xaddr; - else - lblock = 0; - } /* else no mapping */ - - return lblock; -} - -static void last_read_complete(struct page *page) -{ - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); -} - -static void metapage_read_end_io(struct bio *bio, int err) -{ - struct page *page = bio->bi_private; - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { - printk(KERN_ERR "metapage_read_end_io: I/O error\n"); - SetPageError(page); - } - - dec_io(page, last_read_complete); - bio_put(bio); -} - -static void remove_from_logsync(struct metapage *mp) -{ - struct jfs_log *log = mp->log; - unsigned long flags; -/* - * This can race. Recheck that log hasn't been set to null, and after - * acquiring logsync lock, recheck lsn - */ - if (!log) - return; - - LOGSYNC_LOCK(log, flags); - if (mp->lsn) { - mp->log = NULL; - mp->lsn = 0; - mp->clsn = 0; - log->count--; - list_del(&mp->synclist); - } - LOGSYNC_UNLOCK(log, flags); -} - -static void last_write_complete(struct page *page) -{ - struct metapage *mp; - unsigned int offset; - - for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { - mp = page_to_mp(page, offset); - if (mp && test_bit(META_io, &mp->flag)) { - if (mp->lsn) - remove_from_logsync(mp); - clear_bit(META_io, &mp->flag); - } - /* - * I'd like to call drop_metapage here, but I don't think it's - * safe unless I have the page locked - */ - } - end_page_writeback(page); -} - -static void metapage_write_end_io(struct bio *bio, int err) -{ - struct page *page = bio->bi_private; - - BUG_ON(!PagePrivate(page)); - - if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) { - printk(KERN_ERR "metapage_write_end_io: I/O error\n"); - SetPageError(page); - } - dec_io(page, last_write_complete); - bio_put(bio); -} - -static int metapage_writepage(struct page *page, struct writeback_control *wbc) -{ - struct bio *bio = NULL; - int block_offset; /* block offset of mp within page */ - struct inode *inode = page->mapping->host; - int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; - int len; - int xlen; - struct metapage *mp; - int redirty = 0; - sector_t lblock; - int nr_underway = 0; - sector_t pblock; - sector_t next_block = 0; - sector_t page_start; - unsigned long bio_bytes = 0; - unsigned long bio_offset = 0; - int offset; - int bad_blocks = 0; - - page_start = (sector_t)page->index << - (PAGE_CACHE_SHIFT - inode->i_blkbits); - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - - for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { - mp = page_to_mp(page, offset); - - if (!mp || !test_bit(META_dirty, &mp->flag)) - continue; - - if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) { - redirty = 1; - /* - * Make sure this page isn't blocked indefinitely. - * If the journal isn't undergoing I/O, push it - */ - if (mp->log && !(mp->log->cflag & logGC_PAGEOUT)) - jfs_flush_journal(mp->log, 0); - continue; - } - - clear_bit(META_dirty, &mp->flag); - set_bit(META_io, &mp->flag); - block_offset = offset >> inode->i_blkbits; - lblock = page_start + block_offset; - if (bio) { - if (xlen && lblock == next_block) { - /* Contiguous, in memory & on disk */ - len = min(xlen, blocks_per_mp); - xlen -= len; - bio_bytes += len << inode->i_blkbits; - continue; - } - /* Not contiguous */ - if (bio_add_page(bio, page, bio_bytes, bio_offset) < - bio_bytes) - goto add_failed; - /* - * Increment counter before submitting i/o to keep - * count from hitting zero before we're through - */ - inc_io(page); - if (!bio->bi_size) - goto dump_bio; - submit_bio(WRITE, bio); - nr_underway++; - bio = NULL; - } else - inc_io(page); - xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; - pblock = metapage_get_blocks(inode, lblock, &xlen); - if (!pblock) { - printk(KERN_ERR "JFS: metapage_get_blocks failed\n"); - /* - * We already called inc_io(), but can't cancel it - * with dec_io() until we're done with the page - */ - bad_blocks++; - continue; - } - len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage); - - bio = bio_alloc(GFP_NOFS, 1); - bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_sector = pblock << (inode->i_blkbits - 9); - bio->bi_end_io = metapage_write_end_io; - bio->bi_private = page; - - /* Don't call bio_add_page yet, we may add to this vec */ - bio_offset = offset; - bio_bytes = len << inode->i_blkbits; - - xlen -= len; - next_block = lblock + len; - } - if (bio) { - if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes) - goto add_failed; - if (!bio->bi_size) - goto dump_bio; - - submit_bio(WRITE, bio); - nr_underway++; - } - if (redirty) - redirty_page_for_writepage(wbc, page); - - unlock_page(page); - - if (bad_blocks) - goto err_out; - - if (nr_underway == 0) - end_page_writeback(page); - - return 0; -add_failed: - /* We should never reach here, since we're only adding one vec */ - printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); - goto skip; -dump_bio: - print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16, - 4, bio, sizeof(*bio), 0); -skip: - bio_put(bio); - unlock_page(page); - dec_io(page, last_write_complete); -err_out: - while (bad_blocks--) - dec_io(page, last_write_complete); - return -EIO; -} - -static int metapage_readpage(struct file *fp, struct page *page) -{ - struct inode *inode = page->mapping->host; - struct bio *bio = NULL; - int block_offset; - int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; - sector_t page_start; /* address of page in fs blocks */ - sector_t pblock; - int xlen; - unsigned int len; - int offset; - - BUG_ON(!PageLocked(page)); - page_start = (sector_t)page->index << - (PAGE_CACHE_SHIFT - inode->i_blkbits); - - block_offset = 0; - while (block_offset < blocks_per_page) { - xlen = blocks_per_page - block_offset; - pblock = metapage_get_blocks(inode, page_start + block_offset, - &xlen); - if (pblock) { - if (!PagePrivate(page)) - insert_metapage(page, NULL); - inc_io(page); - if (bio) - submit_bio(READ, bio); - - bio = bio_alloc(GFP_NOFS, 1); - bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_sector = pblock << (inode->i_blkbits - 9); - bio->bi_end_io = metapage_read_end_io; - bio->bi_private = page; - len = xlen << inode->i_blkbits; - offset = block_offset << inode->i_blkbits; - if (bio_add_page(bio, page, len, offset) < len) - goto add_failed; - block_offset += xlen; - } else - block_offset++; - } - if (bio) - submit_bio(READ, bio); - else - unlock_page(page); - - return 0; - -add_failed: - printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); - bio_put(bio); - dec_io(page, last_read_complete); - return -EIO; -} - -static int metapage_releasepage(struct page *page, gfp_t gfp_mask) -{ - struct metapage *mp; - int ret = 1; - int offset; - - for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { - mp = page_to_mp(page, offset); - - if (!mp) - continue; - - jfs_info("metapage_releasepage: mp = 0x%p", mp); - if (mp->count || mp->nohomeok || - test_bit(META_dirty, &mp->flag)) { - jfs_info("count = %ld, nohomeok = %d", mp->count, - mp->nohomeok); - ret = 0; - continue; - } - if (mp->lsn) - remove_from_logsync(mp); - remove_metapage(page, mp); - INCREMENT(mpStat.pagefree); - free_metapage(mp); - } - return ret; -} - -static void metapage_invalidatepage(struct page *page, unsigned long offset) -{ - BUG_ON(offset); - - BUG_ON(PageWriteback(page)); - - metapage_releasepage(page, 0); -} - -const struct address_space_operations jfs_metapage_aops = { - .readpage = metapage_readpage, - .writepage = metapage_writepage, - .releasepage = metapage_releasepage, - .invalidatepage = metapage_invalidatepage, - .set_page_dirty = __set_page_dirty_nobuffers, -}; - -struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, - unsigned int size, int absolute, - unsigned long new) -{ - int l2BlocksPerPage; - int l2bsize; - struct address_space *mapping; - struct metapage *mp = NULL; - struct page *page; - unsigned long page_index; - unsigned long page_offset; - - jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d", - inode->i_ino, lblock, absolute); - - l2bsize = inode->i_blkbits; - l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; - page_index = lblock >> l2BlocksPerPage; - page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize; - if ((page_offset + size) > PAGE_CACHE_SIZE) { - jfs_err("MetaData crosses page boundary!!"); - jfs_err("lblock = %lx, size = %d", lblock, size); - dump_stack(); - return NULL; - } - if (absolute) - mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping; - else { - /* - * If an nfs client tries to read an inode that is larger - * than any existing inodes, we may try to read past the - * end of the inode map - */ - if ((lblock << inode->i_blkbits) >= inode->i_size) - return NULL; - mapping = inode->i_mapping; - } - - if (new && (PSIZE == PAGE_CACHE_SIZE)) { - page = grab_cache_page(mapping, page_index); - if (!page) { - jfs_err("grab_cache_page failed!"); - return NULL; - } - SetPageUptodate(page); - } else { - page = read_mapping_page(mapping, page_index, NULL); - if (IS_ERR(page) || !PageUptodate(page)) { - jfs_err("read_mapping_page failed!"); - return NULL; - } - lock_page(page); - } - - mp = page_to_mp(page, page_offset); - if (mp) { - if (mp->logical_size != size) { - jfs_error(inode->i_sb, - "__get_metapage: mp->logical_size != size"); - jfs_err("logical_size = %d, size = %d", - mp->logical_size, size); - dump_stack(); - goto unlock; - } - mp->count++; - lock_metapage(mp); - if (test_bit(META_discard, &mp->flag)) { - if (!new) { - jfs_error(inode->i_sb, - "__get_metapage: using a " - "discarded metapage"); - discard_metapage(mp); - goto unlock; - } - clear_bit(META_discard, &mp->flag); - } - } else { - INCREMENT(mpStat.pagealloc); - mp = alloc_metapage(GFP_NOFS); - mp->page = page; - mp->flag = 0; - mp->xflag = COMMIT_PAGE; - mp->count = 1; - mp->nohomeok = 0; - mp->logical_size = size; - mp->data = page_address(page) + page_offset; - mp->index = lblock; - if (unlikely(insert_metapage(page, mp))) { - free_metapage(mp); - goto unlock; - } - lock_metapage(mp); - } - - if (new) { - jfs_info("zeroing mp = 0x%p", mp); - memset(mp->data, 0, PSIZE); - } - - unlock_page(page); - jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data); - return mp; - -unlock: - unlock_page(page); - return NULL; -} - -void grab_metapage(struct metapage * mp) -{ - jfs_info("grab_metapage: mp = 0x%p", mp); - page_cache_get(mp->page); - lock_page(mp->page); - mp->count++; - lock_metapage(mp); - unlock_page(mp->page); -} - -void force_metapage(struct metapage *mp) -{ - struct page *page = mp->page; - jfs_info("force_metapage: mp = 0x%p", mp); - set_bit(META_forcewrite, &mp->flag); - clear_bit(META_sync, &mp->flag); - page_cache_get(page); - lock_page(page); - set_page_dirty(page); - write_one_page(page, 1); - clear_bit(META_forcewrite, &mp->flag); - page_cache_release(page); -} - -void hold_metapage(struct metapage *mp) -{ - lock_page(mp->page); -} - -void put_metapage(struct metapage *mp) -{ - if (mp->count || mp->nohomeok) { - /* Someone else will release this */ - unlock_page(mp->page); - return; - } - page_cache_get(mp->page); - mp->count++; - lock_metapage(mp); - unlock_page(mp->page); - release_metapage(mp); -} - -void release_metapage(struct metapage * mp) -{ - struct page *page = mp->page; - jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); - - BUG_ON(!page); - - lock_page(page); - unlock_metapage(mp); - - assert(mp->count); - if (--mp->count || mp->nohomeok) { - unlock_page(page); - page_cache_release(page); - return; - } - - if (test_bit(META_dirty, &mp->flag)) { - set_page_dirty(page); - if (test_bit(META_sync, &mp->flag)) { - clear_bit(META_sync, &mp->flag); - write_one_page(page, 1); - lock_page(page); /* write_one_page unlocks the page */ - } - } else if (mp->lsn) /* discard_metapage doesn't remove it */ - remove_from_logsync(mp); - - /* Try to keep metapages from using up too much memory */ - drop_metapage(page, mp); - - unlock_page(page); - page_cache_release(page); -} - -void __invalidate_metapages(struct inode *ip, s64 addr, int len) -{ - sector_t lblock; - int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits; - int BlocksPerPage = 1 << l2BlocksPerPage; - /* All callers are interested in block device's mapping */ - struct address_space *mapping = - JFS_SBI(ip->i_sb)->direct_inode->i_mapping; - struct metapage *mp; - struct page *page; - unsigned int offset; - - /* - * Mark metapages to discard. They will eventually be - * released, but should not be written. - */ - for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len; - lblock += BlocksPerPage) { - page = find_lock_page(mapping, lblock >> l2BlocksPerPage); - if (!page) - continue; - for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { - mp = page_to_mp(page, offset); - if (!mp) - continue; - if (mp->index < addr) - continue; - if (mp->index >= addr + len) - break; - - clear_bit(META_dirty, &mp->flag); - set_bit(META_discard, &mp->flag); - if (mp->lsn) - remove_from_logsync(mp); - } - unlock_page(page); - page_cache_release(page); - } -} - -#ifdef CONFIG_JFS_STATISTICS -static int jfs_mpstat_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, - "JFS Metapage statistics\n" - "=======================\n" - "page allocations = %d\n" - "page frees = %d\n" - "lock waits = %d\n", - mpStat.pagealloc, - mpStat.pagefree, - mpStat.lockwait); - return 0; -} - -static int jfs_mpstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_mpstat_proc_show, NULL); -} - -const struct file_operations jfs_mpstat_proc_fops = { - .owner = THIS_MODULE, - .open = jfs_mpstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif diff --git a/ANDROID_3.4.5/fs/jfs/jfs_metapage.h b/ANDROID_3.4.5/fs/jfs/jfs_metapage.h deleted file mode 100644 index a78beda8..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_metapage.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_METAPAGE -#define _H_JFS_METAPAGE - -#include <linux/pagemap.h> - -struct metapage { - /* Common logsyncblk prefix (see jfs_logmgr.h) */ - u16 xflag; - u16 unused; - lid_t lid; - int lsn; - struct list_head synclist; - /* End of logsyncblk prefix */ - - unsigned long flag; /* See Below */ - unsigned long count; /* Reference count */ - void *data; /* Data pointer */ - sector_t index; /* block address of page */ - wait_queue_head_t wait; - - /* implementation */ - struct page *page; - unsigned int logical_size; - - /* Journal management */ - int clsn; - int nohomeok; - struct jfs_log *log; -}; - -/* metapage flag */ -#define META_locked 0 -#define META_free 1 -#define META_dirty 2 -#define META_sync 3 -#define META_discard 4 -#define META_forcewrite 5 -#define META_io 6 - -#define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag) - -/* function prototypes */ -extern int metapage_init(void); -extern void metapage_exit(void); -extern struct metapage *__get_metapage(struct inode *inode, - unsigned long lblock, unsigned int size, - int absolute, unsigned long new); - -#define read_metapage(inode, lblock, size, absolute)\ - __get_metapage(inode, lblock, size, absolute, false) - -#define get_metapage(inode, lblock, size, absolute)\ - __get_metapage(inode, lblock, size, absolute, true) - -extern void release_metapage(struct metapage *); -extern void grab_metapage(struct metapage *); -extern void force_metapage(struct metapage *); - -/* - * hold_metapage and put_metapage are used in conjunction. The page lock - * is not dropped between the two, so no other threads can get or release - * the metapage - */ -extern void hold_metapage(struct metapage *); -extern void put_metapage(struct metapage *); - -static inline void write_metapage(struct metapage *mp) -{ - set_bit(META_dirty, &mp->flag); - release_metapage(mp); -} - -static inline void flush_metapage(struct metapage *mp) -{ - set_bit(META_sync, &mp->flag); - write_metapage(mp); -} - -static inline void discard_metapage(struct metapage *mp) -{ - clear_bit(META_dirty, &mp->flag); - set_bit(META_discard, &mp->flag); - release_metapage(mp); -} - -static inline void metapage_nohomeok(struct metapage *mp) -{ - struct page *page = mp->page; - lock_page(page); - if (!mp->nohomeok++) { - mark_metapage_dirty(mp); - page_cache_get(page); - wait_on_page_writeback(page); - } - unlock_page(page); -} - -/* - * This serializes access to mp->lsn when metapages are added to logsynclist - * without setting nohomeok. i.e. updating imap & dmap - */ -static inline void metapage_wait_for_io(struct metapage *mp) -{ - if (test_bit(META_io, &mp->flag)) - wait_on_page_writeback(mp->page); -} - -/* - * This is called when already holding the metapage - */ -static inline void _metapage_homeok(struct metapage *mp) -{ - if (!--mp->nohomeok) - page_cache_release(mp->page); -} - -static inline void metapage_homeok(struct metapage *mp) -{ - hold_metapage(mp); - _metapage_homeok(mp); - put_metapage(mp); -} - -extern const struct address_space_operations jfs_metapage_aops; - -/* - * This routines invalidate all pages for an extent. - */ -extern void __invalidate_metapages(struct inode *, s64, int); -#define invalidate_pxd_metapages(ip, pxd) \ - __invalidate_metapages((ip), addressPXD(&(pxd)), lengthPXD(&(pxd))) -#define invalidate_dxd_metapages(ip, dxd) \ - __invalidate_metapages((ip), addressDXD(&(dxd)), lengthDXD(&(dxd))) -#define invalidate_xad_metapages(ip, xad) \ - __invalidate_metapages((ip), addressXAD(&(xad)), lengthXAD(&(xad))) - -#endif /* _H_JFS_METAPAGE */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_mount.c b/ANDROID_3.4.5/fs/jfs/jfs_mount.c deleted file mode 100644 index 9895595f..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_mount.c +++ /dev/null @@ -1,507 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * Module: jfs_mount.c - * - * note: file system in transition to aggregate/fileset: - * - * file system mount is interpreted as the mount of aggregate, - * if not already mounted, and mount of the single/only fileset in - * the aggregate; - * - * a file system/aggregate is represented by an internal inode - * (aka mount inode) initialized with aggregate superblock; - * each vfs represents a fileset, and points to its "fileset inode - * allocation map inode" (aka fileset inode): - * (an aggregate itself is structured recursively as a filset: - * an internal vfs is constructed and points to its "fileset inode - * allocation map inode" (aka aggregate inode) where each inode - * represents a fileset inode) so that inode number is mapped to - * on-disk inode in uniform way at both aggregate and fileset level; - * - * each vnode/inode of a fileset is linked to its vfs (to facilitate - * per fileset inode operations, e.g., unmount of a fileset, etc.); - * each inode points to the mount inode (to facilitate access to - * per aggregate information, e.g., block size, etc.) as well as - * its file set inode. - * - * aggregate - * ipmnt - * mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap; - * fileset vfs -> vp(1) <-> ... <-> vp(n) <->vproot; - */ - -#include <linux/fs.h> -#include <linux/buffer_head.h> - -#include "jfs_incore.h" -#include "jfs_filsys.h" -#include "jfs_superblock.h" -#include "jfs_dmap.h" -#include "jfs_imap.h" -#include "jfs_metapage.h" -#include "jfs_debug.h" - - -/* - * forward references - */ -static int chkSuper(struct super_block *); -static int logMOUNT(struct super_block *sb); - -/* - * NAME: jfs_mount(sb) - * - * FUNCTION: vfs_mount() - * - * PARAMETER: sb - super block - * - * RETURN: -EBUSY - device already mounted or open for write - * -EBUSY - cvrdvp already mounted; - * -EBUSY - mount table full - * -ENOTDIR- cvrdvp not directory on a device mount - * -ENXIO - device open failure - */ -int jfs_mount(struct super_block *sb) -{ - int rc = 0; /* Return code */ - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct inode *ipaimap = NULL; - struct inode *ipaimap2 = NULL; - struct inode *ipimap = NULL; - struct inode *ipbmap = NULL; - - /* - * read/validate superblock - * (initialize mount inode from the superblock) - */ - if ((rc = chkSuper(sb))) { - goto errout20; - } - - ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); - if (ipaimap == NULL) { - jfs_err("jfs_mount: Failed to read AGGREGATE_I"); - rc = -EIO; - goto errout20; - } - sbi->ipaimap = ipaimap; - - jfs_info("jfs_mount: ipaimap:0x%p", ipaimap); - - /* - * initialize aggregate inode allocation map - */ - if ((rc = diMount(ipaimap))) { - jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc); - goto errout21; - } - - /* - * open aggregate block allocation map - */ - ipbmap = diReadSpecial(sb, BMAP_I, 0); - if (ipbmap == NULL) { - rc = -EIO; - goto errout22; - } - - jfs_info("jfs_mount: ipbmap:0x%p", ipbmap); - - sbi->ipbmap = ipbmap; - - /* - * initialize aggregate block allocation map - */ - if ((rc = dbMount(ipbmap))) { - jfs_err("jfs_mount: dbMount failed w/rc = %d", rc); - goto errout22; - } - - /* - * open the secondary aggregate inode allocation map - * - * This is a duplicate of the aggregate inode allocation map. - * - * hand craft a vfs in the same fashion as we did to read ipaimap. - * By adding INOSPEREXT (32) to the inode number, we are telling - * diReadSpecial that we are reading from the secondary aggregate - * inode table. This also creates a unique entry in the inode hash - * table. - */ - if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { - ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1); - if (!ipaimap2) { - jfs_err("jfs_mount: Failed to read AGGREGATE_I"); - rc = -EIO; - goto errout35; - } - sbi->ipaimap2 = ipaimap2; - - jfs_info("jfs_mount: ipaimap2:0x%p", ipaimap2); - - /* - * initialize secondary aggregate inode allocation map - */ - if ((rc = diMount(ipaimap2))) { - jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d", - rc); - goto errout35; - } - } else - /* Secondary aggregate inode table is not valid */ - sbi->ipaimap2 = NULL; - - /* - * mount (the only/single) fileset - */ - /* - * open fileset inode allocation map (aka fileset inode) - */ - ipimap = diReadSpecial(sb, FILESYSTEM_I, 0); - if (ipimap == NULL) { - jfs_err("jfs_mount: Failed to read FILESYSTEM_I"); - /* open fileset secondary inode allocation map */ - rc = -EIO; - goto errout40; - } - jfs_info("jfs_mount: ipimap:0x%p", ipimap); - - /* map further access of per fileset inodes by the fileset inode */ - sbi->ipimap = ipimap; - - /* initialize fileset inode allocation map */ - if ((rc = diMount(ipimap))) { - jfs_err("jfs_mount: diMount failed w/rc = %d", rc); - goto errout41; - } - - goto out; - - /* - * unwind on error - */ - errout41: /* close fileset inode allocation map inode */ - diFreeSpecial(ipimap); - - errout40: /* fileset closed */ - - /* close secondary aggregate inode allocation map */ - if (ipaimap2) { - diUnmount(ipaimap2, 1); - diFreeSpecial(ipaimap2); - } - - errout35: - - /* close aggregate block allocation map */ - dbUnmount(ipbmap, 1); - diFreeSpecial(ipbmap); - - errout22: /* close aggregate inode allocation map */ - - diUnmount(ipaimap, 1); - - errout21: /* close aggregate inodes */ - diFreeSpecial(ipaimap); - errout20: /* aggregate closed */ - - out: - - if (rc) - jfs_err("Mount JFS Failure: %d", rc); - - return rc; -} - -/* - * NAME: jfs_mount_rw(sb, remount) - * - * FUNCTION: Completes read-write mount, or remounts read-only volume - * as read-write - */ -int jfs_mount_rw(struct super_block *sb, int remount) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - int rc; - - /* - * If we are re-mounting a previously read-only volume, we want to - * re-read the inode and block maps, since fsck.jfs may have updated - * them. - */ - if (remount) { - if (chkSuper(sb) || (sbi->state != FM_CLEAN)) - return -EINVAL; - - truncate_inode_pages(sbi->ipimap->i_mapping, 0); - truncate_inode_pages(sbi->ipbmap->i_mapping, 0); - diUnmount(sbi->ipimap, 1); - if ((rc = diMount(sbi->ipimap))) { - jfs_err("jfs_mount_rw: diMount failed!"); - return rc; - } - - dbUnmount(sbi->ipbmap, 1); - if ((rc = dbMount(sbi->ipbmap))) { - jfs_err("jfs_mount_rw: dbMount failed!"); - return rc; - } - } - - /* - * open/initialize log - */ - if ((rc = lmLogOpen(sb))) - return rc; - - /* - * update file system superblock; - */ - if ((rc = updateSuper(sb, FM_MOUNT))) { - jfs_err("jfs_mount: updateSuper failed w/rc = %d", rc); - lmLogClose(sb); - return rc; - } - - /* - * write MOUNT log record of the file system - */ - logMOUNT(sb); - - return rc; -} - -/* - * chkSuper() - * - * validate the superblock of the file system to be mounted and - * get the file system parameters. - * - * returns - * 0 with fragsize set if check successful - * error code if not successful - */ -static int chkSuper(struct super_block *sb) -{ - int rc = 0; - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct jfs_superblock *j_sb; - struct buffer_head *bh; - int AIM_bytesize, AIT_bytesize; - int expected_AIM_bytesize, expected_AIT_bytesize; - s64 AIM_byte_addr, AIT_byte_addr, fsckwsp_addr; - s64 byte_addr_diff0, byte_addr_diff1; - s32 bsize; - - if ((rc = readSuper(sb, &bh))) - return rc; - j_sb = (struct jfs_superblock *)bh->b_data; - - /* - * validate superblock - */ - /* validate fs signature */ - if (strncmp(j_sb->s_magic, JFS_MAGIC, 4) || - le32_to_cpu(j_sb->s_version) > JFS_VERSION) { - rc = -EINVAL; - goto out; - } - - bsize = le32_to_cpu(j_sb->s_bsize); -#ifdef _JFS_4K - if (bsize != PSIZE) { - jfs_err("Currently only 4K block size supported!"); - rc = -EINVAL; - goto out; - } -#endif /* _JFS_4K */ - - jfs_info("superblock: flag:0x%08x state:0x%08x size:0x%Lx", - le32_to_cpu(j_sb->s_flag), le32_to_cpu(j_sb->s_state), - (unsigned long long) le64_to_cpu(j_sb->s_size)); - - /* validate the descriptors for Secondary AIM and AIT */ - if ((j_sb->s_flag & cpu_to_le32(JFS_BAD_SAIT)) != - cpu_to_le32(JFS_BAD_SAIT)) { - expected_AIM_bytesize = 2 * PSIZE; - AIM_bytesize = lengthPXD(&(j_sb->s_aim2)) * bsize; - expected_AIT_bytesize = 4 * PSIZE; - AIT_bytesize = lengthPXD(&(j_sb->s_ait2)) * bsize; - AIM_byte_addr = addressPXD(&(j_sb->s_aim2)) * bsize; - AIT_byte_addr = addressPXD(&(j_sb->s_ait2)) * bsize; - byte_addr_diff0 = AIT_byte_addr - AIM_byte_addr; - fsckwsp_addr = addressPXD(&(j_sb->s_fsckpxd)) * bsize; - byte_addr_diff1 = fsckwsp_addr - AIT_byte_addr; - if ((AIM_bytesize != expected_AIM_bytesize) || - (AIT_bytesize != expected_AIT_bytesize) || - (byte_addr_diff0 != AIM_bytesize) || - (byte_addr_diff1 <= AIT_bytesize)) - j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); - } - - if ((j_sb->s_flag & cpu_to_le32(JFS_GROUPCOMMIT)) != - cpu_to_le32(JFS_GROUPCOMMIT)) - j_sb->s_flag |= cpu_to_le32(JFS_GROUPCOMMIT); - - /* validate fs state */ - if (j_sb->s_state != cpu_to_le32(FM_CLEAN) && - !(sb->s_flags & MS_RDONLY)) { - jfs_err("jfs_mount: Mount Failure: File System Dirty."); - rc = -EINVAL; - goto out; - } - - sbi->state = le32_to_cpu(j_sb->s_state); - sbi->mntflag = le32_to_cpu(j_sb->s_flag); - - /* - * JFS always does I/O by 4K pages. Don't tell the buffer cache - * that we use anything else (leave s_blocksize alone). - */ - sbi->bsize = bsize; - sbi->l2bsize = le16_to_cpu(j_sb->s_l2bsize); - - /* - * For now, ignore s_pbsize, l2bfactor. All I/O going through buffer - * cache. - */ - sbi->nbperpage = PSIZE >> sbi->l2bsize; - sbi->l2nbperpage = L2PSIZE - sbi->l2bsize; - sbi->l2niperblk = sbi->l2bsize - L2DISIZE; - if (sbi->mntflag & JFS_INLINELOG) - sbi->logpxd = j_sb->s_logpxd; - else { - sbi->logdev = new_decode_dev(le32_to_cpu(j_sb->s_logdev)); - memcpy(sbi->uuid, j_sb->s_uuid, sizeof(sbi->uuid)); - memcpy(sbi->loguuid, j_sb->s_loguuid, sizeof(sbi->uuid)); - } - sbi->fsckpxd = j_sb->s_fsckpxd; - sbi->ait2 = j_sb->s_ait2; - - out: - brelse(bh); - return rc; -} - - -/* - * updateSuper() - * - * update synchronously superblock if it is mounted read-write. - */ -int updateSuper(struct super_block *sb, uint state) -{ - struct jfs_superblock *j_sb; - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct buffer_head *bh; - int rc; - - if (sbi->flag & JFS_NOINTEGRITY) { - if (state == FM_DIRTY) { - sbi->p_state = state; - return 0; - } else if (state == FM_MOUNT) { - sbi->p_state = sbi->state; - state = FM_DIRTY; - } else if (state == FM_CLEAN) { - state = sbi->p_state; - } else - jfs_err("updateSuper: bad state"); - } else if (sbi->state == FM_DIRTY) - return 0; - - if ((rc = readSuper(sb, &bh))) - return rc; - - j_sb = (struct jfs_superblock *)bh->b_data; - - j_sb->s_state = cpu_to_le32(state); - sbi->state = state; - - if (state == FM_MOUNT) { - /* record log's dev_t and mount serial number */ - j_sb->s_logdev = cpu_to_le32(new_encode_dev(sbi->log->bdev->bd_dev)); - j_sb->s_logserial = cpu_to_le32(sbi->log->serial); - } else if (state == FM_CLEAN) { - /* - * If this volume is shared with OS/2, OS/2 will need to - * recalculate DASD usage, since we don't deal with it. - */ - if (j_sb->s_flag & cpu_to_le32(JFS_DASD_ENABLED)) - j_sb->s_flag |= cpu_to_le32(JFS_DASD_PRIME); - } - - mark_buffer_dirty(bh); - sync_dirty_buffer(bh); - brelse(bh); - - return 0; -} - - -/* - * readSuper() - * - * read superblock by raw sector address - */ -int readSuper(struct super_block *sb, struct buffer_head **bpp) -{ - /* read in primary superblock */ - *bpp = sb_bread(sb, SUPER1_OFF >> sb->s_blocksize_bits); - if (*bpp) - return 0; - - /* read in secondary/replicated superblock */ - *bpp = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits); - if (*bpp) - return 0; - - return -EIO; -} - - -/* - * logMOUNT() - * - * function: write a MOUNT log record for file system. - * - * MOUNT record keeps logredo() from processing log records - * for this file system past this point in log. - * it is harmless if mount fails. - * - * note: MOUNT record is at aggregate level, not at fileset level, - * since log records of previous mounts of a fileset - * (e.g., AFTER record of extent allocation) have to be processed - * to update block allocation map at aggregate level. - */ -static int logMOUNT(struct super_block *sb) -{ - struct jfs_log *log = JFS_SBI(sb)->log; - struct lrd lrd; - - lrd.logtid = 0; - lrd.backchain = 0; - lrd.type = cpu_to_le16(LOG_MOUNT); - lrd.length = 0; - lrd.aggregate = cpu_to_le32(new_encode_dev(sb->s_bdev->bd_dev)); - lmLog(log, NULL, &lrd, NULL); - - return 0; -} diff --git a/ANDROID_3.4.5/fs/jfs/jfs_superblock.h b/ANDROID_3.4.5/fs/jfs/jfs_superblock.h deleted file mode 100644 index 884fc21a..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_superblock.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2003 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_SUPERBLOCK -#define _H_JFS_SUPERBLOCK - -/* - * make the magic number something a human could read - */ -#define JFS_MAGIC "JFS1" /* Magic word */ - -#define JFS_VERSION 2 /* Version number: Version 2 */ - -#define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */ - -/* - * aggregate superblock - * - * The name superblock is too close to super_block, so the name has been - * changed to jfs_superblock. The utilities are still using the old name. - */ -struct jfs_superblock { - char s_magic[4]; /* 4: magic number */ - __le32 s_version; /* 4: version number */ - - __le64 s_size; /* 8: aggregate size in hardware/LVM blocks; - * VFS: number of blocks - */ - __le32 s_bsize; /* 4: aggregate block size in bytes; - * VFS: fragment size - */ - __le16 s_l2bsize; /* 2: log2 of s_bsize */ - __le16 s_l2bfactor; /* 2: log2(s_bsize/hardware block size) */ - __le32 s_pbsize; /* 4: hardware/LVM block size in bytes */ - __le16 s_l2pbsize; /* 2: log2 of s_pbsize */ - __le16 pad; /* 2: padding necessary for alignment */ - - __le32 s_agsize; /* 4: allocation group size in aggr. blocks */ - - __le32 s_flag; /* 4: aggregate attributes: - * see jfs_filsys.h - */ - __le32 s_state; /* 4: mount/unmount/recovery state: - * see jfs_filsys.h - */ - __le32 s_compress; /* 4: > 0 if data compression */ - - pxd_t s_ait2; /* 8: first extent of secondary - * aggregate inode table - */ - - pxd_t s_aim2; /* 8: first extent of secondary - * aggregate inode map - */ - __le32 s_logdev; /* 4: device address of log */ - __le32 s_logserial; /* 4: log serial number at aggregate mount */ - pxd_t s_logpxd; /* 8: inline log extent */ - - pxd_t s_fsckpxd; /* 8: inline fsck work space extent */ - - struct timestruc_t s_time; /* 8: time last updated */ - - __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for - * the fsck service log. - * N.B. These blocks are divided among the - * versions kept. This is not a per - * version size. - * N.B. These blocks are included in the - * length field of s_fsckpxd. - */ - s8 s_fscklog; /* 1: which fsck service log is most recent - * 0 => no service log data yet - * 1 => the first one - * 2 => the 2nd one - */ - char s_fpack[11]; /* 11: file system volume name - * N.B. This must be 11 bytes to - * conform with the OS/2 BootSector - * requirements - * Only used when s_version is 1 - */ - - /* extendfs() parameter under s_state & FM_EXTENDFS */ - __le64 s_xsize; /* 8: extendfs s_size */ - pxd_t s_xfsckpxd; /* 8: extendfs fsckpxd */ - pxd_t s_xlogpxd; /* 8: extendfs logpxd */ - /* - 128 byte boundary - */ - - char s_uuid[16]; /* 16: 128-bit uuid for volume */ - char s_label[16]; /* 16: volume label */ - char s_loguuid[16]; /* 16: 128-bit uuid for log device */ - -}; - -extern int readSuper(struct super_block *, struct buffer_head **); -extern int updateSuper(struct super_block *, uint); -extern void jfs_error(struct super_block *, const char *, ...); -extern int jfs_mount(struct super_block *); -extern int jfs_mount_rw(struct super_block *, int); -extern int jfs_umount(struct super_block *); -extern int jfs_umount_rw(struct super_block *); -extern int jfs_extendfs(struct super_block *, s64, int); - -extern struct task_struct *jfsIOthread; -extern struct task_struct *jfsSyncThread; - -#endif /*_H_JFS_SUPERBLOCK */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_txnmgr.c b/ANDROID_3.4.5/fs/jfs/jfs_txnmgr.c deleted file mode 100644 index bb8b661b..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_txnmgr.c +++ /dev/null @@ -1,3099 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2005 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * jfs_txnmgr.c: transaction manager - * - * notes: - * transaction starts with txBegin() and ends with txCommit() - * or txAbort(). - * - * tlock is acquired at the time of update; - * (obviate scan at commit time for xtree and dtree) - * tlock and mp points to each other; - * (no hashlist for mp -> tlock). - * - * special cases: - * tlock on in-memory inode: - * in-place tlock in the in-memory inode itself; - * converted to page lock by iWrite() at commit time. - * - * tlock during write()/mmap() under anonymous transaction (tid = 0): - * transferred (?) to transaction at commit time. - * - * use the page itself to update allocation maps - * (obviate intermediate replication of allocation/deallocation data) - * hold on to mp+lock thru update of maps - */ - -#include <linux/fs.h> -#include <linux/vmalloc.h> -#include <linux/completion.h> -#include <linux/freezer.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/kthread.h> -#include <linux/seq_file.h> -#include "jfs_incore.h" -#include "jfs_inode.h" -#include "jfs_filsys.h" -#include "jfs_metapage.h" -#include "jfs_dinode.h" -#include "jfs_imap.h" -#include "jfs_dmap.h" -#include "jfs_superblock.h" -#include "jfs_debug.h" - -/* - * transaction management structures - */ -static struct { - int freetid; /* index of a free tid structure */ - int freelock; /* index first free lock word */ - wait_queue_head_t freewait; /* eventlist of free tblock */ - wait_queue_head_t freelockwait; /* eventlist of free tlock */ - wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ - int tlocksInUse; /* Number of tlocks in use */ - spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ -/* struct tblock *sync_queue; * Transactions waiting for data sync */ - struct list_head unlock_queue; /* Txns waiting to be released */ - struct list_head anon_list; /* inodes having anonymous txns */ - struct list_head anon_list2; /* inodes having anonymous txns - that couldn't be sync'ed */ -} TxAnchor; - -int jfs_tlocks_low; /* Indicates low number of available tlocks */ - -#ifdef CONFIG_JFS_STATISTICS -static struct { - uint txBegin; - uint txBegin_barrier; - uint txBegin_lockslow; - uint txBegin_freetid; - uint txBeginAnon; - uint txBeginAnon_barrier; - uint txBeginAnon_lockslow; - uint txLockAlloc; - uint txLockAlloc_freelock; -} TxStat; -#endif - -static int nTxBlock = -1; /* number of transaction blocks */ -module_param(nTxBlock, int, 0); -MODULE_PARM_DESC(nTxBlock, - "Number of transaction blocks (max:65536)"); - -static int nTxLock = -1; /* number of transaction locks */ -module_param(nTxLock, int, 0); -MODULE_PARM_DESC(nTxLock, - "Number of transaction locks (max:65536)"); - -struct tblock *TxBlock; /* transaction block table */ -static int TxLockLWM; /* Low water mark for number of txLocks used */ -static int TxLockHWM; /* High water mark for number of txLocks used */ -static int TxLockVHWM; /* Very High water mark */ -struct tlock *TxLock; /* transaction lock table */ - -/* - * transaction management lock - */ -static DEFINE_SPINLOCK(jfsTxnLock); - -#define TXN_LOCK() spin_lock(&jfsTxnLock) -#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) - -#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); -#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) -#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) - -static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); -static int jfs_commit_thread_waking; - -/* - * Retry logic exist outside these macros to protect from spurrious wakeups. - */ -static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(event, &wait); - set_current_state(TASK_UNINTERRUPTIBLE); - TXN_UNLOCK(); - io_schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(event, &wait); -} - -#define TXN_SLEEP(event)\ -{\ - TXN_SLEEP_DROP_LOCK(event);\ - TXN_LOCK();\ -} - -#define TXN_WAKEUP(event) wake_up_all(event) - -/* - * statistics - */ -static struct { - tid_t maxtid; /* 4: biggest tid ever used */ - lid_t maxlid; /* 4: biggest lid ever used */ - int ntid; /* 4: # of transactions performed */ - int nlid; /* 4: # of tlocks acquired */ - int waitlock; /* 4: # of tlock wait */ -} stattx; - -/* - * forward references - */ -static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck, struct commit * cd); -static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck); -static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck); -static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck); -static void txAllocPMap(struct inode *ip, struct maplock * maplock, - struct tblock * tblk); -static void txForce(struct tblock * tblk); -static int txLog(struct jfs_log * log, struct tblock * tblk, - struct commit * cd); -static void txUpdateMap(struct tblock * tblk); -static void txRelease(struct tblock * tblk); -static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck); -static void LogSyncRelease(struct metapage * mp); - -/* - * transaction block/lock management - * --------------------------------- - */ - -/* - * Get a transaction lock from the free list. If the number in use is - * greater than the high water mark, wake up the sync daemon. This should - * free some anonymous transaction locks. (TXN_LOCK must be held.) - */ -static lid_t txLockAlloc(void) -{ - lid_t lid; - - INCREMENT(TxStat.txLockAlloc); - if (!TxAnchor.freelock) { - INCREMENT(TxStat.txLockAlloc_freelock); - } - - while (!(lid = TxAnchor.freelock)) - TXN_SLEEP(&TxAnchor.freelockwait); - TxAnchor.freelock = TxLock[lid].next; - HIGHWATERMARK(stattx.maxlid, lid); - if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { - jfs_info("txLockAlloc tlocks low"); - jfs_tlocks_low = 1; - wake_up_process(jfsSyncThread); - } - - return lid; -} - -static void txLockFree(lid_t lid) -{ - TxLock[lid].tid = 0; - TxLock[lid].next = TxAnchor.freelock; - TxAnchor.freelock = lid; - TxAnchor.tlocksInUse--; - if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { - jfs_info("txLockFree jfs_tlocks_low no more"); - jfs_tlocks_low = 0; - TXN_WAKEUP(&TxAnchor.lowlockwait); - } - TXN_WAKEUP(&TxAnchor.freelockwait); -} - -/* - * NAME: txInit() - * - * FUNCTION: initialize transaction management structures - * - * RETURN: - * - * serialization: single thread at jfs_init() - */ -int txInit(void) -{ - int k, size; - struct sysinfo si; - - /* Set defaults for nTxLock and nTxBlock if unset */ - - if (nTxLock == -1) { - if (nTxBlock == -1) { - /* Base default on memory size */ - si_meminfo(&si); - if (si.totalram > (256 * 1024)) /* 1 GB */ - nTxLock = 64 * 1024; - else - nTxLock = si.totalram >> 2; - } else if (nTxBlock > (8 * 1024)) - nTxLock = 64 * 1024; - else - nTxLock = nTxBlock << 3; - } - if (nTxBlock == -1) - nTxBlock = nTxLock >> 3; - - /* Verify tunable parameters */ - if (nTxBlock < 16) - nTxBlock = 16; /* No one should set it this low */ - if (nTxBlock > 65536) - nTxBlock = 65536; - if (nTxLock < 256) - nTxLock = 256; /* No one should set it this low */ - if (nTxLock > 65536) - nTxLock = 65536; - - printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", - nTxBlock, nTxLock); - /* - * initialize transaction block (tblock) table - * - * transaction id (tid) = tblock index - * tid = 0 is reserved. - */ - TxLockLWM = (nTxLock * 4) / 10; - TxLockHWM = (nTxLock * 7) / 10; - TxLockVHWM = (nTxLock * 8) / 10; - - size = sizeof(struct tblock) * nTxBlock; - TxBlock = vmalloc(size); - if (TxBlock == NULL) - return -ENOMEM; - - for (k = 1; k < nTxBlock - 1; k++) { - TxBlock[k].next = k + 1; - init_waitqueue_head(&TxBlock[k].gcwait); - init_waitqueue_head(&TxBlock[k].waitor); - } - TxBlock[k].next = 0; - init_waitqueue_head(&TxBlock[k].gcwait); - init_waitqueue_head(&TxBlock[k].waitor); - - TxAnchor.freetid = 1; - init_waitqueue_head(&TxAnchor.freewait); - - stattx.maxtid = 1; /* statistics */ - - /* - * initialize transaction lock (tlock) table - * - * transaction lock id = tlock index - * tlock id = 0 is reserved. - */ - size = sizeof(struct tlock) * nTxLock; - TxLock = vmalloc(size); - if (TxLock == NULL) { - vfree(TxBlock); - return -ENOMEM; - } - - /* initialize tlock table */ - for (k = 1; k < nTxLock - 1; k++) - TxLock[k].next = k + 1; - TxLock[k].next = 0; - init_waitqueue_head(&TxAnchor.freelockwait); - init_waitqueue_head(&TxAnchor.lowlockwait); - - TxAnchor.freelock = 1; - TxAnchor.tlocksInUse = 0; - INIT_LIST_HEAD(&TxAnchor.anon_list); - INIT_LIST_HEAD(&TxAnchor.anon_list2); - - LAZY_LOCK_INIT(); - INIT_LIST_HEAD(&TxAnchor.unlock_queue); - - stattx.maxlid = 1; /* statistics */ - - return 0; -} - -/* - * NAME: txExit() - * - * FUNCTION: clean up when module is unloaded - */ -void txExit(void) -{ - vfree(TxLock); - TxLock = NULL; - vfree(TxBlock); - TxBlock = NULL; -} - -/* - * NAME: txBegin() - * - * FUNCTION: start a transaction. - * - * PARAMETER: sb - superblock - * flag - force for nested tx; - * - * RETURN: tid - transaction id - * - * note: flag force allows to start tx for nested tx - * to prevent deadlock on logsync barrier; - */ -tid_t txBegin(struct super_block *sb, int flag) -{ - tid_t t; - struct tblock *tblk; - struct jfs_log *log; - - jfs_info("txBegin: flag = 0x%x", flag); - log = JFS_SBI(sb)->log; - - TXN_LOCK(); - - INCREMENT(TxStat.txBegin); - - retry: - if (!(flag & COMMIT_FORCE)) { - /* - * synchronize with logsync barrier - */ - if (test_bit(log_SYNCBARRIER, &log->flag) || - test_bit(log_QUIESCE, &log->flag)) { - INCREMENT(TxStat.txBegin_barrier); - TXN_SLEEP(&log->syncwait); - goto retry; - } - } - if (flag == 0) { - /* - * Don't begin transaction if we're getting starved for tlocks - * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately - * free tlocks) - */ - if (TxAnchor.tlocksInUse > TxLockVHWM) { - INCREMENT(TxStat.txBegin_lockslow); - TXN_SLEEP(&TxAnchor.lowlockwait); - goto retry; - } - } - - /* - * allocate transaction id/block - */ - if ((t = TxAnchor.freetid) == 0) { - jfs_info("txBegin: waiting for free tid"); - INCREMENT(TxStat.txBegin_freetid); - TXN_SLEEP(&TxAnchor.freewait); - goto retry; - } - - tblk = tid_to_tblock(t); - - if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { - /* Don't let a non-forced transaction take the last tblk */ - jfs_info("txBegin: waiting for free tid"); - INCREMENT(TxStat.txBegin_freetid); - TXN_SLEEP(&TxAnchor.freewait); - goto retry; - } - - TxAnchor.freetid = tblk->next; - - /* - * initialize transaction - */ - - /* - * We can't zero the whole thing or we screw up another thread being - * awakened after sleeping on tblk->waitor - * - * memset(tblk, 0, sizeof(struct tblock)); - */ - tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; - - tblk->sb = sb; - ++log->logtid; - tblk->logtid = log->logtid; - - ++log->active; - - HIGHWATERMARK(stattx.maxtid, t); /* statistics */ - INCREMENT(stattx.ntid); /* statistics */ - - TXN_UNLOCK(); - - jfs_info("txBegin: returning tid = %d", t); - - return t; -} - -/* - * NAME: txBeginAnon() - * - * FUNCTION: start an anonymous transaction. - * Blocks if logsync or available tlocks are low to prevent - * anonymous tlocks from depleting supply. - * - * PARAMETER: sb - superblock - * - * RETURN: none - */ -void txBeginAnon(struct super_block *sb) -{ - struct jfs_log *log; - - log = JFS_SBI(sb)->log; - - TXN_LOCK(); - INCREMENT(TxStat.txBeginAnon); - - retry: - /* - * synchronize with logsync barrier - */ - if (test_bit(log_SYNCBARRIER, &log->flag) || - test_bit(log_QUIESCE, &log->flag)) { - INCREMENT(TxStat.txBeginAnon_barrier); - TXN_SLEEP(&log->syncwait); - goto retry; - } - - /* - * Don't begin transaction if we're getting starved for tlocks - */ - if (TxAnchor.tlocksInUse > TxLockVHWM) { - INCREMENT(TxStat.txBeginAnon_lockslow); - TXN_SLEEP(&TxAnchor.lowlockwait); - goto retry; - } - TXN_UNLOCK(); -} - -/* - * txEnd() - * - * function: free specified transaction block. - * - * logsync barrier processing: - * - * serialization: - */ -void txEnd(tid_t tid) -{ - struct tblock *tblk = tid_to_tblock(tid); - struct jfs_log *log; - - jfs_info("txEnd: tid = %d", tid); - TXN_LOCK(); - - /* - * wakeup transactions waiting on the page locked - * by the current transaction - */ - TXN_WAKEUP(&tblk->waitor); - - log = JFS_SBI(tblk->sb)->log; - - /* - * Lazy commit thread can't free this guy until we mark it UNLOCKED, - * otherwise, we would be left with a transaction that may have been - * reused. - * - * Lazy commit thread will turn off tblkGC_LAZY before calling this - * routine. - */ - if (tblk->flag & tblkGC_LAZY) { - jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); - TXN_UNLOCK(); - - spin_lock_irq(&log->gclock); // LOGGC_LOCK - tblk->flag |= tblkGC_UNLOCKED; - spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK - return; - } - - jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); - - assert(tblk->next == 0); - - /* - * insert tblock back on freelist - */ - tblk->next = TxAnchor.freetid; - TxAnchor.freetid = tid; - - /* - * mark the tblock not active - */ - if (--log->active == 0) { - clear_bit(log_FLUSH, &log->flag); - - /* - * synchronize with logsync barrier - */ - if (test_bit(log_SYNCBARRIER, &log->flag)) { - TXN_UNLOCK(); - - /* write dirty metadata & forward log syncpt */ - jfs_syncpt(log, 1); - - jfs_info("log barrier off: 0x%x", log->lsn); - - /* enable new transactions start */ - clear_bit(log_SYNCBARRIER, &log->flag); - - /* wakeup all waitors for logsync barrier */ - TXN_WAKEUP(&log->syncwait); - - goto wakeup; - } - } - - TXN_UNLOCK(); -wakeup: - /* - * wakeup all waitors for a free tblock - */ - TXN_WAKEUP(&TxAnchor.freewait); -} - -/* - * txLock() - * - * function: acquire a transaction lock on the specified <mp> - * - * parameter: - * - * return: transaction lock id - * - * serialization: - */ -struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, - int type) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - int dir_xtree = 0; - lid_t lid; - tid_t xtid; - struct tlock *tlck; - struct xtlock *xtlck; - struct linelock *linelock; - xtpage_t *p; - struct tblock *tblk; - - TXN_LOCK(); - - if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && - !(mp->xflag & COMMIT_PAGE)) { - /* - * Directory inode is special. It can have both an xtree tlock - * and a dtree tlock associated with it. - */ - dir_xtree = 1; - lid = jfs_ip->xtlid; - } else - lid = mp->lid; - - /* is page not locked by a transaction ? */ - if (lid == 0) - goto allocateLock; - - jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); - - /* is page locked by the requester transaction ? */ - tlck = lid_to_tlock(lid); - if ((xtid = tlck->tid) == tid) { - TXN_UNLOCK(); - goto grantLock; - } - - /* - * is page locked by anonymous transaction/lock ? - * - * (page update without transaction (i.e., file write) is - * locked under anonymous transaction tid = 0: - * anonymous tlocks maintained on anonymous tlock list of - * the inode of the page and available to all anonymous - * transactions until txCommit() time at which point - * they are transferred to the transaction tlock list of - * the committing transaction of the inode) - */ - if (xtid == 0) { - tlck->tid = tid; - TXN_UNLOCK(); - tblk = tid_to_tblock(tid); - /* - * The order of the tlocks in the transaction is important - * (during truncate, child xtree pages must be freed before - * parent's tlocks change the working map). - * Take tlock off anonymous list and add to tail of - * transaction list - * - * Note: We really need to get rid of the tid & lid and - * use list_head's. This code is getting UGLY! - */ - if (jfs_ip->atlhead == lid) { - if (jfs_ip->atltail == lid) { - /* only anonymous txn. - * Remove from anon_list - */ - TXN_LOCK(); - list_del_init(&jfs_ip->anon_inode_list); - TXN_UNLOCK(); - } - jfs_ip->atlhead = tlck->next; - } else { - lid_t last; - for (last = jfs_ip->atlhead; - lid_to_tlock(last)->next != lid; - last = lid_to_tlock(last)->next) { - assert(last); - } - lid_to_tlock(last)->next = tlck->next; - if (jfs_ip->atltail == lid) - jfs_ip->atltail = last; - } - - /* insert the tlock at tail of transaction tlock list */ - - if (tblk->next) - lid_to_tlock(tblk->last)->next = lid; - else - tblk->next = lid; - tlck->next = 0; - tblk->last = lid; - - goto grantLock; - } - - goto waitLock; - - /* - * allocate a tlock - */ - allocateLock: - lid = txLockAlloc(); - tlck = lid_to_tlock(lid); - - /* - * initialize tlock - */ - tlck->tid = tid; - - TXN_UNLOCK(); - - /* mark tlock for meta-data page */ - if (mp->xflag & COMMIT_PAGE) { - - tlck->flag = tlckPAGELOCK; - - /* mark the page dirty and nohomeok */ - metapage_nohomeok(mp); - - jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", - mp, mp->nohomeok, tid, tlck); - - /* if anonymous transaction, and buffer is on the group - * commit synclist, mark inode to show this. This will - * prevent the buffer from being marked nohomeok for too - * long a time. - */ - if ((tid == 0) && mp->lsn) - set_cflag(COMMIT_Synclist, ip); - } - /* mark tlock for in-memory inode */ - else - tlck->flag = tlckINODELOCK; - - if (S_ISDIR(ip->i_mode)) - tlck->flag |= tlckDIRECTORY; - - tlck->type = 0; - - /* bind the tlock and the page */ - tlck->ip = ip; - tlck->mp = mp; - if (dir_xtree) - jfs_ip->xtlid = lid; - else - mp->lid = lid; - - /* - * enqueue transaction lock to transaction/inode - */ - /* insert the tlock at tail of transaction tlock list */ - if (tid) { - tblk = tid_to_tblock(tid); - if (tblk->next) - lid_to_tlock(tblk->last)->next = lid; - else - tblk->next = lid; - tlck->next = 0; - tblk->last = lid; - } - /* anonymous transaction: - * insert the tlock at head of inode anonymous tlock list - */ - else { - tlck->next = jfs_ip->atlhead; - jfs_ip->atlhead = lid; - if (tlck->next == 0) { - /* This inode's first anonymous transaction */ - jfs_ip->atltail = lid; - TXN_LOCK(); - list_add_tail(&jfs_ip->anon_inode_list, - &TxAnchor.anon_list); - TXN_UNLOCK(); - } - } - - /* initialize type dependent area for linelock */ - linelock = (struct linelock *) & tlck->lock; - linelock->next = 0; - linelock->flag = tlckLINELOCK; - linelock->maxcnt = TLOCKSHORT; - linelock->index = 0; - - switch (type & tlckTYPE) { - case tlckDTREE: - linelock->l2linesize = L2DTSLOTSIZE; - break; - - case tlckXTREE: - linelock->l2linesize = L2XTSLOTSIZE; - - xtlck = (struct xtlock *) linelock; - xtlck->header.offset = 0; - xtlck->header.length = 2; - - if (type & tlckNEW) { - xtlck->lwm.offset = XTENTRYSTART; - } else { - if (mp->xflag & COMMIT_PAGE) - p = (xtpage_t *) mp->data; - else - p = &jfs_ip->i_xtroot; - xtlck->lwm.offset = - le16_to_cpu(p->header.nextindex); - } - xtlck->lwm.length = 0; /* ! */ - xtlck->twm.offset = 0; - xtlck->hwm.offset = 0; - - xtlck->index = 2; - break; - - case tlckINODE: - linelock->l2linesize = L2INODESLOTSIZE; - break; - - case tlckDATA: - linelock->l2linesize = L2DATASLOTSIZE; - break; - - default: - jfs_err("UFO tlock:0x%p", tlck); - } - - /* - * update tlock vector - */ - grantLock: - tlck->type |= type; - - return tlck; - - /* - * page is being locked by another transaction: - */ - waitLock: - /* Only locks on ipimap or ipaimap should reach here */ - /* assert(jfs_ip->fileset == AGGREGATE_I); */ - if (jfs_ip->fileset != AGGREGATE_I) { - printk(KERN_ERR "txLock: trying to lock locked page!"); - print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, - ip, sizeof(*ip), 0); - print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, - mp, sizeof(*mp), 0); - print_hex_dump(KERN_ERR, "Locker's tblock: ", - DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), - sizeof(struct tblock), 0); - print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, - tlck, sizeof(*tlck), 0); - BUG(); - } - INCREMENT(stattx.waitlock); /* statistics */ - TXN_UNLOCK(); - release_metapage(mp); - TXN_LOCK(); - xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ - - jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", - tid, xtid, lid); - - /* Recheck everything since dropping TXN_LOCK */ - if (xtid && (tlck->mp == mp) && (mp->lid == lid)) - TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); - else - TXN_UNLOCK(); - jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); - - return NULL; -} - -/* - * NAME: txRelease() - * - * FUNCTION: Release buffers associated with transaction locks, but don't - * mark homeok yet. The allows other transactions to modify - * buffers, but won't let them go to disk until commit record - * actually gets written. - * - * PARAMETER: - * tblk - - * - * RETURN: Errors from subroutines. - */ -static void txRelease(struct tblock * tblk) -{ - struct metapage *mp; - lid_t lid; - struct tlock *tlck; - - TXN_LOCK(); - - for (lid = tblk->next; lid; lid = tlck->next) { - tlck = lid_to_tlock(lid); - if ((mp = tlck->mp) != NULL && - (tlck->type & tlckBTROOT) == 0) { - assert(mp->xflag & COMMIT_PAGE); - mp->lid = 0; - } - } - - /* - * wakeup transactions waiting on a page locked - * by the current transaction - */ - TXN_WAKEUP(&tblk->waitor); - - TXN_UNLOCK(); -} - -/* - * NAME: txUnlock() - * - * FUNCTION: Initiates pageout of pages modified by tid in journalled - * objects and frees their lockwords. - */ -static void txUnlock(struct tblock * tblk) -{ - struct tlock *tlck; - struct linelock *linelock; - lid_t lid, next, llid, k; - struct metapage *mp; - struct jfs_log *log; - int difft, diffp; - unsigned long flags; - - jfs_info("txUnlock: tblk = 0x%p", tblk); - log = JFS_SBI(tblk->sb)->log; - - /* - * mark page under tlock homeok (its log has been written): - */ - for (lid = tblk->next; lid; lid = next) { - tlck = lid_to_tlock(lid); - next = tlck->next; - - jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); - - /* unbind page from tlock */ - if ((mp = tlck->mp) != NULL && - (tlck->type & tlckBTROOT) == 0) { - assert(mp->xflag & COMMIT_PAGE); - - /* hold buffer - */ - hold_metapage(mp); - - assert(mp->nohomeok > 0); - _metapage_homeok(mp); - - /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log, flags); - if (mp->clsn) { - logdiff(difft, tblk->clsn, log); - logdiff(diffp, mp->clsn, log); - if (difft > diffp) - mp->clsn = tblk->clsn; - } else - mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log, flags); - - assert(!(tlck->flag & tlckFREEPAGE)); - - put_metapage(mp); - } - - /* insert tlock, and linelock(s) of the tlock if any, - * at head of freelist - */ - TXN_LOCK(); - - llid = ((struct linelock *) & tlck->lock)->next; - while (llid) { - linelock = (struct linelock *) lid_to_tlock(llid); - k = linelock->next; - txLockFree(llid); - llid = k; - } - txLockFree(lid); - - TXN_UNLOCK(); - } - tblk->next = tblk->last = 0; - - /* - * remove tblock from logsynclist - * (allocation map pages inherited lsn of tblk and - * has been inserted in logsync list at txUpdateMap()) - */ - if (tblk->lsn) { - LOGSYNC_LOCK(log, flags); - log->count--; - list_del(&tblk->synclist); - LOGSYNC_UNLOCK(log, flags); - } -} - -/* - * txMaplock() - * - * function: allocate a transaction lock for freed page/entry; - * for freed page, maplock is used as xtlock/dtlock type; - */ -struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - lid_t lid; - struct tblock *tblk; - struct tlock *tlck; - struct maplock *maplock; - - TXN_LOCK(); - - /* - * allocate a tlock - */ - lid = txLockAlloc(); - tlck = lid_to_tlock(lid); - - /* - * initialize tlock - */ - tlck->tid = tid; - - /* bind the tlock and the object */ - tlck->flag = tlckINODELOCK; - if (S_ISDIR(ip->i_mode)) - tlck->flag |= tlckDIRECTORY; - tlck->ip = ip; - tlck->mp = NULL; - - tlck->type = type; - - /* - * enqueue transaction lock to transaction/inode - */ - /* insert the tlock at tail of transaction tlock list */ - if (tid) { - tblk = tid_to_tblock(tid); - if (tblk->next) - lid_to_tlock(tblk->last)->next = lid; - else - tblk->next = lid; - tlck->next = 0; - tblk->last = lid; - } - /* anonymous transaction: - * insert the tlock at head of inode anonymous tlock list - */ - else { - tlck->next = jfs_ip->atlhead; - jfs_ip->atlhead = lid; - if (tlck->next == 0) { - /* This inode's first anonymous transaction */ - jfs_ip->atltail = lid; - list_add_tail(&jfs_ip->anon_inode_list, - &TxAnchor.anon_list); - } - } - - TXN_UNLOCK(); - - /* initialize type dependent area for maplock */ - maplock = (struct maplock *) & tlck->lock; - maplock->next = 0; - maplock->maxcnt = 0; - maplock->index = 0; - - return tlck; -} - -/* - * txLinelock() - * - * function: allocate a transaction lock for log vector list - */ -struct linelock *txLinelock(struct linelock * tlock) -{ - lid_t lid; - struct tlock *tlck; - struct linelock *linelock; - - TXN_LOCK(); - - /* allocate a TxLock structure */ - lid = txLockAlloc(); - tlck = lid_to_tlock(lid); - - TXN_UNLOCK(); - - /* initialize linelock */ - linelock = (struct linelock *) tlck; - linelock->next = 0; - linelock->flag = tlckLINELOCK; - linelock->maxcnt = TLOCKLONG; - linelock->index = 0; - if (tlck->flag & tlckDIRECTORY) - linelock->flag |= tlckDIRECTORY; - - /* append linelock after tlock */ - linelock->next = tlock->next; - tlock->next = lid; - - return linelock; -} - -/* - * transaction commit management - * ----------------------------- - */ - -/* - * NAME: txCommit() - * - * FUNCTION: commit the changes to the objects specified in - * clist. For journalled segments only the - * changes of the caller are committed, ie by tid. - * for non-journalled segments the data are flushed to - * disk and then the change to the disk inode and indirect - * blocks committed (so blocks newly allocated to the - * segment will be made a part of the segment atomically). - * - * all of the segments specified in clist must be in - * one file system. no more than 6 segments are needed - * to handle all unix svcs. - * - * if the i_nlink field (i.e. disk inode link count) - * is zero, and the type of inode is a regular file or - * directory, or symbolic link , the inode is truncated - * to zero length. the truncation is committed but the - * VM resources are unaffected until it is closed (see - * iput and iclose). - * - * PARAMETER: - * - * RETURN: - * - * serialization: - * on entry the inode lock on each segment is assumed - * to be held. - * - * i/o error: - */ -int txCommit(tid_t tid, /* transaction identifier */ - int nip, /* number of inodes to commit */ - struct inode **iplist, /* list of inode to commit */ - int flag) -{ - int rc = 0; - struct commit cd; - struct jfs_log *log; - struct tblock *tblk; - struct lrd *lrd; - struct inode *ip; - struct jfs_inode_info *jfs_ip; - int k, n; - ino_t top; - struct super_block *sb; - - jfs_info("txCommit, tid = %d, flag = %d", tid, flag); - /* is read-only file system ? */ - if (isReadOnly(iplist[0])) { - rc = -EROFS; - goto TheEnd; - } - - sb = cd.sb = iplist[0]->i_sb; - cd.tid = tid; - - if (tid == 0) - tid = txBegin(sb, 0); - tblk = tid_to_tblock(tid); - - /* - * initialize commit structure - */ - log = JFS_SBI(sb)->log; - cd.log = log; - - /* initialize log record descriptor in commit */ - lrd = &cd.lrd; - lrd->logtid = cpu_to_le32(tblk->logtid); - lrd->backchain = 0; - - tblk->xflag |= flag; - - if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) - tblk->xflag |= COMMIT_LAZY; - /* - * prepare non-journaled objects for commit - * - * flush data pages of non-journaled file - * to prevent the file getting non-initialized disk blocks - * in case of crash. - * (new blocks - ) - */ - cd.iplist = iplist; - cd.nip = nip; - - /* - * acquire transaction lock on (on-disk) inodes - * - * update on-disk inode from in-memory inode - * acquiring transaction locks for AFTER records - * on the on-disk inode of file object - * - * sort the inodes array by inode number in descending order - * to prevent deadlock when acquiring transaction lock - * of on-disk inodes on multiple on-disk inode pages by - * multiple concurrent transactions - */ - for (k = 0; k < cd.nip; k++) { - top = (cd.iplist[k])->i_ino; - for (n = k + 1; n < cd.nip; n++) { - ip = cd.iplist[n]; - if (ip->i_ino > top) { - top = ip->i_ino; - cd.iplist[n] = cd.iplist[k]; - cd.iplist[k] = ip; - } - } - - ip = cd.iplist[k]; - jfs_ip = JFS_IP(ip); - - /* - * BUGBUG - This code has temporarily been removed. The - * intent is to ensure that any file data is written before - * the metadata is committed to the journal. This prevents - * uninitialized data from appearing in a file after the - * journal has been replayed. (The uninitialized data - * could be sensitive data removed by another user.) - * - * The problem now is that we are holding the IWRITELOCK - * on the inode, and calling filemap_fdatawrite on an - * unmapped page will cause a deadlock in jfs_get_block. - * - * The long term solution is to pare down the use of - * IWRITELOCK. We are currently holding it too long. - * We could also be smarter about which data pages need - * to be written before the transaction is committed and - * when we don't need to worry about it at all. - * - * if ((!S_ISDIR(ip->i_mode)) - * && (tblk->flag & COMMIT_DELETE) == 0) - * filemap_write_and_wait(ip->i_mapping); - */ - - /* - * Mark inode as not dirty. It will still be on the dirty - * inode list, but we'll know not to commit it again unless - * it gets marked dirty again - */ - clear_cflag(COMMIT_Dirty, ip); - - /* inherit anonymous tlock(s) of inode */ - if (jfs_ip->atlhead) { - lid_to_tlock(jfs_ip->atltail)->next = tblk->next; - tblk->next = jfs_ip->atlhead; - if (!tblk->last) - tblk->last = jfs_ip->atltail; - jfs_ip->atlhead = jfs_ip->atltail = 0; - TXN_LOCK(); - list_del_init(&jfs_ip->anon_inode_list); - TXN_UNLOCK(); - } - - /* - * acquire transaction lock on on-disk inode page - * (become first tlock of the tblk's tlock list) - */ - if (((rc = diWrite(tid, ip)))) - goto out; - } - - /* - * write log records from transaction locks - * - * txUpdateMap() resets XAD_NEW in XAD. - */ - if ((rc = txLog(log, tblk, &cd))) - goto TheEnd; - - /* - * Ensure that inode isn't reused before - * lazy commit thread finishes processing - */ - if (tblk->xflag & COMMIT_DELETE) { - ihold(tblk->u.ip); - /* - * Avoid a rare deadlock - * - * If the inode is locked, we may be blocked in - * jfs_commit_inode. If so, we don't want the - * lazy_commit thread doing the last iput() on the inode - * since that may block on the locked inode. Instead, - * commit the transaction synchronously, so the last iput - * will be done by the calling thread (or later) - */ - /* - * I believe this code is no longer needed. Splitting I_LOCK - * into two bits, I_NEW and I_SYNC should prevent this - * deadlock as well. But since I don't have a JFS testload - * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. - * Joern - */ - if (tblk->u.ip->i_state & I_SYNC) - tblk->xflag &= ~COMMIT_LAZY; - } - - ASSERT((!(tblk->xflag & COMMIT_DELETE)) || - ((tblk->u.ip->i_nlink == 0) && - !test_cflag(COMMIT_Nolink, tblk->u.ip))); - - /* - * write COMMIT log record - */ - lrd->type = cpu_to_le16(LOG_COMMIT); - lrd->length = 0; - lmLog(log, tblk, lrd, NULL); - - lmGroupCommit(log, tblk); - - /* - * - transaction is now committed - - */ - - /* - * force pages in careful update - * (imap addressing structure update) - */ - if (flag & COMMIT_FORCE) - txForce(tblk); - - /* - * update allocation map. - * - * update inode allocation map and inode: - * free pager lock on memory object of inode if any. - * update block allocation map. - * - * txUpdateMap() resets XAD_NEW in XAD. - */ - if (tblk->xflag & COMMIT_FORCE) - txUpdateMap(tblk); - - /* - * free transaction locks and pageout/free pages - */ - txRelease(tblk); - - if ((tblk->flag & tblkGC_LAZY) == 0) - txUnlock(tblk); - - - /* - * reset in-memory object state - */ - for (k = 0; k < cd.nip; k++) { - ip = cd.iplist[k]; - jfs_ip = JFS_IP(ip); - - /* - * reset in-memory inode state - */ - jfs_ip->bxflag = 0; - jfs_ip->blid = 0; - } - - out: - if (rc != 0) - txAbort(tid, 1); - - TheEnd: - jfs_info("txCommit: tid = %d, returning %d", tid, rc); - return rc; -} - -/* - * NAME: txLog() - * - * FUNCTION: Writes AFTER log records for all lines modified - * by tid for segments specified by inodes in comdata. - * Code assumes only WRITELOCKS are recorded in lockwords. - * - * PARAMETERS: - * - * RETURN : - */ -static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) -{ - int rc = 0; - struct inode *ip; - lid_t lid; - struct tlock *tlck; - struct lrd *lrd = &cd->lrd; - - /* - * write log record(s) for each tlock of transaction, - */ - for (lid = tblk->next; lid; lid = tlck->next) { - tlck = lid_to_tlock(lid); - - tlck->flag |= tlckLOG; - - /* initialize lrd common */ - ip = tlck->ip; - lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); - lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); - lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); - - /* write log record of page from the tlock */ - switch (tlck->type & tlckTYPE) { - case tlckXTREE: - xtLog(log, tblk, lrd, tlck); - break; - - case tlckDTREE: - dtLog(log, tblk, lrd, tlck); - break; - - case tlckINODE: - diLog(log, tblk, lrd, tlck, cd); - break; - - case tlckMAP: - mapLog(log, tblk, lrd, tlck); - break; - - case tlckDATA: - dataLog(log, tblk, lrd, tlck); - break; - - default: - jfs_err("UFO tlock:0x%p", tlck); - } - } - - return rc; -} - -/* - * diLog() - * - * function: log inode tlock and format maplock to update bmap; - */ -static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck, struct commit * cd) -{ - int rc = 0; - struct metapage *mp; - pxd_t *pxd; - struct pxd_lock *pxdlock; - - mp = tlck->mp; - - /* initialize as REDOPAGE record format */ - lrd->log.redopage.type = cpu_to_le16(LOG_INODE); - lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); - - pxd = &lrd->log.redopage.pxd; - - /* - * inode after image - */ - if (tlck->type & tlckENTRY) { - /* log after-image for logredo(): */ - lrd->type = cpu_to_le16(LOG_REDOPAGE); - PXDaddress(pxd, mp->index); - PXDlength(pxd, - mp->logical_size >> tblk->sb->s_blocksize_bits); - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - - /* mark page as homeward bound */ - tlck->flag |= tlckWRITEPAGE; - } else if (tlck->type & tlckFREE) { - /* - * free inode extent - * - * (pages of the freed inode extent have been invalidated and - * a maplock for free of the extent has been formatted at - * txLock() time); - * - * the tlock had been acquired on the inode allocation map page - * (iag) that specifies the freed extent, even though the map - * page is not itself logged, to prevent pageout of the map - * page before the log; - */ - - /* log LOG_NOREDOINOEXT of the freed inode extent for - * logredo() to start NoRedoPage filters, and to update - * imap and bmap for free of the extent; - */ - lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); - /* - * For the LOG_NOREDOINOEXT record, we need - * to pass the IAG number and inode extent - * index (within that IAG) from which the - * the extent being released. These have been - * passed to us in the iplist[1] and iplist[2]. - */ - lrd->log.noredoinoext.iagnum = - cpu_to_le32((u32) (size_t) cd->iplist[1]); - lrd->log.noredoinoext.inoext_idx = - cpu_to_le32((u32) (size_t) cd->iplist[2]); - - pxdlock = (struct pxd_lock *) & tlck->lock; - *pxd = pxdlock->pxd; - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); - - /* update bmap */ - tlck->flag |= tlckUPDATEMAP; - - /* mark page as homeward bound */ - tlck->flag |= tlckWRITEPAGE; - } else - jfs_err("diLog: UFO type tlck:0x%p", tlck); -#ifdef _JFS_WIP - /* - * alloc/free external EA extent - * - * a maplock for txUpdateMap() to update bPWMAP for alloc/free - * of the extent has been formatted at txLock() time; - */ - else { - assert(tlck->type & tlckEA); - - /* log LOG_UPDATEMAP for logredo() to update bmap for - * alloc of new (and free of old) external EA extent; - */ - lrd->type = cpu_to_le16(LOG_UPDATEMAP); - pxdlock = (struct pxd_lock *) & tlck->lock; - nlock = pxdlock->index; - for (i = 0; i < nlock; i++, pxdlock++) { - if (pxdlock->flag & mlckALLOCPXD) - lrd->log.updatemap.type = - cpu_to_le16(LOG_ALLOCPXD); - else - lrd->log.updatemap.type = - cpu_to_le16(LOG_FREEPXD); - lrd->log.updatemap.nxd = cpu_to_le16(1); - lrd->log.updatemap.pxd = pxdlock->pxd; - lrd->backchain = - cpu_to_le32(lmLog(log, tblk, lrd, NULL)); - } - - /* update bmap */ - tlck->flag |= tlckUPDATEMAP; - } -#endif /* _JFS_WIP */ - - return rc; -} - -/* - * dataLog() - * - * function: log data tlock - */ -static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck) -{ - struct metapage *mp; - pxd_t *pxd; - - mp = tlck->mp; - - /* initialize as REDOPAGE record format */ - lrd->log.redopage.type = cpu_to_le16(LOG_DATA); - lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); - - pxd = &lrd->log.redopage.pxd; - - /* log after-image for logredo(): */ - lrd->type = cpu_to_le16(LOG_REDOPAGE); - - if (jfs_dirtable_inline(tlck->ip)) { - /* - * The table has been truncated, we've must have deleted - * the last entry, so don't bother logging this - */ - mp->lid = 0; - grab_metapage(mp); - metapage_homeok(mp); - discard_metapage(mp); - tlck->mp = NULL; - return 0; - } - - PXDaddress(pxd, mp->index); - PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); - - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - - /* mark page as homeward bound */ - tlck->flag |= tlckWRITEPAGE; - - return 0; -} - -/* - * dtLog() - * - * function: log dtree tlock and format maplock to update bmap; - */ -static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck) -{ - struct metapage *mp; - struct pxd_lock *pxdlock; - pxd_t *pxd; - - mp = tlck->mp; - - /* initialize as REDOPAGE/NOREDOPAGE record format */ - lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); - lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); - - pxd = &lrd->log.redopage.pxd; - - if (tlck->type & tlckBTROOT) - lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); - - /* - * page extension via relocation: entry insertion; - * page extension in-place: entry insertion; - * new right page from page split, reinitialized in-line - * root from root page split: entry insertion; - */ - if (tlck->type & (tlckNEW | tlckEXTEND)) { - /* log after-image of the new page for logredo(): - * mark log (LOG_NEW) for logredo() to initialize - * freelist and update bmap for alloc of the new page; - */ - lrd->type = cpu_to_le16(LOG_REDOPAGE); - if (tlck->type & tlckEXTEND) - lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); - else - lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); - PXDaddress(pxd, mp->index); - PXDlength(pxd, - mp->logical_size >> tblk->sb->s_blocksize_bits); - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - - /* format a maplock for txUpdateMap() to update bPMAP for - * alloc of the new page; - */ - if (tlck->type & tlckBTROOT) - return; - tlck->flag |= tlckUPDATEMAP; - pxdlock = (struct pxd_lock *) & tlck->lock; - pxdlock->flag = mlckALLOCPXD; - pxdlock->pxd = *pxd; - - pxdlock->index = 1; - - /* mark page as homeward bound */ - tlck->flag |= tlckWRITEPAGE; - return; - } - - /* - * entry insertion/deletion, - * sibling page link update (old right page before split); - */ - if (tlck->type & (tlckENTRY | tlckRELINK)) { - /* log after-image for logredo(): */ - lrd->type = cpu_to_le16(LOG_REDOPAGE); - PXDaddress(pxd, mp->index); - PXDlength(pxd, - mp->logical_size >> tblk->sb->s_blocksize_bits); - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - - /* mark page as homeward bound */ - tlck->flag |= tlckWRITEPAGE; - return; - } - - /* - * page deletion: page has been invalidated - * page relocation: source extent - * - * a maplock for free of the page has been formatted - * at txLock() time); - */ - if (tlck->type & (tlckFREE | tlckRELOCATE)) { - /* log LOG_NOREDOPAGE of the deleted page for logredo() - * to start NoRedoPage filter and to update bmap for free - * of the deletd page - */ - lrd->type = cpu_to_le16(LOG_NOREDOPAGE); - pxdlock = (struct pxd_lock *) & tlck->lock; - *pxd = pxdlock->pxd; - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); - - /* a maplock for txUpdateMap() for free of the page - * has been formatted at txLock() time; - */ - tlck->flag |= tlckUPDATEMAP; - } - return; -} - -/* - * xtLog() - * - * function: log xtree tlock and format maplock to update bmap; - */ -static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck) -{ - struct inode *ip; - struct metapage *mp; - xtpage_t *p; - struct xtlock *xtlck; - struct maplock *maplock; - struct xdlistlock *xadlock; - struct pxd_lock *pxdlock; - pxd_t *page_pxd; - int next, lwm, hwm; - - ip = tlck->ip; - mp = tlck->mp; - - /* initialize as REDOPAGE/NOREDOPAGE record format */ - lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); - lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); - - page_pxd = &lrd->log.redopage.pxd; - - if (tlck->type & tlckBTROOT) { - lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); - p = &JFS_IP(ip)->i_xtroot; - if (S_ISDIR(ip->i_mode)) - lrd->log.redopage.type |= - cpu_to_le16(LOG_DIR_XTREE); - } else - p = (xtpage_t *) mp->data; - next = le16_to_cpu(p->header.nextindex); - - xtlck = (struct xtlock *) & tlck->lock; - - maplock = (struct maplock *) & tlck->lock; - xadlock = (struct xdlistlock *) maplock; - - /* - * entry insertion/extension; - * sibling page link update (old right page before split); - */ - if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { - /* log after-image for logredo(): - * logredo() will update bmap for alloc of new/extended - * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from - * after-image of XADlist; - * logredo() resets (XAD_NEW|XAD_EXTEND) flag when - * applying the after-image to the meta-data page. - */ - lrd->type = cpu_to_le16(LOG_REDOPAGE); - PXDaddress(page_pxd, mp->index); - PXDlength(page_pxd, - mp->logical_size >> tblk->sb->s_blocksize_bits); - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - - /* format a maplock for txUpdateMap() to update bPMAP - * for alloc of new/extended extents of XAD[lwm:next) - * from the page itself; - * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. - */ - lwm = xtlck->lwm.offset; - if (lwm == 0) - lwm = XTPAGEMAXSLOT; - - if (lwm == next) - goto out; - if (lwm > next) { - jfs_err("xtLog: lwm > next\n"); - goto out; - } - tlck->flag |= tlckUPDATEMAP; - xadlock->flag = mlckALLOCXADLIST; - xadlock->count = next - lwm; - if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { - int i; - pxd_t *pxd; - /* - * Lazy commit may allow xtree to be modified before - * txUpdateMap runs. Copy xad into linelock to - * preserve correct data. - * - * We can fit twice as may pxd's as xads in the lock - */ - xadlock->flag = mlckALLOCPXDLIST; - pxd = xadlock->xdlist = &xtlck->pxdlock; - for (i = 0; i < xadlock->count; i++) { - PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); - PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); - p->xad[lwm + i].flag &= - ~(XAD_NEW | XAD_EXTENDED); - pxd++; - } - } else { - /* - * xdlist will point to into inode's xtree, ensure - * that transaction is not committed lazily. - */ - xadlock->flag = mlckALLOCXADLIST; - xadlock->xdlist = &p->xad[lwm]; - tblk->xflag &= ~COMMIT_LAZY; - } - jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d " - "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); - - maplock->index = 1; - - out: - /* mark page as homeward bound */ - tlck->flag |= tlckWRITEPAGE; - - return; - } - - /* - * page deletion: file deletion/truncation (ref. xtTruncate()) - * - * (page will be invalidated after log is written and bmap - * is updated from the page); - */ - if (tlck->type & tlckFREE) { - /* LOG_NOREDOPAGE log for NoRedoPage filter: - * if page free from file delete, NoRedoFile filter from - * inode image of zero link count will subsume NoRedoPage - * filters for each page; - * if page free from file truncattion, write NoRedoPage - * filter; - * - * upadte of block allocation map for the page itself: - * if page free from deletion and truncation, LOG_UPDATEMAP - * log for the page itself is generated from processing - * its parent page xad entries; - */ - /* if page free from file truncation, log LOG_NOREDOPAGE - * of the deleted page for logredo() to start NoRedoPage - * filter for the page; - */ - if (tblk->xflag & COMMIT_TRUNCATE) { - /* write NOREDOPAGE for the page */ - lrd->type = cpu_to_le16(LOG_NOREDOPAGE); - PXDaddress(page_pxd, mp->index); - PXDlength(page_pxd, - mp->logical_size >> tblk->sb-> - s_blocksize_bits); - lrd->backchain = - cpu_to_le32(lmLog(log, tblk, lrd, NULL)); - - if (tlck->type & tlckBTROOT) { - /* Empty xtree must be logged */ - lrd->type = cpu_to_le16(LOG_REDOPAGE); - lrd->backchain = - cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - } - } - - /* init LOG_UPDATEMAP of the freed extents - * XAD[XTENTRYSTART:hwm) from the deleted page itself - * for logredo() to update bmap; - */ - lrd->type = cpu_to_le16(LOG_UPDATEMAP); - lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); - xtlck = (struct xtlock *) & tlck->lock; - hwm = xtlck->hwm.offset; - lrd->log.updatemap.nxd = - cpu_to_le16(hwm - XTENTRYSTART + 1); - /* reformat linelock for lmLog() */ - xtlck->header.offset = XTENTRYSTART; - xtlck->header.length = hwm - XTENTRYSTART + 1; - xtlck->index = 1; - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - - /* format a maplock for txUpdateMap() to update bmap - * to free extents of XAD[XTENTRYSTART:hwm) from the - * deleted page itself; - */ - tlck->flag |= tlckUPDATEMAP; - xadlock->count = hwm - XTENTRYSTART + 1; - if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { - int i; - pxd_t *pxd; - /* - * Lazy commit may allow xtree to be modified before - * txUpdateMap runs. Copy xad into linelock to - * preserve correct data. - * - * We can fit twice as may pxd's as xads in the lock - */ - xadlock->flag = mlckFREEPXDLIST; - pxd = xadlock->xdlist = &xtlck->pxdlock; - for (i = 0; i < xadlock->count; i++) { - PXDaddress(pxd, - addressXAD(&p->xad[XTENTRYSTART + i])); - PXDlength(pxd, - lengthXAD(&p->xad[XTENTRYSTART + i])); - pxd++; - } - } else { - /* - * xdlist will point to into inode's xtree, ensure - * that transaction is not committed lazily. - */ - xadlock->flag = mlckFREEXADLIST; - xadlock->xdlist = &p->xad[XTENTRYSTART]; - tblk->xflag &= ~COMMIT_LAZY; - } - jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", - tlck->ip, mp, xadlock->count); - - maplock->index = 1; - - /* mark page as invalid */ - if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) - && !(tlck->type & tlckBTROOT)) - tlck->flag |= tlckFREEPAGE; - /* - else (tblk->xflag & COMMIT_PMAP) - ? release the page; - */ - return; - } - - /* - * page/entry truncation: file truncation (ref. xtTruncate()) - * - * |----------+------+------+---------------| - * | | | - * | | hwm - hwm before truncation - * | next - truncation point - * lwm - lwm before truncation - * header ? - */ - if (tlck->type & tlckTRUNCATE) { - /* This odd declaration suppresses a bogus gcc warning */ - pxd_t pxd = pxd; /* truncated extent of xad */ - int twm; - - /* - * For truncation the entire linelock may be used, so it would - * be difficult to store xad list in linelock itself. - * Therefore, we'll just force transaction to be committed - * synchronously, so that xtree pages won't be changed before - * txUpdateMap runs. - */ - tblk->xflag &= ~COMMIT_LAZY; - lwm = xtlck->lwm.offset; - if (lwm == 0) - lwm = XTPAGEMAXSLOT; - hwm = xtlck->hwm.offset; - twm = xtlck->twm.offset; - - /* - * write log records - */ - /* log after-image for logredo(): - * - * logredo() will update bmap for alloc of new/extended - * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from - * after-image of XADlist; - * logredo() resets (XAD_NEW|XAD_EXTEND) flag when - * applying the after-image to the meta-data page. - */ - lrd->type = cpu_to_le16(LOG_REDOPAGE); - PXDaddress(page_pxd, mp->index); - PXDlength(page_pxd, - mp->logical_size >> tblk->sb->s_blocksize_bits); - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - - /* - * truncate entry XAD[twm == next - 1]: - */ - if (twm == next - 1) { - /* init LOG_UPDATEMAP for logredo() to update bmap for - * free of truncated delta extent of the truncated - * entry XAD[next - 1]: - * (xtlck->pxdlock = truncated delta extent); - */ - pxdlock = (struct pxd_lock *) & xtlck->pxdlock; - /* assert(pxdlock->type & tlckTRUNCATE); */ - lrd->type = cpu_to_le16(LOG_UPDATEMAP); - lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); - lrd->log.updatemap.nxd = cpu_to_le16(1); - lrd->log.updatemap.pxd = pxdlock->pxd; - pxd = pxdlock->pxd; /* save to format maplock */ - lrd->backchain = - cpu_to_le32(lmLog(log, tblk, lrd, NULL)); - } - - /* - * free entries XAD[next:hwm]: - */ - if (hwm >= next) { - /* init LOG_UPDATEMAP of the freed extents - * XAD[next:hwm] from the deleted page itself - * for logredo() to update bmap; - */ - lrd->type = cpu_to_le16(LOG_UPDATEMAP); - lrd->log.updatemap.type = - cpu_to_le16(LOG_FREEXADLIST); - xtlck = (struct xtlock *) & tlck->lock; - hwm = xtlck->hwm.offset; - lrd->log.updatemap.nxd = - cpu_to_le16(hwm - next + 1); - /* reformat linelock for lmLog() */ - xtlck->header.offset = next; - xtlck->header.length = hwm - next + 1; - xtlck->index = 1; - lrd->backchain = - cpu_to_le32(lmLog(log, tblk, lrd, tlck)); - } - - /* - * format maplock(s) for txUpdateMap() to update bmap - */ - maplock->index = 0; - - /* - * allocate entries XAD[lwm:next): - */ - if (lwm < next) { - /* format a maplock for txUpdateMap() to update bPMAP - * for alloc of new/extended extents of XAD[lwm:next) - * from the page itself; - * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. - */ - tlck->flag |= tlckUPDATEMAP; - xadlock->flag = mlckALLOCXADLIST; - xadlock->count = next - lwm; - xadlock->xdlist = &p->xad[lwm]; - - jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d " - "lwm:%d next:%d", - tlck->ip, mp, xadlock->count, lwm, next); - maplock->index++; - xadlock++; - } - - /* - * truncate entry XAD[twm == next - 1]: - */ - if (twm == next - 1) { - /* format a maplock for txUpdateMap() to update bmap - * to free truncated delta extent of the truncated - * entry XAD[next - 1]; - * (xtlck->pxdlock = truncated delta extent); - */ - tlck->flag |= tlckUPDATEMAP; - pxdlock = (struct pxd_lock *) xadlock; - pxdlock->flag = mlckFREEPXD; - pxdlock->count = 1; - pxdlock->pxd = pxd; - - jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " - "hwm:%d", ip, mp, pxdlock->count, hwm); - maplock->index++; - xadlock++; - } - - /* - * free entries XAD[next:hwm]: - */ - if (hwm >= next) { - /* format a maplock for txUpdateMap() to update bmap - * to free extents of XAD[next:hwm] from thedeleted - * page itself; - */ - tlck->flag |= tlckUPDATEMAP; - xadlock->flag = mlckFREEXADLIST; - xadlock->count = hwm - next + 1; - xadlock->xdlist = &p->xad[next]; - - jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d " - "next:%d hwm:%d", - tlck->ip, mp, xadlock->count, next, hwm); - maplock->index++; - } - - /* mark page as homeward bound */ - tlck->flag |= tlckWRITEPAGE; - } - return; -} - -/* - * mapLog() - * - * function: log from maplock of freed data extents; - */ -static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck) -{ - struct pxd_lock *pxdlock; - int i, nlock; - pxd_t *pxd; - - /* - * page relocation: free the source page extent - * - * a maplock for txUpdateMap() for free of the page - * has been formatted at txLock() time saving the src - * relocated page address; - */ - if (tlck->type & tlckRELOCATE) { - /* log LOG_NOREDOPAGE of the old relocated page - * for logredo() to start NoRedoPage filter; - */ - lrd->type = cpu_to_le16(LOG_NOREDOPAGE); - pxdlock = (struct pxd_lock *) & tlck->lock; - pxd = &lrd->log.redopage.pxd; - *pxd = pxdlock->pxd; - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); - - /* (N.B. currently, logredo() does NOT update bmap - * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); - * if page free from relocation, LOG_UPDATEMAP log is - * specifically generated now for logredo() - * to update bmap for free of src relocated page; - * (new flag LOG_RELOCATE may be introduced which will - * inform logredo() to start NORedoPage filter and also - * update block allocation map at the same time, thus - * avoiding an extra log write); - */ - lrd->type = cpu_to_le16(LOG_UPDATEMAP); - lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); - lrd->log.updatemap.nxd = cpu_to_le16(1); - lrd->log.updatemap.pxd = pxdlock->pxd; - lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); - - /* a maplock for txUpdateMap() for free of the page - * has been formatted at txLock() time; - */ - tlck->flag |= tlckUPDATEMAP; - return; - } - /* - - * Otherwise it's not a relocate request - * - */ - else { - /* log LOG_UPDATEMAP for logredo() to update bmap for - * free of truncated/relocated delta extent of the data; - * e.g.: external EA extent, relocated/truncated extent - * from xtTailgate(); - */ - lrd->type = cpu_to_le16(LOG_UPDATEMAP); - pxdlock = (struct pxd_lock *) & tlck->lock; - nlock = pxdlock->index; - for (i = 0; i < nlock; i++, pxdlock++) { - if (pxdlock->flag & mlckALLOCPXD) - lrd->log.updatemap.type = - cpu_to_le16(LOG_ALLOCPXD); - else - lrd->log.updatemap.type = - cpu_to_le16(LOG_FREEPXD); - lrd->log.updatemap.nxd = cpu_to_le16(1); - lrd->log.updatemap.pxd = pxdlock->pxd; - lrd->backchain = - cpu_to_le32(lmLog(log, tblk, lrd, NULL)); - jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", - (ulong) addressPXD(&pxdlock->pxd), - lengthPXD(&pxdlock->pxd)); - } - - /* update bmap */ - tlck->flag |= tlckUPDATEMAP; - } -} - -/* - * txEA() - * - * function: acquire maplock for EA/ACL extents or - * set COMMIT_INLINE flag; - */ -void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) -{ - struct tlock *tlck = NULL; - struct pxd_lock *maplock = NULL, *pxdlock = NULL; - - /* - * format maplock for alloc of new EA extent - */ - if (newea) { - /* Since the newea could be a completely zeroed entry we need to - * check for the two flags which indicate we should actually - * commit new EA data - */ - if (newea->flag & DXD_EXTENT) { - tlck = txMaplock(tid, ip, tlckMAP); - maplock = (struct pxd_lock *) & tlck->lock; - pxdlock = (struct pxd_lock *) maplock; - pxdlock->flag = mlckALLOCPXD; - PXDaddress(&pxdlock->pxd, addressDXD(newea)); - PXDlength(&pxdlock->pxd, lengthDXD(newea)); - pxdlock++; - maplock->index = 1; - } else if (newea->flag & DXD_INLINE) { - tlck = NULL; - - set_cflag(COMMIT_Inlineea, ip); - } - } - - /* - * format maplock for free of old EA extent - */ - if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { - if (tlck == NULL) { - tlck = txMaplock(tid, ip, tlckMAP); - maplock = (struct pxd_lock *) & tlck->lock; - pxdlock = (struct pxd_lock *) maplock; - maplock->index = 0; - } - pxdlock->flag = mlckFREEPXD; - PXDaddress(&pxdlock->pxd, addressDXD(oldea)); - PXDlength(&pxdlock->pxd, lengthDXD(oldea)); - maplock->index++; - } -} - -/* - * txForce() - * - * function: synchronously write pages locked by transaction - * after txLog() but before txUpdateMap(); - */ -static void txForce(struct tblock * tblk) -{ - struct tlock *tlck; - lid_t lid, next; - struct metapage *mp; - - /* - * reverse the order of transaction tlocks in - * careful update order of address index pages - * (right to left, bottom up) - */ - tlck = lid_to_tlock(tblk->next); - lid = tlck->next; - tlck->next = 0; - while (lid) { - tlck = lid_to_tlock(lid); - next = tlck->next; - tlck->next = tblk->next; - tblk->next = lid; - lid = next; - } - - /* - * synchronously write the page, and - * hold the page for txUpdateMap(); - */ - for (lid = tblk->next; lid; lid = next) { - tlck = lid_to_tlock(lid); - next = tlck->next; - - if ((mp = tlck->mp) != NULL && - (tlck->type & tlckBTROOT) == 0) { - assert(mp->xflag & COMMIT_PAGE); - - if (tlck->flag & tlckWRITEPAGE) { - tlck->flag &= ~tlckWRITEPAGE; - - /* do not release page to freelist */ - force_metapage(mp); -#if 0 - /* - * The "right" thing to do here is to - * synchronously write the metadata. - * With the current implementation this - * is hard since write_metapage requires - * us to kunmap & remap the page. If we - * have tlocks pointing into the metadata - * pages, we don't want to do this. I think - * we can get by with synchronously writing - * the pages when they are released. - */ - assert(mp->nohomeok); - set_bit(META_dirty, &mp->flag); - set_bit(META_sync, &mp->flag); -#endif - } - } - } -} - -/* - * txUpdateMap() - * - * function: update persistent allocation map (and working map - * if appropriate); - * - * parameter: - */ -static void txUpdateMap(struct tblock * tblk) -{ - struct inode *ip; - struct inode *ipimap; - lid_t lid; - struct tlock *tlck; - struct maplock *maplock; - struct pxd_lock pxdlock; - int maptype; - int k, nlock; - struct metapage *mp = NULL; - - ipimap = JFS_SBI(tblk->sb)->ipimap; - - maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; - - - /* - * update block allocation map - * - * update allocation state in pmap (and wmap) and - * update lsn of the pmap page; - */ - /* - * scan each tlock/page of transaction for block allocation/free: - * - * for each tlock/page of transaction, update map. - * ? are there tlock for pmap and pwmap at the same time ? - */ - for (lid = tblk->next; lid; lid = tlck->next) { - tlck = lid_to_tlock(lid); - - if ((tlck->flag & tlckUPDATEMAP) == 0) - continue; - - if (tlck->flag & tlckFREEPAGE) { - /* - * Another thread may attempt to reuse freed space - * immediately, so we want to get rid of the metapage - * before anyone else has a chance to get it. - * Lock metapage, update maps, then invalidate - * the metapage. - */ - mp = tlck->mp; - ASSERT(mp->xflag & COMMIT_PAGE); - grab_metapage(mp); - } - - /* - * extent list: - * . in-line PXD list: - * . out-of-line XAD list: - */ - maplock = (struct maplock *) & tlck->lock; - nlock = maplock->index; - - for (k = 0; k < nlock; k++, maplock++) { - /* - * allocate blocks in persistent map: - * - * blocks have been allocated from wmap at alloc time; - */ - if (maplock->flag & mlckALLOC) { - txAllocPMap(ipimap, maplock, tblk); - } - /* - * free blocks in persistent and working map: - * blocks will be freed in pmap and then in wmap; - * - * ? tblock specifies the PMAP/PWMAP based upon - * transaction - * - * free blocks in persistent map: - * blocks will be freed from wmap at last reference - * release of the object for regular files; - * - * Alway free blocks from both persistent & working - * maps for directories - */ - else { /* (maplock->flag & mlckFREE) */ - - if (tlck->flag & tlckDIRECTORY) - txFreeMap(ipimap, maplock, - tblk, COMMIT_PWMAP); - else - txFreeMap(ipimap, maplock, - tblk, maptype); - } - } - if (tlck->flag & tlckFREEPAGE) { - if (!(tblk->flag & tblkGC_LAZY)) { - /* This is equivalent to txRelease */ - ASSERT(mp->lid == lid); - tlck->mp->lid = 0; - } - assert(mp->nohomeok == 1); - metapage_homeok(mp); - discard_metapage(mp); - tlck->mp = NULL; - } - } - /* - * update inode allocation map - * - * update allocation state in pmap and - * update lsn of the pmap page; - * update in-memory inode flag/state - * - * unlock mapper/write lock - */ - if (tblk->xflag & COMMIT_CREATE) { - diUpdatePMap(ipimap, tblk->ino, false, tblk); - /* update persistent block allocation map - * for the allocation of inode extent; - */ - pxdlock.flag = mlckALLOCPXD; - pxdlock.pxd = tblk->u.ixpxd; - pxdlock.index = 1; - txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); - } else if (tblk->xflag & COMMIT_DELETE) { - ip = tblk->u.ip; - diUpdatePMap(ipimap, ip->i_ino, true, tblk); - iput(ip); - } -} - -/* - * txAllocPMap() - * - * function: allocate from persistent map; - * - * parameter: - * ipbmap - - * malock - - * xad list: - * pxd: - * - * maptype - - * allocate from persistent map; - * free from persistent map; - * (e.g., tmp file - free from working map at releae - * of last reference); - * free from persistent and working map; - * - * lsn - log sequence number; - */ -static void txAllocPMap(struct inode *ip, struct maplock * maplock, - struct tblock * tblk) -{ - struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; - struct xdlistlock *xadlistlock; - xad_t *xad; - s64 xaddr; - int xlen; - struct pxd_lock *pxdlock; - struct xdlistlock *pxdlistlock; - pxd_t *pxd; - int n; - - /* - * allocate from persistent map; - */ - if (maplock->flag & mlckALLOCXADLIST) { - xadlistlock = (struct xdlistlock *) maplock; - xad = xadlistlock->xdlist; - for (n = 0; n < xadlistlock->count; n++, xad++) { - if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { - xaddr = addressXAD(xad); - xlen = lengthXAD(xad); - dbUpdatePMap(ipbmap, false, xaddr, - (s64) xlen, tblk); - xad->flag &= ~(XAD_NEW | XAD_EXTENDED); - jfs_info("allocPMap: xaddr:0x%lx xlen:%d", - (ulong) xaddr, xlen); - } - } - } else if (maplock->flag & mlckALLOCPXD) { - pxdlock = (struct pxd_lock *) maplock; - xaddr = addressPXD(&pxdlock->pxd); - xlen = lengthPXD(&pxdlock->pxd); - dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); - jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); - } else { /* (maplock->flag & mlckALLOCPXDLIST) */ - - pxdlistlock = (struct xdlistlock *) maplock; - pxd = pxdlistlock->xdlist; - for (n = 0; n < pxdlistlock->count; n++, pxd++) { - xaddr = addressPXD(pxd); - xlen = lengthPXD(pxd); - dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, - tblk); - jfs_info("allocPMap: xaddr:0x%lx xlen:%d", - (ulong) xaddr, xlen); - } - } -} - -/* - * txFreeMap() - * - * function: free from persistent and/or working map; - * - * todo: optimization - */ -void txFreeMap(struct inode *ip, - struct maplock * maplock, struct tblock * tblk, int maptype) -{ - struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; - struct xdlistlock *xadlistlock; - xad_t *xad; - s64 xaddr; - int xlen; - struct pxd_lock *pxdlock; - struct xdlistlock *pxdlistlock; - pxd_t *pxd; - int n; - - jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", - tblk, maplock, maptype); - - /* - * free from persistent map; - */ - if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { - if (maplock->flag & mlckFREEXADLIST) { - xadlistlock = (struct xdlistlock *) maplock; - xad = xadlistlock->xdlist; - for (n = 0; n < xadlistlock->count; n++, xad++) { - if (!(xad->flag & XAD_NEW)) { - xaddr = addressXAD(xad); - xlen = lengthXAD(xad); - dbUpdatePMap(ipbmap, true, xaddr, - (s64) xlen, tblk); - jfs_info("freePMap: xaddr:0x%lx " - "xlen:%d", - (ulong) xaddr, xlen); - } - } - } else if (maplock->flag & mlckFREEPXD) { - pxdlock = (struct pxd_lock *) maplock; - xaddr = addressPXD(&pxdlock->pxd); - xlen = lengthPXD(&pxdlock->pxd); - dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, - tblk); - jfs_info("freePMap: xaddr:0x%lx xlen:%d", - (ulong) xaddr, xlen); - } else { /* (maplock->flag & mlckALLOCPXDLIST) */ - - pxdlistlock = (struct xdlistlock *) maplock; - pxd = pxdlistlock->xdlist; - for (n = 0; n < pxdlistlock->count; n++, pxd++) { - xaddr = addressPXD(pxd); - xlen = lengthPXD(pxd); - dbUpdatePMap(ipbmap, true, xaddr, - (s64) xlen, tblk); - jfs_info("freePMap: xaddr:0x%lx xlen:%d", - (ulong) xaddr, xlen); - } - } - } - - /* - * free from working map; - */ - if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { - if (maplock->flag & mlckFREEXADLIST) { - xadlistlock = (struct xdlistlock *) maplock; - xad = xadlistlock->xdlist; - for (n = 0; n < xadlistlock->count; n++, xad++) { - xaddr = addressXAD(xad); - xlen = lengthXAD(xad); - dbFree(ip, xaddr, (s64) xlen); - xad->flag = 0; - jfs_info("freeWMap: xaddr:0x%lx xlen:%d", - (ulong) xaddr, xlen); - } - } else if (maplock->flag & mlckFREEPXD) { - pxdlock = (struct pxd_lock *) maplock; - xaddr = addressPXD(&pxdlock->pxd); - xlen = lengthPXD(&pxdlock->pxd); - dbFree(ip, xaddr, (s64) xlen); - jfs_info("freeWMap: xaddr:0x%lx xlen:%d", - (ulong) xaddr, xlen); - } else { /* (maplock->flag & mlckFREEPXDLIST) */ - - pxdlistlock = (struct xdlistlock *) maplock; - pxd = pxdlistlock->xdlist; - for (n = 0; n < pxdlistlock->count; n++, pxd++) { - xaddr = addressPXD(pxd); - xlen = lengthPXD(pxd); - dbFree(ip, xaddr, (s64) xlen); - jfs_info("freeWMap: xaddr:0x%lx xlen:%d", - (ulong) xaddr, xlen); - } - } - } -} - -/* - * txFreelock() - * - * function: remove tlock from inode anonymous locklist - */ -void txFreelock(struct inode *ip) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - struct tlock *xtlck, *tlck; - lid_t xlid = 0, lid; - - if (!jfs_ip->atlhead) - return; - - TXN_LOCK(); - xtlck = (struct tlock *) &jfs_ip->atlhead; - - while ((lid = xtlck->next) != 0) { - tlck = lid_to_tlock(lid); - if (tlck->flag & tlckFREELOCK) { - xtlck->next = tlck->next; - txLockFree(lid); - } else { - xtlck = tlck; - xlid = lid; - } - } - - if (jfs_ip->atlhead) - jfs_ip->atltail = xlid; - else { - jfs_ip->atltail = 0; - /* - * If inode was on anon_list, remove it - */ - list_del_init(&jfs_ip->anon_inode_list); - } - TXN_UNLOCK(); -} - -/* - * txAbort() - * - * function: abort tx before commit; - * - * frees line-locks and segment locks for all - * segments in comdata structure. - * Optionally sets state of file-system to FM_DIRTY in super-block. - * log age of page-frames in memory for which caller has - * are reset to 0 (to avoid logwarap). - */ -void txAbort(tid_t tid, int dirty) -{ - lid_t lid, next; - struct metapage *mp; - struct tblock *tblk = tid_to_tblock(tid); - struct tlock *tlck; - - /* - * free tlocks of the transaction - */ - for (lid = tblk->next; lid; lid = next) { - tlck = lid_to_tlock(lid); - next = tlck->next; - mp = tlck->mp; - JFS_IP(tlck->ip)->xtlid = 0; - - if (mp) { - mp->lid = 0; - - /* - * reset lsn of page to avoid logwarap: - * - * (page may have been previously committed by another - * transaction(s) but has not been paged, i.e., - * it may be on logsync list even though it has not - * been logged for the current tx.) - */ - if (mp->xflag & COMMIT_PAGE && mp->lsn) - LogSyncRelease(mp); - } - /* insert tlock at head of freelist */ - TXN_LOCK(); - txLockFree(lid); - TXN_UNLOCK(); - } - - /* caller will free the transaction block */ - - tblk->next = tblk->last = 0; - - /* - * mark filesystem dirty - */ - if (dirty) - jfs_error(tblk->sb, "txAbort"); - - return; -} - -/* - * txLazyCommit(void) - * - * All transactions except those changing ipimap (COMMIT_FORCE) are - * processed by this routine. This insures that the inode and block - * allocation maps are updated in order. For synchronous transactions, - * let the user thread finish processing after txUpdateMap() is called. - */ -static void txLazyCommit(struct tblock * tblk) -{ - struct jfs_log *log; - - while (((tblk->flag & tblkGC_READY) == 0) && - ((tblk->flag & tblkGC_UNLOCKED) == 0)) { - /* We must have gotten ahead of the user thread - */ - jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); - yield(); - } - - jfs_info("txLazyCommit: processing tblk 0x%p", tblk); - - txUpdateMap(tblk); - - log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; - - spin_lock_irq(&log->gclock); // LOGGC_LOCK - - tblk->flag |= tblkGC_COMMITTED; - - if (tblk->flag & tblkGC_READY) - log->gcrtc--; - - wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP - - /* - * Can't release log->gclock until we've tested tblk->flag - */ - if (tblk->flag & tblkGC_LAZY) { - spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK - txUnlock(tblk); - tblk->flag &= ~tblkGC_LAZY; - txEnd(tblk - TxBlock); /* Convert back to tid */ - } else - spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK - - jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); -} - -/* - * jfs_lazycommit(void) - * - * To be run as a kernel daemon. If lbmIODone is called in an interrupt - * context, or where blocking is not wanted, this routine will process - * committed transactions from the unlock queue. - */ -int jfs_lazycommit(void *arg) -{ - int WorkDone; - struct tblock *tblk; - unsigned long flags; - struct jfs_sb_info *sbi; - - do { - LAZY_LOCK(flags); - jfs_commit_thread_waking = 0; /* OK to wake another thread */ - while (!list_empty(&TxAnchor.unlock_queue)) { - WorkDone = 0; - list_for_each_entry(tblk, &TxAnchor.unlock_queue, - cqueue) { - - sbi = JFS_SBI(tblk->sb); - /* - * For each volume, the transactions must be - * handled in order. If another commit thread - * is handling a tblk for this superblock, - * skip it - */ - if (sbi->commit_state & IN_LAZYCOMMIT) - continue; - - sbi->commit_state |= IN_LAZYCOMMIT; - WorkDone = 1; - - /* - * Remove transaction from queue - */ - list_del(&tblk->cqueue); - - LAZY_UNLOCK(flags); - txLazyCommit(tblk); - LAZY_LOCK(flags); - - sbi->commit_state &= ~IN_LAZYCOMMIT; - /* - * Don't continue in the for loop. (We can't - * anyway, it's unsafe!) We want to go back to - * the beginning of the list. - */ - break; - } - - /* If there was nothing to do, don't continue */ - if (!WorkDone) - break; - } - /* In case a wakeup came while all threads were active */ - jfs_commit_thread_waking = 0; - - if (freezing(current)) { - LAZY_UNLOCK(flags); - try_to_freeze(); - } else { - DECLARE_WAITQUEUE(wq, current); - - add_wait_queue(&jfs_commit_thread_wait, &wq); - set_current_state(TASK_INTERRUPTIBLE); - LAZY_UNLOCK(flags); - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&jfs_commit_thread_wait, &wq); - } - } while (!kthread_should_stop()); - - if (!list_empty(&TxAnchor.unlock_queue)) - jfs_err("jfs_lazycommit being killed w/pending transactions!"); - else - jfs_info("jfs_lazycommit being killed\n"); - return 0; -} - -void txLazyUnlock(struct tblock * tblk) -{ - unsigned long flags; - - LAZY_LOCK(flags); - - list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); - /* - * Don't wake up a commit thread if there is already one servicing - * this superblock, or if the last one we woke up hasn't started yet. - */ - if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && - !jfs_commit_thread_waking) { - jfs_commit_thread_waking = 1; - wake_up(&jfs_commit_thread_wait); - } - LAZY_UNLOCK(flags); -} - -static void LogSyncRelease(struct metapage * mp) -{ - struct jfs_log *log = mp->log; - - assert(mp->nohomeok); - assert(log); - metapage_homeok(mp); -} - -/* - * txQuiesce - * - * Block all new transactions and push anonymous transactions to - * completion - * - * This does almost the same thing as jfs_sync below. We don't - * worry about deadlocking when jfs_tlocks_low is set, since we would - * expect jfs_sync to get us out of that jam. - */ -void txQuiesce(struct super_block *sb) -{ - struct inode *ip; - struct jfs_inode_info *jfs_ip; - struct jfs_log *log = JFS_SBI(sb)->log; - tid_t tid; - - set_bit(log_QUIESCE, &log->flag); - - TXN_LOCK(); -restart: - while (!list_empty(&TxAnchor.anon_list)) { - jfs_ip = list_entry(TxAnchor.anon_list.next, - struct jfs_inode_info, - anon_inode_list); - ip = &jfs_ip->vfs_inode; - - /* - * inode will be removed from anonymous list - * when it is committed - */ - TXN_UNLOCK(); - tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); - mutex_lock(&jfs_ip->commit_mutex); - txCommit(tid, 1, &ip, 0); - txEnd(tid); - mutex_unlock(&jfs_ip->commit_mutex); - /* - * Just to be safe. I don't know how - * long we can run without blocking - */ - cond_resched(); - TXN_LOCK(); - } - - /* - * If jfs_sync is running in parallel, there could be some inodes - * on anon_list2. Let's check. - */ - if (!list_empty(&TxAnchor.anon_list2)) { - list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list); - INIT_LIST_HEAD(&TxAnchor.anon_list2); - goto restart; - } - TXN_UNLOCK(); - - /* - * We may need to kick off the group commit - */ - jfs_flush_journal(log, 0); -} - -/* - * txResume() - * - * Allows transactions to start again following txQuiesce - */ -void txResume(struct super_block *sb) -{ - struct jfs_log *log = JFS_SBI(sb)->log; - - clear_bit(log_QUIESCE, &log->flag); - TXN_WAKEUP(&log->syncwait); -} - -/* - * jfs_sync(void) - * - * To be run as a kernel daemon. This is awakened when tlocks run low. - * We write any inodes that have anonymous tlocks so they will become - * available. - */ -int jfs_sync(void *arg) -{ - struct inode *ip; - struct jfs_inode_info *jfs_ip; - tid_t tid; - - do { - /* - * write each inode on the anonymous inode list - */ - TXN_LOCK(); - while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { - jfs_ip = list_entry(TxAnchor.anon_list.next, - struct jfs_inode_info, - anon_inode_list); - ip = &jfs_ip->vfs_inode; - - if (! igrab(ip)) { - /* - * Inode is being freed - */ - list_del_init(&jfs_ip->anon_inode_list); - } else if (mutex_trylock(&jfs_ip->commit_mutex)) { - /* - * inode will be removed from anonymous list - * when it is committed - */ - TXN_UNLOCK(); - tid = txBegin(ip->i_sb, COMMIT_INODE); - txCommit(tid, 1, &ip, 0); - txEnd(tid); - mutex_unlock(&jfs_ip->commit_mutex); - - iput(ip); - /* - * Just to be safe. I don't know how - * long we can run without blocking - */ - cond_resched(); - TXN_LOCK(); - } else { - /* We can't get the commit mutex. It may - * be held by a thread waiting for tlock's - * so let's not block here. Save it to - * put back on the anon_list. - */ - - /* Take off anon_list */ - list_del(&jfs_ip->anon_inode_list); - - /* Put on anon_list2 */ - list_add(&jfs_ip->anon_inode_list, - &TxAnchor.anon_list2); - - TXN_UNLOCK(); - iput(ip); - TXN_LOCK(); - } - } - /* Add anon_list2 back to anon_list */ - list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); - - if (freezing(current)) { - TXN_UNLOCK(); - try_to_freeze(); - } else { - set_current_state(TASK_INTERRUPTIBLE); - TXN_UNLOCK(); - schedule(); - __set_current_state(TASK_RUNNING); - } - } while (!kthread_should_stop()); - - jfs_info("jfs_sync being killed"); - return 0; -} - -#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) -static int jfs_txanchor_proc_show(struct seq_file *m, void *v) -{ - char *freewait; - char *freelockwait; - char *lowlockwait; - - freewait = - waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; - freelockwait = - waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; - lowlockwait = - waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; - - seq_printf(m, - "JFS TxAnchor\n" - "============\n" - "freetid = %d\n" - "freewait = %s\n" - "freelock = %d\n" - "freelockwait = %s\n" - "lowlockwait = %s\n" - "tlocksInUse = %d\n" - "jfs_tlocks_low = %d\n" - "unlock_queue is %sempty\n", - TxAnchor.freetid, - freewait, - TxAnchor.freelock, - freelockwait, - lowlockwait, - TxAnchor.tlocksInUse, - jfs_tlocks_low, - list_empty(&TxAnchor.unlock_queue) ? "" : "not "); - return 0; -} - -static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_txanchor_proc_show, NULL); -} - -const struct file_operations jfs_txanchor_proc_fops = { - .owner = THIS_MODULE, - .open = jfs_txanchor_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - -#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) -static int jfs_txstats_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, - "JFS TxStats\n" - "===========\n" - "calls to txBegin = %d\n" - "txBegin blocked by sync barrier = %d\n" - "txBegin blocked by tlocks low = %d\n" - "txBegin blocked by no free tid = %d\n" - "calls to txBeginAnon = %d\n" - "txBeginAnon blocked by sync barrier = %d\n" - "txBeginAnon blocked by tlocks low = %d\n" - "calls to txLockAlloc = %d\n" - "tLockAlloc blocked by no free lock = %d\n", - TxStat.txBegin, - TxStat.txBegin_barrier, - TxStat.txBegin_lockslow, - TxStat.txBegin_freetid, - TxStat.txBeginAnon, - TxStat.txBeginAnon_barrier, - TxStat.txBeginAnon_lockslow, - TxStat.txLockAlloc, - TxStat.txLockAlloc_freelock); - return 0; -} - -static int jfs_txstats_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_txstats_proc_show, NULL); -} - -const struct file_operations jfs_txstats_proc_fops = { - .owner = THIS_MODULE, - .open = jfs_txstats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif diff --git a/ANDROID_3.4.5/fs/jfs/jfs_txnmgr.h b/ANDROID_3.4.5/fs/jfs/jfs_txnmgr.h deleted file mode 100644 index ab728893..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_txnmgr.h +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_TXNMGR -#define _H_JFS_TXNMGR - -#include "jfs_logmgr.h" - -/* - * Hide implementation of TxBlock and TxLock - */ -#define tid_to_tblock(tid) (&TxBlock[tid]) - -#define lid_to_tlock(lid) (&TxLock[lid]) - -/* - * transaction block - */ -struct tblock { - /* - * tblock and jbuf_t common area: struct logsyncblk - * - * the following 5 fields are the same as struct logsyncblk - * which is common to tblock and jbuf to form logsynclist - */ - u16 xflag; /* tx commit type */ - u16 flag; /* tx commit state */ - lid_t dummy; /* Must keep structures common */ - s32 lsn; /* recovery lsn */ - struct list_head synclist; /* logsynclist link */ - - /* lock management */ - struct super_block *sb; /* super block */ - lid_t next; /* index of first tlock of tid */ - lid_t last; /* index of last tlock of tid */ - wait_queue_head_t waitor; /* tids waiting on this tid */ - - /* log management */ - u32 logtid; /* log transaction id */ - - /* commit management */ - struct list_head cqueue; /* commit queue list */ - s32 clsn; /* commit lsn */ - struct lbuf *bp; - s32 pn; /* commit record log page number */ - s32 eor; /* commit record eor */ - wait_queue_head_t gcwait; /* group commit event list: - * ready transactions wait on this - * event for group commit completion. - */ - union { - struct inode *ip; /* inode being deleted */ - pxd_t ixpxd; /* pxd of inode extent for created inode */ - } u; - u32 ino; /* inode number being created */ -}; - -extern struct tblock *TxBlock; /* transaction block table */ - -/* commit flags: tblk->xflag */ -#define COMMIT_SYNC 0x0001 /* synchronous commit */ -#define COMMIT_FORCE 0x0002 /* force pageout at end of commit */ -#define COMMIT_FLUSH 0x0004 /* init flush at end of commit */ -#define COMMIT_MAP 0x00f0 -#define COMMIT_PMAP 0x0010 /* update pmap */ -#define COMMIT_WMAP 0x0020 /* update wmap */ -#define COMMIT_PWMAP 0x0040 /* update pwmap */ -#define COMMIT_FREE 0x0f00 -#define COMMIT_DELETE 0x0100 /* inode delete */ -#define COMMIT_TRUNCATE 0x0200 /* file truncation */ -#define COMMIT_CREATE 0x0400 /* inode create */ -#define COMMIT_LAZY 0x0800 /* lazy commit */ -#define COMMIT_PAGE 0x1000 /* Identifies element as metapage */ -#define COMMIT_INODE 0x2000 /* Identifies element as inode */ - -/* group commit flags tblk->flag: see jfs_logmgr.h */ - -/* - * transaction lock - */ -struct tlock { - lid_t next; /* 2: index next lockword on tid locklist - * next lockword on freelist - */ - tid_t tid; /* 2: transaction id holding lock */ - - u16 flag; /* 2: lock control */ - u16 type; /* 2: log type */ - - struct metapage *mp; /* 4/8: object page buffer locked */ - struct inode *ip; /* 4/8: object */ - /* (16) */ - - s16 lock[24]; /* 48: overlay area */ -}; /* (64) */ - -extern struct tlock *TxLock; /* transaction lock table */ - -/* - * tlock flag - */ -/* txLock state */ -#define tlckPAGELOCK 0x8000 -#define tlckINODELOCK 0x4000 -#define tlckLINELOCK 0x2000 -#define tlckINLINELOCK 0x1000 -/* lmLog state */ -#define tlckLOG 0x0800 -/* updateMap state */ -#define tlckUPDATEMAP 0x0080 -#define tlckDIRECTORY 0x0040 -/* freeLock state */ -#define tlckFREELOCK 0x0008 -#define tlckWRITEPAGE 0x0004 -#define tlckFREEPAGE 0x0002 - -/* - * tlock type - */ -#define tlckTYPE 0xfe00 -#define tlckINODE 0x8000 -#define tlckXTREE 0x4000 -#define tlckDTREE 0x2000 -#define tlckMAP 0x1000 -#define tlckEA 0x0800 -#define tlckACL 0x0400 -#define tlckDATA 0x0200 -#define tlckBTROOT 0x0100 - -#define tlckOPERATION 0x00ff -#define tlckGROW 0x0001 /* file grow */ -#define tlckREMOVE 0x0002 /* file delete */ -#define tlckTRUNCATE 0x0004 /* file truncate */ -#define tlckRELOCATE 0x0008 /* file/directory relocate */ -#define tlckENTRY 0x0001 /* directory insert/delete */ -#define tlckEXTEND 0x0002 /* directory extend in-line */ -#define tlckSPLIT 0x0010 /* splited page */ -#define tlckNEW 0x0020 /* new page from split */ -#define tlckFREE 0x0040 /* free page */ -#define tlckRELINK 0x0080 /* update sibling pointer */ - -/* - * linelock for lmLog() - * - * note: linelock and its variations are overlaid - * at tlock.lock: watch for alignment; - */ -struct lv { - u8 offset; /* 1: */ - u8 length; /* 1: */ -}; /* (2) */ - -#define TLOCKSHORT 20 -#define TLOCKLONG 28 - -struct linelock { - lid_t next; /* 2: next linelock */ - - s8 maxcnt; /* 1: */ - s8 index; /* 1: */ - - u16 flag; /* 2: */ - u8 type; /* 1: */ - u8 l2linesize; /* 1: log2 of linesize */ - /* (8) */ - - struct lv lv[20]; /* 40: */ -}; /* (48) */ - -#define dt_lock linelock - -struct xtlock { - lid_t next; /* 2: */ - - s8 maxcnt; /* 1: */ - s8 index; /* 1: */ - - u16 flag; /* 2: */ - u8 type; /* 1: */ - u8 l2linesize; /* 1: log2 of linesize */ - /* (8) */ - - struct lv header; /* 2: */ - struct lv lwm; /* 2: low water mark */ - struct lv hwm; /* 2: high water mark */ - struct lv twm; /* 2: */ - /* (16) */ - - s32 pxdlock[8]; /* 32: */ -}; /* (48) */ - - -/* - * maplock for txUpdateMap() - * - * note: maplock and its variations are overlaid - * at tlock.lock/linelock: watch for alignment; - * N.B. next field may be set by linelock, and should not - * be modified by maplock; - * N.B. index of the first pxdlock specifies index of next - * free maplock (i.e., number of maplock) in the tlock; - */ -struct maplock { - lid_t next; /* 2: */ - - u8 maxcnt; /* 2: */ - u8 index; /* 2: next free maplock index */ - - u16 flag; /* 2: */ - u8 type; /* 1: */ - u8 count; /* 1: number of pxd/xad */ - /* (8) */ - - pxd_t pxd; /* 8: */ -}; /* (16): */ - -/* maplock flag */ -#define mlckALLOC 0x00f0 -#define mlckALLOCXADLIST 0x0080 -#define mlckALLOCPXDLIST 0x0040 -#define mlckALLOCXAD 0x0020 -#define mlckALLOCPXD 0x0010 -#define mlckFREE 0x000f -#define mlckFREEXADLIST 0x0008 -#define mlckFREEPXDLIST 0x0004 -#define mlckFREEXAD 0x0002 -#define mlckFREEPXD 0x0001 - -#define pxd_lock maplock - -struct xdlistlock { - lid_t next; /* 2: */ - - u8 maxcnt; /* 2: */ - u8 index; /* 2: */ - - u16 flag; /* 2: */ - u8 type; /* 1: */ - u8 count; /* 1: number of pxd/xad */ - /* (8) */ - - /* - * We need xdlist to be 64 bits (8 bytes), regardless of - * whether void * is 32 or 64 bits - */ - union { - void *_xdlist; /* pxd/xad list */ - s64 pad; /* 8: Force 64-bit xdlist size */ - } union64; -}; /* (16): */ - -#define xdlist union64._xdlist - -/* - * commit - * - * parameter to the commit manager routines - */ -struct commit { - tid_t tid; /* tid = index of tblock */ - int flag; /* flags */ - struct jfs_log *log; /* log */ - struct super_block *sb; /* superblock */ - - int nip; /* number of entries in iplist */ - struct inode **iplist; /* list of pointers to inodes */ - - /* log record descriptor on 64-bit boundary */ - struct lrd lrd; /* : log record descriptor */ -}; - -/* - * external declarations - */ -extern int jfs_tlocks_low; - -extern int txInit(void); -extern void txExit(void); -extern struct tlock *txLock(tid_t, struct inode *, struct metapage *, int); -extern struct tlock *txMaplock(tid_t, struct inode *, int); -extern int txCommit(tid_t, int, struct inode **, int); -extern tid_t txBegin(struct super_block *, int); -extern void txBeginAnon(struct super_block *); -extern void txEnd(tid_t); -extern void txAbort(tid_t, int); -extern struct linelock *txLinelock(struct linelock *); -extern void txFreeMap(struct inode *, struct maplock *, struct tblock *, int); -extern void txEA(tid_t, struct inode *, dxd_t *, dxd_t *); -extern void txFreelock(struct inode *); -extern int lmLog(struct jfs_log *, struct tblock *, struct lrd *, - struct tlock *); -extern void txQuiesce(struct super_block *); -extern void txResume(struct super_block *); -extern void txLazyUnlock(struct tblock *); -extern int jfs_lazycommit(void *); -extern int jfs_sync(void *); -#endif /* _H_JFS_TXNMGR */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_types.h b/ANDROID_3.4.5/fs/jfs/jfs_types.h deleted file mode 100644 index 43ea3713..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_types.h +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_TYPES -#define _H_JFS_TYPES - -/* - * jfs_types.h: - * - * basic type/utility definitions - * - * note: this header file must be the 1st include file - * of JFS include list in all JFS .c file. - */ - -#include <linux/types.h> -#include <linux/nls.h> - -#include "endian24.h" - -/* - * transaction and lock id's - * - * Don't change these without carefully considering the impact on the - * size and alignment of all of the linelock variants - */ -typedef u16 tid_t; -typedef u16 lid_t; - -/* - * Almost identical to Linux's timespec, but not quite - */ -struct timestruc_t { - __le32 tv_sec; - __le32 tv_nsec; -}; - -/* - * handy - */ - -#define LEFTMOSTONE 0x80000000 -#define HIGHORDER 0x80000000u /* high order bit on */ -#define ONES 0xffffffffu /* all bit on */ - -/* - * physical xd (pxd) - */ -typedef struct { - unsigned len:24; - unsigned addr1:8; - __le32 addr2; -} pxd_t; - -/* xd_t field construction */ - -#define PXDlength(pxd, length32) ((pxd)->len = __cpu_to_le24(length32)) -#define PXDaddress(pxd, address64)\ -{\ - (pxd)->addr1 = ((s64)address64) >> 32;\ - (pxd)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ -} - -/* xd_t field extraction */ -#define lengthPXD(pxd) __le24_to_cpu((pxd)->len) -#define addressPXD(pxd)\ - ( ((s64)((pxd)->addr1)) << 32 | __le32_to_cpu((pxd)->addr2)) - -#define MAXTREEHEIGHT 8 -/* pxd list */ -struct pxdlist { - s16 maxnpxd; - s16 npxd; - pxd_t pxd[MAXTREEHEIGHT]; -}; - - -/* - * data extent descriptor (dxd) - */ -typedef struct { - unsigned flag:8; /* 1: flags */ - unsigned rsrvd:24; - __le32 size; /* 4: size in byte */ - unsigned len:24; /* 3: length in unit of fsblksize */ - unsigned addr1:8; /* 1: address in unit of fsblksize */ - __le32 addr2; /* 4: address in unit of fsblksize */ -} dxd_t; /* - 16 - */ - -/* dxd_t flags */ -#define DXD_INDEX 0x80 /* B+-tree index */ -#define DXD_INLINE 0x40 /* in-line data extent */ -#define DXD_EXTENT 0x20 /* out-of-line single extent */ -#define DXD_FILE 0x10 /* out-of-line file (inode) */ -#define DXD_CORRUPT 0x08 /* Inconsistency detected */ - -/* dxd_t field construction - * Conveniently, the PXD macros work for DXD - */ -#define DXDlength PXDlength -#define DXDaddress PXDaddress -#define lengthDXD lengthPXD -#define addressDXD addressPXD -#define DXDsize(dxd, size32) ((dxd)->size = cpu_to_le32(size32)) -#define sizeDXD(dxd) le32_to_cpu((dxd)->size) - -/* - * directory entry argument - */ -struct component_name { - int namlen; - wchar_t *name; -}; - - -/* - * DASD limit information - stored in directory inode - */ -struct dasd { - u8 thresh; /* Alert Threshold (in percent) */ - u8 delta; /* Alert Threshold delta (in percent) */ - u8 rsrvd1; - u8 limit_hi; /* DASD limit (in logical blocks) */ - __le32 limit_lo; /* DASD limit (in logical blocks) */ - u8 rsrvd2[3]; - u8 used_hi; /* DASD usage (in logical blocks) */ - __le32 used_lo; /* DASD usage (in logical blocks) */ -}; - -#define DASDLIMIT(dasdp) \ - (((u64)((dasdp)->limit_hi) << 32) + __le32_to_cpu((dasdp)->limit_lo)) -#define setDASDLIMIT(dasdp, limit)\ -{\ - (dasdp)->limit_hi = ((u64)limit) >> 32;\ - (dasdp)->limit_lo = __cpu_to_le32(limit);\ -} -#define DASDUSED(dasdp) \ - (((u64)((dasdp)->used_hi) << 32) + __le32_to_cpu((dasdp)->used_lo)) -#define setDASDUSED(dasdp, used)\ -{\ - (dasdp)->used_hi = ((u64)used) >> 32;\ - (dasdp)->used_lo = __cpu_to_le32(used);\ -} - -#endif /* !_H_JFS_TYPES */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_umount.c b/ANDROID_3.4.5/fs/jfs/jfs_umount.c deleted file mode 100644 index 7971f375..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_umount.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * jfs_umount.c - * - * note: file system in transition to aggregate/fileset: - * (ref. jfs_mount.c) - * - * file system unmount is interpreted as mount of the single/only - * fileset in the aggregate and, if unmount of the last fileset, - * as unmount of the aggerate; - */ - -#include <linux/fs.h> -#include "jfs_incore.h" -#include "jfs_filsys.h" -#include "jfs_superblock.h" -#include "jfs_dmap.h" -#include "jfs_imap.h" -#include "jfs_metapage.h" -#include "jfs_debug.h" - -/* - * NAME: jfs_umount(vfsp, flags, crp) - * - * FUNCTION: vfs_umount() - * - * PARAMETERS: vfsp - virtual file system pointer - * flags - unmount for shutdown - * crp - credential - * - * RETURN : EBUSY - device has open files - */ -int jfs_umount(struct super_block *sb) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct inode *ipbmap = sbi->ipbmap; - struct inode *ipimap = sbi->ipimap; - struct inode *ipaimap = sbi->ipaimap; - struct inode *ipaimap2 = sbi->ipaimap2; - struct jfs_log *log; - int rc = 0; - - jfs_info("UnMount JFS: sb:0x%p", sb); - - /* - * update superblock and close log - * - * if mounted read-write and log based recovery was enabled - */ - if ((log = sbi->log)) - /* - * Wait for outstanding transactions to be written to log: - */ - jfs_flush_journal(log, 2); - - /* - * close fileset inode allocation map (aka fileset inode) - */ - diUnmount(ipimap, 0); - - diFreeSpecial(ipimap); - sbi->ipimap = NULL; - - /* - * close secondary aggregate inode allocation map - */ - ipaimap2 = sbi->ipaimap2; - if (ipaimap2) { - diUnmount(ipaimap2, 0); - diFreeSpecial(ipaimap2); - sbi->ipaimap2 = NULL; - } - - /* - * close aggregate inode allocation map - */ - ipaimap = sbi->ipaimap; - diUnmount(ipaimap, 0); - diFreeSpecial(ipaimap); - sbi->ipaimap = NULL; - - /* - * close aggregate block allocation map - */ - dbUnmount(ipbmap, 0); - - diFreeSpecial(ipbmap); - sbi->ipimap = NULL; - - /* - * Make sure all metadata makes it to disk before we mark - * the superblock as clean - */ - filemap_write_and_wait(sbi->direct_inode->i_mapping); - - /* - * ensure all file system file pages are propagated to their - * home blocks on disk (and their in-memory buffer pages are - * invalidated) BEFORE updating file system superblock state - * (to signify file system is unmounted cleanly, and thus in - * consistent state) and log superblock active file system - * list (to signify skip logredo()). - */ - if (log) { /* log = NULL if read-only mount */ - updateSuper(sb, FM_CLEAN); - - /* - * close log: - * - * remove file system from log active file system list. - */ - rc = lmLogClose(sb); - } - jfs_info("UnMount JFS Complete: rc = %d", rc); - return rc; -} - - -int jfs_umount_rw(struct super_block *sb) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct jfs_log *log = sbi->log; - - if (!log) - return 0; - - /* - * close log: - * - * remove file system from log active file system list. - */ - jfs_flush_journal(log, 2); - - /* - * Make sure all metadata makes it to disk - */ - dbSync(sbi->ipbmap); - diSync(sbi->ipimap); - - /* - * Note that we have to do this even if sync_blockdev() will - * do exactly the same a few instructions later: We can't - * mark the superblock clean before everything is flushed to - * disk. - */ - filemap_write_and_wait(sbi->direct_inode->i_mapping); - - updateSuper(sb, FM_CLEAN); - - return lmLogClose(sb); -} diff --git a/ANDROID_3.4.5/fs/jfs/jfs_unicode.c b/ANDROID_3.4.5/fs/jfs/jfs_unicode.c deleted file mode 100644 index c7de6f5b..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_unicode.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/slab.h> -#include "jfs_incore.h" -#include "jfs_filsys.h" -#include "jfs_unicode.h" -#include "jfs_debug.h" - -/* - * NAME: jfs_strfromUCS() - * - * FUNCTION: Convert little-endian unicode string to character string - * - */ -int jfs_strfromUCS_le(char *to, const __le16 * from, - int len, struct nls_table *codepage) -{ - int i; - int outlen = 0; - static int warn_again = 5; /* Only warn up to 5 times total */ - int warn = !!warn_again; /* once per string */ - - if (codepage) { - for (i = 0; (i < len) && from[i]; i++) { - int charlen; - charlen = - codepage->uni2char(le16_to_cpu(from[i]), - &to[outlen], - NLS_MAX_CHARSET_SIZE); - if (charlen > 0) - outlen += charlen; - else - to[outlen++] = '?'; - } - } else { - for (i = 0; (i < len) && from[i]; i++) { - if (unlikely(le16_to_cpu(from[i]) & 0xff00)) { - to[i] = '?'; - if (unlikely(warn)) { - warn--; - warn_again--; - printk(KERN_ERR - "non-latin1 character 0x%x found in JFS file name\n", - le16_to_cpu(from[i])); - printk(KERN_ERR - "mount with iocharset=utf8 to access\n"); - } - - } - else - to[i] = (char) (le16_to_cpu(from[i])); - } - outlen = i; - } - to[outlen] = 0; - return outlen; -} - -/* - * NAME: jfs_strtoUCS() - * - * FUNCTION: Convert character string to unicode string - * - */ -static int jfs_strtoUCS(wchar_t * to, const unsigned char *from, int len, - struct nls_table *codepage) -{ - int charlen; - int i; - - if (codepage) { - for (i = 0; len && *from; i++, from += charlen, len -= charlen) - { - charlen = codepage->char2uni(from, len, &to[i]); - if (charlen < 1) { - jfs_err("jfs_strtoUCS: char2uni returned %d.", - charlen); - jfs_err("charset = %s, char = 0x%x", - codepage->charset, *from); - return charlen; - } - } - } else { - for (i = 0; (i < len) && from[i]; i++) - to[i] = (wchar_t) from[i]; - } - - to[i] = 0; - return i; -} - -/* - * NAME: get_UCSname() - * - * FUNCTION: Allocate and translate to unicode string - * - */ -int get_UCSname(struct component_name * uniName, struct dentry *dentry) -{ - struct nls_table *nls_tab = JFS_SBI(dentry->d_sb)->nls_tab; - int length = dentry->d_name.len; - - if (length > JFS_NAME_MAX) - return -ENAMETOOLONG; - - uniName->name = - kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS); - - if (uniName->name == NULL) - return -ENOMEM; - - uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name, - length, nls_tab); - - if (uniName->namlen < 0) { - kfree(uniName->name); - return uniName->namlen; - } - - return 0; -} diff --git a/ANDROID_3.4.5/fs/jfs/jfs_unicode.h b/ANDROID_3.4.5/fs/jfs/jfs_unicode.h deleted file mode 100644 index 8f0f02cb..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_unicode.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_UNICODE -#define _H_JFS_UNICODE - -#include <linux/slab.h> -#include <asm/byteorder.h> -#include "jfs_types.h" - -typedef struct { - wchar_t start; - wchar_t end; - signed char *table; -} UNICASERANGE; - -extern signed char UniUpperTable[512]; -extern UNICASERANGE UniUpperRange[]; -extern int get_UCSname(struct component_name *, struct dentry *); -extern int jfs_strfromUCS_le(char *, const __le16 *, int, struct nls_table *); - -#define free_UCSname(COMP) kfree((COMP)->name) - -/* - * UniStrcpy: Copy a string - */ -static inline wchar_t *UniStrcpy(wchar_t * ucs1, const wchar_t * ucs2) -{ - wchar_t *anchor = ucs1; /* save the start of result string */ - - while ((*ucs1++ = *ucs2++)); - return anchor; -} - - - -/* - * UniStrncpy: Copy length limited string with pad - */ -static inline __le16 *UniStrncpy_le(__le16 * ucs1, const __le16 * ucs2, - size_t n) -{ - __le16 *anchor = ucs1; - - while (n-- && *ucs2) /* Copy the strings */ - *ucs1++ = *ucs2++; - - n++; - while (n--) /* Pad with nulls */ - *ucs1++ = 0; - return anchor; -} - -/* - * UniStrncmp_le: Compare length limited string - native to little-endian - */ -static inline int UniStrncmp_le(const wchar_t * ucs1, const __le16 * ucs2, - size_t n) -{ - if (!n) - return 0; /* Null strings are equal */ - while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) { - ucs1++; - ucs2++; - } - return (int) *ucs1 - (int) __le16_to_cpu(*ucs2); -} - -/* - * UniStrncpy_to_le: Copy length limited string with pad to little-endian - */ -static inline __le16 *UniStrncpy_to_le(__le16 * ucs1, const wchar_t * ucs2, - size_t n) -{ - __le16 *anchor = ucs1; - - while (n-- && *ucs2) /* Copy the strings */ - *ucs1++ = cpu_to_le16(*ucs2++); - - n++; - while (n--) /* Pad with nulls */ - *ucs1++ = 0; - return anchor; -} - -/* - * UniStrncpy_from_le: Copy length limited string with pad from little-endian - */ -static inline wchar_t *UniStrncpy_from_le(wchar_t * ucs1, const __le16 * ucs2, - size_t n) -{ - wchar_t *anchor = ucs1; - - while (n-- && *ucs2) /* Copy the strings */ - *ucs1++ = __le16_to_cpu(*ucs2++); - - n++; - while (n--) /* Pad with nulls */ - *ucs1++ = 0; - return anchor; -} - -/* - * UniToupper: Convert a unicode character to upper case - */ -static inline wchar_t UniToupper(wchar_t uc) -{ - UNICASERANGE *rp; - - if (uc < sizeof(UniUpperTable)) { /* Latin characters */ - return uc + UniUpperTable[uc]; /* Use base tables */ - } else { - rp = UniUpperRange; /* Use range tables */ - while (rp->start) { - if (uc < rp->start) /* Before start of range */ - return uc; /* Uppercase = input */ - if (uc <= rp->end) /* In range */ - return uc + rp->table[uc - rp->start]; - rp++; /* Try next range */ - } - } - return uc; /* Past last range */ -} - - -/* - * UniStrupr: Upper case a unicode string - */ -static inline wchar_t *UniStrupr(wchar_t * upin) -{ - wchar_t *up; - - up = upin; - while (*up) { /* For all characters */ - *up = UniToupper(*up); - up++; - } - return upin; /* Return input pointer */ -} - -#endif /* !_H_JFS_UNICODE */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_uniupr.c b/ANDROID_3.4.5/fs/jfs/jfs_uniupr.c deleted file mode 100644 index cfe50666..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_uniupr.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include "jfs_unicode.h" - -/* - * Latin upper case - */ -signed char UniUpperTable[512] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 030-03f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 040-04f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 050-05f */ - 0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 060-06f */ - -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, 0, 0, 0, 0, 0, /* 070-07f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 080-08f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 090-09f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0a0-0af */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0b0-0bf */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0c0-0cf */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0d0-0df */ - -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 0e0-0ef */ - -32,-32,-32,-32,-32,-32,-32, 0,-32,-32,-32,-32,-32,-32,-32,121, /* 0f0-0ff */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 100-10f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 110-11f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 120-12f */ - 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 130-13f */ - -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, /* 140-14f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 150-15f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 160-16f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 170-17f */ - 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, /* 180-18f */ - 0, 0, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, /* 190-19f */ - 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, /* 1a0-1af */ - -1, 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, /* 1b0-1bf */ - 0, 0, 0, 0, 0, -1, -2, 0, -1, -2, 0, -1, -2, 0, -1, 0, /* 1c0-1cf */ - -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,-79, 0, -1, /* 1d0-1df */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e0-1ef */ - 0, 0, -1, -2, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, /* 1f0-1ff */ -}; - -/* Upper case range - Greek */ -static signed char UniCaseRangeU03a0[47] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-38,-37,-37,-37, /* 3a0-3af */ - 0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 3b0-3bf */ - -32,-32,-31,-32,-32,-32,-32,-32,-32,-32,-32,-32,-64,-63,-63, -}; - -/* Upper case range - Cyrillic */ -static signed char UniCaseRangeU0430[48] = { - -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 430-43f */ - -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 440-44f */ - 0,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80, 0,-80,-80, /* 450-45f */ -}; - -/* Upper case range - Extended cyrillic */ -static signed char UniCaseRangeU0490[61] = { - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 490-49f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4a0-4af */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4b0-4bf */ - 0, 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, -}; - -/* Upper case range - Extended latin and greek */ -static signed char UniCaseRangeU1e00[509] = { - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e00-1e0f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e10-1e1f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e20-1e2f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e30-1e3f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e40-1e4f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e50-1e5f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e60-1e6f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e70-1e7f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e80-1e8f */ - 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0,-59, 0, -1, 0, -1, /* 1e90-1e9f */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ea0-1eaf */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1eb0-1ebf */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ec0-1ecf */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ed0-1edf */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ee0-1eef */ - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, /* 1ef0-1eff */ - 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f00-1f0f */ - 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f10-1f1f */ - 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f20-1f2f */ - 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f30-1f3f */ - 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f40-1f4f */ - 0, 8, 0, 8, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f50-1f5f */ - 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f60-1f6f */ - 74, 74, 86, 86, 86, 86,100,100, 0, 0,112,112,126,126, 0, 0, /* 1f70-1f7f */ - 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f80-1f8f */ - 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f90-1f9f */ - 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fa0-1faf */ - 8, 8, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fb0-1fbf */ - 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fc0-1fcf */ - 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fd0-1fdf */ - 8, 8, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fe0-1fef */ - 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -/* Upper case range - Wide latin */ -static signed char UniCaseRangeUff40[27] = { - 0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* ff40-ff4f */ - -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, -}; - -/* - * Upper Case Range - */ -UNICASERANGE UniUpperRange[] = { - { 0x03a0, 0x03ce, UniCaseRangeU03a0 }, - { 0x0430, 0x045f, UniCaseRangeU0430 }, - { 0x0490, 0x04cc, UniCaseRangeU0490 }, - { 0x1e00, 0x1ffc, UniCaseRangeU1e00 }, - { 0xff40, 0xff5a, UniCaseRangeUff40 }, - { 0 } -}; diff --git a/ANDROID_3.4.5/fs/jfs/jfs_xattr.h b/ANDROID_3.4.5/fs/jfs/jfs_xattr.h deleted file mode 100644 index e9e100fd..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_xattr.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef H_JFS_XATTR -#define H_JFS_XATTR - -/* - * jfs_ea_list describe the on-disk format of the extended attributes. - * I know the null-terminator is redundant since namelen is stored, but - * I am maintaining compatibility with OS/2 where possible. - */ -struct jfs_ea { - u8 flag; /* Unused? */ - u8 namelen; /* Length of name */ - __le16 valuelen; /* Length of value */ - char name[0]; /* Attribute name (includes null-terminator) */ -}; /* Value immediately follows name */ - -struct jfs_ea_list { - __le32 size; /* overall size */ - struct jfs_ea ea[0]; /* Variable length list */ -}; - -/* Macros for defining maxiumum number of bytes supported for EAs */ -#define MAXEASIZE 65535 -#define MAXEALISTSIZE MAXEASIZE - -/* - * some macros for dealing with variable length EA lists. - */ -#define EA_SIZE(ea) \ - (sizeof (struct jfs_ea) + (ea)->namelen + 1 + \ - le16_to_cpu((ea)->valuelen)) -#define NEXT_EA(ea) ((struct jfs_ea *) (((char *) (ea)) + (EA_SIZE (ea)))) -#define FIRST_EA(ealist) ((ealist)->ea) -#define EALIST_SIZE(ealist) le32_to_cpu((ealist)->size) -#define END_EALIST(ealist) \ - ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) - -extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *, - size_t, int); -extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, - int); -extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); -extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); -extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); -extern int jfs_removexattr(struct dentry *, const char *); - -#ifdef CONFIG_JFS_SECURITY -extern int jfs_init_security(tid_t, struct inode *, struct inode *, - const struct qstr *); -#else -static inline int jfs_init_security(tid_t tid, struct inode *inode, - struct inode *dir, const struct qstr *qstr) -{ - return 0; -} -#endif - -#endif /* H_JFS_XATTR */ diff --git a/ANDROID_3.4.5/fs/jfs/jfs_xtree.c b/ANDROID_3.4.5/fs/jfs/jfs_xtree.c deleted file mode 100644 index 6c50871e..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_xtree.c +++ /dev/null @@ -1,3905 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2005 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* - * jfs_xtree.c: extent allocation descriptor B+-tree manager - */ - -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/quotaops.h> -#include <linux/seq_file.h> -#include "jfs_incore.h" -#include "jfs_filsys.h" -#include "jfs_metapage.h" -#include "jfs_dmap.h" -#include "jfs_dinode.h" -#include "jfs_superblock.h" -#include "jfs_debug.h" - -/* - * xtree local flag - */ -#define XT_INSERT 0x00000001 - -/* - * xtree key/entry comparison: extent offset - * - * return: - * -1: k < start of extent - * 0: start_of_extent <= k <= end_of_extent - * 1: k > end_of_extent - */ -#define XT_CMP(CMP, K, X, OFFSET64)\ -{\ - OFFSET64 = offsetXAD(X);\ - (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ - ((K) < OFFSET64) ? -1 : 0;\ -} - -/* write a xad entry */ -#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ -{\ - (XAD)->flag = (FLAG);\ - XADoffset((XAD), (OFF));\ - XADlength((XAD), (LEN));\ - XADaddress((XAD), (ADDR));\ -} - -#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) - -/* get page buffer for specified block address */ -/* ToDo: Replace this ugly macro with a function */ -#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ -{\ - BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ - if (!(RC))\ - {\ - if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ - (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ - (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ - {\ - jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ - BT_PUTPAGE(MP);\ - MP = NULL;\ - RC = -EIO;\ - }\ - }\ -} - -/* for consistency */ -#define XT_PUTPAGE(MP) BT_PUTPAGE(MP) - -#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ - BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) -/* xtree entry parameter descriptor */ -struct xtsplit { - struct metapage *mp; - s16 index; - u8 flag; - s64 off; - s64 addr; - int len; - struct pxdlist *pxdlist; -}; - - -/* - * statistics - */ -#ifdef CONFIG_JFS_STATISTICS -static struct { - uint search; - uint fastSearch; - uint split; -} xtStat; -#endif - - -/* - * forward references - */ -static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, - struct btstack * btstack, int flag); - -static int xtSplitUp(tid_t tid, - struct inode *ip, - struct xtsplit * split, struct btstack * btstack); - -static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, - struct metapage ** rmpp, s64 * rbnp); - -static int xtSplitRoot(tid_t tid, struct inode *ip, - struct xtsplit * split, struct metapage ** rmpp); - -#ifdef _STILL_TO_PORT -static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, - xtpage_t * fp, struct btstack * btstack); - -static int xtSearchNode(struct inode *ip, - xad_t * xad, - int *cmpp, struct btstack * btstack, int flag); - -static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); -#endif /* _STILL_TO_PORT */ - -/* - * xtLookup() - * - * function: map a single page into a physical extent; - */ -int xtLookup(struct inode *ip, s64 lstart, - s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) -{ - int rc = 0; - struct btstack btstack; - int cmp; - s64 bn; - struct metapage *mp; - xtpage_t *p; - int index; - xad_t *xad; - s64 next, size, xoff, xend; - int xlen; - s64 xaddr; - - *paddr = 0; - *plen = llen; - - if (!no_check) { - /* is lookup offset beyond eof ? */ - size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> - JFS_SBI(ip->i_sb)->l2bsize; - if (lstart >= size) - return 0; - } - - /* - * search for the xad entry covering the logical extent - */ -//search: - if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { - jfs_err("xtLookup: xtSearch returned %d", rc); - return rc; - } - - /* - * compute the physical extent covering logical extent - * - * N.B. search may have failed (e.g., hole in sparse file), - * and returned the index of the next entry. - */ - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - /* is xad found covering start of logical extent ? - * lstart is a page start address, - * i.e., lstart cannot start in a hole; - */ - if (cmp) { - if (next) - *plen = min(next - lstart, llen); - goto out; - } - - /* - * lxd covered by xad - */ - xad = &p->xad[index]; - xoff = offsetXAD(xad); - xlen = lengthXAD(xad); - xend = xoff + xlen; - xaddr = addressXAD(xad); - - /* initialize new pxd */ - *pflag = xad->flag; - *paddr = xaddr + (lstart - xoff); - /* a page must be fully covered by an xad */ - *plen = min(xend - lstart, llen); - - out: - XT_PUTPAGE(mp); - - return rc; -} - -/* - * xtSearch() - * - * function: search for the xad entry covering specified offset. - * - * parameters: - * ip - file object; - * xoff - extent offset; - * nextp - address of next extent (if any) for search miss - * cmpp - comparison result: - * btstack - traverse stack; - * flag - search process flag (XT_INSERT); - * - * returns: - * btstack contains (bn, index) of search path traversed to the entry. - * *cmpp is set to result of comparison with the entry returned. - * the page containing the entry is pinned at exit. - */ -static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, - int *cmpp, struct btstack * btstack, int flag) -{ - struct jfs_inode_info *jfs_ip = JFS_IP(ip); - int rc = 0; - int cmp = 1; /* init for empty page */ - s64 bn; /* block number */ - struct metapage *mp; /* page buffer */ - xtpage_t *p; /* page */ - xad_t *xad; - int base, index, lim, btindex; - struct btframe *btsp; - int nsplit = 0; /* number of pages to split */ - s64 t64; - s64 next = 0; - - INCREMENT(xtStat.search); - - BT_CLR(btstack); - - btstack->nsplit = 0; - - /* - * search down tree from root: - * - * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of - * internal page, child page Pi contains entry with k, Ki <= K < Kj. - * - * if entry with search key K is not found - * internal page search find the entry with largest key Ki - * less than K which point to the child page to search; - * leaf page search find the entry with smallest key Kj - * greater than K so that the returned index is the position of - * the entry to be shifted right for insertion of new entry. - * for empty tree, search key is greater than any key of the tree. - * - * by convention, root bn = 0. - */ - for (bn = 0;;) { - /* get/pin the page to search */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* try sequential access heuristics with the previous - * access entry in target leaf page: - * once search narrowed down into the target leaf, - * key must either match an entry in the leaf or - * key entry does not exist in the tree; - */ -//fastSearch: - if ((jfs_ip->btorder & BT_SEQUENTIAL) && - (p->header.flag & BT_LEAF) && - (index = jfs_ip->btindex) < - le16_to_cpu(p->header.nextindex)) { - xad = &p->xad[index]; - t64 = offsetXAD(xad); - if (xoff < t64 + lengthXAD(xad)) { - if (xoff >= t64) { - *cmpp = 0; - goto out; - } - - /* stop sequential access heuristics */ - goto binarySearch; - } else { /* (t64 + lengthXAD(xad)) <= xoff */ - - /* try next sequential entry */ - index++; - if (index < - le16_to_cpu(p->header.nextindex)) { - xad++; - t64 = offsetXAD(xad); - if (xoff < t64 + lengthXAD(xad)) { - if (xoff >= t64) { - *cmpp = 0; - goto out; - } - - /* miss: key falls between - * previous and this entry - */ - *cmpp = 1; - next = t64; - goto out; - } - - /* (xoff >= t64 + lengthXAD(xad)); - * matching entry may be further out: - * stop heuristic search - */ - /* stop sequential access heuristics */ - goto binarySearch; - } - - /* (index == p->header.nextindex); - * miss: key entry does not exist in - * the target leaf/tree - */ - *cmpp = 1; - goto out; - } - - /* - * if hit, return index of the entry found, and - * if miss, where new entry with search key is - * to be inserted; - */ - out: - /* compute number of pages to split */ - if (flag & XT_INSERT) { - if (p->header.nextindex == /* little-endian */ - p->header.maxentry) - nsplit++; - else - nsplit = 0; - btstack->nsplit = nsplit; - } - - /* save search result */ - btsp = btstack->top; - btsp->bn = bn; - btsp->index = index; - btsp->mp = mp; - - /* update sequential access heuristics */ - jfs_ip->btindex = index; - - if (nextp) - *nextp = next; - - INCREMENT(xtStat.fastSearch); - return 0; - } - - /* well, ... full search now */ - binarySearch: - lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; - - /* - * binary search with search key K on the current page - */ - for (base = XTENTRYSTART; lim; lim >>= 1) { - index = base + (lim >> 1); - - XT_CMP(cmp, xoff, &p->xad[index], t64); - if (cmp == 0) { - /* - * search hit - */ - /* search hit - leaf page: - * return the entry found - */ - if (p->header.flag & BT_LEAF) { - *cmpp = cmp; - - /* compute number of pages to split */ - if (flag & XT_INSERT) { - if (p->header.nextindex == - p->header.maxentry) - nsplit++; - else - nsplit = 0; - btstack->nsplit = nsplit; - } - - /* save search result */ - btsp = btstack->top; - btsp->bn = bn; - btsp->index = index; - btsp->mp = mp; - - /* init sequential access heuristics */ - btindex = jfs_ip->btindex; - if (index == btindex || - index == btindex + 1) - jfs_ip->btorder = BT_SEQUENTIAL; - else - jfs_ip->btorder = BT_RANDOM; - jfs_ip->btindex = index; - - return 0; - } - /* search hit - internal page: - * descend/search its child page - */ - if (index < le16_to_cpu(p->header.nextindex)-1) - next = offsetXAD(&p->xad[index + 1]); - goto next; - } - - if (cmp > 0) { - base = index + 1; - --lim; - } - } - - /* - * search miss - * - * base is the smallest index with key (Kj) greater than - * search key (K) and may be zero or maxentry index. - */ - if (base < le16_to_cpu(p->header.nextindex)) - next = offsetXAD(&p->xad[base]); - /* - * search miss - leaf page: - * - * return location of entry (base) where new entry with - * search key K is to be inserted. - */ - if (p->header.flag & BT_LEAF) { - *cmpp = cmp; - - /* compute number of pages to split */ - if (flag & XT_INSERT) { - if (p->header.nextindex == - p->header.maxentry) - nsplit++; - else - nsplit = 0; - btstack->nsplit = nsplit; - } - - /* save search result */ - btsp = btstack->top; - btsp->bn = bn; - btsp->index = base; - btsp->mp = mp; - - /* init sequential access heuristics */ - btindex = jfs_ip->btindex; - if (base == btindex || base == btindex + 1) - jfs_ip->btorder = BT_SEQUENTIAL; - else - jfs_ip->btorder = BT_RANDOM; - jfs_ip->btindex = base; - - if (nextp) - *nextp = next; - - return 0; - } - - /* - * search miss - non-leaf page: - * - * if base is non-zero, decrement base by one to get the parent - * entry of the child page to search. - */ - index = base ? base - 1 : base; - - /* - * go down to child page - */ - next: - /* update number of pages to split */ - if (p->header.nextindex == p->header.maxentry) - nsplit++; - else - nsplit = 0; - - /* push (bn, index) of the parent page/entry */ - if (BT_STACK_FULL(btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtSearch!"); - XT_PUTPAGE(mp); - return -EIO; - } - BT_PUSH(btstack, bn, index); - - /* get the child page block number */ - bn = addressXAD(&p->xad[index]); - - /* unpin the parent page */ - XT_PUTPAGE(mp); - } -} - -/* - * xtInsert() - * - * function: - * - * parameter: - * tid - transaction id; - * ip - file object; - * xflag - extent flag (XAD_NOTRECORDED): - * xoff - extent offset; - * xlen - extent length; - * xaddrp - extent address pointer (in/out): - * if (*xaddrp) - * caller allocated data extent at *xaddrp; - * else - * allocate data extent and return its xaddr; - * flag - - * - * return: - */ -int xtInsert(tid_t tid, /* transaction id */ - struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, - int flag) -{ - int rc = 0; - s64 xaddr, hint; - struct metapage *mp; /* meta-page buffer */ - xtpage_t *p; /* base B+-tree index page */ - s64 bn; - int index, nextindex; - struct btstack btstack; /* traverse stack */ - struct xtsplit split; /* split information */ - xad_t *xad; - int cmp; - s64 next; - struct tlock *tlck; - struct xtlock *xtlck; - - jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); - - /* - * search for the entry location at which to insert: - * - * xtFastSearch() and xtSearch() both returns (leaf page - * pinned, index at which to insert). - * n.b. xtSearch() may return index of maxentry of - * the full page. - */ - if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) - return rc; - - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - /* This test must follow XT_GETSEARCH since mp must be valid if - * we branch to out: */ - if ((cmp == 0) || (next && (xlen > next - xoff))) { - rc = -EEXIST; - goto out; - } - - /* - * allocate data extent requested - * - * allocation hint: last xad - */ - if ((xaddr = *xaddrp) == 0) { - if (index > XTENTRYSTART) { - xad = &p->xad[index - 1]; - hint = addressXAD(xad) + lengthXAD(xad) - 1; - } else - hint = 0; - if ((rc = dquot_alloc_block(ip, xlen))) - goto out; - if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { - dquot_free_block(ip, xlen); - goto out; - } - } - - /* - * insert entry for new extent - */ - xflag |= XAD_NEW; - - /* - * if the leaf page is full, split the page and - * propagate up the router entry for the new page from split - * - * The xtSplitUp() will insert the entry and unpin the leaf page. - */ - nextindex = le16_to_cpu(p->header.nextindex); - if (nextindex == le16_to_cpu(p->header.maxentry)) { - split.mp = mp; - split.index = index; - split.flag = xflag; - split.off = xoff; - split.len = xlen; - split.addr = xaddr; - split.pxdlist = NULL; - if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { - /* undo data extent allocation */ - if (*xaddrp == 0) { - dbFree(ip, xaddr, (s64) xlen); - dquot_free_block(ip, xlen); - } - return rc; - } - - *xaddrp = xaddr; - return 0; - } - - /* - * insert the new entry into the leaf page - */ - /* - * acquire a transaction lock on the leaf page; - * - * action: xad insertion/extension; - */ - BT_MARK_DIRTY(mp, ip); - - /* if insert into middle, shift right remaining entries. */ - if (index < nextindex) - memmove(&p->xad[index + 1], &p->xad[index], - (nextindex - index) * sizeof(xad_t)); - - /* insert the new entry: mark the entry NEW */ - xad = &p->xad[index]; - XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); - - /* advance next available entry index */ - le16_add_cpu(&p->header.nextindex, 1); - - /* Don't log it if there are no links to the file */ - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->lwm.offset = - (xtlck->lwm.offset) ? min(index, - (int)xtlck->lwm.offset) : index; - xtlck->lwm.length = - le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; - } - - *xaddrp = xaddr; - - out: - /* unpin the leaf page */ - XT_PUTPAGE(mp); - - return rc; -} - - -/* - * xtSplitUp() - * - * function: - * split full pages as propagating insertion up the tree - * - * parameter: - * tid - transaction id; - * ip - file object; - * split - entry parameter descriptor; - * btstack - traverse stack from xtSearch() - * - * return: - */ -static int -xtSplitUp(tid_t tid, - struct inode *ip, struct xtsplit * split, struct btstack * btstack) -{ - int rc = 0; - struct metapage *smp; - xtpage_t *sp; /* split page */ - struct metapage *rmp; - s64 rbn; /* new right page block number */ - struct metapage *rcmp; - xtpage_t *rcp; /* right child page */ - s64 rcbn; /* right child page block number */ - int skip; /* index of entry of insertion */ - int nextindex; /* next available entry index of p */ - struct btframe *parent; /* parent page entry on traverse stack */ - xad_t *xad; - s64 xaddr; - int xlen; - int nsplit; /* number of pages split */ - struct pxdlist pxdlist; - pxd_t *pxd; - struct tlock *tlck; - struct xtlock *xtlck; - - smp = split->mp; - sp = XT_PAGE(ip, smp); - - /* is inode xtree root extension/inline EA area free ? */ - if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && - (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && - (JFS_IP(ip)->mode2 & INLINEEA)) { - sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); - JFS_IP(ip)->mode2 &= ~INLINEEA; - - BT_MARK_DIRTY(smp, ip); - /* - * acquire a transaction lock on the leaf page; - * - * action: xad insertion/extension; - */ - - /* if insert into middle, shift right remaining entries. */ - skip = split->index; - nextindex = le16_to_cpu(sp->header.nextindex); - if (skip < nextindex) - memmove(&sp->xad[skip + 1], &sp->xad[skip], - (nextindex - skip) * sizeof(xad_t)); - - /* insert the new entry: mark the entry NEW */ - xad = &sp->xad[skip]; - XT_PUTENTRY(xad, split->flag, split->off, split->len, - split->addr); - - /* advance next available entry index */ - le16_add_cpu(&sp->header.nextindex, 1); - - /* Don't log it if there are no links to the file */ - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->lwm.offset = (xtlck->lwm.offset) ? - min(skip, (int)xtlck->lwm.offset) : skip; - xtlck->lwm.length = - le16_to_cpu(sp->header.nextindex) - - xtlck->lwm.offset; - } - - return 0; - } - - /* - * allocate new index blocks to cover index page split(s) - * - * allocation hint: ? - */ - if (split->pxdlist == NULL) { - nsplit = btstack->nsplit; - split->pxdlist = &pxdlist; - pxdlist.maxnpxd = pxdlist.npxd = 0; - pxd = &pxdlist.pxd[0]; - xlen = JFS_SBI(ip->i_sb)->nbperpage; - for (; nsplit > 0; nsplit--, pxd++) { - if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) - == 0) { - PXDaddress(pxd, xaddr); - PXDlength(pxd, xlen); - - pxdlist.maxnpxd++; - - continue; - } - - /* undo allocation */ - - XT_PUTPAGE(smp); - return rc; - } - } - - /* - * Split leaf page <sp> into <sp> and a new right page <rp>. - * - * The split routines insert the new entry into the leaf page, - * and acquire txLock as appropriate. - * return <rp> pinned and its block number <rpbn>. - */ - rc = (sp->header.flag & BT_ROOT) ? - xtSplitRoot(tid, ip, split, &rmp) : - xtSplitPage(tid, ip, split, &rmp, &rbn); - - XT_PUTPAGE(smp); - - if (rc) - return -EIO; - /* - * propagate up the router entry for the leaf page just split - * - * insert a router entry for the new page into the parent page, - * propagate the insert/split up the tree by walking back the stack - * of (bn of parent page, index of child page entry in parent page) - * that were traversed during the search for the page that split. - * - * the propagation of insert/split up the tree stops if the root - * splits or the page inserted into doesn't have to split to hold - * the new entry. - * - * the parent entry for the split page remains the same, and - * a new entry is inserted at its right with the first key and - * block number of the new right page. - * - * There are a maximum of 3 pages pinned at any time: - * right child, left parent and right parent (when the parent splits) - * to keep the child page pinned while working on the parent. - * make sure that all pins are released at exit. - */ - while ((parent = BT_POP(btstack)) != NULL) { - /* parent page specified by stack frame <parent> */ - - /* keep current child pages <rcp> pinned */ - rcmp = rmp; - rcbn = rbn; - rcp = XT_PAGE(ip, rcmp); - - /* - * insert router entry in parent for new right child page <rp> - */ - /* get/pin the parent page <sp> */ - XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); - if (rc) { - XT_PUTPAGE(rcmp); - return rc; - } - - /* - * The new key entry goes ONE AFTER the index of parent entry, - * because the split was to the right. - */ - skip = parent->index + 1; - - /* - * split or shift right remaining entries of the parent page - */ - nextindex = le16_to_cpu(sp->header.nextindex); - /* - * parent page is full - split the parent page - */ - if (nextindex == le16_to_cpu(sp->header.maxentry)) { - /* init for parent page split */ - split->mp = smp; - split->index = skip; /* index at insert */ - split->flag = XAD_NEW; - split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); - split->len = JFS_SBI(ip->i_sb)->nbperpage; - split->addr = rcbn; - - /* unpin previous right child page */ - XT_PUTPAGE(rcmp); - - /* The split routines insert the new entry, - * and acquire txLock as appropriate. - * return <rp> pinned and its block number <rpbn>. - */ - rc = (sp->header.flag & BT_ROOT) ? - xtSplitRoot(tid, ip, split, &rmp) : - xtSplitPage(tid, ip, split, &rmp, &rbn); - if (rc) { - XT_PUTPAGE(smp); - return rc; - } - - XT_PUTPAGE(smp); - /* keep new child page <rp> pinned */ - } - /* - * parent page is not full - insert in parent page - */ - else { - /* - * insert router entry in parent for the right child - * page from the first entry of the right child page: - */ - /* - * acquire a transaction lock on the parent page; - * - * action: router xad insertion; - */ - BT_MARK_DIRTY(smp, ip); - - /* - * if insert into middle, shift right remaining entries - */ - if (skip < nextindex) - memmove(&sp->xad[skip + 1], &sp->xad[skip], - (nextindex - - skip) << L2XTSLOTSIZE); - - /* insert the router entry */ - xad = &sp->xad[skip]; - XT_PUTENTRY(xad, XAD_NEW, - offsetXAD(&rcp->xad[XTENTRYSTART]), - JFS_SBI(ip->i_sb)->nbperpage, rcbn); - - /* advance next available entry index. */ - le16_add_cpu(&sp->header.nextindex, 1); - - /* Don't log it if there are no links to the file */ - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, smp, - tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->lwm.offset = (xtlck->lwm.offset) ? - min(skip, (int)xtlck->lwm.offset) : skip; - xtlck->lwm.length = - le16_to_cpu(sp->header.nextindex) - - xtlck->lwm.offset; - } - - /* unpin parent page */ - XT_PUTPAGE(smp); - - /* exit propagate up */ - break; - } - } - - /* unpin current right page */ - XT_PUTPAGE(rmp); - - return 0; -} - - -/* - * xtSplitPage() - * - * function: - * split a full non-root page into - * original/split/left page and new right page - * i.e., the original/split page remains as left page. - * - * parameter: - * int tid, - * struct inode *ip, - * struct xtsplit *split, - * struct metapage **rmpp, - * u64 *rbnp, - * - * return: - * Pointer to page in which to insert or NULL on error. - */ -static int -xtSplitPage(tid_t tid, struct inode *ip, - struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) -{ - int rc = 0; - struct metapage *smp; - xtpage_t *sp; - struct metapage *rmp; - xtpage_t *rp; /* new right page allocated */ - s64 rbn; /* new right page block number */ - struct metapage *mp; - xtpage_t *p; - s64 nextbn; - int skip, maxentry, middle, righthalf, n; - xad_t *xad; - struct pxdlist *pxdlist; - pxd_t *pxd; - struct tlock *tlck; - struct xtlock *sxtlck = NULL, *rxtlck = NULL; - int quota_allocation = 0; - - smp = split->mp; - sp = XT_PAGE(ip, smp); - - INCREMENT(xtStat.split); - - pxdlist = split->pxdlist; - pxd = &pxdlist->pxd[pxdlist->npxd]; - pxdlist->npxd++; - rbn = addressPXD(pxd); - - /* Allocate blocks to quota. */ - rc = dquot_alloc_block(ip, lengthPXD(pxd)); - if (rc) - goto clean_up; - - quota_allocation += lengthPXD(pxd); - - /* - * allocate the new right page for the split - */ - rmp = get_metapage(ip, rbn, PSIZE, 1); - if (rmp == NULL) { - rc = -EIO; - goto clean_up; - } - - jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); - - BT_MARK_DIRTY(rmp, ip); - /* - * action: new page; - */ - - rp = (xtpage_t *) rmp->data; - rp->header.self = *pxd; - rp->header.flag = sp->header.flag & BT_TYPE; - rp->header.maxentry = sp->header.maxentry; /* little-endian */ - rp->header.nextindex = cpu_to_le16(XTENTRYSTART); - - BT_MARK_DIRTY(smp, ip); - /* Don't log it if there are no links to the file */ - if (!test_cflag(COMMIT_Nolink, ip)) { - /* - * acquire a transaction lock on the new right page; - */ - tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); - rxtlck = (struct xtlock *) & tlck->lock; - rxtlck->lwm.offset = XTENTRYSTART; - /* - * acquire a transaction lock on the split page - */ - tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); - sxtlck = (struct xtlock *) & tlck->lock; - } - - /* - * initialize/update sibling pointers of <sp> and <rp> - */ - nextbn = le64_to_cpu(sp->header.next); - rp->header.next = cpu_to_le64(nextbn); - rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); - sp->header.next = cpu_to_le64(rbn); - - skip = split->index; - - /* - * sequential append at tail (after last entry of last page) - * - * if splitting the last page on a level because of appending - * a entry to it (skip is maxentry), it's likely that the access is - * sequential. adding an empty page on the side of the level is less - * work and can push the fill factor much higher than normal. - * if we're wrong it's no big deal - we will do the split the right - * way next time. - * (it may look like it's equally easy to do a similar hack for - * reverse sorted data, that is, split the tree left, but it's not. - * Be my guest.) - */ - if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { - /* - * acquire a transaction lock on the new/right page; - * - * action: xad insertion; - */ - /* insert entry at the first entry of the new right page */ - xad = &rp->xad[XTENTRYSTART]; - XT_PUTENTRY(xad, split->flag, split->off, split->len, - split->addr); - - rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); - - if (!test_cflag(COMMIT_Nolink, ip)) { - /* rxtlck->lwm.offset = XTENTRYSTART; */ - rxtlck->lwm.length = 1; - } - - *rmpp = rmp; - *rbnp = rbn; - - jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); - return 0; - } - - /* - * non-sequential insert (at possibly middle page) - */ - - /* - * update previous pointer of old next/right page of <sp> - */ - if (nextbn != 0) { - XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); - if (rc) { - XT_PUTPAGE(rmp); - goto clean_up; - } - - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the next page; - * - * action:sibling pointer update; - */ - if (!test_cflag(COMMIT_Nolink, ip)) - tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); - - p->header.prev = cpu_to_le64(rbn); - - /* sibling page may have been updated previously, or - * it may be updated later; - */ - - XT_PUTPAGE(mp); - } - - /* - * split the data between the split and new/right pages - */ - maxentry = le16_to_cpu(sp->header.maxentry); - middle = maxentry >> 1; - righthalf = maxentry - middle; - - /* - * skip index in old split/left page - insert into left page: - */ - if (skip <= middle) { - /* move right half of split page to the new right page */ - memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], - righthalf << L2XTSLOTSIZE); - - /* shift right tail of left half to make room for new entry */ - if (skip < middle) - memmove(&sp->xad[skip + 1], &sp->xad[skip], - (middle - skip) << L2XTSLOTSIZE); - - /* insert new entry */ - xad = &sp->xad[skip]; - XT_PUTENTRY(xad, split->flag, split->off, split->len, - split->addr); - - /* update page header */ - sp->header.nextindex = cpu_to_le16(middle + 1); - if (!test_cflag(COMMIT_Nolink, ip)) { - sxtlck->lwm.offset = (sxtlck->lwm.offset) ? - min(skip, (int)sxtlck->lwm.offset) : skip; - } - - rp->header.nextindex = - cpu_to_le16(XTENTRYSTART + righthalf); - } - /* - * skip index in new right page - insert into right page: - */ - else { - /* move left head of right half to right page */ - n = skip - middle; - memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], - n << L2XTSLOTSIZE); - - /* insert new entry */ - n += XTENTRYSTART; - xad = &rp->xad[n]; - XT_PUTENTRY(xad, split->flag, split->off, split->len, - split->addr); - - /* move right tail of right half to right page */ - if (skip < maxentry) - memmove(&rp->xad[n + 1], &sp->xad[skip], - (maxentry - skip) << L2XTSLOTSIZE); - - /* update page header */ - sp->header.nextindex = cpu_to_le16(middle); - if (!test_cflag(COMMIT_Nolink, ip)) { - sxtlck->lwm.offset = (sxtlck->lwm.offset) ? - min(middle, (int)sxtlck->lwm.offset) : middle; - } - - rp->header.nextindex = cpu_to_le16(XTENTRYSTART + - righthalf + 1); - } - - if (!test_cflag(COMMIT_Nolink, ip)) { - sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - - sxtlck->lwm.offset; - - /* rxtlck->lwm.offset = XTENTRYSTART; */ - rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - - XTENTRYSTART; - } - - *rmpp = rmp; - *rbnp = rbn; - - jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); - return rc; - - clean_up: - - /* Rollback quota allocation. */ - if (quota_allocation) - dquot_free_block(ip, quota_allocation); - - return (rc); -} - - -/* - * xtSplitRoot() - * - * function: - * split the full root page into original/root/split page and new - * right page - * i.e., root remains fixed in tree anchor (inode) and the root is - * copied to a single new right child page since root page << - * non-root page, and the split root page contains a single entry - * for the new right child page. - * - * parameter: - * int tid, - * struct inode *ip, - * struct xtsplit *split, - * struct metapage **rmpp) - * - * return: - * Pointer to page in which to insert or NULL on error. - */ -static int -xtSplitRoot(tid_t tid, - struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) -{ - xtpage_t *sp; - struct metapage *rmp; - xtpage_t *rp; - s64 rbn; - int skip, nextindex; - xad_t *xad; - pxd_t *pxd; - struct pxdlist *pxdlist; - struct tlock *tlck; - struct xtlock *xtlck; - int rc; - - sp = &JFS_IP(ip)->i_xtroot; - - INCREMENT(xtStat.split); - - /* - * allocate a single (right) child page - */ - pxdlist = split->pxdlist; - pxd = &pxdlist->pxd[pxdlist->npxd]; - pxdlist->npxd++; - rbn = addressPXD(pxd); - rmp = get_metapage(ip, rbn, PSIZE, 1); - if (rmp == NULL) - return -EIO; - - /* Allocate blocks to quota. */ - rc = dquot_alloc_block(ip, lengthPXD(pxd)); - if (rc) { - release_metapage(rmp); - return rc; - } - - jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); - - /* - * acquire a transaction lock on the new right page; - * - * action: new page; - */ - BT_MARK_DIRTY(rmp, ip); - - rp = (xtpage_t *) rmp->data; - rp->header.flag = - (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; - rp->header.self = *pxd; - rp->header.nextindex = cpu_to_le16(XTENTRYSTART); - rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); - - /* initialize sibling pointers */ - rp->header.next = 0; - rp->header.prev = 0; - - /* - * copy the in-line root page into new right page extent - */ - nextindex = le16_to_cpu(sp->header.maxentry); - memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], - (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); - - /* - * insert the new entry into the new right/child page - * (skip index in the new right page will not change) - */ - skip = split->index; - /* if insert into middle, shift right remaining entries */ - if (skip != nextindex) - memmove(&rp->xad[skip + 1], &rp->xad[skip], - (nextindex - skip) * sizeof(xad_t)); - - xad = &rp->xad[skip]; - XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); - - /* update page header */ - rp->header.nextindex = cpu_to_le16(nextindex + 1); - - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->lwm.offset = XTENTRYSTART; - xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - - XTENTRYSTART; - } - - /* - * reset the root - * - * init root with the single entry for the new right page - * set the 1st entry offset to 0, which force the left-most key - * at any level of the tree to be less than any search key. - */ - /* - * acquire a transaction lock on the root page (in-memory inode); - * - * action: root split; - */ - BT_MARK_DIRTY(split->mp, ip); - - xad = &sp->xad[XTENTRYSTART]; - XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); - - /* update page header of root */ - sp->header.flag &= ~BT_LEAF; - sp->header.flag |= BT_INTERNAL; - - sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); - - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->lwm.offset = XTENTRYSTART; - xtlck->lwm.length = 1; - } - - *rmpp = rmp; - - jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); - return 0; -} - - -/* - * xtExtend() - * - * function: extend in-place; - * - * note: existing extent may or may not have been committed. - * caller is responsible for pager buffer cache update, and - * working block allocation map update; - * update pmap: alloc whole extended extent; - */ -int xtExtend(tid_t tid, /* transaction id */ - struct inode *ip, s64 xoff, /* delta extent offset */ - s32 xlen, /* delta extent length */ - int flag) -{ - int rc = 0; - int cmp; - struct metapage *mp; /* meta-page buffer */ - xtpage_t *p; /* base B+-tree index page */ - s64 bn; - int index, nextindex, len; - struct btstack btstack; /* traverse stack */ - struct xtsplit split; /* split information */ - xad_t *xad; - s64 xaddr; - struct tlock *tlck; - struct xtlock *xtlck = NULL; - - jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); - - /* there must exist extent to be extended */ - if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) - return rc; - - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - if (cmp != 0) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); - return -EIO; - } - - /* extension must be contiguous */ - xad = &p->xad[index]; - if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); - return -EIO; - } - - /* - * acquire a transaction lock on the leaf page; - * - * action: xad insertion/extension; - */ - BT_MARK_DIRTY(mp, ip); - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - } - - /* extend will overflow extent ? */ - xlen = lengthXAD(xad) + xlen; - if ((len = xlen - MAXXLEN) <= 0) - goto extendOld; - - /* - * extent overflow: insert entry for new extent - */ -//insertNew: - xoff = offsetXAD(xad) + MAXXLEN; - xaddr = addressXAD(xad) + MAXXLEN; - nextindex = le16_to_cpu(p->header.nextindex); - - /* - * if the leaf page is full, insert the new entry and - * propagate up the router entry for the new page from split - * - * The xtSplitUp() will insert the entry and unpin the leaf page. - */ - if (nextindex == le16_to_cpu(p->header.maxentry)) { - /* xtSpliUp() unpins leaf pages */ - split.mp = mp; - split.index = index + 1; - split.flag = XAD_NEW; - split.off = xoff; /* split offset */ - split.len = len; - split.addr = xaddr; - split.pxdlist = NULL; - if ((rc = xtSplitUp(tid, ip, &split, &btstack))) - return rc; - - /* get back old page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - /* - * if leaf root has been split, original root has been - * copied to new child page, i.e., original entry now - * resides on the new child page; - */ - if (p->header.flag & BT_INTERNAL) { - ASSERT(p->header.nextindex == - cpu_to_le16(XTENTRYSTART + 1)); - xad = &p->xad[XTENTRYSTART]; - bn = addressXAD(xad); - XT_PUTPAGE(mp); - - /* get new child page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - BT_MARK_DIRTY(mp, ip); - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - } - } - } - /* - * insert the new entry into the leaf page - */ - else { - /* insert the new entry: mark the entry NEW */ - xad = &p->xad[index + 1]; - XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); - - /* advance next available entry index */ - le16_add_cpu(&p->header.nextindex, 1); - } - - /* get back old entry */ - xad = &p->xad[index]; - xlen = MAXXLEN; - - /* - * extend old extent - */ - extendOld: - XADlength(xad, xlen); - if (!(xad->flag & XAD_NEW)) - xad->flag |= XAD_EXTENDED; - - if (!test_cflag(COMMIT_Nolink, ip)) { - xtlck->lwm.offset = - (xtlck->lwm.offset) ? min(index, - (int)xtlck->lwm.offset) : index; - xtlck->lwm.length = - le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; - } - - /* unpin the leaf page */ - XT_PUTPAGE(mp); - - return rc; -} - -#ifdef _NOTYET -/* - * xtTailgate() - * - * function: split existing 'tail' extent - * (split offset >= start offset of tail extent), and - * relocate and extend the split tail half; - * - * note: existing extent may or may not have been committed. - * caller is responsible for pager buffer cache update, and - * working block allocation map update; - * update pmap: free old split tail extent, alloc new extent; - */ -int xtTailgate(tid_t tid, /* transaction id */ - struct inode *ip, s64 xoff, /* split/new extent offset */ - s32 xlen, /* new extent length */ - s64 xaddr, /* new extent address */ - int flag) -{ - int rc = 0; - int cmp; - struct metapage *mp; /* meta-page buffer */ - xtpage_t *p; /* base B+-tree index page */ - s64 bn; - int index, nextindex, llen, rlen; - struct btstack btstack; /* traverse stack */ - struct xtsplit split; /* split information */ - xad_t *xad; - struct tlock *tlck; - struct xtlock *xtlck = 0; - struct tlock *mtlck; - struct maplock *pxdlock; - -/* -printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", - (ulong)xoff, xlen, (ulong)xaddr); -*/ - - /* there must exist extent to be tailgated */ - if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) - return rc; - - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - if (cmp != 0) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); - return -EIO; - } - - /* entry found must be last entry */ - nextindex = le16_to_cpu(p->header.nextindex); - if (index != nextindex - 1) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtTailgate: the entry found is not the last entry"); - return -EIO; - } - - BT_MARK_DIRTY(mp, ip); - /* - * acquire tlock of the leaf page containing original entry - */ - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - } - - /* completely replace extent ? */ - xad = &p->xad[index]; -/* -printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", - (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); -*/ - if ((llen = xoff - offsetXAD(xad)) == 0) - goto updateOld; - - /* - * partially replace extent: insert entry for new extent - */ -//insertNew: - /* - * if the leaf page is full, insert the new entry and - * propagate up the router entry for the new page from split - * - * The xtSplitUp() will insert the entry and unpin the leaf page. - */ - if (nextindex == le16_to_cpu(p->header.maxentry)) { - /* xtSpliUp() unpins leaf pages */ - split.mp = mp; - split.index = index + 1; - split.flag = XAD_NEW; - split.off = xoff; /* split offset */ - split.len = xlen; - split.addr = xaddr; - split.pxdlist = NULL; - if ((rc = xtSplitUp(tid, ip, &split, &btstack))) - return rc; - - /* get back old page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - /* - * if leaf root has been split, original root has been - * copied to new child page, i.e., original entry now - * resides on the new child page; - */ - if (p->header.flag & BT_INTERNAL) { - ASSERT(p->header.nextindex == - cpu_to_le16(XTENTRYSTART + 1)); - xad = &p->xad[XTENTRYSTART]; - bn = addressXAD(xad); - XT_PUTPAGE(mp); - - /* get new child page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - BT_MARK_DIRTY(mp, ip); - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - } - } - } - /* - * insert the new entry into the leaf page - */ - else { - /* insert the new entry: mark the entry NEW */ - xad = &p->xad[index + 1]; - XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); - - /* advance next available entry index */ - le16_add_cpu(&p->header.nextindex, 1); - } - - /* get back old XAD */ - xad = &p->xad[index]; - - /* - * truncate/relocate old extent at split offset - */ - updateOld: - /* update dmap for old/committed/truncated extent */ - rlen = lengthXAD(xad) - llen; - if (!(xad->flag & XAD_NEW)) { - /* free from PWMAP at commit */ - if (!test_cflag(COMMIT_Nolink, ip)) { - mtlck = txMaplock(tid, ip, tlckMAP); - pxdlock = (struct maplock *) & mtlck->lock; - pxdlock->flag = mlckFREEPXD; - PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); - PXDlength(&pxdlock->pxd, rlen); - pxdlock->index = 1; - } - } else - /* free from WMAP */ - dbFree(ip, addressXAD(xad) + llen, (s64) rlen); - - if (llen) - /* truncate */ - XADlength(xad, llen); - else - /* replace */ - XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); - - if (!test_cflag(COMMIT_Nolink, ip)) { - xtlck->lwm.offset = (xtlck->lwm.offset) ? - min(index, (int)xtlck->lwm.offset) : index; - xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - - xtlck->lwm.offset; - } - - /* unpin the leaf page */ - XT_PUTPAGE(mp); - - return rc; -} -#endif /* _NOTYET */ - -/* - * xtUpdate() - * - * function: update XAD; - * - * update extent for allocated_but_not_recorded or - * compressed extent; - * - * parameter: - * nxad - new XAD; - * logical extent of the specified XAD must be completely - * contained by an existing XAD; - */ -int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) -{ /* new XAD */ - int rc = 0; - int cmp; - struct metapage *mp; /* meta-page buffer */ - xtpage_t *p; /* base B+-tree index page */ - s64 bn; - int index0, index, newindex, nextindex; - struct btstack btstack; /* traverse stack */ - struct xtsplit split; /* split information */ - xad_t *xad, *lxad, *rxad; - int xflag; - s64 nxoff, xoff; - int nxlen, xlen, lxlen, rxlen; - s64 nxaddr, xaddr; - struct tlock *tlck; - struct xtlock *xtlck = NULL; - int newpage = 0; - - /* there must exist extent to be tailgated */ - nxoff = offsetXAD(nxad); - nxlen = lengthXAD(nxad); - nxaddr = addressXAD(nxad); - - if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) - return rc; - - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); - - if (cmp != 0) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); - return -EIO; - } - - BT_MARK_DIRTY(mp, ip); - /* - * acquire tlock of the leaf page containing original entry - */ - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - } - - xad = &p->xad[index0]; - xflag = xad->flag; - xoff = offsetXAD(xad); - xlen = lengthXAD(xad); - xaddr = addressXAD(xad); - - /* nXAD must be completely contained within XAD */ - if ((xoff > nxoff) || - (nxoff + nxlen > xoff + xlen)) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtUpdate: nXAD in not completely contained within XAD"); - return -EIO; - } - - index = index0; - newindex = index + 1; - nextindex = le16_to_cpu(p->header.nextindex); - -#ifdef _JFS_WIP_NOCOALESCE - if (xoff < nxoff) - goto updateRight; - - /* - * replace XAD with nXAD - */ - replace: /* (nxoff == xoff) */ - if (nxlen == xlen) { - /* replace XAD with nXAD:recorded */ - *xad = *nxad; - xad->flag = xflag & ~XAD_NOTRECORDED; - - goto out; - } else /* (nxlen < xlen) */ - goto updateLeft; -#endif /* _JFS_WIP_NOCOALESCE */ - -/* #ifdef _JFS_WIP_COALESCE */ - if (xoff < nxoff) - goto coalesceRight; - - /* - * coalesce with left XAD - */ -//coalesceLeft: /* (xoff == nxoff) */ - /* is XAD first entry of page ? */ - if (index == XTENTRYSTART) - goto replace; - - /* is nXAD logically and physically contiguous with lXAD ? */ - lxad = &p->xad[index - 1]; - lxlen = lengthXAD(lxad); - if (!(lxad->flag & XAD_NOTRECORDED) && - (nxoff == offsetXAD(lxad) + lxlen) && - (nxaddr == addressXAD(lxad) + lxlen) && - (lxlen + nxlen < MAXXLEN)) { - /* extend right lXAD */ - index0 = index - 1; - XADlength(lxad, lxlen + nxlen); - - /* If we just merged two extents together, need to make sure the - * right extent gets logged. If the left one is marked XAD_NEW, - * then we know it will be logged. Otherwise, mark as - * XAD_EXTENDED - */ - if (!(lxad->flag & XAD_NEW)) - lxad->flag |= XAD_EXTENDED; - - if (xlen > nxlen) { - /* truncate XAD */ - XADoffset(xad, xoff + nxlen); - XADlength(xad, xlen - nxlen); - XADaddress(xad, xaddr + nxlen); - goto out; - } else { /* (xlen == nxlen) */ - - /* remove XAD */ - if (index < nextindex - 1) - memmove(&p->xad[index], &p->xad[index + 1], - (nextindex - index - - 1) << L2XTSLOTSIZE); - - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) - - 1); - - index = index0; - newindex = index + 1; - nextindex = le16_to_cpu(p->header.nextindex); - xoff = nxoff = offsetXAD(lxad); - xlen = nxlen = lxlen + nxlen; - xaddr = nxaddr = addressXAD(lxad); - goto coalesceRight; - } - } - - /* - * replace XAD with nXAD - */ - replace: /* (nxoff == xoff) */ - if (nxlen == xlen) { - /* replace XAD with nXAD:recorded */ - *xad = *nxad; - xad->flag = xflag & ~XAD_NOTRECORDED; - - goto coalesceRight; - } else /* (nxlen < xlen) */ - goto updateLeft; - - /* - * coalesce with right XAD - */ - coalesceRight: /* (xoff <= nxoff) */ - /* is XAD last entry of page ? */ - if (newindex == nextindex) { - if (xoff == nxoff) - goto out; - goto updateRight; - } - - /* is nXAD logically and physically contiguous with rXAD ? */ - rxad = &p->xad[index + 1]; - rxlen = lengthXAD(rxad); - if (!(rxad->flag & XAD_NOTRECORDED) && - (nxoff + nxlen == offsetXAD(rxad)) && - (nxaddr + nxlen == addressXAD(rxad)) && - (rxlen + nxlen < MAXXLEN)) { - /* extend left rXAD */ - XADoffset(rxad, nxoff); - XADlength(rxad, rxlen + nxlen); - XADaddress(rxad, nxaddr); - - /* If we just merged two extents together, need to make sure - * the left extent gets logged. If the right one is marked - * XAD_NEW, then we know it will be logged. Otherwise, mark as - * XAD_EXTENDED - */ - if (!(rxad->flag & XAD_NEW)) - rxad->flag |= XAD_EXTENDED; - - if (xlen > nxlen) - /* truncate XAD */ - XADlength(xad, xlen - nxlen); - else { /* (xlen == nxlen) */ - - /* remove XAD */ - memmove(&p->xad[index], &p->xad[index + 1], - (nextindex - index - 1) << L2XTSLOTSIZE); - - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) - - 1); - } - - goto out; - } else if (xoff == nxoff) - goto out; - - if (xoff >= nxoff) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); - return -EIO; - } -/* #endif _JFS_WIP_COALESCE */ - - /* - * split XAD into (lXAD, nXAD): - * - * |---nXAD---> - * --|----------XAD----------|-- - * |-lXAD-| - */ - updateRight: /* (xoff < nxoff) */ - /* truncate old XAD as lXAD:not_recorded */ - xad = &p->xad[index]; - XADlength(xad, nxoff - xoff); - - /* insert nXAD:recorded */ - if (nextindex == le16_to_cpu(p->header.maxentry)) { - - /* xtSpliUp() unpins leaf pages */ - split.mp = mp; - split.index = newindex; - split.flag = xflag & ~XAD_NOTRECORDED; - split.off = nxoff; - split.len = nxlen; - split.addr = nxaddr; - split.pxdlist = NULL; - if ((rc = xtSplitUp(tid, ip, &split, &btstack))) - return rc; - - /* get back old page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - /* - * if leaf root has been split, original root has been - * copied to new child page, i.e., original entry now - * resides on the new child page; - */ - if (p->header.flag & BT_INTERNAL) { - ASSERT(p->header.nextindex == - cpu_to_le16(XTENTRYSTART + 1)); - xad = &p->xad[XTENTRYSTART]; - bn = addressXAD(xad); - XT_PUTPAGE(mp); - - /* get new child page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - BT_MARK_DIRTY(mp, ip); - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - } - } else { - /* is nXAD on new page ? */ - if (newindex > - (le16_to_cpu(p->header.maxentry) >> 1)) { - newindex = - newindex - - le16_to_cpu(p->header.nextindex) + - XTENTRYSTART; - newpage = 1; - } - } - } else { - /* if insert into middle, shift right remaining entries */ - if (newindex < nextindex) - memmove(&p->xad[newindex + 1], &p->xad[newindex], - (nextindex - newindex) << L2XTSLOTSIZE); - - /* insert the entry */ - xad = &p->xad[newindex]; - *xad = *nxad; - xad->flag = xflag & ~XAD_NOTRECORDED; - - /* advance next available entry index. */ - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); - } - - /* - * does nXAD force 3-way split ? - * - * |---nXAD--->| - * --|----------XAD-------------|-- - * |-lXAD-| |-rXAD -| - */ - if (nxoff + nxlen == xoff + xlen) - goto out; - - /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ - if (newpage) { - /* close out old page */ - if (!test_cflag(COMMIT_Nolink, ip)) { - xtlck->lwm.offset = (xtlck->lwm.offset) ? - min(index0, (int)xtlck->lwm.offset) : index0; - xtlck->lwm.length = - le16_to_cpu(p->header.nextindex) - - xtlck->lwm.offset; - } - - bn = le64_to_cpu(p->header.next); - XT_PUTPAGE(mp); - - /* get new right page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - BT_MARK_DIRTY(mp, ip); - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - } - - index0 = index = newindex; - } else - index++; - - newindex = index + 1; - nextindex = le16_to_cpu(p->header.nextindex); - xlen = xlen - (nxoff - xoff); - xoff = nxoff; - xaddr = nxaddr; - - /* recompute split pages */ - if (nextindex == le16_to_cpu(p->header.maxentry)) { - XT_PUTPAGE(mp); - - if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) - return rc; - - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); - - if (cmp != 0) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); - return -EIO; - } - - if (index0 != index) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtUpdate: unexpected value of index"); - return -EIO; - } - } - - /* - * split XAD into (nXAD, rXAD) - * - * ---nXAD---| - * --|----------XAD----------|-- - * |-rXAD-| - */ - updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ - /* update old XAD with nXAD:recorded */ - xad = &p->xad[index]; - *xad = *nxad; - xad->flag = xflag & ~XAD_NOTRECORDED; - - /* insert rXAD:not_recorded */ - xoff = xoff + nxlen; - xlen = xlen - nxlen; - xaddr = xaddr + nxlen; - if (nextindex == le16_to_cpu(p->header.maxentry)) { -/* -printf("xtUpdate.updateLeft.split p:0x%p\n", p); -*/ - /* xtSpliUp() unpins leaf pages */ - split.mp = mp; - split.index = newindex; - split.flag = xflag; - split.off = xoff; - split.len = xlen; - split.addr = xaddr; - split.pxdlist = NULL; - if ((rc = xtSplitUp(tid, ip, &split, &btstack))) - return rc; - - /* get back old page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* - * if leaf root has been split, original root has been - * copied to new child page, i.e., original entry now - * resides on the new child page; - */ - if (p->header.flag & BT_INTERNAL) { - ASSERT(p->header.nextindex == - cpu_to_le16(XTENTRYSTART + 1)); - xad = &p->xad[XTENTRYSTART]; - bn = addressXAD(xad); - XT_PUTPAGE(mp); - - /* get new child page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - BT_MARK_DIRTY(mp, ip); - if (!test_cflag(COMMIT_Nolink, ip)) { - tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - } - } - } else { - /* if insert into middle, shift right remaining entries */ - if (newindex < nextindex) - memmove(&p->xad[newindex + 1], &p->xad[newindex], - (nextindex - newindex) << L2XTSLOTSIZE); - - /* insert the entry */ - xad = &p->xad[newindex]; - XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); - - /* advance next available entry index. */ - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); - } - - out: - if (!test_cflag(COMMIT_Nolink, ip)) { - xtlck->lwm.offset = (xtlck->lwm.offset) ? - min(index0, (int)xtlck->lwm.offset) : index0; - xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - - xtlck->lwm.offset; - } - - /* unpin the leaf page */ - XT_PUTPAGE(mp); - - return rc; -} - - -/* - * xtAppend() - * - * function: grow in append mode from contiguous region specified ; - * - * parameter: - * tid - transaction id; - * ip - file object; - * xflag - extent flag: - * xoff - extent offset; - * maxblocks - max extent length; - * xlen - extent length (in/out); - * xaddrp - extent address pointer (in/out): - * flag - - * - * return: - */ -int xtAppend(tid_t tid, /* transaction id */ - struct inode *ip, int xflag, s64 xoff, s32 maxblocks, - s32 * xlenp, /* (in/out) */ - s64 * xaddrp, /* (in/out) */ - int flag) -{ - int rc = 0; - struct metapage *mp; /* meta-page buffer */ - xtpage_t *p; /* base B+-tree index page */ - s64 bn, xaddr; - int index, nextindex; - struct btstack btstack; /* traverse stack */ - struct xtsplit split; /* split information */ - xad_t *xad; - int cmp; - struct tlock *tlck; - struct xtlock *xtlck; - int nsplit, nblocks, xlen; - struct pxdlist pxdlist; - pxd_t *pxd; - s64 next; - - xaddr = *xaddrp; - xlen = *xlenp; - jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", - (ulong) xoff, maxblocks, xlen, (ulong) xaddr); - - /* - * search for the entry location at which to insert: - * - * xtFastSearch() and xtSearch() both returns (leaf page - * pinned, index at which to insert). - * n.b. xtSearch() may return index of maxentry of - * the full page. - */ - if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) - return rc; - - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - if (cmp == 0) { - rc = -EEXIST; - goto out; - } - - if (next) - xlen = min(xlen, (int)(next - xoff)); -//insert: - /* - * insert entry for new extent - */ - xflag |= XAD_NEW; - - /* - * if the leaf page is full, split the page and - * propagate up the router entry for the new page from split - * - * The xtSplitUp() will insert the entry and unpin the leaf page. - */ - nextindex = le16_to_cpu(p->header.nextindex); - if (nextindex < le16_to_cpu(p->header.maxentry)) - goto insertLeaf; - - /* - * allocate new index blocks to cover index page split(s) - */ - nsplit = btstack.nsplit; - split.pxdlist = &pxdlist; - pxdlist.maxnpxd = pxdlist.npxd = 0; - pxd = &pxdlist.pxd[0]; - nblocks = JFS_SBI(ip->i_sb)->nbperpage; - for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { - if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { - PXDaddress(pxd, xaddr); - PXDlength(pxd, nblocks); - - pxdlist.maxnpxd++; - - continue; - } - - /* undo allocation */ - - goto out; - } - - xlen = min(xlen, maxblocks); - - /* - * allocate data extent requested - */ - if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) - goto out; - - split.mp = mp; - split.index = index; - split.flag = xflag; - split.off = xoff; - split.len = xlen; - split.addr = xaddr; - if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { - /* undo data extent allocation */ - dbFree(ip, *xaddrp, (s64) * xlenp); - - return rc; - } - - *xaddrp = xaddr; - *xlenp = xlen; - return 0; - - /* - * insert the new entry into the leaf page - */ - insertLeaf: - /* - * allocate data extent requested - */ - if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) - goto out; - - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the leaf page; - * - * action: xad insertion/extension; - */ - tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - - /* insert the new entry: mark the entry NEW */ - xad = &p->xad[index]; - XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); - - /* advance next available entry index */ - le16_add_cpu(&p->header.nextindex, 1); - - xtlck->lwm.offset = - (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; - xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - - xtlck->lwm.offset; - - *xaddrp = xaddr; - *xlenp = xlen; - - out: - /* unpin the leaf page */ - XT_PUTPAGE(mp); - - return rc; -} -#ifdef _STILL_TO_PORT - -/* - TBD for defragmentaion/reorganization - - * - * xtDelete() - * - * function: - * delete the entry with the specified key. - * - * N.B.: whole extent of the entry is assumed to be deleted. - * - * parameter: - * - * return: - * ENOENT: if the entry is not found. - * - * exception: - */ -int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) -{ - int rc = 0; - struct btstack btstack; - int cmp; - s64 bn; - struct metapage *mp; - xtpage_t *p; - int index, nextindex; - struct tlock *tlck; - struct xtlock *xtlck; - - /* - * find the matching entry; xtSearch() pins the page - */ - if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) - return rc; - - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - if (cmp) { - /* unpin the leaf page */ - XT_PUTPAGE(mp); - return -ENOENT; - } - - /* - * delete the entry from the leaf page - */ - nextindex = le16_to_cpu(p->header.nextindex); - le16_add_cpu(&p->header.nextindex, -1); - - /* - * if the leaf page bocome empty, free the page - */ - if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) - return (xtDeleteUp(tid, ip, mp, p, &btstack)); - - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the leaf page; - * - * action:xad deletion; - */ - tlck = txLock(tid, ip, mp, tlckXTREE); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->lwm.offset = - (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; - - /* if delete from middle, shift left/compact the remaining entries */ - if (index < nextindex - 1) - memmove(&p->xad[index], &p->xad[index + 1], - (nextindex - index - 1) * sizeof(xad_t)); - - XT_PUTPAGE(mp); - - return 0; -} - - -/* - TBD for defragmentaion/reorganization - - * - * xtDeleteUp() - * - * function: - * free empty pages as propagating deletion up the tree - * - * parameter: - * - * return: - */ -static int -xtDeleteUp(tid_t tid, struct inode *ip, - struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) -{ - int rc = 0; - struct metapage *mp; - xtpage_t *p; - int index, nextindex; - s64 xaddr; - int xlen; - struct btframe *parent; - struct tlock *tlck; - struct xtlock *xtlck; - - /* - * keep root leaf page which has become empty - */ - if (fp->header.flag & BT_ROOT) { - /* keep the root page */ - fp->header.flag &= ~BT_INTERNAL; - fp->header.flag |= BT_LEAF; - fp->header.nextindex = cpu_to_le16(XTENTRYSTART); - - /* XT_PUTPAGE(fmp); */ - - return 0; - } - - /* - * free non-root leaf page - */ - if ((rc = xtRelink(tid, ip, fp))) { - XT_PUTPAGE(fmp); - return rc; - } - - xaddr = addressPXD(&fp->header.self); - xlen = lengthPXD(&fp->header.self); - /* free the page extent */ - dbFree(ip, xaddr, (s64) xlen); - - /* free the buffer page */ - discard_metapage(fmp); - - /* - * propagate page deletion up the index tree - * - * If the delete from the parent page makes it empty, - * continue all the way up the tree. - * stop if the root page is reached (which is never deleted) or - * if the entry deletion does not empty the page. - */ - while ((parent = BT_POP(btstack)) != NULL) { - /* get/pin the parent page <sp> */ - XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - index = parent->index; - - /* delete the entry for the freed child page from parent. - */ - nextindex = le16_to_cpu(p->header.nextindex); - - /* - * the parent has the single entry being deleted: - * free the parent page which has become empty. - */ - if (nextindex == 1) { - if (p->header.flag & BT_ROOT) { - /* keep the root page */ - p->header.flag &= ~BT_INTERNAL; - p->header.flag |= BT_LEAF; - p->header.nextindex = - cpu_to_le16(XTENTRYSTART); - - /* XT_PUTPAGE(mp); */ - - break; - } else { - /* free the parent page */ - if ((rc = xtRelink(tid, ip, p))) - return rc; - - xaddr = addressPXD(&p->header.self); - /* free the page extent */ - dbFree(ip, xaddr, - (s64) JFS_SBI(ip->i_sb)->nbperpage); - - /* unpin/free the buffer page */ - discard_metapage(mp); - - /* propagate up */ - continue; - } - } - /* - * the parent has other entries remaining: - * delete the router entry from the parent page. - */ - else { - BT_MARK_DIRTY(mp, ip); - /* - * acquire a transaction lock on the leaf page; - * - * action:xad deletion; - */ - tlck = txLock(tid, ip, mp, tlckXTREE); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->lwm.offset = - (xtlck->lwm.offset) ? min(index, - xtlck->lwm. - offset) : index; - - /* if delete from middle, - * shift left/compact the remaining entries in the page - */ - if (index < nextindex - 1) - memmove(&p->xad[index], &p->xad[index + 1], - (nextindex - index - - 1) << L2XTSLOTSIZE); - - le16_add_cpu(&p->header.nextindex, -1); - jfs_info("xtDeleteUp(entry): 0x%lx[%d]", - (ulong) parent->bn, index); - } - - /* unpin the parent page */ - XT_PUTPAGE(mp); - - /* exit propagation up */ - break; - } - - return 0; -} - - -/* - * NAME: xtRelocate() - * - * FUNCTION: relocate xtpage or data extent of regular file; - * This function is mainly used by defragfs utility. - * - * NOTE: This routine does not have the logic to handle - * uncommitted allocated extent. The caller should call - * txCommit() to commit all the allocation before call - * this routine. - */ -int -xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ - s64 nxaddr, /* new xaddr */ - int xtype) -{ /* extent type: XTPAGE or DATAEXT */ - int rc = 0; - struct tblock *tblk; - struct tlock *tlck; - struct xtlock *xtlck; - struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ - xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ - xad_t *xad; - pxd_t *pxd; - s64 xoff, xsize; - int xlen; - s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; - cbuf_t *cp; - s64 offset, nbytes, nbrd, pno; - int nb, npages, nblks; - s64 bn; - int cmp; - int index; - struct pxd_lock *pxdlock; - struct btstack btstack; /* traverse stack */ - - xtype = xtype & EXTENT_TYPE; - - xoff = offsetXAD(oxad); - oxaddr = addressXAD(oxad); - xlen = lengthXAD(oxad); - - /* validate extent offset */ - offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; - if (offset >= ip->i_size) - return -ESTALE; /* stale extent */ - - jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", - xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); - - /* - * 1. get and validate the parent xtpage/xad entry - * covering the source extent to be relocated; - */ - if (xtype == DATAEXT) { - /* search in leaf entry */ - rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); - if (rc) - return rc; - - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); - - if (cmp) { - XT_PUTPAGE(pmp); - return -ESTALE; - } - - /* validate for exact match with a single entry */ - xad = &pp->xad[index]; - if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { - XT_PUTPAGE(pmp); - return -ESTALE; - } - } else { /* (xtype == XTPAGE) */ - - /* search in internal entry */ - rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); - if (rc) - return rc; - - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); - - if (cmp) { - XT_PUTPAGE(pmp); - return -ESTALE; - } - - /* xtSearchNode() validated for exact match with a single entry - */ - xad = &pp->xad[index]; - } - jfs_info("xtRelocate: parent xad entry validated."); - - /* - * 2. relocate the extent - */ - if (xtype == DATAEXT) { - /* if the extent is allocated-but-not-recorded - * there is no real data to be moved in this extent, - */ - if (xad->flag & XAD_NOTRECORDED) - goto out; - else - /* release xtpage for cmRead()/xtLookup() */ - XT_PUTPAGE(pmp); - - /* - * cmRelocate() - * - * copy target data pages to be relocated; - * - * data extent must start at page boundary and - * multiple of page size (except the last data extent); - * read in each page of the source data extent into cbuf, - * update the cbuf extent descriptor of the page to be - * homeward bound to new dst data extent - * copy the data from the old extent to new extent. - * copy is essential for compressed files to avoid problems - * that can arise if there was a change in compression - * algorithms. - * it is a good strategy because it may disrupt cache - * policy to keep the pages in memory afterwards. - */ - offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; - assert((offset & CM_OFFSET) == 0); - nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; - pno = offset >> CM_L2BSIZE; - npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; -/* - npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - - (offset >> CM_L2BSIZE) + 1; -*/ - sxaddr = oxaddr; - dxaddr = nxaddr; - - /* process the request one cache buffer at a time */ - for (nbrd = 0; nbrd < nbytes; nbrd += nb, - offset += nb, pno++, npages--) { - /* compute page size */ - nb = min(nbytes - nbrd, CM_BSIZE); - - /* get the cache buffer of the page */ - if (rc = cmRead(ip, offset, npages, &cp)) - break; - - assert(addressPXD(&cp->cm_pxd) == sxaddr); - assert(!cp->cm_modified); - - /* bind buffer with the new extent address */ - nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; - cmSetXD(ip, cp, pno, dxaddr, nblks); - - /* release the cbuf, mark it as modified */ - cmPut(cp, true); - - dxaddr += nblks; - sxaddr += nblks; - } - - /* get back parent page */ - if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) - return rc; - - XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); - jfs_info("xtRelocate: target data extent relocated."); - } else { /* (xtype == XTPAGE) */ - - /* - * read in the target xtpage from the source extent; - */ - XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); - if (rc) { - XT_PUTPAGE(pmp); - return rc; - } - - /* - * read in sibling pages if any to update sibling pointers; - */ - rmp = NULL; - if (p->header.next) { - nextbn = le64_to_cpu(p->header.next); - XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); - if (rc) { - XT_PUTPAGE(pmp); - XT_PUTPAGE(mp); - return (rc); - } - } - - lmp = NULL; - if (p->header.prev) { - prevbn = le64_to_cpu(p->header.prev); - XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); - if (rc) { - XT_PUTPAGE(pmp); - XT_PUTPAGE(mp); - if (rmp) - XT_PUTPAGE(rmp); - return (rc); - } - } - - /* at this point, all xtpages to be updated are in memory */ - - /* - * update sibling pointers of sibling xtpages if any; - */ - if (lmp) { - BT_MARK_DIRTY(lmp, ip); - tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); - lp->header.next = cpu_to_le64(nxaddr); - XT_PUTPAGE(lmp); - } - - if (rmp) { - BT_MARK_DIRTY(rmp, ip); - tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); - rp->header.prev = cpu_to_le64(nxaddr); - XT_PUTPAGE(rmp); - } - - /* - * update the target xtpage to be relocated - * - * update the self address of the target page - * and write to destination extent; - * redo image covers the whole xtpage since it is new page - * to the destination extent; - * update of bmap for the free of source extent - * of the target xtpage itself: - * update of bmap for the allocation of destination extent - * of the target xtpage itself: - * update of bmap for the extents covered by xad entries in - * the target xtpage is not necessary since they are not - * updated; - * if not committed before this relocation, - * target page may contain XAD_NEW entries which must - * be scanned for bmap update (logredo() always - * scan xtpage REDOPAGE image for bmap update); - * if committed before this relocation (tlckRELOCATE), - * scan may be skipped by commit() and logredo(); - */ - BT_MARK_DIRTY(mp, ip); - /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ - tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); - xtlck = (struct xtlock *) & tlck->lock; - - /* update the self address in the xtpage header */ - pxd = &p->header.self; - PXDaddress(pxd, nxaddr); - - /* linelock for the after image of the whole page */ - xtlck->lwm.length = - le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; - - /* update the buffer extent descriptor of target xtpage */ - xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; - bmSetXD(mp, nxaddr, xsize); - - /* unpin the target page to new homeward bound */ - XT_PUTPAGE(mp); - jfs_info("xtRelocate: target xtpage relocated."); - } - - /* - * 3. acquire maplock for the source extent to be freed; - * - * acquire a maplock saving the src relocated extent address; - * to free of the extent at commit time; - */ - out: - /* if DATAEXT relocation, write a LOG_UPDATEMAP record for - * free PXD of the source data extent (logredo() will update - * bmap for free of source data extent), and update bmap for - * free of the source data extent; - */ - if (xtype == DATAEXT) - tlck = txMaplock(tid, ip, tlckMAP); - /* if XTPAGE relocation, write a LOG_NOREDOPAGE record - * for the source xtpage (logredo() will init NoRedoPage - * filter and will also update bmap for free of the source - * xtpage), and update bmap for free of the source xtpage; - * N.B. We use tlckMAP instead of tlkcXTREE because there - * is no buffer associated with this lock since the buffer - * has been redirected to the target location. - */ - else /* (xtype == XTPAGE) */ - tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); - - pxdlock = (struct pxd_lock *) & tlck->lock; - pxdlock->flag = mlckFREEPXD; - PXDaddress(&pxdlock->pxd, oxaddr); - PXDlength(&pxdlock->pxd, xlen); - pxdlock->index = 1; - - /* - * 4. update the parent xad entry for relocation; - * - * acquire tlck for the parent entry with XAD_NEW as entry - * update which will write LOG_REDOPAGE and update bmap for - * allocation of XAD_NEW destination extent; - */ - jfs_info("xtRelocate: update parent xad entry."); - BT_MARK_DIRTY(pmp, ip); - tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); - xtlck = (struct xtlock *) & tlck->lock; - - /* update the XAD with the new destination extent; */ - xad = &pp->xad[index]; - xad->flag |= XAD_NEW; - XADaddress(xad, nxaddr); - - xtlck->lwm.offset = min(index, xtlck->lwm.offset); - xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - - xtlck->lwm.offset; - - /* unpin the parent xtpage */ - XT_PUTPAGE(pmp); - - return rc; -} - - -/* - * xtSearchNode() - * - * function: search for the internal xad entry covering specified extent. - * This function is mainly used by defragfs utility. - * - * parameters: - * ip - file object; - * xad - extent to find; - * cmpp - comparison result: - * btstack - traverse stack; - * flag - search process flag; - * - * returns: - * btstack contains (bn, index) of search path traversed to the entry. - * *cmpp is set to result of comparison with the entry returned. - * the page containing the entry is pinned at exit. - */ -static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ - int *cmpp, struct btstack * btstack, int flag) -{ - int rc = 0; - s64 xoff, xaddr; - int xlen; - int cmp = 1; /* init for empty page */ - s64 bn; /* block number */ - struct metapage *mp; /* meta-page buffer */ - xtpage_t *p; /* page */ - int base, index, lim; - struct btframe *btsp; - s64 t64; - - BT_CLR(btstack); - - xoff = offsetXAD(xad); - xlen = lengthXAD(xad); - xaddr = addressXAD(xad); - - /* - * search down tree from root: - * - * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of - * internal page, child page Pi contains entry with k, Ki <= K < Kj. - * - * if entry with search key K is not found - * internal page search find the entry with largest key Ki - * less than K which point to the child page to search; - * leaf page search find the entry with smallest key Kj - * greater than K so that the returned index is the position of - * the entry to be shifted right for insertion of new entry. - * for empty tree, search key is greater than any key of the tree. - * - * by convention, root bn = 0. - */ - for (bn = 0;;) { - /* get/pin the page to search */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - if (p->header.flag & BT_LEAF) { - XT_PUTPAGE(mp); - return -ESTALE; - } - - lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; - - /* - * binary search with search key K on the current page - */ - for (base = XTENTRYSTART; lim; lim >>= 1) { - index = base + (lim >> 1); - - XT_CMP(cmp, xoff, &p->xad[index], t64); - if (cmp == 0) { - /* - * search hit - * - * verify for exact match; - */ - if (xaddr == addressXAD(&p->xad[index]) && - xoff == offsetXAD(&p->xad[index])) { - *cmpp = cmp; - - /* save search result */ - btsp = btstack->top; - btsp->bn = bn; - btsp->index = index; - btsp->mp = mp; - - return 0; - } - - /* descend/search its child page */ - goto next; - } - - if (cmp > 0) { - base = index + 1; - --lim; - } - } - - /* - * search miss - non-leaf page: - * - * base is the smallest index with key (Kj) greater than - * search key (K) and may be zero or maxentry index. - * if base is non-zero, decrement base by one to get the parent - * entry of the child page to search. - */ - index = base ? base - 1 : base; - - /* - * go down to child page - */ - next: - /* get the child page block number */ - bn = addressXAD(&p->xad[index]); - - /* unpin the parent page */ - XT_PUTPAGE(mp); - } -} - - -/* - * xtRelink() - * - * function: - * link around a freed page. - * - * Parameter: - * int tid, - * struct inode *ip, - * xtpage_t *p) - * - * returns: - */ -static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) -{ - int rc = 0; - struct metapage *mp; - s64 nextbn, prevbn; - struct tlock *tlck; - - nextbn = le64_to_cpu(p->header.next); - prevbn = le64_to_cpu(p->header.prev); - - /* update prev pointer of the next page */ - if (nextbn != 0) { - XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* - * acquire a transaction lock on the page; - * - * action: update prev pointer; - */ - BT_MARK_DIRTY(mp, ip); - tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); - - /* the page may already have been tlock'd */ - - p->header.prev = cpu_to_le64(prevbn); - - XT_PUTPAGE(mp); - } - - /* update next pointer of the previous page */ - if (prevbn != 0) { - XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* - * acquire a transaction lock on the page; - * - * action: update next pointer; - */ - BT_MARK_DIRTY(mp, ip); - tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); - - /* the page may already have been tlock'd */ - - p->header.next = le64_to_cpu(nextbn); - - XT_PUTPAGE(mp); - } - - return 0; -} -#endif /* _STILL_TO_PORT */ - - -/* - * xtInitRoot() - * - * initialize file root (inline in inode) - */ -void xtInitRoot(tid_t tid, struct inode *ip) -{ - xtpage_t *p; - - /* - * acquire a transaction lock on the root - * - * action: - */ - txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, - tlckXTREE | tlckNEW); - p = &JFS_IP(ip)->i_xtroot; - - p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; - p->header.nextindex = cpu_to_le16(XTENTRYSTART); - - if (S_ISDIR(ip->i_mode)) - p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); - else { - p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); - ip->i_size = 0; - } - - - return; -} - - -/* - * We can run into a deadlock truncating a file with a large number of - * xtree pages (large fragmented file). A robust fix would entail a - * reservation system where we would reserve a number of metadata pages - * and tlocks which we would be guaranteed without a deadlock. Without - * this, a partial fix is to limit number of metadata pages we will lock - * in a single transaction. Currently we will truncate the file so that - * no more than 50 leaf pages will be locked. The caller of xtTruncate - * will be responsible for ensuring that the current transaction gets - * committed, and that subsequent transactions are created to truncate - * the file further if needed. - */ -#define MAX_TRUNCATE_LEAVES 50 - -/* - * xtTruncate() - * - * function: - * traverse for truncation logging backward bottom up; - * terminate at the last extent entry at the current subtree - * root page covering new down size. - * truncation may occur within the last extent entry. - * - * parameter: - * int tid, - * struct inode *ip, - * s64 newsize, - * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} - * - * return: - * - * note: - * PWMAP: - * 1. truncate (non-COMMIT_NOLINK file) - * by jfs_truncate() or jfs_open(O_TRUNC): - * xtree is updated; - * 2. truncate index table of directory when last entry removed - * map update via tlock at commit time; - * PMAP: - * Call xtTruncate_pmap instead - * WMAP: - * 1. remove (free zero link count) on last reference release - * (pmap has been freed at commit zero link count); - * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): - * xtree is updated; - * map update directly at truncation time; - * - * if (DELETE) - * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); - * else if (TRUNCATE) - * must write LOG_NOREDOPAGE for deleted index page; - * - * pages may already have been tlocked by anonymous transactions - * during file growth (i.e., write) before truncation; - * - * except last truncated entry, deleted entries remains as is - * in the page (nextindex is updated) for other use - * (e.g., log/update allocation map): this avoid copying the page - * info but delay free of pages; - * - */ -s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) -{ - int rc = 0; - s64 teof; - struct metapage *mp; - xtpage_t *p; - s64 bn; - int index, nextindex; - xad_t *xad; - s64 xoff, xaddr; - int xlen, len, freexlen; - struct btstack btstack; - struct btframe *parent; - struct tblock *tblk = NULL; - struct tlock *tlck = NULL; - struct xtlock *xtlck = NULL; - struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ - struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ - s64 nfreed; - int freed, log; - int locked_leaves = 0; - - /* save object truncation type */ - if (tid) { - tblk = tid_to_tblock(tid); - tblk->xflag |= flag; - } - - nfreed = 0; - - flag &= COMMIT_MAP; - assert(flag != COMMIT_PMAP); - - if (flag == COMMIT_PWMAP) - log = 1; - else { - log = 0; - xadlock.flag = mlckFREEXADLIST; - xadlock.index = 1; - } - - /* - * if the newsize is not an integral number of pages, - * the file between newsize and next page boundary will - * be cleared. - * if truncating into a file hole, it will cause - * a full block to be allocated for the logical block. - */ - - /* - * release page blocks of truncated region <teof, eof> - * - * free the data blocks from the leaf index blocks. - * delete the parent index entries corresponding to - * the freed child data/index blocks. - * free the index blocks themselves which aren't needed - * in new sized file. - * - * index blocks are updated only if the blocks are to be - * retained in the new sized file. - * if type is PMAP, the data and index pages are NOT - * freed, and the data and index blocks are NOT freed - * from working map. - * (this will allow continued access of data/index of - * temporary file (zerolink count file truncated to zero-length)). - */ - teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> - JFS_SBI(ip->i_sb)->l2bsize; - - /* clear stack */ - BT_CLR(&btstack); - - /* - * start with root - * - * root resides in the inode - */ - bn = 0; - - /* - * first access of each page: - */ - getPage: - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* process entries backward from last index */ - index = le16_to_cpu(p->header.nextindex) - 1; - - - /* Since this is the rightmost page at this level, and we may have - * already freed a page that was formerly to the right, let's make - * sure that the next pointer is zero. - */ - if (p->header.next) { - if (log) - /* - * Make sure this change to the header is logged. - * If we really truncate this leaf, the flag - * will be changed to tlckTRUNCATE - */ - tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); - BT_MARK_DIRTY(mp, ip); - p->header.next = 0; - } - - if (p->header.flag & BT_INTERNAL) - goto getChild; - - /* - * leaf page - */ - freed = 0; - - /* does region covered by leaf page precede Teof ? */ - xad = &p->xad[index]; - xoff = offsetXAD(xad); - xlen = lengthXAD(xad); - if (teof >= xoff + xlen) { - XT_PUTPAGE(mp); - goto getParent; - } - - /* (re)acquire tlock of the leaf page */ - if (log) { - if (++locked_leaves > MAX_TRUNCATE_LEAVES) { - /* - * We need to limit the size of the transaction - * to avoid exhausting pagecache & tlocks - */ - XT_PUTPAGE(mp); - newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; - goto getParent; - } - tlck = txLock(tid, ip, mp, tlckXTREE); - tlck->type = tlckXTREE | tlckTRUNCATE; - xtlck = (struct xtlock *) & tlck->lock; - xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; - } - BT_MARK_DIRTY(mp, ip); - - /* - * scan backward leaf page entries - */ - for (; index >= XTENTRYSTART; index--) { - xad = &p->xad[index]; - xoff = offsetXAD(xad); - xlen = lengthXAD(xad); - xaddr = addressXAD(xad); - - /* - * The "data" for a directory is indexed by the block - * device's address space. This metadata must be invalidated - * here - */ - if (S_ISDIR(ip->i_mode) && (teof == 0)) - invalidate_xad_metapages(ip, *xad); - /* - * entry beyond eof: continue scan of current page - * xad - * ---|---=======-------> - * eof - */ - if (teof < xoff) { - nfreed += xlen; - continue; - } - - /* - * (xoff <= teof): last entry to be deleted from page; - * If other entries remain in page: keep and update the page. - */ - - /* - * eof == entry_start: delete the entry - * xad - * -------|=======-------> - * eof - * - */ - if (teof == xoff) { - nfreed += xlen; - - if (index == XTENTRYSTART) - break; - - nextindex = index; - } - /* - * eof within the entry: truncate the entry. - * xad - * -------===|===-------> - * eof - */ - else if (teof < xoff + xlen) { - /* update truncated entry */ - len = teof - xoff; - freexlen = xlen - len; - XADlength(xad, len); - - /* save pxd of truncated extent in tlck */ - xaddr += len; - if (log) { /* COMMIT_PWMAP */ - xtlck->lwm.offset = (xtlck->lwm.offset) ? - min(index, (int)xtlck->lwm.offset) : index; - xtlck->lwm.length = index + 1 - - xtlck->lwm.offset; - xtlck->twm.offset = index; - pxdlock = (struct pxd_lock *) & xtlck->pxdlock; - pxdlock->flag = mlckFREEPXD; - PXDaddress(&pxdlock->pxd, xaddr); - PXDlength(&pxdlock->pxd, freexlen); - } - /* free truncated extent */ - else { /* COMMIT_WMAP */ - - pxdlock = (struct pxd_lock *) & xadlock; - pxdlock->flag = mlckFREEPXD; - PXDaddress(&pxdlock->pxd, xaddr); - PXDlength(&pxdlock->pxd, freexlen); - txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); - - /* reset map lock */ - xadlock.flag = mlckFREEXADLIST; - } - - /* current entry is new last entry; */ - nextindex = index + 1; - - nfreed += freexlen; - } - /* - * eof beyond the entry: - * xad - * -------=======---|---> - * eof - */ - else { /* (xoff + xlen < teof) */ - - nextindex = index + 1; - } - - if (nextindex < le16_to_cpu(p->header.nextindex)) { - if (!log) { /* COMMIT_WAMP */ - xadlock.xdlist = &p->xad[nextindex]; - xadlock.count = - le16_to_cpu(p->header.nextindex) - - nextindex; - txFreeMap(ip, (struct maplock *) & xadlock, - NULL, COMMIT_WMAP); - } - p->header.nextindex = cpu_to_le16(nextindex); - } - - XT_PUTPAGE(mp); - - /* assert(freed == 0); */ - goto getParent; - } /* end scan of leaf page entries */ - - freed = 1; - - /* - * leaf page become empty: free the page if type != PMAP - */ - if (log) { /* COMMIT_PWMAP */ - /* txCommit() with tlckFREE: - * free data extents covered by leaf [XTENTRYSTART:hwm); - * invalidate leaf if COMMIT_PWMAP; - * if (TRUNCATE), will write LOG_NOREDOPAGE; - */ - tlck->type = tlckXTREE | tlckFREE; - } else { /* COMMIT_WAMP */ - - /* free data extents covered by leaf */ - xadlock.xdlist = &p->xad[XTENTRYSTART]; - xadlock.count = - le16_to_cpu(p->header.nextindex) - XTENTRYSTART; - txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); - } - - if (p->header.flag & BT_ROOT) { - p->header.flag &= ~BT_INTERNAL; - p->header.flag |= BT_LEAF; - p->header.nextindex = cpu_to_le16(XTENTRYSTART); - - XT_PUTPAGE(mp); /* debug */ - goto out; - } else { - if (log) { /* COMMIT_PWMAP */ - /* page will be invalidated at tx completion - */ - XT_PUTPAGE(mp); - } else { /* COMMIT_WMAP */ - - if (mp->lid) - lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; - - /* invalidate empty leaf page */ - discard_metapage(mp); - } - } - - /* - * the leaf page become empty: delete the parent entry - * for the leaf page if the parent page is to be kept - * in the new sized file. - */ - - /* - * go back up to the parent page - */ - getParent: - /* pop/restore parent entry for the current child page */ - if ((parent = BT_POP(&btstack)) == NULL) - /* current page must have been root */ - goto out; - - /* get back the parent page */ - bn = parent->bn; - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - index = parent->index; - - /* - * child page was not empty: - */ - if (freed == 0) { - /* has any entry deleted from parent ? */ - if (index < le16_to_cpu(p->header.nextindex) - 1) { - /* (re)acquire tlock on the parent page */ - if (log) { /* COMMIT_PWMAP */ - /* txCommit() with tlckTRUNCATE: - * free child extents covered by parent [); - */ - tlck = txLock(tid, ip, mp, tlckXTREE); - xtlck = (struct xtlock *) & tlck->lock; - if (!(tlck->type & tlckTRUNCATE)) { - xtlck->hwm.offset = - le16_to_cpu(p->header. - nextindex) - 1; - tlck->type = - tlckXTREE | tlckTRUNCATE; - } - } else { /* COMMIT_WMAP */ - - /* free child extents covered by parent */ - xadlock.xdlist = &p->xad[index + 1]; - xadlock.count = - le16_to_cpu(p->header.nextindex) - - index - 1; - txFreeMap(ip, (struct maplock *) & xadlock, - NULL, COMMIT_WMAP); - } - BT_MARK_DIRTY(mp, ip); - - p->header.nextindex = cpu_to_le16(index + 1); - } - XT_PUTPAGE(mp); - goto getParent; - } - - /* - * child page was empty: - */ - nfreed += lengthXAD(&p->xad[index]); - - /* - * During working map update, child page's tlock must be handled - * before parent's. This is because the parent's tlock will cause - * the child's disk space to be marked available in the wmap, so - * it's important that the child page be released by that time. - * - * ToDo: tlocks should be on doubly-linked list, so we can - * quickly remove it and add it to the end. - */ - - /* - * Move parent page's tlock to the end of the tid's tlock list - */ - if (log && mp->lid && (tblk->last != mp->lid) && - lid_to_tlock(mp->lid)->tid) { - lid_t lid = mp->lid; - struct tlock *prev; - - tlck = lid_to_tlock(lid); - - if (tblk->next == lid) - tblk->next = tlck->next; - else { - for (prev = lid_to_tlock(tblk->next); - prev->next != lid; - prev = lid_to_tlock(prev->next)) { - assert(prev->next); - } - prev->next = tlck->next; - } - lid_to_tlock(tblk->last)->next = lid; - tlck->next = 0; - tblk->last = lid; - } - - /* - * parent page become empty: free the page - */ - if (index == XTENTRYSTART) { - if (log) { /* COMMIT_PWMAP */ - /* txCommit() with tlckFREE: - * free child extents covered by parent; - * invalidate parent if COMMIT_PWMAP; - */ - tlck = txLock(tid, ip, mp, tlckXTREE); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->hwm.offset = - le16_to_cpu(p->header.nextindex) - 1; - tlck->type = tlckXTREE | tlckFREE; - } else { /* COMMIT_WMAP */ - - /* free child extents covered by parent */ - xadlock.xdlist = &p->xad[XTENTRYSTART]; - xadlock.count = - le16_to_cpu(p->header.nextindex) - - XTENTRYSTART; - txFreeMap(ip, (struct maplock *) & xadlock, NULL, - COMMIT_WMAP); - } - BT_MARK_DIRTY(mp, ip); - - if (p->header.flag & BT_ROOT) { - p->header.flag &= ~BT_INTERNAL; - p->header.flag |= BT_LEAF; - p->header.nextindex = cpu_to_le16(XTENTRYSTART); - if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { - /* - * Shrink root down to allow inline - * EA (otherwise fsck complains) - */ - p->header.maxentry = - cpu_to_le16(XTROOTINITSLOT); - JFS_IP(ip)->mode2 |= INLINEEA; - } - - XT_PUTPAGE(mp); /* debug */ - goto out; - } else { - if (log) { /* COMMIT_PWMAP */ - /* page will be invalidated at tx completion - */ - XT_PUTPAGE(mp); - } else { /* COMMIT_WMAP */ - - if (mp->lid) - lid_to_tlock(mp->lid)->flag |= - tlckFREELOCK; - - /* invalidate parent page */ - discard_metapage(mp); - } - - /* parent has become empty and freed: - * go back up to its parent page - */ - /* freed = 1; */ - goto getParent; - } - } - /* - * parent page still has entries for front region; - */ - else { - /* try truncate region covered by preceding entry - * (process backward) - */ - index--; - - /* go back down to the child page corresponding - * to the entry - */ - goto getChild; - } - - /* - * internal page: go down to child page of current entry - */ - getChild: - /* save current parent entry for the child page */ - if (BT_STACK_FULL(&btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); - XT_PUTPAGE(mp); - return -EIO; - } - BT_PUSH(&btstack, bn, index); - - /* get child page */ - xad = &p->xad[index]; - bn = addressXAD(xad); - - /* - * first access of each internal entry: - */ - /* release parent page */ - XT_PUTPAGE(mp); - - /* process the child page */ - goto getPage; - - out: - /* - * update file resource stat - */ - /* set size - */ - if (S_ISDIR(ip->i_mode) && !newsize) - ip->i_size = 1; /* fsck hates zero-length directories */ - else - ip->i_size = newsize; - - /* update quota allocation to reflect freed blocks */ - dquot_free_block(ip, nfreed); - - /* - * free tlock of invalidated pages - */ - if (flag == COMMIT_WMAP) - txFreelock(ip); - - return newsize; -} - - -/* - * xtTruncate_pmap() - * - * function: - * Perform truncate to zero length for deleted file, leaving the - * the xtree and working map untouched. This allows the file to - * be accessed via open file handles, while the delete of the file - * is committed to disk. - * - * parameter: - * tid_t tid, - * struct inode *ip, - * s64 committed_size) - * - * return: new committed size - * - * note: - * - * To avoid deadlock by holding too many transaction locks, the - * truncation may be broken up into multiple transactions. - * The committed_size keeps track of part of the file has been - * freed from the pmaps. - */ -s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) -{ - s64 bn; - struct btstack btstack; - int cmp; - int index; - int locked_leaves = 0; - struct metapage *mp; - xtpage_t *p; - struct btframe *parent; - int rc; - struct tblock *tblk; - struct tlock *tlck = NULL; - xad_t *xad; - int xlen; - s64 xoff; - struct xtlock *xtlck = NULL; - - /* save object truncation type */ - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_PMAP; - - /* clear stack */ - BT_CLR(&btstack); - - if (committed_size) { - xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; - rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); - if (rc) - return rc; - - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - if (cmp != 0) { - XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtTruncate_pmap: did not find extent"); - return -EIO; - } - } else { - /* - * start with root - * - * root resides in the inode - */ - bn = 0; - - /* - * first access of each page: - */ - getPage: - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - /* process entries backward from last index */ - index = le16_to_cpu(p->header.nextindex) - 1; - - if (p->header.flag & BT_INTERNAL) - goto getChild; - } - - /* - * leaf page - */ - - if (++locked_leaves > MAX_TRUNCATE_LEAVES) { - /* - * We need to limit the size of the transaction - * to avoid exhausting pagecache & tlocks - */ - xad = &p->xad[index]; - xoff = offsetXAD(xad); - xlen = lengthXAD(xad); - XT_PUTPAGE(mp); - return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; - } - tlck = txLock(tid, ip, mp, tlckXTREE); - tlck->type = tlckXTREE | tlckFREE; - xtlck = (struct xtlock *) & tlck->lock; - xtlck->hwm.offset = index; - - - XT_PUTPAGE(mp); - - /* - * go back up to the parent page - */ - getParent: - /* pop/restore parent entry for the current child page */ - if ((parent = BT_POP(&btstack)) == NULL) - /* current page must have been root */ - goto out; - - /* get back the parent page */ - bn = parent->bn; - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - index = parent->index; - - /* - * parent page become empty: free the page - */ - if (index == XTENTRYSTART) { - /* txCommit() with tlckFREE: - * free child extents covered by parent; - * invalidate parent if COMMIT_PWMAP; - */ - tlck = txLock(tid, ip, mp, tlckXTREE); - xtlck = (struct xtlock *) & tlck->lock; - xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; - tlck->type = tlckXTREE | tlckFREE; - - XT_PUTPAGE(mp); - - if (p->header.flag & BT_ROOT) { - - goto out; - } else { - goto getParent; - } - } - /* - * parent page still has entries for front region; - */ - else - index--; - /* - * internal page: go down to child page of current entry - */ - getChild: - /* save current parent entry for the child page */ - if (BT_STACK_FULL(&btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); - XT_PUTPAGE(mp); - return -EIO; - } - BT_PUSH(&btstack, bn, index); - - /* get child page */ - xad = &p->xad[index]; - bn = addressXAD(xad); - - /* - * first access of each internal entry: - */ - /* release parent page */ - XT_PUTPAGE(mp); - - /* process the child page */ - goto getPage; - - out: - - return 0; -} - -#ifdef CONFIG_JFS_STATISTICS -static int jfs_xtstat_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, - "JFS Xtree statistics\n" - "====================\n" - "searches = %d\n" - "fast searches = %d\n" - "splits = %d\n", - xtStat.search, - xtStat.fastSearch, - xtStat.split); - return 0; -} - -static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_xtstat_proc_show, NULL); -} - -const struct file_operations jfs_xtstat_proc_fops = { - .owner = THIS_MODULE, - .open = jfs_xtstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif diff --git a/ANDROID_3.4.5/fs/jfs/jfs_xtree.h b/ANDROID_3.4.5/fs/jfs/jfs_xtree.h deleted file mode 100644 index 08c0c749..00000000 --- a/ANDROID_3.4.5/fs/jfs/jfs_xtree.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _H_JFS_XTREE -#define _H_JFS_XTREE - -/* - * jfs_xtree.h: extent allocation descriptor B+-tree manager - */ - -#include "jfs_btree.h" - - -/* - * extent allocation descriptor (xad) - */ -typedef struct xad { - unsigned flag:8; /* 1: flag */ - unsigned rsvrd:16; /* 2: reserved */ - unsigned off1:8; /* 1: offset in unit of fsblksize */ - __le32 off2; /* 4: offset in unit of fsblksize */ - unsigned len:24; /* 3: length in unit of fsblksize */ - unsigned addr1:8; /* 1: address in unit of fsblksize */ - __le32 addr2; /* 4: address in unit of fsblksize */ -} xad_t; /* (16) */ - -#define MAXXLEN ((1 << 24) - 1) - -#define XTSLOTSIZE 16 -#define L2XTSLOTSIZE 4 - -/* xad_t field construction */ -#define XADoffset(xad, offset64)\ -{\ - (xad)->off1 = ((u64)offset64) >> 32;\ - (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ -} -#define XADaddress(xad, address64)\ -{\ - (xad)->addr1 = ((u64)address64) >> 32;\ - (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ -} -#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) - -/* xad_t field extraction */ -#define offsetXAD(xad)\ - ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) -#define addressXAD(xad)\ - ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) -#define lengthXAD(xad) __le24_to_cpu((xad)->len) - -/* xad list */ -struct xadlist { - s16 maxnxad; - s16 nxad; - xad_t *xad; -}; - -/* xad_t flags */ -#define XAD_NEW 0x01 /* new */ -#define XAD_EXTENDED 0x02 /* extended */ -#define XAD_COMPRESSED 0x04 /* compressed with recorded length */ -#define XAD_NOTRECORDED 0x08 /* allocated but not recorded */ -#define XAD_COW 0x10 /* copy-on-write */ - - -/* possible values for maxentry */ -#define XTROOTINITSLOT_DIR 6 -#define XTROOTINITSLOT 10 -#define XTROOTMAXSLOT 18 -#define XTPAGEMAXSLOT 256 -#define XTENTRYSTART 2 - -/* - * xtree page: - */ -typedef union { - struct xtheader { - __le64 next; /* 8: */ - __le64 prev; /* 8: */ - - u8 flag; /* 1: */ - u8 rsrvd1; /* 1: */ - __le16 nextindex; /* 2: next index = number of entries */ - __le16 maxentry; /* 2: max number of entries */ - __le16 rsrvd2; /* 2: */ - - pxd_t self; /* 8: self */ - } header; /* (32) */ - - xad_t xad[XTROOTMAXSLOT]; /* 16 * maxentry: xad array */ -} xtpage_t; - -/* - * external declaration - */ -extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, - int *pflag, s64 * paddr, int *plen, int flag); -extern void xtInitRoot(tid_t tid, struct inode *ip); -extern int xtInsert(tid_t tid, struct inode *ip, - int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag); -extern int xtExtend(tid_t tid, struct inode *ip, s64 xoff, int xlen, - int flag); -#ifdef _NOTYET -extern int xtTailgate(tid_t tid, struct inode *ip, - s64 xoff, int xlen, s64 xaddr, int flag); -#endif -extern int xtUpdate(tid_t tid, struct inode *ip, struct xad *nxad); -extern int xtDelete(tid_t tid, struct inode *ip, s64 xoff, int xlen, - int flag); -extern s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int type); -extern s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size); -extern int xtRelocate(tid_t tid, struct inode *ip, - xad_t * oxad, s64 nxaddr, int xtype); -extern int xtAppend(tid_t tid, - struct inode *ip, int xflag, s64 xoff, int maxblocks, - int *xlenp, s64 * xaddrp, int flag); -#endif /* !_H_JFS_XTREE */ diff --git a/ANDROID_3.4.5/fs/jfs/namei.c b/ANDROID_3.4.5/fs/jfs/namei.c deleted file mode 100644 index 07c91ca6..00000000 --- a/ANDROID_3.4.5/fs/jfs/namei.c +++ /dev/null @@ -1,1610 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/namei.h> -#include <linux/ctype.h> -#include <linux/quotaops.h> -#include <linux/exportfs.h> -#include "jfs_incore.h" -#include "jfs_superblock.h" -#include "jfs_inode.h" -#include "jfs_dinode.h" -#include "jfs_dmap.h" -#include "jfs_unicode.h" -#include "jfs_metapage.h" -#include "jfs_xattr.h" -#include "jfs_acl.h" -#include "jfs_debug.h" - -/* - * forward references - */ -const struct dentry_operations jfs_ci_dentry_operations; - -static s64 commitZeroLink(tid_t, struct inode *); - -/* - * NAME: free_ea_wmap(inode) - * - * FUNCTION: free uncommitted extended attributes from working map - * - */ -static inline void free_ea_wmap(struct inode *inode) -{ - dxd_t *ea = &JFS_IP(inode)->ea; - - if (ea->flag & DXD_EXTENT) { - /* free EA pages from cache */ - invalidate_dxd_metapages(inode, *ea); - dbFree(inode, addressDXD(ea), lengthDXD(ea)); - } - ea->flag = 0; -} - -/* - * NAME: jfs_create(dip, dentry, mode) - * - * FUNCTION: create a regular file in the parent directory <dip> - * with name = <from dentry> and mode = <mode> - * - * PARAMETER: dip - parent directory vnode - * dentry - dentry of new file - * mode - create mode (rwxrwxrwx). - * nd- nd struct - * - * RETURN: Errors from subroutines - * - */ -static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, - struct nameidata *nd) -{ - int rc = 0; - tid_t tid; /* transaction id */ - struct inode *ip = NULL; /* child directory inode */ - ino_t ino; - struct component_name dname; /* child directory name */ - struct btstack btstack; - struct inode *iplist[2]; - struct tblock *tblk; - - jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name); - - dquot_initialize(dip); - - /* - * search parent directory for entry/freespace - * (dtSearch() returns parent directory page pinned) - */ - if ((rc = get_UCSname(&dname, dentry))) - goto out1; - - /* - * Either iAlloc() or txBegin() may block. Deadlock can occur if we - * block there while holding dtree page, so we allocate the inode & - * begin the transaction before we search the directory. - */ - ip = ialloc(dip, mode); - if (IS_ERR(ip)) { - rc = PTR_ERR(ip); - goto out2; - } - - tid = txBegin(dip->i_sb, 0); - - mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); - mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); - - rc = jfs_init_acl(tid, ip, dip); - if (rc) - goto out3; - - rc = jfs_init_security(tid, ip, dip, &dentry->d_name); - if (rc) { - txAbort(tid, 0); - goto out3; - } - - if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) { - jfs_err("jfs_create: dtSearch returned %d", rc); - txAbort(tid, 0); - goto out3; - } - - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_CREATE; - tblk->ino = ip->i_ino; - tblk->u.ixpxd = JFS_IP(ip)->ixpxd; - - iplist[0] = dip; - iplist[1] = ip; - - /* - * initialize the child XAD tree root in-line in inode - */ - xtInitRoot(tid, ip); - - /* - * create entry in parent directory for child directory - * (dtInsert() releases parent directory page) - */ - ino = ip->i_ino; - if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) { - if (rc == -EIO) { - jfs_err("jfs_create: dtInsert returned -EIO"); - txAbort(tid, 1); /* Marks Filesystem dirty */ - } else - txAbort(tid, 0); /* Filesystem full */ - goto out3; - } - - ip->i_op = &jfs_file_inode_operations; - ip->i_fop = &jfs_file_operations; - ip->i_mapping->a_ops = &jfs_aops; - - mark_inode_dirty(ip); - - dip->i_ctime = dip->i_mtime = CURRENT_TIME; - - mark_inode_dirty(dip); - - rc = txCommit(tid, 2, &iplist[0], 0); - - out3: - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dip)->commit_mutex); - if (rc) { - free_ea_wmap(ip); - clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); - } else { - d_instantiate(dentry, ip); - unlock_new_inode(ip); - } - - out2: - free_UCSname(&dname); - - out1: - - jfs_info("jfs_create: rc:%d", rc); - return rc; -} - - -/* - * NAME: jfs_mkdir(dip, dentry, mode) - * - * FUNCTION: create a child directory in the parent directory <dip> - * with name = <from dentry> and mode = <mode> - * - * PARAMETER: dip - parent directory vnode - * dentry - dentry of child directory - * mode - create mode (rwxrwxrwx). - * - * RETURN: Errors from subroutines - * - * note: - * EACCESS: user needs search+write permission on the parent directory - */ -static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) -{ - int rc = 0; - tid_t tid; /* transaction id */ - struct inode *ip = NULL; /* child directory inode */ - ino_t ino; - struct component_name dname; /* child directory name */ - struct btstack btstack; - struct inode *iplist[2]; - struct tblock *tblk; - - jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name); - - dquot_initialize(dip); - - /* - * search parent directory for entry/freespace - * (dtSearch() returns parent directory page pinned) - */ - if ((rc = get_UCSname(&dname, dentry))) - goto out1; - - /* - * Either iAlloc() or txBegin() may block. Deadlock can occur if we - * block there while holding dtree page, so we allocate the inode & - * begin the transaction before we search the directory. - */ - ip = ialloc(dip, S_IFDIR | mode); - if (IS_ERR(ip)) { - rc = PTR_ERR(ip); - goto out2; - } - - tid = txBegin(dip->i_sb, 0); - - mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); - mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); - - rc = jfs_init_acl(tid, ip, dip); - if (rc) - goto out3; - - rc = jfs_init_security(tid, ip, dip, &dentry->d_name); - if (rc) { - txAbort(tid, 0); - goto out3; - } - - if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) { - jfs_err("jfs_mkdir: dtSearch returned %d", rc); - txAbort(tid, 0); - goto out3; - } - - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_CREATE; - tblk->ino = ip->i_ino; - tblk->u.ixpxd = JFS_IP(ip)->ixpxd; - - iplist[0] = dip; - iplist[1] = ip; - - /* - * initialize the child directory in-line in inode - */ - dtInitRoot(tid, ip, dip->i_ino); - - /* - * create entry in parent directory for child directory - * (dtInsert() releases parent directory page) - */ - ino = ip->i_ino; - if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) { - if (rc == -EIO) { - jfs_err("jfs_mkdir: dtInsert returned -EIO"); - txAbort(tid, 1); /* Marks Filesystem dirty */ - } else - txAbort(tid, 0); /* Filesystem full */ - goto out3; - } - - set_nlink(ip, 2); /* for '.' */ - ip->i_op = &jfs_dir_inode_operations; - ip->i_fop = &jfs_dir_operations; - - mark_inode_dirty(ip); - - /* update parent directory inode */ - inc_nlink(dip); /* for '..' from child directory */ - dip->i_ctime = dip->i_mtime = CURRENT_TIME; - mark_inode_dirty(dip); - - rc = txCommit(tid, 2, &iplist[0], 0); - - out3: - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dip)->commit_mutex); - if (rc) { - free_ea_wmap(ip); - clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); - } else { - d_instantiate(dentry, ip); - unlock_new_inode(ip); - } - - out2: - free_UCSname(&dname); - - - out1: - - jfs_info("jfs_mkdir: rc:%d", rc); - return rc; -} - -/* - * NAME: jfs_rmdir(dip, dentry) - * - * FUNCTION: remove a link to child directory - * - * PARAMETER: dip - parent inode - * dentry - child directory dentry - * - * RETURN: -EINVAL - if name is . or .. - * -EINVAL - if . or .. exist but are invalid. - * errors from subroutines - * - * note: - * if other threads have the directory open when the last link - * is removed, the "." and ".." entries, if present, are removed before - * rmdir() returns and no new entries may be created in the directory, - * but the directory is not removed until the last reference to - * the directory is released (cf.unlink() of regular file). - */ -static int jfs_rmdir(struct inode *dip, struct dentry *dentry) -{ - int rc; - tid_t tid; /* transaction id */ - struct inode *ip = dentry->d_inode; - ino_t ino; - struct component_name dname; - struct inode *iplist[2]; - struct tblock *tblk; - - jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); - - /* Init inode for quota operations. */ - dquot_initialize(dip); - dquot_initialize(ip); - - /* directory must be empty to be removed */ - if (!dtEmpty(ip)) { - rc = -ENOTEMPTY; - goto out; - } - - if ((rc = get_UCSname(&dname, dentry))) { - goto out; - } - - tid = txBegin(dip->i_sb, 0); - - mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); - mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); - - iplist[0] = dip; - iplist[1] = ip; - - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_DELETE; - tblk->u.ip = ip; - - /* - * delete the entry of target directory from parent directory - */ - ino = ip->i_ino; - if ((rc = dtDelete(tid, dip, &dname, &ino, JFS_REMOVE))) { - jfs_err("jfs_rmdir: dtDelete returned %d", rc); - if (rc == -EIO) - txAbort(tid, 1); - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dip)->commit_mutex); - - goto out2; - } - - /* update parent directory's link count corresponding - * to ".." entry of the target directory deleted - */ - dip->i_ctime = dip->i_mtime = CURRENT_TIME; - inode_dec_link_count(dip); - - /* - * OS/2 could have created EA and/or ACL - */ - /* free EA from both persistent and working map */ - if (JFS_IP(ip)->ea.flag & DXD_EXTENT) { - /* free EA pages */ - txEA(tid, ip, &JFS_IP(ip)->ea, NULL); - } - JFS_IP(ip)->ea.flag = 0; - - /* free ACL from both persistent and working map */ - if (JFS_IP(ip)->acl.flag & DXD_EXTENT) { - /* free ACL pages */ - txEA(tid, ip, &JFS_IP(ip)->acl, NULL); - } - JFS_IP(ip)->acl.flag = 0; - - /* mark the target directory as deleted */ - clear_nlink(ip); - mark_inode_dirty(ip); - - rc = txCommit(tid, 2, &iplist[0], 0); - - txEnd(tid); - - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dip)->commit_mutex); - - /* - * Truncating the directory index table is not guaranteed. It - * may need to be done iteratively - */ - if (test_cflag(COMMIT_Stale, dip)) { - if (dip->i_size > 1) - jfs_truncate_nolock(dip, 0); - - clear_cflag(COMMIT_Stale, dip); - } - - out2: - free_UCSname(&dname); - - out: - jfs_info("jfs_rmdir: rc:%d", rc); - return rc; -} - -/* - * NAME: jfs_unlink(dip, dentry) - * - * FUNCTION: remove a link to object <vp> named by <name> - * from parent directory <dvp> - * - * PARAMETER: dip - inode of parent directory - * dentry - dentry of object to be removed - * - * RETURN: errors from subroutines - * - * note: - * temporary file: if one or more processes have the file open - * when the last link is removed, the link will be removed before - * unlink() returns, but the removal of the file contents will be - * postponed until all references to the files are closed. - * - * JFS does NOT support unlink() on directories. - * - */ -static int jfs_unlink(struct inode *dip, struct dentry *dentry) -{ - int rc; - tid_t tid; /* transaction id */ - struct inode *ip = dentry->d_inode; - ino_t ino; - struct component_name dname; /* object name */ - struct inode *iplist[2]; - struct tblock *tblk; - s64 new_size = 0; - int commit_flag; - - jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name); - - /* Init inode for quota operations. */ - dquot_initialize(dip); - dquot_initialize(ip); - - if ((rc = get_UCSname(&dname, dentry))) - goto out; - - IWRITE_LOCK(ip, RDWRLOCK_NORMAL); - - tid = txBegin(dip->i_sb, 0); - - mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); - mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); - - iplist[0] = dip; - iplist[1] = ip; - - /* - * delete the entry of target file from parent directory - */ - ino = ip->i_ino; - if ((rc = dtDelete(tid, dip, &dname, &ino, JFS_REMOVE))) { - jfs_err("jfs_unlink: dtDelete returned %d", rc); - if (rc == -EIO) - txAbort(tid, 1); /* Marks FS Dirty */ - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dip)->commit_mutex); - IWRITE_UNLOCK(ip); - goto out1; - } - - ASSERT(ip->i_nlink); - - ip->i_ctime = dip->i_ctime = dip->i_mtime = CURRENT_TIME; - mark_inode_dirty(dip); - - /* update target's inode */ - inode_dec_link_count(ip); - - /* - * commit zero link count object - */ - if (ip->i_nlink == 0) { - assert(!test_cflag(COMMIT_Nolink, ip)); - /* free block resources */ - if ((new_size = commitZeroLink(tid, ip)) < 0) { - txAbort(tid, 1); /* Marks FS Dirty */ - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dip)->commit_mutex); - IWRITE_UNLOCK(ip); - rc = new_size; - goto out1; - } - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_DELETE; - tblk->u.ip = ip; - } - - /* - * Incomplete truncate of file data can - * result in timing problems unless we synchronously commit the - * transaction. - */ - if (new_size) - commit_flag = COMMIT_SYNC; - else - commit_flag = 0; - - /* - * If xtTruncate was incomplete, commit synchronously to avoid - * timing complications - */ - rc = txCommit(tid, 2, &iplist[0], commit_flag); - - txEnd(tid); - - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dip)->commit_mutex); - - while (new_size && (rc == 0)) { - tid = txBegin(dip->i_sb, 0); - mutex_lock(&JFS_IP(ip)->commit_mutex); - new_size = xtTruncate_pmap(tid, ip, new_size); - if (new_size < 0) { - txAbort(tid, 1); /* Marks FS Dirty */ - rc = new_size; - } else - rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC); - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - } - - if (ip->i_nlink == 0) - set_cflag(COMMIT_Nolink, ip); - - IWRITE_UNLOCK(ip); - - /* - * Truncating the directory index table is not guaranteed. It - * may need to be done iteratively - */ - if (test_cflag(COMMIT_Stale, dip)) { - if (dip->i_size > 1) - jfs_truncate_nolock(dip, 0); - - clear_cflag(COMMIT_Stale, dip); - } - - out1: - free_UCSname(&dname); - out: - jfs_info("jfs_unlink: rc:%d", rc); - return rc; -} - -/* - * NAME: commitZeroLink() - * - * FUNCTION: for non-directory, called by jfs_remove(), - * truncate a regular file, directory or symbolic - * link to zero length. return 0 if type is not - * one of these. - * - * if the file is currently associated with a VM segment - * only permanent disk and inode map resources are freed, - * and neither the inode nor indirect blocks are modified - * so that the resources can be later freed in the work - * map by ctrunc1. - * if there is no VM segment on entry, the resources are - * freed in both work and permanent map. - * (? for temporary file - memory object is cached even - * after no reference: - * reference count > 0 - ) - * - * PARAMETERS: cd - pointer to commit data structure. - * current inode is the one to truncate. - * - * RETURN: Errors from subroutines - */ -static s64 commitZeroLink(tid_t tid, struct inode *ip) -{ - int filetype; - struct tblock *tblk; - - jfs_info("commitZeroLink: tid = %d, ip = 0x%p", tid, ip); - - filetype = ip->i_mode & S_IFMT; - switch (filetype) { - case S_IFREG: - break; - case S_IFLNK: - /* fast symbolic link */ - if (ip->i_size < IDATASIZE) { - ip->i_size = 0; - return 0; - } - break; - default: - assert(filetype != S_IFDIR); - return 0; - } - - set_cflag(COMMIT_Freewmap, ip); - - /* mark transaction of block map update type */ - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_PMAP; - - /* - * free EA - */ - if (JFS_IP(ip)->ea.flag & DXD_EXTENT) - /* acquire maplock on EA to be freed from block map */ - txEA(tid, ip, &JFS_IP(ip)->ea, NULL); - - /* - * free ACL - */ - if (JFS_IP(ip)->acl.flag & DXD_EXTENT) - /* acquire maplock on EA to be freed from block map */ - txEA(tid, ip, &JFS_IP(ip)->acl, NULL); - - /* - * free xtree/data (truncate to zero length): - * free xtree/data pages from cache if COMMIT_PWMAP, - * free xtree/data blocks from persistent block map, and - * free xtree/data blocks from working block map if COMMIT_PWMAP; - */ - if (ip->i_size) - return xtTruncate_pmap(tid, ip, 0); - - return 0; -} - - -/* - * NAME: jfs_free_zero_link() - * - * FUNCTION: for non-directory, called by iClose(), - * free resources of a file from cache and WORKING map - * for a file previously committed with zero link count - * while associated with a pager object, - * - * PARAMETER: ip - pointer to inode of file. - */ -void jfs_free_zero_link(struct inode *ip) -{ - int type; - - jfs_info("jfs_free_zero_link: ip = 0x%p", ip); - - /* return if not reg or symbolic link or if size is - * already ok. - */ - type = ip->i_mode & S_IFMT; - - switch (type) { - case S_IFREG: - break; - case S_IFLNK: - /* if its contained in inode nothing to do */ - if (ip->i_size < IDATASIZE) - return; - break; - default: - return; - } - - /* - * free EA - */ - if (JFS_IP(ip)->ea.flag & DXD_EXTENT) { - s64 xaddr = addressDXD(&JFS_IP(ip)->ea); - int xlen = lengthDXD(&JFS_IP(ip)->ea); - struct maplock maplock; /* maplock for COMMIT_WMAP */ - struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ - - /* free EA pages from cache */ - invalidate_dxd_metapages(ip, JFS_IP(ip)->ea); - - /* free EA extent from working block map */ - maplock.index = 1; - pxdlock = (struct pxd_lock *) & maplock; - pxdlock->flag = mlckFREEPXD; - PXDaddress(&pxdlock->pxd, xaddr); - PXDlength(&pxdlock->pxd, xlen); - txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); - } - - /* - * free ACL - */ - if (JFS_IP(ip)->acl.flag & DXD_EXTENT) { - s64 xaddr = addressDXD(&JFS_IP(ip)->acl); - int xlen = lengthDXD(&JFS_IP(ip)->acl); - struct maplock maplock; /* maplock for COMMIT_WMAP */ - struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ - - invalidate_dxd_metapages(ip, JFS_IP(ip)->acl); - - /* free ACL extent from working block map */ - maplock.index = 1; - pxdlock = (struct pxd_lock *) & maplock; - pxdlock->flag = mlckFREEPXD; - PXDaddress(&pxdlock->pxd, xaddr); - PXDlength(&pxdlock->pxd, xlen); - txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); - } - - /* - * free xtree/data (truncate to zero length): - * free xtree/data pages from cache, and - * free xtree/data blocks from working block map; - */ - if (ip->i_size) - xtTruncate(0, ip, 0, COMMIT_WMAP); -} - -/* - * NAME: jfs_link(vp, dvp, name, crp) - * - * FUNCTION: create a link to <vp> by the name = <name> - * in the parent directory <dvp> - * - * PARAMETER: vp - target object - * dvp - parent directory of new link - * name - name of new link to target object - * crp - credential - * - * RETURN: Errors from subroutines - * - * note: - * JFS does NOT support link() on directories (to prevent circular - * path in the directory hierarchy); - * EPERM: the target object is a directory, and either the caller - * does not have appropriate privileges or the implementation prohibits - * using link() on directories [XPG4.2]. - * - * JFS does NOT support links between file systems: - * EXDEV: target object and new link are on different file systems and - * implementation does not support links between file systems [XPG4.2]. - */ -static int jfs_link(struct dentry *old_dentry, - struct inode *dir, struct dentry *dentry) -{ - int rc; - tid_t tid; - struct inode *ip = old_dentry->d_inode; - ino_t ino; - struct component_name dname; - struct btstack btstack; - struct inode *iplist[2]; - - jfs_info("jfs_link: %s %s", old_dentry->d_name.name, - dentry->d_name.name); - - dquot_initialize(dir); - - tid = txBegin(ip->i_sb, 0); - - mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); - mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); - - /* - * scan parent directory for entry/freespace - */ - if ((rc = get_UCSname(&dname, dentry))) - goto out; - - if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) - goto free_dname; - - /* - * create entry for new link in parent directory - */ - ino = ip->i_ino; - if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) - goto free_dname; - - /* update object inode */ - inc_nlink(ip); /* for new link */ - ip->i_ctime = CURRENT_TIME; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - mark_inode_dirty(dir); - ihold(ip); - - iplist[0] = ip; - iplist[1] = dir; - rc = txCommit(tid, 2, &iplist[0], 0); - - if (rc) { - drop_nlink(ip); /* never instantiated */ - iput(ip); - } else - d_instantiate(dentry, ip); - - free_dname: - free_UCSname(&dname); - - out: - txEnd(tid); - - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dir)->commit_mutex); - - jfs_info("jfs_link: rc:%d", rc); - return rc; -} - -/* - * NAME: jfs_symlink(dip, dentry, name) - * - * FUNCTION: creates a symbolic link to <symlink> by name <name> - * in directory <dip> - * - * PARAMETER: dip - parent directory vnode - * dentry - dentry of symbolic link - * name - the path name of the existing object - * that will be the source of the link - * - * RETURN: errors from subroutines - * - * note: - * ENAMETOOLONG: pathname resolution of a symbolic link produced - * an intermediate result whose length exceeds PATH_MAX [XPG4.2] -*/ - -static int jfs_symlink(struct inode *dip, struct dentry *dentry, - const char *name) -{ - int rc; - tid_t tid; - ino_t ino = 0; - struct component_name dname; - int ssize; /* source pathname size */ - struct btstack btstack; - struct inode *ip = dentry->d_inode; - unchar *i_fastsymlink; - s64 xlen = 0; - int bmask = 0, xsize; - s64 xaddr; - struct metapage *mp; - struct super_block *sb; - struct tblock *tblk; - - struct inode *iplist[2]; - - jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name); - - dquot_initialize(dip); - - ssize = strlen(name) + 1; - - /* - * search parent directory for entry/freespace - * (dtSearch() returns parent directory page pinned) - */ - - if ((rc = get_UCSname(&dname, dentry))) - goto out1; - - /* - * allocate on-disk/in-memory inode for symbolic link: - * (iAlloc() returns new, locked inode) - */ - ip = ialloc(dip, S_IFLNK | 0777); - if (IS_ERR(ip)) { - rc = PTR_ERR(ip); - goto out2; - } - - tid = txBegin(dip->i_sb, 0); - - mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); - mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); - - rc = jfs_init_security(tid, ip, dip, &dentry->d_name); - if (rc) - goto out3; - - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_CREATE; - tblk->ino = ip->i_ino; - tblk->u.ixpxd = JFS_IP(ip)->ixpxd; - - /* fix symlink access permission - * (dir_create() ANDs in the u.u_cmask, - * but symlinks really need to be 777 access) - */ - ip->i_mode |= 0777; - - /* - * write symbolic link target path name - */ - xtInitRoot(tid, ip); - - /* - * write source path name inline in on-disk inode (fast symbolic link) - */ - - if (ssize <= IDATASIZE) { - ip->i_op = &jfs_fast_symlink_inode_operations; - - i_fastsymlink = JFS_IP(ip)->i_inline; - memcpy(i_fastsymlink, name, ssize); - ip->i_size = ssize - 1; - - /* - * if symlink is > 128 bytes, we don't have the space to - * store inline extended attributes - */ - if (ssize > sizeof (JFS_IP(ip)->i_inline)) - JFS_IP(ip)->mode2 &= ~INLINEEA; - - jfs_info("jfs_symlink: fast symlink added ssize:%d name:%s ", - ssize, name); - } - /* - * write source path name in a single extent - */ - else { - jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); - - ip->i_op = &jfs_symlink_inode_operations; - ip->i_mapping->a_ops = &jfs_aops; - - /* - * even though the data of symlink object (source - * path name) is treated as non-journaled user data, - * it is read/written thru buffer cache for performance. - */ - sb = ip->i_sb; - bmask = JFS_SBI(sb)->bsize - 1; - xsize = (ssize + bmask) & ~bmask; - xaddr = 0; - xlen = xsize >> JFS_SBI(sb)->l2bsize; - if ((rc = xtInsert(tid, ip, 0, 0, xlen, &xaddr, 0))) { - txAbort(tid, 0); - goto out3; - } - ip->i_size = ssize - 1; - while (ssize) { - /* This is kind of silly since PATH_MAX == 4K */ - int copy_size = min(ssize, PSIZE); - - mp = get_metapage(ip, xaddr, PSIZE, 1); - - if (mp == NULL) { - xtTruncate(tid, ip, 0, COMMIT_PWMAP); - rc = -EIO; - txAbort(tid, 0); - goto out3; - } - memcpy(mp->data, name, copy_size); - flush_metapage(mp); - ssize -= copy_size; - name += copy_size; - xaddr += JFS_SBI(sb)->nbperpage; - } - } - - /* - * create entry for symbolic link in parent directory - */ - rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE); - if (rc == 0) { - ino = ip->i_ino; - rc = dtInsert(tid, dip, &dname, &ino, &btstack); - } - if (rc) { - if (xlen) - xtTruncate(tid, ip, 0, COMMIT_PWMAP); - txAbort(tid, 0); - /* discard new inode */ - goto out3; - } - - mark_inode_dirty(ip); - - dip->i_ctime = dip->i_mtime = CURRENT_TIME; - mark_inode_dirty(dip); - /* - * commit update of parent directory and link object - */ - - iplist[0] = dip; - iplist[1] = ip; - rc = txCommit(tid, 2, &iplist[0], 0); - - out3: - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dip)->commit_mutex); - if (rc) { - free_ea_wmap(ip); - clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); - } else { - d_instantiate(dentry, ip); - unlock_new_inode(ip); - } - - out2: - free_UCSname(&dname); - - out1: - jfs_info("jfs_symlink: rc:%d", rc); - return rc; -} - - -/* - * NAME: jfs_rename - * - * FUNCTION: rename a file or directory - */ -static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - struct btstack btstack; - ino_t ino; - struct component_name new_dname; - struct inode *new_ip; - struct component_name old_dname; - struct inode *old_ip; - int rc; - tid_t tid; - struct tlock *tlck; - struct dt_lock *dtlck; - struct lv *lv; - int ipcount; - struct inode *iplist[4]; - struct tblock *tblk; - s64 new_size = 0; - int commit_flag; - - - jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, - new_dentry->d_name.name); - - dquot_initialize(old_dir); - dquot_initialize(new_dir); - - old_ip = old_dentry->d_inode; - new_ip = new_dentry->d_inode; - - if ((rc = get_UCSname(&old_dname, old_dentry))) - goto out1; - - if ((rc = get_UCSname(&new_dname, new_dentry))) - goto out2; - - /* - * Make sure source inode number is what we think it is - */ - rc = dtSearch(old_dir, &old_dname, &ino, &btstack, JFS_LOOKUP); - if (rc || (ino != old_ip->i_ino)) { - rc = -ENOENT; - goto out3; - } - - /* - * Make sure dest inode number (if any) is what we think it is - */ - rc = dtSearch(new_dir, &new_dname, &ino, &btstack, JFS_LOOKUP); - if (!rc) { - if ((!new_ip) || (ino != new_ip->i_ino)) { - rc = -ESTALE; - goto out3; - } - } else if (rc != -ENOENT) - goto out3; - else if (new_ip) { - /* no entry exists, but one was expected */ - rc = -ESTALE; - goto out3; - } - - if (S_ISDIR(old_ip->i_mode)) { - if (new_ip) { - if (!dtEmpty(new_ip)) { - rc = -ENOTEMPTY; - goto out3; - } - } - } else if (new_ip) { - IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); - /* Init inode for quota operations. */ - dquot_initialize(new_ip); - } - - /* - * The real work starts here - */ - tid = txBegin(new_dir->i_sb, 0); - - /* - * How do we know the locking is safe from deadlocks? - * The vfs does the hard part for us. Any time we are taking nested - * commit_mutexes, the vfs already has i_mutex held on the parent. - * Here, the vfs has already taken i_mutex on both old_dir and new_dir. - */ - mutex_lock_nested(&JFS_IP(new_dir)->commit_mutex, COMMIT_MUTEX_PARENT); - mutex_lock_nested(&JFS_IP(old_ip)->commit_mutex, COMMIT_MUTEX_CHILD); - if (old_dir != new_dir) - mutex_lock_nested(&JFS_IP(old_dir)->commit_mutex, - COMMIT_MUTEX_SECOND_PARENT); - - if (new_ip) { - mutex_lock_nested(&JFS_IP(new_ip)->commit_mutex, - COMMIT_MUTEX_VICTIM); - /* - * Change existing directory entry to new inode number - */ - ino = new_ip->i_ino; - rc = dtModify(tid, new_dir, &new_dname, &ino, - old_ip->i_ino, JFS_RENAME); - if (rc) - goto out4; - drop_nlink(new_ip); - if (S_ISDIR(new_ip->i_mode)) { - drop_nlink(new_ip); - if (new_ip->i_nlink) { - mutex_unlock(&JFS_IP(new_ip)->commit_mutex); - if (old_dir != new_dir) - mutex_unlock(&JFS_IP(old_dir)->commit_mutex); - mutex_unlock(&JFS_IP(old_ip)->commit_mutex); - mutex_unlock(&JFS_IP(new_dir)->commit_mutex); - if (!S_ISDIR(old_ip->i_mode) && new_ip) - IWRITE_UNLOCK(new_ip); - jfs_error(new_ip->i_sb, - "jfs_rename: new_ip->i_nlink != 0"); - return -EIO; - } - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_DELETE; - tblk->u.ip = new_ip; - } else if (new_ip->i_nlink == 0) { - assert(!test_cflag(COMMIT_Nolink, new_ip)); - /* free block resources */ - if ((new_size = commitZeroLink(tid, new_ip)) < 0) { - txAbort(tid, 1); /* Marks FS Dirty */ - rc = new_size; - goto out4; - } - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_DELETE; - tblk->u.ip = new_ip; - } else { - new_ip->i_ctime = CURRENT_TIME; - mark_inode_dirty(new_ip); - } - } else { - /* - * Add new directory entry - */ - rc = dtSearch(new_dir, &new_dname, &ino, &btstack, - JFS_CREATE); - if (rc) { - jfs_err("jfs_rename didn't expect dtSearch to fail " - "w/rc = %d", rc); - goto out4; - } - - ino = old_ip->i_ino; - rc = dtInsert(tid, new_dir, &new_dname, &ino, &btstack); - if (rc) { - if (rc == -EIO) - jfs_err("jfs_rename: dtInsert returned -EIO"); - goto out4; - } - if (S_ISDIR(old_ip->i_mode)) - inc_nlink(new_dir); - } - /* - * Remove old directory entry - */ - - ino = old_ip->i_ino; - rc = dtDelete(tid, old_dir, &old_dname, &ino, JFS_REMOVE); - if (rc) { - jfs_err("jfs_rename did not expect dtDelete to return rc = %d", - rc); - txAbort(tid, 1); /* Marks Filesystem dirty */ - goto out4; - } - if (S_ISDIR(old_ip->i_mode)) { - drop_nlink(old_dir); - if (old_dir != new_dir) { - /* - * Change inode number of parent for moved directory - */ - - JFS_IP(old_ip)->i_dtroot.header.idotdot = - cpu_to_le32(new_dir->i_ino); - - /* Linelock header of dtree */ - tlck = txLock(tid, old_ip, - (struct metapage *) &JFS_IP(old_ip)->bxflag, - tlckDTREE | tlckBTROOT | tlckRELINK); - dtlck = (struct dt_lock *) & tlck->lock; - ASSERT(dtlck->index == 0); - lv = & dtlck->lv[0]; - lv->offset = 0; - lv->length = 1; - dtlck->index++; - } - } - - /* - * Update ctime on changed/moved inodes & mark dirty - */ - old_ip->i_ctime = CURRENT_TIME; - mark_inode_dirty(old_ip); - - new_dir->i_ctime = new_dir->i_mtime = current_fs_time(new_dir->i_sb); - mark_inode_dirty(new_dir); - - /* Build list of inodes modified by this transaction */ - ipcount = 0; - iplist[ipcount++] = old_ip; - if (new_ip) - iplist[ipcount++] = new_ip; - iplist[ipcount++] = old_dir; - - if (old_dir != new_dir) { - iplist[ipcount++] = new_dir; - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; - mark_inode_dirty(old_dir); - } - - /* - * Incomplete truncate of file data can - * result in timing problems unless we synchronously commit the - * transaction. - */ - if (new_size) - commit_flag = COMMIT_SYNC; - else - commit_flag = 0; - - rc = txCommit(tid, ipcount, iplist, commit_flag); - - out4: - txEnd(tid); - if (new_ip) - mutex_unlock(&JFS_IP(new_ip)->commit_mutex); - if (old_dir != new_dir) - mutex_unlock(&JFS_IP(old_dir)->commit_mutex); - mutex_unlock(&JFS_IP(old_ip)->commit_mutex); - mutex_unlock(&JFS_IP(new_dir)->commit_mutex); - - while (new_size && (rc == 0)) { - tid = txBegin(new_ip->i_sb, 0); - mutex_lock(&JFS_IP(new_ip)->commit_mutex); - new_size = xtTruncate_pmap(tid, new_ip, new_size); - if (new_size < 0) { - txAbort(tid, 1); - rc = new_size; - } else - rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC); - txEnd(tid); - mutex_unlock(&JFS_IP(new_ip)->commit_mutex); - } - if (new_ip && (new_ip->i_nlink == 0)) - set_cflag(COMMIT_Nolink, new_ip); - out3: - free_UCSname(&new_dname); - out2: - free_UCSname(&old_dname); - out1: - if (new_ip && !S_ISDIR(new_ip->i_mode)) - IWRITE_UNLOCK(new_ip); - /* - * Truncating the directory index table is not guaranteed. It - * may need to be done iteratively - */ - if (test_cflag(COMMIT_Stale, old_dir)) { - if (old_dir->i_size > 1) - jfs_truncate_nolock(old_dir, 0); - - clear_cflag(COMMIT_Stale, old_dir); - } - - jfs_info("jfs_rename: returning %d", rc); - return rc; -} - - -/* - * NAME: jfs_mknod - * - * FUNCTION: Create a special file (device) - */ -static int jfs_mknod(struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t rdev) -{ - struct jfs_inode_info *jfs_ip; - struct btstack btstack; - struct component_name dname; - ino_t ino; - struct inode *ip; - struct inode *iplist[2]; - int rc; - tid_t tid; - struct tblock *tblk; - - if (!new_valid_dev(rdev)) - return -EINVAL; - - jfs_info("jfs_mknod: %s", dentry->d_name.name); - - dquot_initialize(dir); - - if ((rc = get_UCSname(&dname, dentry))) - goto out; - - ip = ialloc(dir, mode); - if (IS_ERR(ip)) { - rc = PTR_ERR(ip); - goto out1; - } - jfs_ip = JFS_IP(ip); - - tid = txBegin(dir->i_sb, 0); - - mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); - mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); - - rc = jfs_init_acl(tid, ip, dir); - if (rc) - goto out3; - - rc = jfs_init_security(tid, ip, dir, &dentry->d_name); - if (rc) { - txAbort(tid, 0); - goto out3; - } - - if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) { - txAbort(tid, 0); - goto out3; - } - - tblk = tid_to_tblock(tid); - tblk->xflag |= COMMIT_CREATE; - tblk->ino = ip->i_ino; - tblk->u.ixpxd = JFS_IP(ip)->ixpxd; - - ino = ip->i_ino; - if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) { - txAbort(tid, 0); - goto out3; - } - - ip->i_op = &jfs_file_inode_operations; - jfs_ip->dev = new_encode_dev(rdev); - init_special_inode(ip, ip->i_mode, rdev); - - mark_inode_dirty(ip); - - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - - mark_inode_dirty(dir); - - iplist[0] = dir; - iplist[1] = ip; - rc = txCommit(tid, 2, iplist, 0); - - out3: - txEnd(tid); - mutex_unlock(&JFS_IP(ip)->commit_mutex); - mutex_unlock(&JFS_IP(dir)->commit_mutex); - if (rc) { - free_ea_wmap(ip); - clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); - } else { - d_instantiate(dentry, ip); - unlock_new_inode(ip); - } - - out1: - free_UCSname(&dname); - - out: - jfs_info("jfs_mknod: returning %d", rc); - return rc; -} - -static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd) -{ - struct btstack btstack; - ino_t inum; - struct inode *ip; - struct component_name key; - int rc; - - jfs_info("jfs_lookup: name = %s", dentry->d_name.name); - - if ((rc = get_UCSname(&key, dentry))) - return ERR_PTR(rc); - rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); - free_UCSname(&key); - if (rc == -ENOENT) { - ip = NULL; - } else if (rc) { - jfs_err("jfs_lookup: dtSearch returned %d", rc); - ip = ERR_PTR(rc); - } else { - ip = jfs_iget(dip->i_sb, inum); - if (IS_ERR(ip)) - jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum); - } - - return d_splice_alias(ip, dentry); -} - -static struct inode *jfs_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) -{ - struct inode *inode; - - if (ino == 0) - return ERR_PTR(-ESTALE); - inode = jfs_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - if (generation && inode->i_generation != generation) { - iput(inode); - return ERR_PTR(-ESTALE); - } - - return inode; -} - -struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - jfs_nfs_get_inode); -} - -struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - jfs_nfs_get_inode); -} - -struct dentry *jfs_get_parent(struct dentry *dentry) -{ - unsigned long parent_ino; - - parent_ino = - le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); - - return d_obtain_alias(jfs_iget(dentry->d_inode->i_sb, parent_ino)); -} - -const struct inode_operations jfs_dir_inode_operations = { - .create = jfs_create, - .lookup = jfs_lookup, - .link = jfs_link, - .unlink = jfs_unlink, - .symlink = jfs_symlink, - .mkdir = jfs_mkdir, - .rmdir = jfs_rmdir, - .mknod = jfs_mknod, - .rename = jfs_rename, - .setxattr = jfs_setxattr, - .getxattr = jfs_getxattr, - .listxattr = jfs_listxattr, - .removexattr = jfs_removexattr, - .setattr = jfs_setattr, -#ifdef CONFIG_JFS_POSIX_ACL - .get_acl = jfs_get_acl, -#endif -}; - -const struct file_operations jfs_dir_operations = { - .read = generic_read_dir, - .readdir = jfs_readdir, - .fsync = jfs_fsync, - .unlocked_ioctl = jfs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = jfs_compat_ioctl, -#endif - .llseek = generic_file_llseek, -}; - -static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, - struct qstr *this) -{ - unsigned long hash; - int i; - - hash = init_name_hash(); - for (i=0; i < this->len; i++) - hash = partial_name_hash(tolower(this->name[i]), hash); - this->hash = end_name_hash(hash); - - return 0; -} - -static int jfs_ci_compare(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, - unsigned int len, const char *str, const struct qstr *name) -{ - int i, result = 1; - - if (len != name->len) - goto out; - for (i=0; i < len; i++) { - if (tolower(str[i]) != tolower(name->name[i])) - goto out; - } - result = 0; -out: - return result; -} - -static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - /* - * This is not negative dentry. Always valid. - * - * Note, rename() to existing directory entry will have ->d_inode, - * and will use existing name which isn't specified name by user. - * - * We may be able to drop this positive dentry here. But dropping - * positive dentry isn't good idea. So it's unsupported like - * rename("filename", "FILENAME") for now. - */ - if (dentry->d_inode) - return 1; - - /* - * This may be nfsd (or something), anyway, we can't see the - * intent of this. So, since this can be for creation, drop it. - */ - if (!nd) - return 0; - - /* - * Drop the negative dentry, in order to make sure to use the - * case sensitive name which is specified by user if this is - * for creation. - */ - if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) - return 0; - return 1; -} - -const struct dentry_operations jfs_ci_dentry_operations = -{ - .d_hash = jfs_ci_hash, - .d_compare = jfs_ci_compare, - .d_revalidate = jfs_ci_revalidate, -}; diff --git a/ANDROID_3.4.5/fs/jfs/resize.c b/ANDROID_3.4.5/fs/jfs/resize.c deleted file mode 100644 index 8d0c1c7c..00000000 --- a/ANDROID_3.4.5/fs/jfs/resize.c +++ /dev/null @@ -1,543 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#include <linux/fs.h> -#include <linux/buffer_head.h> -#include <linux/quotaops.h> -#include "jfs_incore.h" -#include "jfs_filsys.h" -#include "jfs_metapage.h" -#include "jfs_dinode.h" -#include "jfs_imap.h" -#include "jfs_dmap.h" -#include "jfs_superblock.h" -#include "jfs_txnmgr.h" -#include "jfs_debug.h" - -#define BITSPERPAGE (PSIZE << 3) -#define L2MEGABYTE 20 -#define MEGABYTE (1 << L2MEGABYTE) -#define MEGABYTE32 (MEGABYTE << 5) - -/* convert block number to bmap file page number */ -#define BLKTODMAPN(b)\ - (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) - -/* - * jfs_extendfs() - * - * function: extend file system; - * - * |-------------------------------|----------|----------| - * file system space fsck inline log - * workspace space - * - * input: - * new LVSize: in LV blocks (required) - * new LogSize: in LV blocks (optional) - * new FSSize: in LV blocks (optional) - * - * new configuration: - * 1. set new LogSize as specified or default from new LVSize; - * 2. compute new FSCKSize from new LVSize; - * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where - * assert(new FSSize >= old FSSize), - * i.e., file system must not be shrunk; - */ -int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) -{ - int rc = 0; - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct inode *ipbmap = sbi->ipbmap; - struct inode *ipbmap2; - struct inode *ipimap = sbi->ipimap; - struct jfs_log *log = sbi->log; - struct bmap *bmp = sbi->bmap; - s64 newLogAddress, newFSCKAddress; - int newFSCKSize; - s64 newMapSize = 0, mapSize; - s64 XAddress, XSize, nblocks, xoff, xaddr, t64; - s64 oldLVSize; - s64 newFSSize; - s64 VolumeSize; - int newNpages = 0, nPages, newPage, xlen, t32; - int tid; - int log_formatted = 0; - struct inode *iplist[1]; - struct jfs_superblock *j_sb, *j_sb2; - s64 old_agsize; - int agsizechanged = 0; - struct buffer_head *bh, *bh2; - - /* If the volume hasn't grown, get out now */ - - if (sbi->mntflag & JFS_INLINELOG) - oldLVSize = addressPXD(&sbi->logpxd) + lengthPXD(&sbi->logpxd); - else - oldLVSize = addressPXD(&sbi->fsckpxd) + - lengthPXD(&sbi->fsckpxd); - - if (oldLVSize >= newLVSize) { - printk(KERN_WARNING - "jfs_extendfs: volume hasn't grown, returning\n"); - goto out; - } - - VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; - - if (VolumeSize) { - if (newLVSize > VolumeSize) { - printk(KERN_WARNING "jfs_extendfs: invalid size\n"); - rc = -EINVAL; - goto out; - } - } else { - /* check the device */ - bh = sb_bread(sb, newLVSize - 1); - if (!bh) { - printk(KERN_WARNING "jfs_extendfs: invalid size\n"); - rc = -EINVAL; - goto out; - } - bforget(bh); - } - - /* Can't extend write-protected drive */ - - if (isReadOnly(ipbmap)) { - printk(KERN_WARNING "jfs_extendfs: read-only file system\n"); - rc = -EROFS; - goto out; - } - - /* - * reconfigure LV spaces - * --------------------- - * - * validate new size, or, if not specified, determine new size - */ - - /* - * reconfigure inline log space: - */ - if ((sbi->mntflag & JFS_INLINELOG)) { - if (newLogSize == 0) { - /* - * no size specified: default to 1/256 of aggregate - * size; rounded up to a megabyte boundary; - */ - newLogSize = newLVSize >> 8; - t32 = (1 << (20 - sbi->l2bsize)) - 1; - newLogSize = (newLogSize + t32) & ~t32; - newLogSize = - min(newLogSize, MEGABYTE32 >> sbi->l2bsize); - } else { - /* - * convert the newLogSize to fs blocks. - * - * Since this is given in megabytes, it will always be - * an even number of pages. - */ - newLogSize = (newLogSize * MEGABYTE) >> sbi->l2bsize; - } - - } else - newLogSize = 0; - - newLogAddress = newLVSize - newLogSize; - - /* - * reconfigure fsck work space: - * - * configure it to the end of the logical volume regardless of - * whether file system extends to the end of the aggregate; - * Need enough 4k pages to cover: - * - 1 bit per block in aggregate rounded up to BPERDMAP boundary - * - 1 extra page to handle control page and intermediate level pages - * - 50 extra pages for the chkdsk service log - */ - t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP) - << L2BPERDMAP; - t32 = DIV_ROUND_UP(t64, BITSPERPAGE) + 1 + 50; - newFSCKSize = t32 << sbi->l2nbperpage; - newFSCKAddress = newLogAddress - newFSCKSize; - - /* - * compute new file system space; - */ - newFSSize = newLVSize - newLogSize - newFSCKSize; - - /* file system cannot be shrunk */ - if (newFSSize < bmp->db_mapsize) { - rc = -EINVAL; - goto out; - } - - /* - * If we're expanding enough that the inline log does not overlap - * the old one, we can format the new log before we quiesce the - * filesystem. - */ - if ((sbi->mntflag & JFS_INLINELOG) && (newLogAddress > oldLVSize)) { - if ((rc = lmLogFormat(log, newLogAddress, newLogSize))) - goto out; - log_formatted = 1; - } - /* - * quiesce file system - * - * (prepare to move the inline log and to prevent map update) - * - * block any new transactions and wait for completion of - * all wip transactions and flush modified pages s.t. - * on-disk file system is in consistent state and - * log is not required for recovery. - */ - txQuiesce(sb); - - /* Reset size of direct inode */ - sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size; - - if (sbi->mntflag & JFS_INLINELOG) { - /* - * deactivate old inline log - */ - lmLogShutdown(log); - - /* - * mark on-disk super block for fs in transition; - * - * update on-disk superblock for the new space configuration - * of inline log space and fsck work space descriptors: - * N.B. FS descriptor is NOT updated; - * - * crash recovery: - * logredo(): if FM_EXTENDFS, return to fsck() for cleanup; - * fsck(): if FM_EXTENDFS, reformat inline log and fsck - * workspace from superblock inline log descriptor and fsck - * workspace descriptor; - */ - - /* read in superblock */ - if ((rc = readSuper(sb, &bh))) - goto error_out; - j_sb = (struct jfs_superblock *)bh->b_data; - - /* mark extendfs() in progress */ - j_sb->s_state |= cpu_to_le32(FM_EXTENDFS); - j_sb->s_xsize = cpu_to_le64(newFSSize); - PXDaddress(&j_sb->s_xfsckpxd, newFSCKAddress); - PXDlength(&j_sb->s_xfsckpxd, newFSCKSize); - PXDaddress(&j_sb->s_xlogpxd, newLogAddress); - PXDlength(&j_sb->s_xlogpxd, newLogSize); - - /* synchronously update superblock */ - mark_buffer_dirty(bh); - sync_dirty_buffer(bh); - brelse(bh); - - /* - * format new inline log synchronously; - * - * crash recovery: if log move in progress, - * reformat log and exit success; - */ - if (!log_formatted) - if ((rc = lmLogFormat(log, newLogAddress, newLogSize))) - goto error_out; - - /* - * activate new log - */ - log->base = newLogAddress; - log->size = newLogSize >> (L2LOGPSIZE - sb->s_blocksize_bits); - if ((rc = lmLogInit(log))) - goto error_out; - } - - /* - * extend block allocation map - * --------------------------- - * - * extendfs() for new extension, retry after crash recovery; - * - * note: both logredo() and fsck() rebuild map from - * the bitmap and configuration parameter from superblock - * (disregarding all other control information in the map); - * - * superblock: - * s_size: aggregate size in physical blocks; - */ - /* - * compute the new block allocation map configuration - * - * map dinode: - * di_size: map file size in byte; - * di_nblocks: number of blocks allocated for map file; - * di_mapsize: number of blocks in aggregate (covered by map); - * map control page: - * db_mapsize: number of blocks in aggregate (covered by map); - */ - newMapSize = newFSSize; - /* number of data pages of new bmap file: - * roundup new size to full dmap page boundary and - * add 1 extra dmap page for next extendfs() - */ - t64 = (newMapSize - 1) + BPERDMAP; - newNpages = BLKTODMAPN(t64) + 1; - - /* - * extend map from current map (WITHOUT growing mapfile) - * - * map new extension with unmapped part of the last partial - * dmap page, if applicable, and extra page(s) allocated - * at end of bmap by mkfs() or previous extendfs(); - */ - extendBmap: - /* compute number of blocks requested to extend */ - mapSize = bmp->db_mapsize; - XAddress = mapSize; /* eXtension Address */ - XSize = newMapSize - mapSize; /* eXtension Size */ - old_agsize = bmp->db_agsize; /* We need to know if this changes */ - - /* compute number of blocks that can be extended by current mapfile */ - t64 = dbMapFileSizeToMapSize(ipbmap); - if (mapSize > t64) { - printk(KERN_ERR "jfs_extendfs: mapSize (0x%Lx) > t64 (0x%Lx)\n", - (long long) mapSize, (long long) t64); - rc = -EIO; - goto error_out; - } - nblocks = min(t64 - mapSize, XSize); - - /* - * update map pages for new extension: - * - * update/init dmap and bubble up the control hierarchy - * incrementally fold up dmaps into upper levels; - * update bmap control page; - */ - if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) - goto error_out; - - agsizechanged |= (bmp->db_agsize != old_agsize); - - /* - * the map now has extended to cover additional nblocks: - * dn_mapsize = oldMapsize + nblocks; - */ - /* ipbmap->i_mapsize += nblocks; */ - XSize -= nblocks; - - /* - * grow map file to cover remaining extension - * and/or one extra dmap page for next extendfs(); - * - * allocate new map pages and its backing blocks, and - * update map file xtree - */ - /* compute number of data pages of current bmap file */ - nPages = ipbmap->i_size >> L2PSIZE; - - /* need to grow map file ? */ - if (nPages == newNpages) - goto finalizeBmap; - - /* - * grow bmap file for the new map pages required: - * - * allocate growth at the start of newly extended region; - * bmap file only grows sequentially, i.e., both data pages - * and possibly xtree index pages may grow in append mode, - * s.t. logredo() can reconstruct pre-extension state - * by washing away bmap file of pages outside s_size boundary; - */ - /* - * journal map file growth as if a regular file growth: - * (note: bmap is created with di_mode = IFJOURNAL|IFREG); - * - * journaling of bmap file growth is not required since - * logredo() do/can not use log records of bmap file growth - * but it provides careful write semantics, pmap update, etc.; - */ - /* synchronous write of data pages: bmap data pages are - * cached in meta-data cache, and not written out - * by txCommit(); - */ - filemap_fdatawait(ipbmap->i_mapping); - filemap_write_and_wait(ipbmap->i_mapping); - diWriteSpecial(ipbmap, 0); - - newPage = nPages; /* first new page number */ - xoff = newPage << sbi->l2nbperpage; - xlen = (newNpages - nPages) << sbi->l2nbperpage; - xlen = min(xlen, (int) nblocks) & ~(sbi->nbperpage - 1); - xaddr = XAddress; - - tid = txBegin(sb, COMMIT_FORCE); - - if ((rc = xtAppend(tid, ipbmap, 0, xoff, nblocks, &xlen, &xaddr, 0))) { - txEnd(tid); - goto error_out; - } - /* update bmap file size */ - ipbmap->i_size += xlen << sbi->l2bsize; - inode_add_bytes(ipbmap, xlen << sbi->l2bsize); - - iplist[0] = ipbmap; - rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); - - txEnd(tid); - - if (rc) - goto error_out; - - /* - * map file has been grown now to cover extension to further out; - * di_size = new map file size; - * - * if huge extension, the previous extension based on previous - * map file size may not have been sufficient to cover whole extension - * (it could have been used up for new map pages), - * but the newly grown map file now covers lot bigger new free space - * available for further extension of map; - */ - /* any more blocks to extend ? */ - if (XSize) - goto extendBmap; - - finalizeBmap: - /* finalize bmap */ - dbFinalizeBmap(ipbmap); - - /* - * update inode allocation map - * --------------------------- - * - * move iag lists from old to new iag; - * agstart field is not updated for logredo() to reconstruct - * iag lists if system crash occurs. - * (computation of ag number from agstart based on agsize - * will correctly identify the new ag); - */ - /* if new AG size the same as old AG size, done! */ - if (agsizechanged) { - if ((rc = diExtendFS(ipimap, ipbmap))) - goto error_out; - - /* finalize imap */ - if ((rc = diSync(ipimap))) - goto error_out; - } - - /* - * finalize - * -------- - * - * extension is committed when on-disk super block is - * updated with new descriptors: logredo will recover - * crash before it to pre-extension state; - */ - - /* sync log to skip log replay of bmap file growth transaction; */ - /* lmLogSync(log, 1); */ - - /* - * synchronous write bmap global control page; - * for crash before completion of write - * logredo() will recover to pre-extendfs state; - * for crash after completion of write, - * logredo() will recover post-extendfs state; - */ - if ((rc = dbSync(ipbmap))) - goto error_out; - - /* - * copy primary bmap inode to secondary bmap inode - */ - - ipbmap2 = diReadSpecial(sb, BMAP_I, 1); - if (ipbmap2 == NULL) { - printk(KERN_ERR "jfs_extendfs: diReadSpecial(bmap) failed\n"); - goto error_out; - } - memcpy(&JFS_IP(ipbmap2)->i_xtroot, &JFS_IP(ipbmap)->i_xtroot, 288); - ipbmap2->i_size = ipbmap->i_size; - ipbmap2->i_blocks = ipbmap->i_blocks; - - diWriteSpecial(ipbmap2, 1); - diFreeSpecial(ipbmap2); - - /* - * update superblock - */ - if ((rc = readSuper(sb, &bh))) - goto error_out; - j_sb = (struct jfs_superblock *)bh->b_data; - - /* mark extendfs() completion */ - j_sb->s_state &= cpu_to_le32(~FM_EXTENDFS); - j_sb->s_size = cpu_to_le64(bmp->db_mapsize << - le16_to_cpu(j_sb->s_l2bfactor)); - j_sb->s_agsize = cpu_to_le32(bmp->db_agsize); - - /* update inline log space descriptor */ - if (sbi->mntflag & JFS_INLINELOG) { - PXDaddress(&(j_sb->s_logpxd), newLogAddress); - PXDlength(&(j_sb->s_logpxd), newLogSize); - } - - /* record log's mount serial number */ - j_sb->s_logserial = cpu_to_le32(log->serial); - - /* update fsck work space descriptor */ - PXDaddress(&(j_sb->s_fsckpxd), newFSCKAddress); - PXDlength(&(j_sb->s_fsckpxd), newFSCKSize); - j_sb->s_fscklog = 1; - /* sb->s_fsckloglen remains the same */ - - /* Update secondary superblock */ - bh2 = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits); - if (bh2) { - j_sb2 = (struct jfs_superblock *)bh2->b_data; - memcpy(j_sb2, j_sb, sizeof (struct jfs_superblock)); - - mark_buffer_dirty(bh); - sync_dirty_buffer(bh2); - brelse(bh2); - } - - /* write primary superblock */ - mark_buffer_dirty(bh); - sync_dirty_buffer(bh); - brelse(bh); - - goto resume; - - error_out: - jfs_error(sb, "jfs_extendfs"); - - resume: - /* - * resume file system transactions - */ - txResume(sb); - - out: - return rc; -} diff --git a/ANDROID_3.4.5/fs/jfs/super.c b/ANDROID_3.4.5/fs/jfs/super.c deleted file mode 100644 index 4a82950f..00000000 --- a/ANDROID_3.4.5/fs/jfs/super.c +++ /dev/null @@ -1,905 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/parser.h> -#include <linux/completion.h> -#include <linux/vfs.h> -#include <linux/quotaops.h> -#include <linux/mount.h> -#include <linux/moduleparam.h> -#include <linux/kthread.h> -#include <linux/posix_acl.h> -#include <linux/buffer_head.h> -#include <linux/exportfs.h> -#include <linux/crc32.h> -#include <linux/slab.h> -#include <asm/uaccess.h> -#include <linux/seq_file.h> - -#include "jfs_incore.h" -#include "jfs_filsys.h" -#include "jfs_inode.h" -#include "jfs_metapage.h" -#include "jfs_superblock.h" -#include "jfs_dmap.h" -#include "jfs_imap.h" -#include "jfs_acl.h" -#include "jfs_debug.h" - -MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); -MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); -MODULE_LICENSE("GPL"); - -static struct kmem_cache * jfs_inode_cachep; - -static const struct super_operations jfs_super_operations; -static const struct export_operations jfs_export_operations; -static struct file_system_type jfs_fs_type; - -#define MAX_COMMIT_THREADS 64 -static int commit_threads = 0; -module_param(commit_threads, int, 0); -MODULE_PARM_DESC(commit_threads, "Number of commit threads"); - -static struct task_struct *jfsCommitThread[MAX_COMMIT_THREADS]; -struct task_struct *jfsIOthread; -struct task_struct *jfsSyncThread; - -#ifdef CONFIG_JFS_DEBUG -int jfsloglevel = JFS_LOGLEVEL_WARN; -module_param(jfsloglevel, int, 0644); -MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)"); -#endif - -static void jfs_handle_error(struct super_block *sb) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - - if (sb->s_flags & MS_RDONLY) - return; - - updateSuper(sb, FM_DIRTY); - - if (sbi->flag & JFS_ERR_PANIC) - panic("JFS (device %s): panic forced after error\n", - sb->s_id); - else if (sbi->flag & JFS_ERR_REMOUNT_RO) { - jfs_err("ERROR: (device %s): remounting filesystem " - "as read-only\n", - sb->s_id); - sb->s_flags |= MS_RDONLY; - } - - /* nothing is done for continue beyond marking the superblock dirty */ -} - -void jfs_error(struct super_block *sb, const char * function, ...) -{ - static char error_buf[256]; - va_list args; - - va_start(args, function); - vsnprintf(error_buf, sizeof(error_buf), function, args); - va_end(args); - - printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); - - jfs_handle_error(sb); -} - -static struct inode *jfs_alloc_inode(struct super_block *sb) -{ - struct jfs_inode_info *jfs_inode; - - jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); - if (!jfs_inode) - return NULL; - return &jfs_inode->vfs_inode; -} - -static void jfs_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - struct jfs_inode_info *ji = JFS_IP(inode); - kmem_cache_free(jfs_inode_cachep, ji); -} - -static void jfs_destroy_inode(struct inode *inode) -{ - struct jfs_inode_info *ji = JFS_IP(inode); - - BUG_ON(!list_empty(&ji->anon_inode_list)); - - spin_lock_irq(&ji->ag_lock); - if (ji->active_ag != -1) { - struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; - atomic_dec(&bmap->db_active[ji->active_ag]); - ji->active_ag = -1; - } - spin_unlock_irq(&ji->ag_lock); - call_rcu(&inode->i_rcu, jfs_i_callback); -} - -static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); - s64 maxinodes; - struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap; - - jfs_info("In jfs_statfs"); - buf->f_type = JFS_SUPER_MAGIC; - buf->f_bsize = sbi->bsize; - buf->f_blocks = sbi->bmap->db_mapsize; - buf->f_bfree = sbi->bmap->db_nfree; - buf->f_bavail = sbi->bmap->db_nfree; - /* - * If we really return the number of allocated & free inodes, some - * applications will fail because they won't see enough free inodes. - * We'll try to calculate some guess as to how may inodes we can - * really allocate - * - * buf->f_files = atomic_read(&imap->im_numinos); - * buf->f_ffree = atomic_read(&imap->im_numfree); - */ - maxinodes = min((s64) atomic_read(&imap->im_numinos) + - ((sbi->bmap->db_nfree >> imap->im_l2nbperiext) - << L2INOSPEREXT), (s64) 0xffffffffLL); - buf->f_files = maxinodes; - buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) - - atomic_read(&imap->im_numfree)); - buf->f_fsid.val[0] = (u32)crc32_le(0, sbi->uuid, sizeof(sbi->uuid)/2); - buf->f_fsid.val[1] = (u32)crc32_le(0, sbi->uuid + sizeof(sbi->uuid)/2, - sizeof(sbi->uuid)/2); - - buf->f_namelen = JFS_NAME_MAX; - return 0; -} - -static void jfs_put_super(struct super_block *sb) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - int rc; - - jfs_info("In jfs_put_super"); - - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); - - rc = jfs_umount(sb); - if (rc) - jfs_err("jfs_umount failed with return code %d", rc); - - unload_nls(sbi->nls_tab); - - truncate_inode_pages(sbi->direct_inode->i_mapping, 0); - iput(sbi->direct_inode); - - kfree(sbi); -} - -enum { - Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, - Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask -}; - -static const match_table_t tokens = { - {Opt_integrity, "integrity"}, - {Opt_nointegrity, "nointegrity"}, - {Opt_iocharset, "iocharset=%s"}, - {Opt_resize, "resize=%u"}, - {Opt_resize_nosize, "resize"}, - {Opt_errors, "errors=%s"}, - {Opt_ignore, "noquota"}, - {Opt_ignore, "quota"}, - {Opt_usrquota, "usrquota"}, - {Opt_grpquota, "grpquota"}, - {Opt_uid, "uid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_umask, "umask=%u"}, - {Opt_err, NULL} -}; - -static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, - int *flag) -{ - void *nls_map = (void *)-1; /* -1: no change; NULL: none */ - char *p; - struct jfs_sb_info *sbi = JFS_SBI(sb); - - *newLVSize = 0; - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - substring_t args[MAX_OPT_ARGS]; - int token; - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_integrity: - *flag &= ~JFS_NOINTEGRITY; - break; - case Opt_nointegrity: - *flag |= JFS_NOINTEGRITY; - break; - case Opt_ignore: - /* Silently ignore the quota options */ - /* Don't do anything ;-) */ - break; - case Opt_iocharset: - if (nls_map && nls_map != (void *) -1) - unload_nls(nls_map); - if (!strcmp(args[0].from, "none")) - nls_map = NULL; - else { - nls_map = load_nls(args[0].from); - if (!nls_map) { - printk(KERN_ERR - "JFS: charset not found\n"); - goto cleanup; - } - } - break; - case Opt_resize: - { - char *resize = args[0].from; - *newLVSize = simple_strtoull(resize, &resize, 0); - break; - } - case Opt_resize_nosize: - { - *newLVSize = sb->s_bdev->bd_inode->i_size >> - sb->s_blocksize_bits; - if (*newLVSize == 0) - printk(KERN_ERR - "JFS: Cannot determine volume size\n"); - break; - } - case Opt_errors: - { - char *errors = args[0].from; - if (!errors || !*errors) - goto cleanup; - if (!strcmp(errors, "continue")) { - *flag &= ~JFS_ERR_REMOUNT_RO; - *flag &= ~JFS_ERR_PANIC; - *flag |= JFS_ERR_CONTINUE; - } else if (!strcmp(errors, "remount-ro")) { - *flag &= ~JFS_ERR_CONTINUE; - *flag &= ~JFS_ERR_PANIC; - *flag |= JFS_ERR_REMOUNT_RO; - } else if (!strcmp(errors, "panic")) { - *flag &= ~JFS_ERR_CONTINUE; - *flag &= ~JFS_ERR_REMOUNT_RO; - *flag |= JFS_ERR_PANIC; - } else { - printk(KERN_ERR - "JFS: %s is an invalid error handler\n", - errors); - goto cleanup; - } - break; - } - -#ifdef CONFIG_QUOTA - case Opt_quota: - case Opt_usrquota: - *flag |= JFS_USRQUOTA; - break; - case Opt_grpquota: - *flag |= JFS_GRPQUOTA; - break; -#else - case Opt_usrquota: - case Opt_grpquota: - case Opt_quota: - printk(KERN_ERR - "JFS: quota operations not supported\n"); - break; -#endif - case Opt_uid: - { - char *uid = args[0].from; - sbi->uid = simple_strtoul(uid, &uid, 0); - break; - } - case Opt_gid: - { - char *gid = args[0].from; - sbi->gid = simple_strtoul(gid, &gid, 0); - break; - } - case Opt_umask: - { - char *umask = args[0].from; - sbi->umask = simple_strtoul(umask, &umask, 8); - if (sbi->umask & ~0777) { - printk(KERN_ERR - "JFS: Invalid value of umask\n"); - goto cleanup; - } - break; - } - default: - printk("jfs: Unrecognized mount option \"%s\" " - " or missing value\n", p); - goto cleanup; - } - } - - if (nls_map != (void *) -1) { - /* Discard old (if remount) */ - unload_nls(sbi->nls_tab); - sbi->nls_tab = nls_map; - } - return 1; - -cleanup: - if (nls_map && nls_map != (void *) -1) - unload_nls(nls_map); - return 0; -} - -static int jfs_remount(struct super_block *sb, int *flags, char *data) -{ - s64 newLVSize = 0; - int rc = 0; - int flag = JFS_SBI(sb)->flag; - int ret; - - if (!parse_options(data, sb, &newLVSize, &flag)) { - return -EINVAL; - } - - if (newLVSize) { - if (sb->s_flags & MS_RDONLY) { - printk(KERN_ERR - "JFS: resize requires volume to be mounted read-write\n"); - return -EROFS; - } - rc = jfs_extendfs(sb, newLVSize, 0); - if (rc) - return rc; - } - - if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { - /* - * Invalidate any previously read metadata. fsck may have - * changed the on-disk data since we mounted r/o - */ - truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); - - JFS_SBI(sb)->flag = flag; - ret = jfs_mount_rw(sb, 1); - - /* mark the fs r/w for quota activity */ - sb->s_flags &= ~MS_RDONLY; - - dquot_resume(sb, -1); - return ret; - } - if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { - rc = dquot_suspend(sb, -1); - if (rc < 0) { - return rc; - } - rc = jfs_umount_rw(sb); - JFS_SBI(sb)->flag = flag; - return rc; - } - if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) - if (!(sb->s_flags & MS_RDONLY)) { - rc = jfs_umount_rw(sb); - if (rc) - return rc; - - JFS_SBI(sb)->flag = flag; - ret = jfs_mount_rw(sb, 1); - return ret; - } - JFS_SBI(sb)->flag = flag; - - return 0; -} - -static int jfs_fill_super(struct super_block *sb, void *data, int silent) -{ - struct jfs_sb_info *sbi; - struct inode *inode; - int rc; - s64 newLVSize = 0; - int flag, ret = -EINVAL; - - jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); - - if (!new_valid_dev(sb->s_bdev->bd_dev)) - return -EOVERFLOW; - - sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - - sb->s_fs_info = sbi; - sb->s_max_links = JFS_LINK_MAX; - sbi->sb = sb; - sbi->uid = sbi->gid = sbi->umask = -1; - - /* initialize the mount flag and determine the default error handler */ - flag = JFS_ERR_REMOUNT_RO; - - if (!parse_options((char *) data, sb, &newLVSize, &flag)) - goto out_kfree; - sbi->flag = flag; - -#ifdef CONFIG_JFS_POSIX_ACL - sb->s_flags |= MS_POSIXACL; -#endif - - if (newLVSize) { - printk(KERN_ERR "resize option for remount only\n"); - goto out_kfree; - } - - /* - * Initialize blocksize to 4K. - */ - sb_set_blocksize(sb, PSIZE); - - /* - * Set method vectors. - */ - sb->s_op = &jfs_super_operations; - sb->s_export_op = &jfs_export_operations; -#ifdef CONFIG_QUOTA - sb->dq_op = &dquot_operations; - sb->s_qcop = &dquot_quotactl_ops; -#endif - - /* - * Initialize direct-mapping inode/address-space - */ - inode = new_inode(sb); - if (inode == NULL) { - ret = -ENOMEM; - goto out_unload; - } - inode->i_ino = 0; - inode->i_size = sb->s_bdev->bd_inode->i_size; - inode->i_mapping->a_ops = &jfs_metapage_aops; - insert_inode_hash(inode); - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); - - sbi->direct_inode = inode; - - rc = jfs_mount(sb); - if (rc) { - if (!silent) { - jfs_err("jfs_mount failed w/return code = %d", rc); - } - goto out_mount_failed; - } - if (sb->s_flags & MS_RDONLY) - sbi->log = NULL; - else { - rc = jfs_mount_rw(sb, 0); - if (rc) { - if (!silent) { - jfs_err("jfs_mount_rw failed, return code = %d", - rc); - } - goto out_no_rw; - } - } - - sb->s_magic = JFS_SUPER_MAGIC; - - if (sbi->mntflag & JFS_OS2) - sb->s_d_op = &jfs_ci_dentry_operations; - - inode = jfs_iget(sb, ROOT_I); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - goto out_no_rw; - } - sb->s_root = d_make_root(inode); - if (!sb->s_root) - goto out_no_root; - - /* logical blocks are represented by 40 bits in pxd_t, etc. */ - sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; -#if BITS_PER_LONG == 32 - /* - * Page cache is indexed by long. - * I would use MAX_LFS_FILESIZE, but it's only half as big - */ - sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, (u64)sb->s_maxbytes); -#endif - sb->s_time_gran = 1; - return 0; - -out_no_root: - jfs_err("jfs_read_super: get root dentry failed"); - -out_no_rw: - rc = jfs_umount(sb); - if (rc) { - jfs_err("jfs_umount failed with return code %d", rc); - } -out_mount_failed: - filemap_write_and_wait(sbi->direct_inode->i_mapping); - truncate_inode_pages(sbi->direct_inode->i_mapping, 0); - make_bad_inode(sbi->direct_inode); - iput(sbi->direct_inode); - sbi->direct_inode = NULL; -out_unload: - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); -out_kfree: - kfree(sbi); - return ret; -} - -static int jfs_freeze(struct super_block *sb) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct jfs_log *log = sbi->log; - - if (!(sb->s_flags & MS_RDONLY)) { - txQuiesce(sb); - lmLogShutdown(log); - updateSuper(sb, FM_CLEAN); - } - return 0; -} - -static int jfs_unfreeze(struct super_block *sb) -{ - struct jfs_sb_info *sbi = JFS_SBI(sb); - struct jfs_log *log = sbi->log; - int rc = 0; - - if (!(sb->s_flags & MS_RDONLY)) { - updateSuper(sb, FM_MOUNT); - if ((rc = lmLogInit(log))) - jfs_err("jfs_unlock failed with return code %d", rc); - else - txResume(sb); - } - return 0; -} - -static struct dentry *jfs_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super); -} - -static int jfs_sync_fs(struct super_block *sb, int wait) -{ - struct jfs_log *log = JFS_SBI(sb)->log; - - /* log == NULL indicates read-only mount */ - if (log) { - jfs_flush_journal(log, wait); - jfs_syncpt(log, 0); - } - - return 0; -} - -static int jfs_show_options(struct seq_file *seq, struct dentry *root) -{ - struct jfs_sb_info *sbi = JFS_SBI(root->d_sb); - - if (sbi->uid != -1) - seq_printf(seq, ",uid=%d", sbi->uid); - if (sbi->gid != -1) - seq_printf(seq, ",gid=%d", sbi->gid); - if (sbi->umask != -1) - seq_printf(seq, ",umask=%03o", sbi->umask); - if (sbi->flag & JFS_NOINTEGRITY) - seq_puts(seq, ",nointegrity"); - if (sbi->nls_tab) - seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); - if (sbi->flag & JFS_ERR_CONTINUE) - seq_printf(seq, ",errors=continue"); - if (sbi->flag & JFS_ERR_PANIC) - seq_printf(seq, ",errors=panic"); - -#ifdef CONFIG_QUOTA - if (sbi->flag & JFS_USRQUOTA) - seq_puts(seq, ",usrquota"); - - if (sbi->flag & JFS_GRPQUOTA) - seq_puts(seq, ",grpquota"); -#endif - - return 0; -} - -#ifdef CONFIG_QUOTA - -/* Read data from quotafile - avoid pagecache and such because we cannot afford - * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and no one else should touch the files) - * we don't have to be afraid of races */ -static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, - size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - sector_t blk = off >> sb->s_blocksize_bits; - int err = 0; - int offset = off & (sb->s_blocksize - 1); - int tocopy; - size_t toread; - struct buffer_head tmp_bh; - struct buffer_head *bh; - loff_t i_size = i_size_read(inode); - - if (off > i_size) - return 0; - if (off+len > i_size) - len = i_size-off; - toread = len; - while (toread > 0) { - tocopy = sb->s_blocksize - offset < toread ? - sb->s_blocksize - offset : toread; - - tmp_bh.b_state = 0; - tmp_bh.b_size = 1 << inode->i_blkbits; - err = jfs_get_block(inode, blk, &tmp_bh, 0); - if (err) - return err; - if (!buffer_mapped(&tmp_bh)) /* A hole? */ - memset(data, 0, tocopy); - else { - bh = sb_bread(sb, tmp_bh.b_blocknr); - if (!bh) - return -EIO; - memcpy(data, bh->b_data+offset, tocopy); - brelse(bh); - } - offset = 0; - toread -= tocopy; - data += tocopy; - blk++; - } - return len; -} - -/* Write to quotafile */ -static ssize_t jfs_quota_write(struct super_block *sb, int type, - const char *data, size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - sector_t blk = off >> sb->s_blocksize_bits; - int err = 0; - int offset = off & (sb->s_blocksize - 1); - int tocopy; - size_t towrite = len; - struct buffer_head tmp_bh; - struct buffer_head *bh; - - mutex_lock(&inode->i_mutex); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - - tmp_bh.b_state = 0; - tmp_bh.b_size = 1 << inode->i_blkbits; - err = jfs_get_block(inode, blk, &tmp_bh, 1); - if (err) - goto out; - if (offset || tocopy != sb->s_blocksize) - bh = sb_bread(sb, tmp_bh.b_blocknr); - else - bh = sb_getblk(sb, tmp_bh.b_blocknr); - if (!bh) { - err = -EIO; - goto out; - } - lock_buffer(bh); - memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - unlock_buffer(bh); - brelse(bh); - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; - } -out: - if (len == towrite) { - mutex_unlock(&inode->i_mutex); - return err; - } - if (inode->i_size < off+len-towrite) - i_size_write(inode, off+len-towrite); - inode->i_version++; - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); - mutex_unlock(&inode->i_mutex); - return len - towrite; -} - -#endif - -static const struct super_operations jfs_super_operations = { - .alloc_inode = jfs_alloc_inode, - .destroy_inode = jfs_destroy_inode, - .dirty_inode = jfs_dirty_inode, - .write_inode = jfs_write_inode, - .evict_inode = jfs_evict_inode, - .put_super = jfs_put_super, - .sync_fs = jfs_sync_fs, - .freeze_fs = jfs_freeze, - .unfreeze_fs = jfs_unfreeze, - .statfs = jfs_statfs, - .remount_fs = jfs_remount, - .show_options = jfs_show_options, -#ifdef CONFIG_QUOTA - .quota_read = jfs_quota_read, - .quota_write = jfs_quota_write, -#endif -}; - -static const struct export_operations jfs_export_operations = { - .fh_to_dentry = jfs_fh_to_dentry, - .fh_to_parent = jfs_fh_to_parent, - .get_parent = jfs_get_parent, -}; - -static struct file_system_type jfs_fs_type = { - .owner = THIS_MODULE, - .name = "jfs", - .mount = jfs_do_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -static void init_once(void *foo) -{ - struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; - - memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); - INIT_LIST_HEAD(&jfs_ip->anon_inode_list); - init_rwsem(&jfs_ip->rdwrlock); - mutex_init(&jfs_ip->commit_mutex); - init_rwsem(&jfs_ip->xattr_sem); - spin_lock_init(&jfs_ip->ag_lock); - jfs_ip->active_ag = -1; - inode_init_once(&jfs_ip->vfs_inode); -} - -static int __init init_jfs_fs(void) -{ - int i; - int rc; - - jfs_inode_cachep = - kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, - init_once); - if (jfs_inode_cachep == NULL) - return -ENOMEM; - - /* - * Metapage initialization - */ - rc = metapage_init(); - if (rc) { - jfs_err("metapage_init failed w/rc = %d", rc); - goto free_slab; - } - - /* - * Transaction Manager initialization - */ - rc = txInit(); - if (rc) { - jfs_err("txInit failed w/rc = %d", rc); - goto free_metapage; - } - - /* - * I/O completion thread (endio) - */ - jfsIOthread = kthread_run(jfsIOWait, NULL, "jfsIO"); - if (IS_ERR(jfsIOthread)) { - rc = PTR_ERR(jfsIOthread); - jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); - goto end_txmngr; - } - - if (commit_threads < 1) - commit_threads = num_online_cpus(); - if (commit_threads > MAX_COMMIT_THREADS) - commit_threads = MAX_COMMIT_THREADS; - - for (i = 0; i < commit_threads; i++) { - jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, "jfsCommit"); - if (IS_ERR(jfsCommitThread[i])) { - rc = PTR_ERR(jfsCommitThread[i]); - jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); - commit_threads = i; - goto kill_committask; - } - } - - jfsSyncThread = kthread_run(jfs_sync, NULL, "jfsSync"); - if (IS_ERR(jfsSyncThread)) { - rc = PTR_ERR(jfsSyncThread); - jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); - goto kill_committask; - } - -#ifdef PROC_FS_JFS - jfs_proc_init(); -#endif - - rc = register_filesystem(&jfs_fs_type); - if (!rc) - return 0; - -#ifdef PROC_FS_JFS - jfs_proc_clean(); -#endif - kthread_stop(jfsSyncThread); -kill_committask: - for (i = 0; i < commit_threads; i++) - kthread_stop(jfsCommitThread[i]); - kthread_stop(jfsIOthread); -end_txmngr: - txExit(); -free_metapage: - metapage_exit(); -free_slab: - kmem_cache_destroy(jfs_inode_cachep); - return rc; -} - -static void __exit exit_jfs_fs(void) -{ - int i; - - jfs_info("exit_jfs_fs called"); - - txExit(); - metapage_exit(); - - kthread_stop(jfsIOthread); - for (i = 0; i < commit_threads; i++) - kthread_stop(jfsCommitThread[i]); - kthread_stop(jfsSyncThread); -#ifdef PROC_FS_JFS - jfs_proc_clean(); -#endif - unregister_filesystem(&jfs_fs_type); - kmem_cache_destroy(jfs_inode_cachep); -} - -module_init(init_jfs_fs) -module_exit(exit_jfs_fs) diff --git a/ANDROID_3.4.5/fs/jfs/symlink.c b/ANDROID_3.4.5/fs/jfs/symlink.c deleted file mode 100644 index 205b946d..00000000 --- a/ANDROID_3.4.5/fs/jfs/symlink.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/fs.h> -#include <linux/namei.h> -#include "jfs_incore.h" -#include "jfs_inode.h" -#include "jfs_xattr.h" - -static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - char *s = JFS_IP(dentry->d_inode)->i_inline; - nd_set_link(nd, s); - return NULL; -} - -const struct inode_operations jfs_fast_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = jfs_follow_link, - .setattr = jfs_setattr, - .setxattr = jfs_setxattr, - .getxattr = jfs_getxattr, - .listxattr = jfs_listxattr, - .removexattr = jfs_removexattr, -}; - -const struct inode_operations jfs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, - .setattr = jfs_setattr, - .setxattr = jfs_setxattr, - .getxattr = jfs_getxattr, - .listxattr = jfs_listxattr, - .removexattr = jfs_removexattr, -}; - diff --git a/ANDROID_3.4.5/fs/jfs/xattr.c b/ANDROID_3.4.5/fs/jfs/xattr.c deleted file mode 100644 index 26683e15..00000000 --- a/ANDROID_3.4.5/fs/jfs/xattr.c +++ /dev/null @@ -1,1125 +0,0 @@ -/* - * Copyright (C) International Business Machines Corp., 2000-2004 - * Copyright (C) Christoph Hellwig, 2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/capability.h> -#include <linux/fs.h> -#include <linux/xattr.h> -#include <linux/posix_acl_xattr.h> -#include <linux/slab.h> -#include <linux/quotaops.h> -#include <linux/security.h> -#include "jfs_incore.h" -#include "jfs_superblock.h" -#include "jfs_dmap.h" -#include "jfs_debug.h" -#include "jfs_dinode.h" -#include "jfs_extent.h" -#include "jfs_metapage.h" -#include "jfs_xattr.h" -#include "jfs_acl.h" - -/* - * jfs_xattr.c: extended attribute service - * - * Overall design -- - * - * Format: - * - * Extended attribute lists (jfs_ea_list) consist of an overall size (32 bit - * value) and a variable (0 or more) number of extended attribute - * entries. Each extended attribute entry (jfs_ea) is a <name,value> double - * where <name> is constructed from a null-terminated ascii string - * (1 ... 255 bytes in the name) and <value> is arbitrary 8 bit data - * (1 ... 65535 bytes). The in-memory format is - * - * 0 1 2 4 4 + namelen + 1 - * +-------+--------+--------+----------------+-------------------+ - * | Flags | Name | Value | Name String \0 | Data . . . . | - * | | Length | Length | | | - * +-------+--------+--------+----------------+-------------------+ - * - * A jfs_ea_list then is structured as - * - * 0 4 4 + EA_SIZE(ea1) - * +------------+-------------------+--------------------+----- - * | Overall EA | First FEA Element | Second FEA Element | ..... - * | List Size | | | - * +------------+-------------------+--------------------+----- - * - * On-disk: - * - * FEALISTs are stored on disk using blocks allocated by dbAlloc() and - * written directly. An EA list may be in-lined in the inode if there is - * sufficient room available. - */ - -struct ea_buffer { - int flag; /* Indicates what storage xattr points to */ - int max_size; /* largest xattr that fits in current buffer */ - dxd_t new_ea; /* dxd to replace ea when modifying xattr */ - struct metapage *mp; /* metapage containing ea list */ - struct jfs_ea_list *xattr; /* buffer containing ea list */ -}; - -/* - * ea_buffer.flag values - */ -#define EA_INLINE 0x0001 -#define EA_EXTENT 0x0002 -#define EA_NEW 0x0004 -#define EA_MALLOC 0x0008 - - -static int is_known_namespace(const char *name) -{ - if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && - strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && - strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && - strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) - return false; - - return true; -} - -/* - * These three routines are used to recognize on-disk extended attributes - * that are in a recognized namespace. If the attribute is not recognized, - * "os2." is prepended to the name - */ -static int is_os2_xattr(struct jfs_ea *ea) -{ - return !is_known_namespace(ea->name); -} - -static inline int name_size(struct jfs_ea *ea) -{ - if (is_os2_xattr(ea)) - return ea->namelen + XATTR_OS2_PREFIX_LEN; - else - return ea->namelen; -} - -static inline int copy_name(char *buffer, struct jfs_ea *ea) -{ - int len = ea->namelen; - - if (is_os2_xattr(ea)) { - memcpy(buffer, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN); - buffer += XATTR_OS2_PREFIX_LEN; - len += XATTR_OS2_PREFIX_LEN; - } - memcpy(buffer, ea->name, ea->namelen); - buffer[ea->namelen] = 0; - - return len; -} - -/* Forward references */ -static void ea_release(struct inode *inode, struct ea_buffer *ea_buf); - -/* - * NAME: ea_write_inline - * - * FUNCTION: Attempt to write an EA inline if area is available - * - * PRE CONDITIONS: - * Already verified that the specified EA is small enough to fit inline - * - * PARAMETERS: - * ip - Inode pointer - * ealist - EA list pointer - * size - size of ealist in bytes - * ea - dxd_t structure to be filled in with necessary EA information - * if we successfully copy the EA inline - * - * NOTES: - * Checks if the inode's inline area is available. If so, copies EA inline - * and sets <ea> fields appropriately. Otherwise, returns failure, EA will - * have to be put into an extent. - * - * RETURNS: 0 for successful copy to inline area; -1 if area not available - */ -static int ea_write_inline(struct inode *ip, struct jfs_ea_list *ealist, - int size, dxd_t * ea) -{ - struct jfs_inode_info *ji = JFS_IP(ip); - - /* - * Make sure we have an EA -- the NULL EA list is valid, but you - * can't copy it! - */ - if (ealist && size > sizeof (struct jfs_ea_list)) { - assert(size <= sizeof (ji->i_inline_ea)); - - /* - * See if the space is available or if it is already being - * used for an inline EA. - */ - if (!(ji->mode2 & INLINEEA) && !(ji->ea.flag & DXD_INLINE)) - return -EPERM; - - DXDsize(ea, size); - DXDlength(ea, 0); - DXDaddress(ea, 0); - memcpy(ji->i_inline_ea, ealist, size); - ea->flag = DXD_INLINE; - ji->mode2 &= ~INLINEEA; - } else { - ea->flag = 0; - DXDsize(ea, 0); - DXDlength(ea, 0); - DXDaddress(ea, 0); - - /* Free up INLINE area */ - if (ji->ea.flag & DXD_INLINE) - ji->mode2 |= INLINEEA; - } - - return 0; -} - -/* - * NAME: ea_write - * - * FUNCTION: Write an EA for an inode - * - * PRE CONDITIONS: EA has been verified - * - * PARAMETERS: - * ip - Inode pointer - * ealist - EA list pointer - * size - size of ealist in bytes - * ea - dxd_t structure to be filled in appropriately with where the - * EA was copied - * - * NOTES: Will write EA inline if able to, otherwise allocates blocks for an - * extent and synchronously writes it to those blocks. - * - * RETURNS: 0 for success; Anything else indicates failure - */ -static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size, - dxd_t * ea) -{ - struct super_block *sb = ip->i_sb; - struct jfs_inode_info *ji = JFS_IP(ip); - struct jfs_sb_info *sbi = JFS_SBI(sb); - int nblocks; - s64 blkno; - int rc = 0, i; - char *cp; - s32 nbytes, nb; - s32 bytes_to_write; - struct metapage *mp; - - /* - * Quick check to see if this is an in-linable EA. Short EAs - * and empty EAs are all in-linable, provided the space exists. - */ - if (!ealist || size <= sizeof (ji->i_inline_ea)) { - if (!ea_write_inline(ip, ealist, size, ea)) - return 0; - } - - /* figure out how many blocks we need */ - nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits; - - /* Allocate new blocks to quota. */ - rc = dquot_alloc_block(ip, nblocks); - if (rc) - return rc; - - rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); - if (rc) { - /*Rollback quota allocation. */ - dquot_free_block(ip, nblocks); - return rc; - } - - /* - * Now have nblocks worth of storage to stuff into the FEALIST. - * loop over the FEALIST copying data into the buffer one page at - * a time. - */ - cp = (char *) ealist; - nbytes = size; - for (i = 0; i < nblocks; i += sbi->nbperpage) { - /* - * Determine how many bytes for this request, and round up to - * the nearest aggregate block size - */ - nb = min(PSIZE, nbytes); - bytes_to_write = - ((((nb + sb->s_blocksize - 1)) >> sb->s_blocksize_bits)) - << sb->s_blocksize_bits; - - if (!(mp = get_metapage(ip, blkno + i, bytes_to_write, 1))) { - rc = -EIO; - goto failed; - } - - memcpy(mp->data, cp, nb); - - /* - * We really need a way to propagate errors for - * forced writes like this one. --hch - * - * (__write_metapage => release_metapage => flush_metapage) - */ -#ifdef _JFS_FIXME - if ((rc = flush_metapage(mp))) { - /* - * the write failed -- this means that the buffer - * is still assigned and the blocks are not being - * used. this seems like the best error recovery - * we can get ... - */ - goto failed; - } -#else - flush_metapage(mp); -#endif - - cp += PSIZE; - nbytes -= nb; - } - - ea->flag = DXD_EXTENT; - DXDsize(ea, le32_to_cpu(ealist->size)); - DXDlength(ea, nblocks); - DXDaddress(ea, blkno); - - /* Free up INLINE area */ - if (ji->ea.flag & DXD_INLINE) - ji->mode2 |= INLINEEA; - - return 0; - - failed: - /* Rollback quota allocation. */ - dquot_free_block(ip, nblocks); - - dbFree(ip, blkno, nblocks); - return rc; -} - -/* - * NAME: ea_read_inline - * - * FUNCTION: Read an inlined EA into user's buffer - * - * PARAMETERS: - * ip - Inode pointer - * ealist - Pointer to buffer to fill in with EA - * - * RETURNS: 0 - */ -static int ea_read_inline(struct inode *ip, struct jfs_ea_list *ealist) -{ - struct jfs_inode_info *ji = JFS_IP(ip); - int ea_size = sizeDXD(&ji->ea); - - if (ea_size == 0) { - ealist->size = 0; - return 0; - } - - /* Sanity Check */ - if ((sizeDXD(&ji->ea) > sizeof (ji->i_inline_ea))) - return -EIO; - if (le32_to_cpu(((struct jfs_ea_list *) &ji->i_inline_ea)->size) - != ea_size) - return -EIO; - - memcpy(ealist, ji->i_inline_ea, ea_size); - return 0; -} - -/* - * NAME: ea_read - * - * FUNCTION: copy EA data into user's buffer - * - * PARAMETERS: - * ip - Inode pointer - * ealist - Pointer to buffer to fill in with EA - * - * NOTES: If EA is inline calls ea_read_inline() to copy EA. - * - * RETURNS: 0 for success; other indicates failure - */ -static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) -{ - struct super_block *sb = ip->i_sb; - struct jfs_inode_info *ji = JFS_IP(ip); - struct jfs_sb_info *sbi = JFS_SBI(sb); - int nblocks; - s64 blkno; - char *cp = (char *) ealist; - int i; - int nbytes, nb; - s32 bytes_to_read; - struct metapage *mp; - - /* quick check for in-line EA */ - if (ji->ea.flag & DXD_INLINE) - return ea_read_inline(ip, ealist); - - nbytes = sizeDXD(&ji->ea); - if (!nbytes) { - jfs_error(sb, "ea_read: nbytes is 0"); - return -EIO; - } - - /* - * Figure out how many blocks were allocated when this EA list was - * originally written to disk. - */ - nblocks = lengthDXD(&ji->ea) << sbi->l2nbperpage; - blkno = addressDXD(&ji->ea) << sbi->l2nbperpage; - - /* - * I have found the disk blocks which were originally used to store - * the FEALIST. now i loop over each contiguous block copying the - * data into the buffer. - */ - for (i = 0; i < nblocks; i += sbi->nbperpage) { - /* - * Determine how many bytes for this request, and round up to - * the nearest aggregate block size - */ - nb = min(PSIZE, nbytes); - bytes_to_read = - ((((nb + sb->s_blocksize - 1)) >> sb->s_blocksize_bits)) - << sb->s_blocksize_bits; - - if (!(mp = read_metapage(ip, blkno + i, bytes_to_read, 1))) - return -EIO; - - memcpy(cp, mp->data, nb); - release_metapage(mp); - - cp += PSIZE; - nbytes -= nb; - } - - return 0; -} - -/* - * NAME: ea_get - * - * FUNCTION: Returns buffer containing existing extended attributes. - * The size of the buffer will be the larger of the existing - * attributes size, or min_size. - * - * The buffer, which may be inlined in the inode or in the - * page cache must be release by calling ea_release or ea_put - * - * PARAMETERS: - * inode - Inode pointer - * ea_buf - Structure to be populated with ealist and its metadata - * min_size- minimum size of buffer to be returned - * - * RETURNS: 0 for success; Other indicates failure - */ -static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) -{ - struct jfs_inode_info *ji = JFS_IP(inode); - struct super_block *sb = inode->i_sb; - int size; - int ea_size = sizeDXD(&ji->ea); - int blocks_needed, current_blocks; - s64 blkno; - int rc; - int quota_allocation = 0; - - /* When fsck.jfs clears a bad ea, it doesn't clear the size */ - if (ji->ea.flag == 0) - ea_size = 0; - - if (ea_size == 0) { - if (min_size == 0) { - ea_buf->flag = 0; - ea_buf->max_size = 0; - ea_buf->xattr = NULL; - return 0; - } - if ((min_size <= sizeof (ji->i_inline_ea)) && - (ji->mode2 & INLINEEA)) { - ea_buf->flag = EA_INLINE | EA_NEW; - ea_buf->max_size = sizeof (ji->i_inline_ea); - ea_buf->xattr = (struct jfs_ea_list *) ji->i_inline_ea; - DXDlength(&ea_buf->new_ea, 0); - DXDaddress(&ea_buf->new_ea, 0); - ea_buf->new_ea.flag = DXD_INLINE; - DXDsize(&ea_buf->new_ea, min_size); - return 0; - } - current_blocks = 0; - } else if (ji->ea.flag & DXD_INLINE) { - if (min_size <= sizeof (ji->i_inline_ea)) { - ea_buf->flag = EA_INLINE; - ea_buf->max_size = sizeof (ji->i_inline_ea); - ea_buf->xattr = (struct jfs_ea_list *) ji->i_inline_ea; - goto size_check; - } - current_blocks = 0; - } else { - if (!(ji->ea.flag & DXD_EXTENT)) { - jfs_error(sb, "ea_get: invalid ea.flag)"); - return -EIO; - } - current_blocks = (ea_size + sb->s_blocksize - 1) >> - sb->s_blocksize_bits; - } - size = max(min_size, ea_size); - - if (size > PSIZE) { - /* - * To keep the rest of the code simple. Allocate a - * contiguous buffer to work with - */ - ea_buf->xattr = kmalloc(size, GFP_KERNEL); - if (ea_buf->xattr == NULL) - return -ENOMEM; - - ea_buf->flag = EA_MALLOC; - ea_buf->max_size = (size + sb->s_blocksize - 1) & - ~(sb->s_blocksize - 1); - - if (ea_size == 0) - return 0; - - if ((rc = ea_read(inode, ea_buf->xattr))) { - kfree(ea_buf->xattr); - ea_buf->xattr = NULL; - return rc; - } - goto size_check; - } - blocks_needed = (min_size + sb->s_blocksize - 1) >> - sb->s_blocksize_bits; - - if (blocks_needed > current_blocks) { - /* Allocate new blocks to quota. */ - rc = dquot_alloc_block(inode, blocks_needed); - if (rc) - return -EDQUOT; - - quota_allocation = blocks_needed; - - rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed, - &blkno); - if (rc) - goto clean_up; - - DXDlength(&ea_buf->new_ea, blocks_needed); - DXDaddress(&ea_buf->new_ea, blkno); - ea_buf->new_ea.flag = DXD_EXTENT; - DXDsize(&ea_buf->new_ea, min_size); - - ea_buf->flag = EA_EXTENT | EA_NEW; - - ea_buf->mp = get_metapage(inode, blkno, - blocks_needed << sb->s_blocksize_bits, - 1); - if (ea_buf->mp == NULL) { - dbFree(inode, blkno, (s64) blocks_needed); - rc = -EIO; - goto clean_up; - } - ea_buf->xattr = ea_buf->mp->data; - ea_buf->max_size = (min_size + sb->s_blocksize - 1) & - ~(sb->s_blocksize - 1); - if (ea_size == 0) - return 0; - if ((rc = ea_read(inode, ea_buf->xattr))) { - discard_metapage(ea_buf->mp); - dbFree(inode, blkno, (s64) blocks_needed); - goto clean_up; - } - goto size_check; - } - ea_buf->flag = EA_EXTENT; - ea_buf->mp = read_metapage(inode, addressDXD(&ji->ea), - lengthDXD(&ji->ea) << sb->s_blocksize_bits, - 1); - if (ea_buf->mp == NULL) { - rc = -EIO; - goto clean_up; - } - ea_buf->xattr = ea_buf->mp->data; - ea_buf->max_size = (ea_size + sb->s_blocksize - 1) & - ~(sb->s_blocksize - 1); - - size_check: - if (EALIST_SIZE(ea_buf->xattr) != ea_size) { - printk(KERN_ERR "ea_get: invalid extended attribute\n"); - print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, - ea_buf->xattr, ea_size, 1); - ea_release(inode, ea_buf); - rc = -EIO; - goto clean_up; - } - - return ea_size; - - clean_up: - /* Rollback quota allocation */ - if (quota_allocation) - dquot_free_block(inode, quota_allocation); - - return (rc); -} - -static void ea_release(struct inode *inode, struct ea_buffer *ea_buf) -{ - if (ea_buf->flag & EA_MALLOC) - kfree(ea_buf->xattr); - else if (ea_buf->flag & EA_EXTENT) { - assert(ea_buf->mp); - release_metapage(ea_buf->mp); - - if (ea_buf->flag & EA_NEW) - dbFree(inode, addressDXD(&ea_buf->new_ea), - lengthDXD(&ea_buf->new_ea)); - } -} - -static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf, - int new_size) -{ - struct jfs_inode_info *ji = JFS_IP(inode); - unsigned long old_blocks, new_blocks; - int rc = 0; - - if (new_size == 0) { - ea_release(inode, ea_buf); - ea_buf = NULL; - } else if (ea_buf->flag & EA_INLINE) { - assert(new_size <= sizeof (ji->i_inline_ea)); - ji->mode2 &= ~INLINEEA; - ea_buf->new_ea.flag = DXD_INLINE; - DXDsize(&ea_buf->new_ea, new_size); - DXDaddress(&ea_buf->new_ea, 0); - DXDlength(&ea_buf->new_ea, 0); - } else if (ea_buf->flag & EA_MALLOC) { - rc = ea_write(inode, ea_buf->xattr, new_size, &ea_buf->new_ea); - kfree(ea_buf->xattr); - } else if (ea_buf->flag & EA_NEW) { - /* We have already allocated a new dxd */ - flush_metapage(ea_buf->mp); - } else { - /* ->xattr must point to original ea's metapage */ - rc = ea_write(inode, ea_buf->xattr, new_size, &ea_buf->new_ea); - discard_metapage(ea_buf->mp); - } - if (rc) - return rc; - - old_blocks = new_blocks = 0; - - if (ji->ea.flag & DXD_EXTENT) { - invalidate_dxd_metapages(inode, ji->ea); - old_blocks = lengthDXD(&ji->ea); - } - - if (ea_buf) { - txEA(tid, inode, &ji->ea, &ea_buf->new_ea); - if (ea_buf->new_ea.flag & DXD_EXTENT) { - new_blocks = lengthDXD(&ea_buf->new_ea); - if (ji->ea.flag & DXD_INLINE) - ji->mode2 |= INLINEEA; - } - ji->ea = ea_buf->new_ea; - } else { - txEA(tid, inode, &ji->ea, NULL); - if (ji->ea.flag & DXD_INLINE) - ji->mode2 |= INLINEEA; - ji->ea.flag = 0; - ji->ea.size = 0; - } - - /* If old blocks exist, they must be removed from quota allocation. */ - if (old_blocks) - dquot_free_block(inode, old_blocks); - - inode->i_ctime = CURRENT_TIME; - - return 0; -} - -/* - * can_set_system_xattr - * - * This code is specific to the system.* namespace. It contains policy - * which doesn't belong in the main xattr codepath. - */ -static int can_set_system_xattr(struct inode *inode, const char *name, - const void *value, size_t value_len) -{ -#ifdef CONFIG_JFS_POSIX_ACL - struct posix_acl *acl; - int rc; - - if (!inode_owner_or_capable(inode)) - return -EPERM; - - /* - * POSIX_ACL_XATTR_ACCESS is tied to i_mode - */ - if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { - acl = posix_acl_from_xattr(value, value_len); - if (IS_ERR(acl)) { - rc = PTR_ERR(acl); - printk(KERN_ERR "posix_acl_from_xattr returned %d\n", - rc); - return rc; - } - if (acl) { - rc = posix_acl_equiv_mode(acl, &inode->i_mode); - posix_acl_release(acl); - if (rc < 0) { - printk(KERN_ERR - "posix_acl_equiv_mode returned %d\n", - rc); - return rc; - } - mark_inode_dirty(inode); - } - /* - * We're changing the ACL. Get rid of the cached one - */ - forget_cached_acl(inode, ACL_TYPE_ACCESS); - - return 0; - } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { - acl = posix_acl_from_xattr(value, value_len); - if (IS_ERR(acl)) { - rc = PTR_ERR(acl); - printk(KERN_ERR "posix_acl_from_xattr returned %d\n", - rc); - return rc; - } - posix_acl_release(acl); - - /* - * We're changing the default ACL. Get rid of the cached one - */ - forget_cached_acl(inode, ACL_TYPE_DEFAULT); - - return 0; - } -#endif /* CONFIG_JFS_POSIX_ACL */ - return -EOPNOTSUPP; -} - -/* - * Most of the permission checking is done by xattr_permission in the vfs. - * The local file system is responsible for handling the system.* namespace. - * We also need to verify that this is a namespace that we recognize. - */ -static int can_set_xattr(struct inode *inode, const char *name, - const void *value, size_t value_len) -{ - if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) - return can_set_system_xattr(inode, name, value, value_len); - - if (!strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) { - /* - * This makes sure that we aren't trying to set an - * attribute in a different namespace by prefixing it - * with "os2." - */ - if (is_known_namespace(name + XATTR_OS2_PREFIX_LEN)) - return -EOPNOTSUPP; - return 0; - } - - /* - * Don't allow setting an attribute in an unknown namespace. - */ - if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && - strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && - strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) - return -EOPNOTSUPP; - - return 0; -} - -int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name, - const void *value, size_t value_len, int flags) -{ - struct jfs_ea_list *ealist; - struct jfs_ea *ea, *old_ea = NULL, *next_ea = NULL; - struct ea_buffer ea_buf; - int old_ea_size = 0; - int xattr_size; - int new_size; - int namelen = strlen(name); - char *os2name = NULL; - int found = 0; - int rc; - int length; - - if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { - os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1, - GFP_KERNEL); - if (!os2name) - return -ENOMEM; - strcpy(os2name, name + XATTR_OS2_PREFIX_LEN); - name = os2name; - namelen -= XATTR_OS2_PREFIX_LEN; - } - - down_write(&JFS_IP(inode)->xattr_sem); - - xattr_size = ea_get(inode, &ea_buf, 0); - if (xattr_size < 0) { - rc = xattr_size; - goto out; - } - - again: - ealist = (struct jfs_ea_list *) ea_buf.xattr; - new_size = sizeof (struct jfs_ea_list); - - if (xattr_size) { - for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); - ea = NEXT_EA(ea)) { - if ((namelen == ea->namelen) && - (memcmp(name, ea->name, namelen) == 0)) { - found = 1; - if (flags & XATTR_CREATE) { - rc = -EEXIST; - goto release; - } - old_ea = ea; - old_ea_size = EA_SIZE(ea); - next_ea = NEXT_EA(ea); - } else - new_size += EA_SIZE(ea); - } - } - - if (!found) { - if (flags & XATTR_REPLACE) { - rc = -ENODATA; - goto release; - } - if (value == NULL) { - rc = 0; - goto release; - } - } - if (value) - new_size += sizeof (struct jfs_ea) + namelen + 1 + value_len; - - if (new_size > ea_buf.max_size) { - /* - * We need to allocate more space for merged ea list. - * We should only have loop to again: once. - */ - ea_release(inode, &ea_buf); - xattr_size = ea_get(inode, &ea_buf, new_size); - if (xattr_size < 0) { - rc = xattr_size; - goto out; - } - goto again; - } - - /* Remove old ea of the same name */ - if (found) { - /* number of bytes following target EA */ - length = (char *) END_EALIST(ealist) - (char *) next_ea; - if (length > 0) - memmove(old_ea, next_ea, length); - xattr_size -= old_ea_size; - } - - /* Add new entry to the end */ - if (value) { - if (xattr_size == 0) - /* Completely new ea list */ - xattr_size = sizeof (struct jfs_ea_list); - - ea = (struct jfs_ea *) ((char *) ealist + xattr_size); - ea->flag = 0; - ea->namelen = namelen; - ea->valuelen = (cpu_to_le16(value_len)); - memcpy(ea->name, name, namelen); - ea->name[namelen] = 0; - if (value_len) - memcpy(&ea->name[namelen + 1], value, value_len); - xattr_size += EA_SIZE(ea); - } - - /* DEBUG - If we did this right, these number match */ - if (xattr_size != new_size) { - printk(KERN_ERR - "jfs_xsetattr: xattr_size = %d, new_size = %d\n", - xattr_size, new_size); - - rc = -EINVAL; - goto release; - } - - /* - * If we're left with an empty list, there's no ea - */ - if (new_size == sizeof (struct jfs_ea_list)) - new_size = 0; - - ealist->size = cpu_to_le32(new_size); - - rc = ea_put(tid, inode, &ea_buf, new_size); - - goto out; - release: - ea_release(inode, &ea_buf); - out: - up_write(&JFS_IP(inode)->xattr_sem); - - kfree(os2name); - - return rc; -} - -int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t value_len, int flags) -{ - struct inode *inode = dentry->d_inode; - struct jfs_inode_info *ji = JFS_IP(inode); - int rc; - tid_t tid; - - if ((rc = can_set_xattr(inode, name, value, value_len))) - return rc; - - if (value == NULL) { /* empty EA, do not remove */ - value = ""; - value_len = 0; - } - - tid = txBegin(inode->i_sb, 0); - mutex_lock(&ji->commit_mutex); - rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len, - flags); - if (!rc) - rc = txCommit(tid, 1, &inode, 0); - txEnd(tid); - mutex_unlock(&ji->commit_mutex); - - return rc; -} - -ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, - size_t buf_size) -{ - struct jfs_ea_list *ealist; - struct jfs_ea *ea; - struct ea_buffer ea_buf; - int xattr_size; - ssize_t size; - int namelen = strlen(name); - char *value; - - down_read(&JFS_IP(inode)->xattr_sem); - - xattr_size = ea_get(inode, &ea_buf, 0); - - if (xattr_size < 0) { - size = xattr_size; - goto out; - } - - if (xattr_size == 0) - goto not_found; - - ealist = (struct jfs_ea_list *) ea_buf.xattr; - - /* Find the named attribute */ - for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) - if ((namelen == ea->namelen) && - memcmp(name, ea->name, namelen) == 0) { - /* Found it */ - size = le16_to_cpu(ea->valuelen); - if (!data) - goto release; - else if (size > buf_size) { - size = -ERANGE; - goto release; - } - value = ((char *) &ea->name) + ea->namelen + 1; - memcpy(data, value, size); - goto release; - } - not_found: - size = -ENODATA; - release: - ea_release(inode, &ea_buf); - out: - up_read(&JFS_IP(inode)->xattr_sem); - - return size; -} - -ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, - size_t buf_size) -{ - int err; - - if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { - /* - * skip past "os2." prefix - */ - name += XATTR_OS2_PREFIX_LEN; - /* - * Don't allow retrieving properly prefixed attributes - * by prepending them with "os2." - */ - if (is_known_namespace(name)) - return -EOPNOTSUPP; - } - - err = __jfs_getxattr(dentry->d_inode, name, data, buf_size); - - return err; -} - -/* - * No special permissions are needed to list attributes except for trusted.* - */ -static inline int can_list(struct jfs_ea *ea) -{ - return (strncmp(ea->name, XATTR_TRUSTED_PREFIX, - XATTR_TRUSTED_PREFIX_LEN) || - capable(CAP_SYS_ADMIN)); -} - -ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) -{ - struct inode *inode = dentry->d_inode; - char *buffer; - ssize_t size = 0; - int xattr_size; - struct jfs_ea_list *ealist; - struct jfs_ea *ea; - struct ea_buffer ea_buf; - - down_read(&JFS_IP(inode)->xattr_sem); - - xattr_size = ea_get(inode, &ea_buf, 0); - if (xattr_size < 0) { - size = xattr_size; - goto out; - } - - if (xattr_size == 0) - goto release; - - ealist = (struct jfs_ea_list *) ea_buf.xattr; - - /* compute required size of list */ - for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { - if (can_list(ea)) - size += name_size(ea) + 1; - } - - if (!data) - goto release; - - if (size > buf_size) { - size = -ERANGE; - goto release; - } - - /* Copy attribute names to buffer */ - buffer = data; - for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { - if (can_list(ea)) { - int namelen = copy_name(buffer, ea); - buffer += namelen + 1; - } - } - - release: - ea_release(inode, &ea_buf); - out: - up_read(&JFS_IP(inode)->xattr_sem); - return size; -} - -int jfs_removexattr(struct dentry *dentry, const char *name) -{ - struct inode *inode = dentry->d_inode; - struct jfs_inode_info *ji = JFS_IP(inode); - int rc; - tid_t tid; - - if ((rc = can_set_xattr(inode, name, NULL, 0))) - return rc; - - tid = txBegin(inode->i_sb, 0); - mutex_lock(&ji->commit_mutex); - rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE); - if (!rc) - rc = txCommit(tid, 1, &inode, 0); - txEnd(tid); - mutex_unlock(&ji->commit_mutex); - - return rc; -} - -#ifdef CONFIG_JFS_SECURITY -int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *fs_info) -{ - const struct xattr *xattr; - tid_t *tid = fs_info; - char *name; - int err = 0; - - for (xattr = xattr_array; xattr->name != NULL; xattr++) { - name = kmalloc(XATTR_SECURITY_PREFIX_LEN + - strlen(xattr->name) + 1, GFP_NOFS); - if (!name) { - err = -ENOMEM; - break; - } - strcpy(name, XATTR_SECURITY_PREFIX); - strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); - - err = __jfs_setxattr(*tid, inode, name, - xattr->value, xattr->value_len, 0); - kfree(name); - if (err < 0) - break; - } - return err; -} - -int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir, - const struct qstr *qstr) -{ - return security_inode_init_security(inode, dir, qstr, - &jfs_initxattrs, &tid); -} -#endif |