diff options
Diffstat (limited to 'ANDROID_3.4.5/fs/reiserfs')
32 files changed, 0 insertions, 31118 deletions
diff --git a/ANDROID_3.4.5/fs/reiserfs/Kconfig b/ANDROID_3.4.5/fs/reiserfs/Kconfig deleted file mode 100644 index 7cd46666..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/Kconfig +++ /dev/null @@ -1,88 +0,0 @@ -config REISERFS_FS - tristate "Reiserfs support" - select CRC32 - help - Stores not just filenames but the files themselves in a balanced - tree. Uses journalling. - - Balanced trees are more efficient than traditional file system - architectural foundations. - - In general, ReiserFS is as fast as ext2, but is very efficient with - large directories and small files. Additional patches are needed - for NFS and quotas, please see - <https://reiser4.wiki.kernel.org/index.php/Main_Page> for links. - - It is more easily extended to have features currently found in - database and keyword search systems than block allocation based file - systems are. The next version will be so extended, and will support - plugins consistent with our motto ``It takes more than a license to - make source code open.'' - - Read <https://reiser4.wiki.kernel.org/index.php/Main_Page> - to learn more about reiserfs. - - Sponsored by Threshold Networks, Emusic.com, and Bigstorage.com. - - If you like it, you can pay us to add new features to it that you - need, buy a support contract, or pay us to port it to another OS. - -config REISERFS_CHECK - bool "Enable reiserfs debug mode" - depends on REISERFS_FS - help - If you set this to Y, then ReiserFS will perform every check it can - possibly imagine of its internal consistency throughout its - operation. It will also go substantially slower. More than once we - have forgotten that this was on, and then gone despondent over the - latest benchmarks.:-) Use of this option allows our team to go all - out in checking for consistency when debugging without fear of its - effect on end users. If you are on the verge of sending in a bug - report, say Y and you might get a useful error message. Almost - everyone should say N. - -config REISERFS_PROC_INFO - bool "Stats in /proc/fs/reiserfs" - depends on REISERFS_FS && PROC_FS - help - Create under /proc/fs/reiserfs a hierarchy of files, displaying - various ReiserFS statistics and internal data at the expense of - making your kernel or module slightly larger (+8 KB). This also - increases the amount of kernel memory required for each mount. - Almost everyone but ReiserFS developers and people fine-tuning - reiserfs or tracing problems should say N. - -config REISERFS_FS_XATTR - bool "ReiserFS extended attributes" - depends on REISERFS_FS - help - Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). - - If unsure, say N. - -config REISERFS_FS_POSIX_ACL - bool "ReiserFS POSIX Access Control Lists" - depends on REISERFS_FS_XATTR - select FS_POSIX_ACL - help - Posix Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. - - To learn more about Access Control Lists, visit the Posix ACLs for - Linux website <http://acl.bestbits.at/>. - - If you don't know what Access Control Lists are, say N - -config REISERFS_FS_SECURITY - bool "ReiserFS Security Labels" - depends on REISERFS_FS_XATTR - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute handler for file security - labels in the ReiserFS filesystem. - - If you are not using a security module that requires using - extended attributes for file security labels, say N. diff --git a/ANDROID_3.4.5/fs/reiserfs/Makefile b/ANDROID_3.4.5/fs/reiserfs/Makefile deleted file mode 100644 index 3c3b0016..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -# -# Makefile for the linux reiser-filesystem routines. -# - -obj-$(CONFIG_REISERFS_FS) += reiserfs.o - -reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ - super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ - hashes.o tail_conversion.o journal.o resize.o \ - item_ops.o ioctl.o xattr.o lock.o - -ifeq ($(CONFIG_REISERFS_PROC_INFO),y) -reiserfs-objs += procfs.o -endif - -ifeq ($(CONFIG_REISERFS_FS_XATTR),y) -reiserfs-objs += xattr_user.o xattr_trusted.o -endif - -ifeq ($(CONFIG_REISERFS_FS_SECURITY),y) -reiserfs-objs += xattr_security.o -endif - -ifeq ($(CONFIG_REISERFS_FS_POSIX_ACL),y) -reiserfs-objs += xattr_acl.o -endif - -# gcc -O2 (the kernel default) is overaggressive on ppc32 when many inline -# functions are used. This causes the compiler to advance the stack -# pointer out of the available stack space, corrupting kernel space, -# and causing a panic. Since this behavior only affects ppc32, this ifeq -# will work around it. If any other architecture displays this behavior, -# add it here. -ccflags-$(CONFIG_PPC32) := $(call cc-ifversion, -lt, 0400, -O1) - -TAGS: - etags *.c - diff --git a/ANDROID_3.4.5/fs/reiserfs/README b/ANDROID_3.4.5/fs/reiserfs/README deleted file mode 100644 index e2f7a264..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/README +++ /dev/null @@ -1,161 +0,0 @@ -[LICENSING] - -ReiserFS is hereby licensed under the GNU General -Public License version 2. - -Source code files that contain the phrase "licensing governed by -reiserfs/README" are "governed files" throughout this file. Governed -files are licensed under the GPL. The portions of them owned by Hans -Reiser, or authorized to be licensed by him, have been in the past, -and likely will be in the future, licensed to other parties under -other licenses. If you add your code to governed files, and don't -want it to be owned by Hans Reiser, put your copyright label on that -code so the poor blight and his customers can keep things straight. -All portions of governed files not labeled otherwise are owned by Hans -Reiser, and by adding your code to it, widely distributing it to -others or sending us a patch, and leaving the sentence in stating that -licensing is governed by the statement in this file, you accept this. -It will be a kindness if you identify whether Hans Reiser is allowed -to license code labeled as owned by you on your behalf other than -under the GPL, because he wants to know if it is okay to do so and put -a check in the mail to you (for non-trivial improvements) when he -makes his next sale. He makes no guarantees as to the amount if any, -though he feels motivated to motivate contributors, and you can surely -discuss this with him before or after contributing. You have the -right to decline to allow him to license your code contribution other -than under the GPL. - -Further licensing options are available for commercial and/or other -interests directly from Hans Reiser: hans@reiser.to. If you interpret -the GPL as not allowing those additional licensing options, you read -it wrongly, and Richard Stallman agrees with me, when carefully read -you can see that those restrictions on additional terms do not apply -to the owner of the copyright, and my interpretation of this shall -govern for this license. - -Finally, nothing in this license shall be interpreted to allow you to -fail to fairly credit me, or to remove my credits, without my -permission, unless you are an end user not redistributing to others. -If you have doubts about how to properly do that, or about what is -fair, ask. (Last I spoke with him Richard was contemplating how best -to address the fair crediting issue in the next GPL version.) - -[END LICENSING] - -Reiserfs is a file system based on balanced tree algorithms, which is -described at https://reiser4.wiki.kernel.org/index.php/Main_Page - -Stop reading here. Go there, then return. - -Send bug reports to yura@namesys.botik.ru. - -mkreiserfs and other utilities are in reiserfs/utils, or wherever your -Linux provider put them. There is some disagreement about how useful -it is for users to get their fsck and mkreiserfs out of sync with the -version of reiserfs that is in their kernel, with many important -distributors wanting them out of sync.:-) Please try to remember to -recompile and reinstall fsck and mkreiserfs with every update of -reiserfs, this is a common source of confusion. Note that some of the -utilities cannot be compiled without accessing the balancing code -which is in the kernel code, and relocating the utilities may require -you to specify where that code can be found. - -Yes, if you update your reiserfs kernel module you do have to -recompile your kernel, most of the time. The errors you get will be -quite cryptic if your forget to do so. - -Real users, as opposed to folks who want to hack and then understand -what went wrong, will want REISERFS_CHECK off. - -Hideous Commercial Pitch: Spread your development costs across other OS -vendors. Select from the best in the world, not the best in your -building, by buying from third party OS component suppliers. Leverage -the software component development power of the internet. Be the most -aggressive in taking advantage of the commercial possibilities of -decentralized internet development, and add value through your branded -integration that you sell as an operating system. Let your competitors -be the ones to compete against the entire internet by themselves. Be -hip, get with the new economic trend, before your competitors do. Send -email to hans@reiser.to. - -To understand the code, after reading the website, start reading the -code by reading reiserfs_fs.h first. - -Hans Reiser was the project initiator, primary architect, source of all -funding for the first 5.5 years, and one of the programmers. He owns -the copyright. - -Vladimir Saveljev was one of the programmers, and he worked long hours -writing the cleanest code. He always made the effort to be the best he -could be, and to make his code the best that it could be. What resulted -was quite remarkable. I don't think that money can ever motivate someone -to work the way he did, he is one of the most selfless men I know. - -Yura helps with benchmarking, coding hashes, and block pre-allocation -code. - -Anatoly Pinchuk is a former member of our team who worked closely with -Vladimir throughout the project's development. He wrote a quite -substantial portion of the total code. He realized that there was a -space problem with packing tails of files for files larger than a node -that start on a node aligned boundary (there are reasons to want to node -align files), and he invented and implemented indirect items and -unformatted nodes as the solution. - -Konstantin Shvachko, with the help of the Russian version of a VC, -tried to put me in a position where I was forced into giving control -of the project to him. (Fortunately, as the person paying the money -for all salaries from my dayjob I owned all copyrights, and you can't -really force takeovers of sole proprietorships.) This was something -curious, because he never really understood the value of our project, -why we should do what we do, or why innovation was possible in -general, but he was sure that he ought to be controlling it. Every -innovation had to be forced past him while he was with us. He added -two years to the time required to complete reiserfs, and was a net -loss for me. Mikhail Gilula was a brilliant innovator who also left -in a destructive way that erased the value of his contributions, and -that he was shown much generosity just makes it more painful. - -Grigory Zaigralin was an extremely effective system administrator for -our group. - -Igor Krasheninnikov was wonderful at hardware procurement, repair, and -network installation. - -Jeremy Fitzhardinge wrote the teahash.c code, and he gives credit to a -textbook he got the algorithm from in the code. Note that his analysis -of how we could use the hashing code in making 32 bit NFS cookies work -was probably more important than the actual algorithm. Colin Plumb also -contributed to it. - -Chris Mason dived right into our code, and in just a few months produced -the journaling code that dramatically increased the value of ReiserFS. -He is just an amazing programmer. - -Igor Zagorovsky is writing much of the new item handler and extent code -for our next major release. - -Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the -resizer, and is hard at work on implementing allocate on flush. SGI -implemented allocate on flush before us for XFS, and generously took -the time to convince me we should do it also. They are great people, -and a great company. - -Yuri Shevchuk and Nikita Danilov are doing squid cache optimization. - -Vitaly Fertman is doing fsck. - -Jeff Mahoney, of SuSE, contributed a few cleanup fixes, most notably -the endian safe patches which allow ReiserFS to run on any platform -supported by the Linux kernel. - -SuSE, IntegratedLinux.com, Ecila, MP3.com, bigstorage.com, and the -Alpha PC Company made it possible for me to not have a day job -anymore, and to dramatically increase our staffing. Ecila funded -hypertext feature development, MP3.com funded journaling, SuSE funded -core development, IntegratedLinux.com funded squid web cache -appliances, bigstorage.com funded HSM, and the alpha PC company funded -the alpha port. Many of these tasks were helped by sponsors other -than the ones just named. SuSE has helped in much more than just -funding.... - diff --git a/ANDROID_3.4.5/fs/reiserfs/acl.h b/ANDROID_3.4.5/fs/reiserfs/acl.h deleted file mode 100644 index f096b80e..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/acl.h +++ /dev/null @@ -1,76 +0,0 @@ -#include <linux/init.h> -#include <linux/posix_acl.h> - -#define REISERFS_ACL_VERSION 0x0001 - -typedef struct { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; -} reiserfs_acl_entry; - -typedef struct { - __le16 e_tag; - __le16 e_perm; -} reiserfs_acl_entry_short; - -typedef struct { - __le32 a_version; -} reiserfs_acl_header; - -static inline size_t reiserfs_acl_size(int count) -{ - if (count <= 4) { - return sizeof(reiserfs_acl_header) + - count * sizeof(reiserfs_acl_entry_short); - } else { - return sizeof(reiserfs_acl_header) + - 4 * sizeof(reiserfs_acl_entry_short) + - (count - 4) * sizeof(reiserfs_acl_entry); - } -} - -static inline int reiserfs_acl_count(size_t size) -{ - ssize_t s; - size -= sizeof(reiserfs_acl_header); - s = size - 4 * sizeof(reiserfs_acl_entry_short); - if (s < 0) { - if (size % sizeof(reiserfs_acl_entry_short)) - return -1; - return size / sizeof(reiserfs_acl_entry_short); - } else { - if (s % sizeof(reiserfs_acl_entry)) - return -1; - return s / sizeof(reiserfs_acl_entry) + 4; - } -} - -#ifdef CONFIG_REISERFS_FS_POSIX_ACL -struct posix_acl *reiserfs_get_acl(struct inode *inode, int type); -int reiserfs_acl_chmod(struct inode *inode); -int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, - struct inode *dir, struct dentry *dentry, - struct inode *inode); -int reiserfs_cache_default_acl(struct inode *dir); -extern const struct xattr_handler reiserfs_posix_acl_default_handler; -extern const struct xattr_handler reiserfs_posix_acl_access_handler; - -#else - -#define reiserfs_cache_default_acl(inode) 0 -#define reiserfs_get_acl NULL - -static inline int reiserfs_acl_chmod(struct inode *inode) -{ - return 0; -} - -static inline int -reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, - const struct inode *dir, struct dentry *dentry, - struct inode *inode) -{ - return 0; -} -#endif diff --git a/ANDROID_3.4.5/fs/reiserfs/bitmap.c b/ANDROID_3.4.5/fs/reiserfs/bitmap.c deleted file mode 100644 index 4c0c7d16..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/bitmap.c +++ /dev/null @@ -1,1382 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ -/* Reiserfs block (de)allocator, bitmap-based. */ - -#include <linux/time.h> -#include "reiserfs.h" -#include <linux/errno.h> -#include <linux/buffer_head.h> -#include <linux/kernel.h> -#include <linux/pagemap.h> -#include <linux/vmalloc.h> -#include <linux/quotaops.h> -#include <linux/seq_file.h> - -#define PREALLOCATION_SIZE 9 - -/* different reiserfs block allocator options */ - -#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits) - -#define _ALLOC_concentrating_formatted_nodes 0 -#define _ALLOC_displacing_large_files 1 -#define _ALLOC_displacing_new_packing_localities 2 -#define _ALLOC_old_hashed_relocation 3 -#define _ALLOC_new_hashed_relocation 4 -#define _ALLOC_skip_busy 5 -#define _ALLOC_displace_based_on_dirid 6 -#define _ALLOC_hashed_formatted_nodes 7 -#define _ALLOC_old_way 8 -#define _ALLOC_hundredth_slices 9 -#define _ALLOC_dirid_groups 10 -#define _ALLOC_oid_groups 11 -#define _ALLOC_packing_groups 12 - -#define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s)) -#define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s)) -#define displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s)) - -#define SET_OPTION(optname) \ - do { \ - reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \ - set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \ - } while(0) -#define TEST_OPTION(optname, s) \ - test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) - -static inline void get_bit_address(struct super_block *s, - b_blocknr_t block, - unsigned int *bmap_nr, - unsigned int *offset) -{ - /* It is in the bitmap block number equal to the block - * number divided by the number of bits in a block. */ - *bmap_nr = block >> (s->s_blocksize_bits + 3); - /* Within that bitmap block it is located at bit offset *offset. */ - *offset = block & ((s->s_blocksize << 3) - 1); -} - -int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) -{ - unsigned int bmap, offset; - unsigned int bmap_count = reiserfs_bmap_count(s); - - if (block == 0 || block >= SB_BLOCK_COUNT(s)) { - reiserfs_error(s, "vs-4010", - "block number is out of range %lu (%u)", - block, SB_BLOCK_COUNT(s)); - return 0; - } - - get_bit_address(s, block, &bmap, &offset); - - /* Old format filesystem? Unlikely, but the bitmaps are all up front so - * we need to account for it. */ - if (unlikely(test_bit(REISERFS_OLD_FORMAT, - &(REISERFS_SB(s)->s_properties)))) { - b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; - if (block >= bmap1 && - block <= bmap1 + bmap_count) { - reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) " - "can't be freed or reused", - block, bmap_count); - return 0; - } - } else { - if (offset == 0) { - reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) " - "can't be freed or reused", - block, bmap_count); - return 0; - } - } - - if (bmap >= bmap_count) { - reiserfs_error(s, "vs-4030", "bitmap for requested block " - "is out of range: block=%lu, bitmap_nr=%u", - block, bmap); - return 0; - } - - if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) { - reiserfs_error(s, "vs-4050", "this is root block (%u), " - "it must be busy", SB_ROOT_BLOCK(s)); - return 0; - } - - return 1; -} - -/* searches in journal structures for a given block number (bmap, off). If block - is found in reiserfs journal it suggests next free block candidate to test. */ -static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, - int off, int *next) -{ - b_blocknr_t tmp; - - if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) { - if (tmp) { /* hint supplied */ - *next = tmp; - PROC_INFO_INC(s, scan_bitmap.in_journal_hint); - } else { - (*next) = off + 1; /* inc offset to avoid looping. */ - PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); - } - PROC_INFO_INC(s, scan_bitmap.retry); - return 1; - } - return 0; -} - -/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap - * block; */ -static int scan_bitmap_block(struct reiserfs_transaction_handle *th, - unsigned int bmap_n, int *beg, int boundary, - int min, int max, int unfm) -{ - struct super_block *s = th->t_super; - struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n]; - struct buffer_head *bh; - int end, next; - int org = *beg; - - BUG_ON(!th->t_trans_id); - - RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " - "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); - PROC_INFO_INC(s, scan_bitmap.bmap); -/* this is unclear and lacks comments, explain how journal bitmaps - work here for the reader. Convey a sense of the design here. What - is a window? */ -/* - I mean `a window of zero bits' as in description of this function - Zam. */ - - if (!bi) { - reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " - "for bitmap %d", bmap_n); - return 0; - } - - bh = reiserfs_read_bitmap_block(s, bmap_n); - if (bh == NULL) - return 0; - - while (1) { - cont: - if (bi->free_count < min) { - brelse(bh); - return 0; // No free blocks in this bitmap - } - - /* search for a first zero bit -- beginning of a window */ - *beg = reiserfs_find_next_zero_le_bit - ((unsigned long *)(bh->b_data), boundary, *beg); - - if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block - * cannot contain a zero window of minimum size */ - brelse(bh); - return 0; - } - - if (unfm && is_block_in_journal(s, bmap_n, *beg, beg)) - continue; - /* first zero bit found; we check next bits */ - for (end = *beg + 1;; end++) { - if (end >= *beg + max || end >= boundary - || reiserfs_test_le_bit(end, bh->b_data)) { - next = end; - break; - } - /* finding the other end of zero bit window requires looking into journal structures (in - * case of searching for free blocks for unformatted nodes) */ - if (unfm && is_block_in_journal(s, bmap_n, end, &next)) - break; - } - - /* now (*beg) points to beginning of zero bits window, - * (end) points to one bit after the window end */ - if (end - *beg >= min) { /* it seems we have found window of proper size */ - int i; - reiserfs_prepare_for_journal(s, bh, 1); - /* try to set all blocks used checking are they still free */ - for (i = *beg; i < end; i++) { - /* It seems that we should not check in journal again. */ - if (reiserfs_test_and_set_le_bit - (i, bh->b_data)) { - /* bit was set by another process - * while we slept in prepare_for_journal() */ - PROC_INFO_INC(s, scan_bitmap.stolen); - if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks, - * if length of this set is more or equal to `min' */ - end = i; - break; - } - /* otherwise we clear all bit were set ... */ - while (--i >= *beg) - reiserfs_clear_le_bit - (i, bh->b_data); - reiserfs_restore_prepared_buffer(s, bh); - *beg = org; - /* ... and search again in current block from beginning */ - goto cont; - } - } - bi->free_count -= (end - *beg); - journal_mark_dirty(th, s, bh); - brelse(bh); - - /* free block count calculation */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), - 1); - PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); - journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); - - return end - (*beg); - } else { - *beg = next; - } - } -} - -static int bmap_hash_id(struct super_block *s, u32 id) -{ - char *hash_in = NULL; - unsigned long hash; - unsigned bm; - - if (id <= 2) { - bm = 1; - } else { - hash_in = (char *)(&id); - hash = keyed_hash(hash_in, 4); - bm = hash % reiserfs_bmap_count(s); - if (!bm) - bm = 1; - } - /* this can only be true when SB_BMAP_NR = 1 */ - if (bm >= reiserfs_bmap_count(s)) - bm = 0; - return bm; -} - -/* - * hashes the id and then returns > 0 if the block group for the - * corresponding hash is full - */ -static inline int block_group_used(struct super_block *s, u32 id) -{ - int bm = bmap_hash_id(s, id); - struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; - - /* If we don't have cached information on this bitmap block, we're - * going to have to load it later anyway. Loading it here allows us - * to make a better decision. This favors long-term performance gain - * with a better on-disk layout vs. a short term gain of skipping the - * read and potentially having a bad placement. */ - if (info->free_count == UINT_MAX) { - struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); - brelse(bh); - } - - if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) { - return 0; - } - return 1; -} - -/* - * the packing is returned in disk byte order - */ -__le32 reiserfs_choose_packing(struct inode * dir) -{ - __le32 packing; - if (TEST_OPTION(packing_groups, dir->i_sb)) { - u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); - /* - * some versions of reiserfsck expect packing locality 1 to be - * special - */ - if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir)) - packing = INODE_PKEY(dir)->k_objectid; - else - packing = INODE_PKEY(dir)->k_dir_id; - } else - packing = INODE_PKEY(dir)->k_objectid; - return packing; -} - -/* Tries to find contiguous zero bit window (given size) in given region of - * bitmap and place new blocks there. Returns number of allocated blocks. */ -static int scan_bitmap(struct reiserfs_transaction_handle *th, - b_blocknr_t * start, b_blocknr_t finish, - int min, int max, int unfm, sector_t file_block) -{ - int nr_allocated = 0; - struct super_block *s = th->t_super; - /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr - * - Hans, it is not a block number - Zam. */ - - unsigned int bm, off; - unsigned int end_bm, end_off; - unsigned int off_max = s->s_blocksize << 3; - - BUG_ON(!th->t_trans_id); - - PROC_INFO_INC(s, scan_bitmap.call); - if (SB_FREE_BLOCKS(s) <= 0) - return 0; // No point in looking for more free blocks - - get_bit_address(s, *start, &bm, &off); - get_bit_address(s, finish, &end_bm, &end_off); - if (bm > reiserfs_bmap_count(s)) - return 0; - if (end_bm > reiserfs_bmap_count(s)) - end_bm = reiserfs_bmap_count(s); - - /* When the bitmap is more than 10% free, anyone can allocate. - * When it's less than 10% free, only files that already use the - * bitmap are allowed. Once we pass 80% full, this restriction - * is lifted. - * - * We do this so that files that grow later still have space close to - * their original allocation. This improves locality, and presumably - * performance as a result. - * - * This is only an allocation policy and does not make up for getting a - * bad hint. Decent hinting must be implemented for this to work well. - */ - if (TEST_OPTION(skip_busy, s) - && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) { - for (; bm < end_bm; bm++, off = 0) { - if ((off && (!unfm || (file_block != 0))) - || SB_AP_BITMAP(s)[bm].free_count > - (s->s_blocksize << 3) / 10) - nr_allocated = - scan_bitmap_block(th, bm, &off, off_max, - min, max, unfm); - if (nr_allocated) - goto ret; - } - /* we know from above that start is a reasonable number */ - get_bit_address(s, *start, &bm, &off); - } - - for (; bm < end_bm; bm++, off = 0) { - nr_allocated = - scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); - if (nr_allocated) - goto ret; - } - - nr_allocated = - scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); - - ret: - *start = bm * off_max + off; - return nr_allocated; - -} - -static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block, - int for_unformatted) -{ - struct super_block *s = th->t_super; - struct reiserfs_super_block *rs; - struct buffer_head *sbh, *bmbh; - struct reiserfs_bitmap_info *apbi; - unsigned int nr, offset; - - BUG_ON(!th->t_trans_id); - - PROC_INFO_INC(s, free_block); - - rs = SB_DISK_SUPER_BLOCK(s); - sbh = SB_BUFFER_WITH_SB(s); - apbi = SB_AP_BITMAP(s); - - get_bit_address(s, block, &nr, &offset); - - if (nr >= reiserfs_bmap_count(s)) { - reiserfs_error(s, "vs-4075", "block %lu is out of range", - block); - return; - } - - bmbh = reiserfs_read_bitmap_block(s, nr); - if (!bmbh) - return; - - reiserfs_prepare_for_journal(s, bmbh, 1); - - /* clear bit for the given block in bit map */ - if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) { - reiserfs_error(s, "vs-4080", - "block %lu: bit already cleared", block); - } - apbi[nr].free_count++; - journal_mark_dirty(th, s, bmbh); - brelse(bmbh); - - reiserfs_prepare_for_journal(s, sbh, 1); - /* update super block */ - set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); - - journal_mark_dirty(th, s, sbh); - if (for_unformatted) - dquot_free_block_nodirty(inode, 1); -} - -void reiserfs_free_block(struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block, - int for_unformatted) -{ - struct super_block *s = th->t_super; - BUG_ON(!th->t_trans_id); - - RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); - if (!is_reusable(s, block, 1)) - return; - - if (block > sb_block_count(REISERFS_SB(s)->s_rs)) { - reiserfs_error(th->t_super, "bitmap-4072", - "Trying to free block outside file system " - "boundaries (%lu > %lu)", - block, sb_block_count(REISERFS_SB(s)->s_rs)); - return; - } - /* mark it before we clear it, just in case */ - journal_mark_freed(th, s, block); - _reiserfs_free_block(th, inode, block, for_unformatted); -} - -/* preallocated blocks don't need to be run through journal_mark_freed */ -static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block) -{ - BUG_ON(!th->t_trans_id); - RFALSE(!th->t_super, - "vs-4060: trying to free block on nonexistent device"); - if (!is_reusable(th->t_super, block, 1)) - return; - _reiserfs_free_block(th, inode, block, 1); -} - -static void __discard_prealloc(struct reiserfs_transaction_handle *th, - struct reiserfs_inode_info *ei) -{ - unsigned long save = ei->i_prealloc_block; - int dirty = 0; - struct inode *inode = &ei->vfs_inode; - BUG_ON(!th->t_trans_id); -#ifdef CONFIG_REISERFS_CHECK - if (ei->i_prealloc_count < 0) - reiserfs_error(th->t_super, "zam-4001", - "inode has negative prealloc blocks count."); -#endif - while (ei->i_prealloc_count > 0) { - reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); - ei->i_prealloc_block++; - ei->i_prealloc_count--; - dirty = 1; - } - if (dirty) - reiserfs_update_sd(th, inode); - ei->i_prealloc_block = save; - list_del_init(&(ei->i_prealloc_list)); -} - -/* FIXME: It should be inline function */ -void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, - struct inode *inode) -{ - struct reiserfs_inode_info *ei = REISERFS_I(inode); - BUG_ON(!th->t_trans_id); - if (ei->i_prealloc_count) - __discard_prealloc(th, ei); -} - -void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th) -{ - struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; - - BUG_ON(!th->t_trans_id); - - while (!list_empty(plist)) { - struct reiserfs_inode_info *ei; - ei = list_entry(plist->next, struct reiserfs_inode_info, - i_prealloc_list); -#ifdef CONFIG_REISERFS_CHECK - if (!ei->i_prealloc_count) { - reiserfs_error(th->t_super, "zam-4001", - "inode is in prealloc list but has " - "no preallocated blocks."); - } -#endif - __discard_prealloc(th, ei); - } -} - -void reiserfs_init_alloc_options(struct super_block *s) -{ - set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s)); - set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s)); - set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s)); -} - -/* block allocator related options are parsed here */ -int reiserfs_parse_alloc_options(struct super_block *s, char *options) -{ - char *this_char, *value; - - REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ - - while ((this_char = strsep(&options, ":")) != NULL) { - if ((value = strchr(this_char, '=')) != NULL) - *value++ = 0; - - if (!strcmp(this_char, "concentrating_formatted_nodes")) { - int temp; - SET_OPTION(concentrating_formatted_nodes); - temp = (value - && *value) ? simple_strtoul(value, &value, - 0) : 10; - if (temp <= 0 || temp > 100) { - REISERFS_SB(s)->s_alloc_options.border = 10; - } else { - REISERFS_SB(s)->s_alloc_options.border = - 100 / temp; - } - continue; - } - if (!strcmp(this_char, "displacing_large_files")) { - SET_OPTION(displacing_large_files); - REISERFS_SB(s)->s_alloc_options.large_file_size = - (value - && *value) ? simple_strtoul(value, &value, 0) : 16; - continue; - } - if (!strcmp(this_char, "displacing_new_packing_localities")) { - SET_OPTION(displacing_new_packing_localities); - continue; - }; - - if (!strcmp(this_char, "old_hashed_relocation")) { - SET_OPTION(old_hashed_relocation); - continue; - } - - if (!strcmp(this_char, "new_hashed_relocation")) { - SET_OPTION(new_hashed_relocation); - continue; - } - - if (!strcmp(this_char, "dirid_groups")) { - SET_OPTION(dirid_groups); - continue; - } - if (!strcmp(this_char, "oid_groups")) { - SET_OPTION(oid_groups); - continue; - } - if (!strcmp(this_char, "packing_groups")) { - SET_OPTION(packing_groups); - continue; - } - if (!strcmp(this_char, "hashed_formatted_nodes")) { - SET_OPTION(hashed_formatted_nodes); - continue; - } - - if (!strcmp(this_char, "skip_busy")) { - SET_OPTION(skip_busy); - continue; - } - - if (!strcmp(this_char, "hundredth_slices")) { - SET_OPTION(hundredth_slices); - continue; - } - - if (!strcmp(this_char, "old_way")) { - SET_OPTION(old_way); - continue; - } - - if (!strcmp(this_char, "displace_based_on_dirid")) { - SET_OPTION(displace_based_on_dirid); - continue; - } - - if (!strcmp(this_char, "preallocmin")) { - REISERFS_SB(s)->s_alloc_options.preallocmin = - (value - && *value) ? simple_strtoul(value, &value, 0) : 4; - continue; - } - - if (!strcmp(this_char, "preallocsize")) { - REISERFS_SB(s)->s_alloc_options.preallocsize = - (value - && *value) ? simple_strtoul(value, &value, - 0) : - PREALLOCATION_SIZE; - continue; - } - - reiserfs_warning(s, "zam-4001", "unknown option - %s", - this_char); - return 1; - } - - reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s)); - return 0; -} - -static void print_sep(struct seq_file *seq, int *first) -{ - if (!*first) - seq_puts(seq, ":"); - else - *first = 0; -} - -void show_alloc_options(struct seq_file *seq, struct super_block *s) -{ - int first = 1; - - if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) | - (1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups))) - return; - - seq_puts(seq, ",alloc="); - - if (TEST_OPTION(concentrating_formatted_nodes, s)) { - print_sep(seq, &first); - if (REISERFS_SB(s)->s_alloc_options.border != 10) { - seq_printf(seq, "concentrating_formatted_nodes=%d", - 100 / REISERFS_SB(s)->s_alloc_options.border); - } else - seq_puts(seq, "concentrating_formatted_nodes"); - } - if (TEST_OPTION(displacing_large_files, s)) { - print_sep(seq, &first); - if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) { - seq_printf(seq, "displacing_large_files=%lu", - REISERFS_SB(s)->s_alloc_options.large_file_size); - } else - seq_puts(seq, "displacing_large_files"); - } - if (TEST_OPTION(displacing_new_packing_localities, s)) { - print_sep(seq, &first); - seq_puts(seq, "displacing_new_packing_localities"); - } - if (TEST_OPTION(old_hashed_relocation, s)) { - print_sep(seq, &first); - seq_puts(seq, "old_hashed_relocation"); - } - if (TEST_OPTION(new_hashed_relocation, s)) { - print_sep(seq, &first); - seq_puts(seq, "new_hashed_relocation"); - } - if (TEST_OPTION(dirid_groups, s)) { - print_sep(seq, &first); - seq_puts(seq, "dirid_groups"); - } - if (TEST_OPTION(oid_groups, s)) { - print_sep(seq, &first); - seq_puts(seq, "oid_groups"); - } - if (TEST_OPTION(packing_groups, s)) { - print_sep(seq, &first); - seq_puts(seq, "packing_groups"); - } - if (TEST_OPTION(hashed_formatted_nodes, s)) { - print_sep(seq, &first); - seq_puts(seq, "hashed_formatted_nodes"); - } - if (TEST_OPTION(skip_busy, s)) { - print_sep(seq, &first); - seq_puts(seq, "skip_busy"); - } - if (TEST_OPTION(hundredth_slices, s)) { - print_sep(seq, &first); - seq_puts(seq, "hundredth_slices"); - } - if (TEST_OPTION(old_way, s)) { - print_sep(seq, &first); - seq_puts(seq, "old_way"); - } - if (TEST_OPTION(displace_based_on_dirid, s)) { - print_sep(seq, &first); - seq_puts(seq, "displace_based_on_dirid"); - } - if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) { - print_sep(seq, &first); - seq_printf(seq, "preallocmin=%d", - REISERFS_SB(s)->s_alloc_options.preallocmin); - } - if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) { - print_sep(seq, &first); - seq_printf(seq, "preallocsize=%d", - REISERFS_SB(s)->s_alloc_options.preallocsize); - } -} - -static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) -{ - char *hash_in; - if (hint->formatted_node) { - hash_in = (char *)&hint->key.k_dir_id; - } else { - if (!hint->inode) { - //hint->search_start = hint->beg; - hash_in = (char *)&hint->key.k_dir_id; - } else - if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); - else - hash_in = - (char *)(&INODE_PKEY(hint->inode)->k_objectid); - } - - hint->search_start = - hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); -} - -/* - * Relocation based on dirid, hashing them into a given bitmap block - * files. Formatted nodes are unaffected, a separate policy covers them - */ -static void dirid_groups(reiserfs_blocknr_hint_t * hint) -{ - unsigned long hash; - __u32 dirid = 0; - int bm = 0; - struct super_block *sb = hint->th->t_super; - if (hint->inode) - dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); - else if (hint->formatted_node) - dirid = hint->key.k_dir_id; - - if (dirid) { - bm = bmap_hash_id(sb, dirid); - hash = bm * (sb->s_blocksize << 3); - /* give a portion of the block group to metadata */ - if (hint->inode) - hash += sb->s_blocksize / 2; - hint->search_start = hash; - } -} - -/* - * Relocation based on oid, hashing them into a given bitmap block - * files. Formatted nodes are unaffected, a separate policy covers them - */ -static void oid_groups(reiserfs_blocknr_hint_t * hint) -{ - if (hint->inode) { - unsigned long hash; - __u32 oid; - __u32 dirid; - int bm; - - dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); - - /* keep the root dir and it's first set of subdirs close to - * the start of the disk - */ - if (dirid <= 2) - hash = (hint->inode->i_sb->s_blocksize << 3); - else { - oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid); - bm = bmap_hash_id(hint->inode->i_sb, oid); - hash = bm * (hint->inode->i_sb->s_blocksize << 3); - } - hint->search_start = hash; - } -} - -/* returns 1 if it finds an indirect item and gets valid hint info - * from it, otherwise 0 - */ -static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) -{ - struct treepath *path; - struct buffer_head *bh; - struct item_head *ih; - int pos_in_item; - __le32 *item; - int ret = 0; - - if (!hint->path) /* reiserfs code can call this function w/o pointer to path - * structure supplied; then we rely on supplied search_start */ - return 0; - - path = hint->path; - bh = get_last_bh(path); - RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor"); - ih = get_ih(path); - pos_in_item = path->pos_in_item; - item = get_item(path); - - hint->search_start = bh->b_blocknr; - - if (!hint->formatted_node && is_indirect_le_ih(ih)) { - /* for indirect item: go to left and look for the first non-hole entry - in the indirect item */ - if (pos_in_item == I_UNFM_NUM(ih)) - pos_in_item--; -// pos_in_item = I_UNFM_NUM (ih) - 1; - while (pos_in_item >= 0) { - int t = get_block_num(item, pos_in_item); - if (t) { - hint->search_start = t; - ret = 1; - break; - } - pos_in_item--; - } - } - - /* does result value fit into specified region? */ - return ret; -} - -/* should be, if formatted node, then try to put on first part of the device - specified as number of percent with mount option device, else try to put - on last of device. This is not to say it is good code to do so, - but the effect should be measured. */ -static inline void set_border_in_hint(struct super_block *s, - reiserfs_blocknr_hint_t * hint) -{ - b_blocknr_t border = - SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border; - - if (hint->formatted_node) - hint->end = border - 1; - else - hint->beg = border; -} - -static inline void displace_large_file(reiserfs_blocknr_hint_t * hint) -{ - if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hint->search_start = - hint->beg + - keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), - 4) % (hint->end - hint->beg); - else - hint->search_start = - hint->beg + - keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), - 4) % (hint->end - hint->beg); -} - -static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint) -{ - char *hash_in; - - if (!hint->inode) - hash_in = (char *)&hint->key.k_dir_id; - else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); - else - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); - - hint->search_start = - hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); -} - -static inline int -this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t * - hint) -{ - return hint->block == - REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size; -} - -#ifdef DISPLACE_NEW_PACKING_LOCALITIES -static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint) -{ - struct in_core_key *key = &hint->key; - - hint->th->displace_new_blocks = 0; - hint->search_start = - hint->beg + keyed_hash((char *)(&key->k_objectid), - 4) % (hint->end - hint->beg); -} -#endif - -static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint) -{ - b_blocknr_t border; - u32 hash_in; - - if (hint->formatted_node || hint->inode == NULL) { - return 0; - } - - hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); - border = - hint->beg + (u32) keyed_hash(((char *)(&hash_in)), - 4) % (hint->end - hint->beg - 1); - if (border > hint->search_start) - hint->search_start = border; - - return 1; -} - -static inline int old_way(reiserfs_blocknr_hint_t * hint) -{ - b_blocknr_t border; - - if (hint->formatted_node || hint->inode == NULL) { - return 0; - } - - border = - hint->beg + - le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - - hint->beg); - if (border > hint->search_start) - hint->search_start = border; - - return 1; -} - -static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint) -{ - struct in_core_key *key = &hint->key; - b_blocknr_t slice_start; - - slice_start = - (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100); - if (slice_start > hint->search_start - || slice_start + (hint->end / 100) <= hint->search_start) { - hint->search_start = slice_start; - } -} - -static void determine_search_start(reiserfs_blocknr_hint_t * hint, - int amount_needed) -{ - struct super_block *s = hint->th->t_super; - int unfm_hint; - - hint->beg = 0; - hint->end = SB_BLOCK_COUNT(s) - 1; - - /* This is former border algorithm. Now with tunable border offset */ - if (concentrating_formatted_nodes(s)) - set_border_in_hint(s, hint); - -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - /* whenever we create a new directory, we displace it. At first we will - hash for location, later we might look for a moderately empty place for - it */ - if (displacing_new_packing_localities(s) - && hint->th->displace_new_blocks) { - displace_new_packing_locality(hint); - - /* we do not continue determine_search_start, - * if new packing locality is being displaced */ - return; - } -#endif - - /* all persons should feel encouraged to add more special cases here and - * test them */ - - if (displacing_large_files(s) && !hint->formatted_node - && this_blocknr_allocation_would_make_it_a_large_file(hint)) { - displace_large_file(hint); - return; - } - - /* if none of our special cases is relevant, use the left neighbor in the - tree order of the new node we are allocating for */ - if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { - hash_formatted_node(hint); - return; - } - - unfm_hint = get_left_neighbor(hint); - - /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, - new blocks are displaced based on directory ID. Also, if suggested search_start - is less than last preallocated block, we start searching from it, assuming that - HDD dataflow is faster in forward direction */ - if (TEST_OPTION(old_way, s)) { - if (!hint->formatted_node) { - if (!reiserfs_hashed_relocation(s)) - old_way(hint); - else if (!reiserfs_no_unhashed_relocation(s)) - old_hashed_relocation(hint); - - if (hint->inode - && hint->search_start < - REISERFS_I(hint->inode)->i_prealloc_block) - hint->search_start = - REISERFS_I(hint->inode)->i_prealloc_block; - } - return; - } - - /* This is an approach proposed by Hans */ - if (TEST_OPTION(hundredth_slices, s) - && !(displacing_large_files(s) && !hint->formatted_node)) { - hundredth_slices(hint); - return; - } - - /* old_hashed_relocation only works on unformatted */ - if (!unfm_hint && !hint->formatted_node && - TEST_OPTION(old_hashed_relocation, s)) { - old_hashed_relocation(hint); - } - /* new_hashed_relocation works with both formatted/unformatted nodes */ - if ((!unfm_hint || hint->formatted_node) && - TEST_OPTION(new_hashed_relocation, s)) { - new_hashed_relocation(hint); - } - /* dirid grouping works only on unformatted nodes */ - if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { - dirid_groups(hint); - } -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) { - dirid_groups(hint); - } -#endif - - /* oid grouping works only on unformatted nodes */ - if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) { - oid_groups(hint); - } - return; -} - -static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) -{ - /* make minimum size a mount option and benchmark both ways */ - /* we preallocate blocks only for regular files, specific size */ - /* benchmark preallocating always and see what happens */ - - hint->prealloc_size = 0; - - if (!hint->formatted_node && hint->preallocate) { - if (S_ISREG(hint->inode->i_mode) - && hint->inode->i_size >= - REISERFS_SB(hint->th->t_super)->s_alloc_options. - preallocmin * hint->inode->i_sb->s_blocksize) - hint->prealloc_size = - REISERFS_SB(hint->th->t_super)->s_alloc_options. - preallocsize - 1; - } - return CARRY_ON; -} - -/* XXX I know it could be merged with upper-level function; - but may be result function would be too complex. */ -static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, - b_blocknr_t * new_blocknrs, - b_blocknr_t start, - b_blocknr_t finish, int min, - int amount_needed, - int prealloc_size) -{ - int rest = amount_needed; - int nr_allocated; - - while (rest > 0 && start <= finish) { - nr_allocated = scan_bitmap(hint->th, &start, finish, min, - rest + prealloc_size, - !hint->formatted_node, hint->block); - - if (nr_allocated == 0) /* no new blocks allocated, return */ - break; - - /* fill free_blocknrs array first */ - while (rest > 0 && nr_allocated > 0) { - *new_blocknrs++ = start++; - rest--; - nr_allocated--; - } - - /* do we have something to fill prealloc. array also ? */ - if (nr_allocated > 0) { - /* it means prealloc_size was greater that 0 and we do preallocation */ - list_add(&REISERFS_I(hint->inode)->i_prealloc_list, - &SB_JOURNAL(hint->th->t_super)-> - j_prealloc_list); - REISERFS_I(hint->inode)->i_prealloc_block = start; - REISERFS_I(hint->inode)->i_prealloc_count = - nr_allocated; - break; - } - } - - return (amount_needed - rest); -} - -static inline int blocknrs_and_prealloc_arrays_from_search_start - (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, - int amount_needed) { - struct super_block *s = hint->th->t_super; - b_blocknr_t start = hint->search_start; - b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; - int passno = 0; - int nr_allocated = 0; - - determine_prealloc_size(hint); - if (!hint->formatted_node) { - int quota_ret; -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(s, REISERFS_DEBUG_CODE, - "reiserquota: allocating %d blocks id=%u", - amount_needed, hint->inode->i_uid); -#endif - quota_ret = - dquot_alloc_block_nodirty(hint->inode, amount_needed); - if (quota_ret) /* Quota exceeded? */ - return QUOTA_EXCEEDED; - if (hint->preallocate && hint->prealloc_size) { -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(s, REISERFS_DEBUG_CODE, - "reiserquota: allocating (prealloc) %d blocks id=%u", - hint->prealloc_size, hint->inode->i_uid); -#endif - quota_ret = dquot_prealloc_block_nodirty(hint->inode, - hint->prealloc_size); - if (quota_ret) - hint->preallocate = hint->prealloc_size = 0; - } - /* for unformatted nodes, force large allocations */ - } - - do { - switch (passno++) { - case 0: /* Search from hint->search_start to end of disk */ - start = hint->search_start; - finish = SB_BLOCK_COUNT(s) - 1; - break; - case 1: /* Search from hint->beg to hint->search_start */ - start = hint->beg; - finish = hint->search_start; - break; - case 2: /* Last chance: Search from 0 to hint->beg */ - start = 0; - finish = hint->beg; - break; - default: /* We've tried searching everywhere, not enough space */ - /* Free the blocks */ - if (!hint->formatted_node) { -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(s, REISERFS_DEBUG_CODE, - "reiserquota: freeing (nospace) %d blocks id=%u", - amount_needed + - hint->prealloc_size - - nr_allocated, - hint->inode->i_uid); -#endif - /* Free not allocated blocks */ - dquot_free_block_nodirty(hint->inode, - amount_needed + hint->prealloc_size - - nr_allocated); - } - while (nr_allocated--) - reiserfs_free_block(hint->th, hint->inode, - new_blocknrs[nr_allocated], - !hint->formatted_node); - - return NO_DISK_SPACE; - } - } while ((nr_allocated += allocate_without_wrapping_disk(hint, - new_blocknrs + - nr_allocated, - start, finish, - 1, - amount_needed - - nr_allocated, - hint-> - prealloc_size)) - < amount_needed); - if (!hint->formatted_node && - amount_needed + hint->prealloc_size > - nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) { - /* Some of preallocation blocks were not allocated */ -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(s, REISERFS_DEBUG_CODE, - "reiserquota: freeing (failed prealloc) %d blocks id=%u", - amount_needed + hint->prealloc_size - - nr_allocated - - REISERFS_I(hint->inode)->i_prealloc_count, - hint->inode->i_uid); -#endif - dquot_free_block_nodirty(hint->inode, amount_needed + - hint->prealloc_size - nr_allocated - - REISERFS_I(hint->inode)-> - i_prealloc_count); - } - - return CARRY_ON; -} - -/* grab new blocknrs from preallocated list */ -/* return amount still needed after using them */ -static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint, - b_blocknr_t * new_blocknrs, - int amount_needed) -{ - struct inode *inode = hint->inode; - - if (REISERFS_I(inode)->i_prealloc_count > 0) { - while (amount_needed) { - - *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++; - REISERFS_I(inode)->i_prealloc_count--; - - amount_needed--; - - if (REISERFS_I(inode)->i_prealloc_count <= 0) { - list_del(&REISERFS_I(inode)->i_prealloc_list); - break; - } - } - } - /* return amount still needed after using preallocated blocks */ - return amount_needed; -} - -int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have - already reserved */ ) -{ - int initial_amount_needed = amount_needed; - int ret; - struct super_block *s = hint->th->t_super; - - /* Check if there is enough space, taking into account reserved space */ - if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks < - amount_needed - reserved_by_us) - return NO_DISK_SPACE; - /* should this be if !hint->inode && hint->preallocate? */ - /* do you mean hint->formatted_node can be removed ? - Zam */ - /* hint->formatted_node cannot be removed because we try to access - inode information here, and there is often no inode assotiated with - metadata allocations - green */ - - if (!hint->formatted_node && hint->preallocate) { - amount_needed = use_preallocated_list_if_available - (hint, new_blocknrs, amount_needed); - if (amount_needed == 0) /* all blocknrs we need we got from - prealloc. list */ - return CARRY_ON; - new_blocknrs += (initial_amount_needed - amount_needed); - } - - /* find search start and save it in hint structure */ - determine_search_start(hint, amount_needed); - if (hint->search_start >= SB_BLOCK_COUNT(s)) - hint->search_start = SB_BLOCK_COUNT(s) - 1; - - /* allocation itself; fill new_blocknrs and preallocation arrays */ - ret = blocknrs_and_prealloc_arrays_from_search_start - (hint, new_blocknrs, amount_needed); - - /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we - * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second - * variant) */ - - if (ret != CARRY_ON) { - while (amount_needed++ < initial_amount_needed) { - reiserfs_free_block(hint->th, hint->inode, - *(--new_blocknrs), 1); - } - } - return ret; -} - -void reiserfs_cache_bitmap_metadata(struct super_block *sb, - struct buffer_head *bh, - struct reiserfs_bitmap_info *info) -{ - unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); - - /* The first bit must ALWAYS be 1 */ - if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data)) - reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is " - "corrupted: first bit must be 1", bh->b_blocknr); - - info->free_count = 0; - - while (--cur >= (unsigned long *)bh->b_data) { - /* 0 and ~0 are special, we can optimize for them */ - if (*cur == 0) - info->free_count += BITS_PER_LONG; - else if (*cur != ~0L) /* A mix, investigate */ - info->free_count += BITS_PER_LONG - hweight_long(*cur); - } -} - -struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, - unsigned int bitmap) -{ - b_blocknr_t block = (sb->s_blocksize << 3) * bitmap; - struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; - struct buffer_head *bh; - - /* Way old format filesystems had the bitmaps packed up front. - * I doubt there are any of these left, but just in case... */ - if (unlikely(test_bit(REISERFS_OLD_FORMAT, - &(REISERFS_SB(sb)->s_properties)))) - block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; - else if (bitmap == 0) - block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; - - reiserfs_write_unlock(sb); - bh = sb_bread(sb, block); - reiserfs_write_lock(sb); - if (bh == NULL) - reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " - "reading failed", __func__, block); - else { - if (buffer_locked(bh)) { - PROC_INFO_INC(sb, scan_bitmap.wait); - reiserfs_write_unlock(sb); - __wait_on_buffer(bh); - reiserfs_write_lock(sb); - } - BUG_ON(!buffer_uptodate(bh)); - BUG_ON(atomic_read(&bh->b_count) == 0); - - if (info->free_count == UINT_MAX) - reiserfs_cache_bitmap_metadata(sb, bh, info); - } - - return bh; -} - -int reiserfs_init_bitmap_cache(struct super_block *sb) -{ - struct reiserfs_bitmap_info *bitmap; - unsigned int bmap_nr = reiserfs_bmap_count(sb); - - bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); - if (bitmap == NULL) - return -ENOMEM; - - memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr); - - SB_AP_BITMAP(sb) = bitmap; - - return 0; -} - -void reiserfs_free_bitmap_cache(struct super_block *sb) -{ - if (SB_AP_BITMAP(sb)) { - vfree(SB_AP_BITMAP(sb)); - SB_AP_BITMAP(sb) = NULL; - } -} diff --git a/ANDROID_3.4.5/fs/reiserfs/dir.c b/ANDROID_3.4.5/fs/reiserfs/dir.c deleted file mode 100644 index 66c53b64..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/dir.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include "reiserfs.h" -#include <linux/stat.h> -#include <linux/buffer_head.h> -#include <linux/slab.h> -#include <asm/uaccess.h> - -extern const struct reiserfs_key MIN_KEY; - -static int reiserfs_readdir(struct file *, void *, filldir_t); -static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, - int datasync); - -const struct file_operations reiserfs_dir_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = reiserfs_readdir, - .fsync = reiserfs_dir_fsync, - .unlocked_ioctl = reiserfs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = reiserfs_compat_ioctl, -#endif -}; - -static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, - int datasync) -{ - struct inode *inode = filp->f_mapping->host; - int err; - - err = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (err) - return err; - - mutex_lock(&inode->i_mutex); - reiserfs_write_lock(inode->i_sb); - err = reiserfs_commit_for_inode(inode); - reiserfs_write_unlock(inode->i_sb); - mutex_unlock(&inode->i_mutex); - if (err < 0) - return err; - return 0; -} - -#define store_ih(where,what) copy_item_head (where, what) - -static inline bool is_privroot_deh(struct dentry *dir, - struct reiserfs_de_head *deh) -{ - struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; - return (dir == dir->d_parent && privroot->d_inode && - deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); -} - -int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, - filldir_t filldir, loff_t *pos) -{ - struct inode *inode = dentry->d_inode; - struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ - INITIALIZE_PATH(path_to_entry); - struct buffer_head *bh; - int item_num, entry_num; - const struct reiserfs_key *rkey; - struct item_head *ih, tmp_ih; - int search_res; - char *local_buf; - loff_t next_pos; - char small_buf[32]; /* avoid kmalloc if we can */ - struct reiserfs_dir_entry de; - int ret = 0; - - reiserfs_write_lock(inode->i_sb); - - reiserfs_check_lock_depth(inode->i_sb, "readdir"); - - /* form key for search the next directory entry using f_pos field of - file structure */ - make_cpu_key(&pos_key, inode, *pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); - next_pos = cpu_key_k_offset(&pos_key); - - path_to_entry.reada = PATH_READA; - while (1) { - research: - /* search the directory item, containing entry with specified key */ - search_res = - search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, - &de); - if (search_res == IO_ERROR) { - // FIXME: we could just skip part of directory which could - // not be read - ret = -EIO; - goto out; - } - entry_num = de.de_entry_num; - bh = de.de_bh; - item_num = de.de_item_num; - ih = de.de_ih; - store_ih(&tmp_ih, ih); - - /* we must have found item, that is item of this directory, */ - RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key), - "vs-9000: found item %h does not match to dir we readdir %K", - ih, &pos_key); - RFALSE(item_num > B_NR_ITEMS(bh) - 1, - "vs-9005 item_num == %d, item amount == %d", - item_num, B_NR_ITEMS(bh)); - - /* and entry must be not more than number of entries in the item */ - RFALSE(I_ENTRY_COUNT(ih) < entry_num, - "vs-9010: entry number is too big %d (%d)", - entry_num, I_ENTRY_COUNT(ih)); - - if (search_res == POSITION_FOUND - || entry_num < I_ENTRY_COUNT(ih)) { - /* go through all entries in the directory item beginning from the entry, that has been found */ - struct reiserfs_de_head *deh = - B_I_DEH(bh, ih) + entry_num; - - for (; entry_num < I_ENTRY_COUNT(ih); - entry_num++, deh++) { - int d_reclen; - char *d_name; - off_t d_off; - ino_t d_ino; - - if (!de_visible(deh)) - /* it is hidden entry */ - continue; - d_reclen = entry_length(bh, ih, entry_num); - d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); - - if (d_reclen <= 0 || - d_name + d_reclen > bh->b_data + bh->b_size) { - /* There is corrupted data in entry, - * We'd better stop here */ - pathrelse(&path_to_entry); - ret = -EIO; - goto out; - } - - if (!d_name[d_reclen - 1]) - d_reclen = strlen(d_name); - - if (d_reclen > - REISERFS_MAX_NAME(inode->i_sb-> - s_blocksize)) { - /* too big to send back to VFS */ - continue; - } - - /* Ignore the .reiserfs_priv entry */ - if (is_privroot_deh(dentry, deh)) - continue; - - d_off = deh_offset(deh); - *pos = d_off; - d_ino = deh_objectid(deh); - if (d_reclen <= 32) { - local_buf = small_buf; - } else { - local_buf = kmalloc(d_reclen, - GFP_NOFS); - if (!local_buf) { - pathrelse(&path_to_entry); - ret = -ENOMEM; - goto out; - } - if (item_moved(&tmp_ih, &path_to_entry)) { - kfree(local_buf); - goto research; - } - } - // Note, that we copy name to user space via temporary - // buffer (local_buf) because filldir will block if - // user space buffer is swapped out. At that time - // entry can move to somewhere else - memcpy(local_buf, d_name, d_reclen); - - /* - * Since filldir might sleep, we can release - * the write lock here for other waiters - */ - reiserfs_write_unlock(inode->i_sb); - if (filldir - (dirent, local_buf, d_reclen, d_off, d_ino, - DT_UNKNOWN) < 0) { - reiserfs_write_lock(inode->i_sb); - if (local_buf != small_buf) { - kfree(local_buf); - } - goto end; - } - reiserfs_write_lock(inode->i_sb); - if (local_buf != small_buf) { - kfree(local_buf); - } - // next entry should be looked for with such offset - next_pos = deh_offset(deh) + 1; - - if (item_moved(&tmp_ih, &path_to_entry)) { - goto research; - } - } /* for */ - } - - if (item_num != B_NR_ITEMS(bh) - 1) - // end of directory has been reached - goto end; - - /* item we went through is last item of node. Using right - delimiting key check is it directory end */ - rkey = get_rkey(&path_to_entry, inode->i_sb); - if (!comp_le_keys(rkey, &MIN_KEY)) { - /* set pos_key to key, that is the smallest and greater - that key of the last entry in the item */ - set_cpu_key_k_offset(&pos_key, next_pos); - continue; - } - - if (COMP_SHORT_KEYS(rkey, &pos_key)) { - // end of directory has been reached - goto end; - } - - /* directory continues in the right neighboring block */ - set_cpu_key_k_offset(&pos_key, - le_key_k_offset(KEY_FORMAT_3_5, rkey)); - - } /* while */ - -end: - *pos = next_pos; - pathrelse(&path_to_entry); - reiserfs_check_path(&path_to_entry); -out: - reiserfs_write_unlock(inode->i_sb); - return ret; -} - -static int reiserfs_readdir(struct file *file, void *dirent, filldir_t filldir) -{ - struct dentry *dentry = file->f_path.dentry; - return reiserfs_readdir_dentry(dentry, dirent, filldir, &file->f_pos); -} - -/* compose directory item containing "." and ".." entries (entries are - not aligned to 4 byte boundary) */ -/* the last four params are LE */ -void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid) -{ - struct reiserfs_de_head *deh; - - memset(body, 0, EMPTY_DIR_SIZE_V1); - deh = (struct reiserfs_de_head *)body; - - /* direntry header of "." */ - put_deh_offset(&(deh[0]), DOT_OFFSET); - /* these two are from make_le_item_head, and are are LE */ - deh[0].deh_dir_id = dirid; - deh[0].deh_objectid = objid; - deh[0].deh_state = 0; /* Endian safe if 0 */ - put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen(".")); - mark_de_visible(&(deh[0])); - - /* direntry header of ".." */ - put_deh_offset(&(deh[1]), DOT_DOT_OFFSET); - /* key of ".." for the root directory */ - /* these two are from the inode, and are are LE */ - deh[1].deh_dir_id = par_dirid; - deh[1].deh_objectid = par_objid; - deh[1].deh_state = 0; /* Endian safe if 0 */ - put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen("..")); - mark_de_visible(&(deh[1])); - - /* copy ".." and "." */ - memcpy(body + deh_location(&(deh[0])), ".", 1); - memcpy(body + deh_location(&(deh[1])), "..", 2); -} - -/* compose directory item containing "." and ".." entries */ -void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid) -{ - struct reiserfs_de_head *deh; - - memset(body, 0, EMPTY_DIR_SIZE); - deh = (struct reiserfs_de_head *)body; - - /* direntry header of "." */ - put_deh_offset(&(deh[0]), DOT_OFFSET); - /* these two are from make_le_item_head, and are are LE */ - deh[0].deh_dir_id = dirid; - deh[0].deh_objectid = objid; - deh[0].deh_state = 0; /* Endian safe if 0 */ - put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen("."))); - mark_de_visible(&(deh[0])); - - /* direntry header of ".." */ - put_deh_offset(&(deh[1]), DOT_DOT_OFFSET); - /* key of ".." for the root directory */ - /* these two are from the inode, and are are LE */ - deh[1].deh_dir_id = par_dirid; - deh[1].deh_objectid = par_objid; - deh[1].deh_state = 0; /* Endian safe if 0 */ - put_deh_location(&(deh[1]), - deh_location(&(deh[0])) - ROUND_UP(strlen(".."))); - mark_de_visible(&(deh[1])); - - /* copy ".." and "." */ - memcpy(body + deh_location(&(deh[0])), ".", 1); - memcpy(body + deh_location(&(deh[1])), "..", 2); -} diff --git a/ANDROID_3.4.5/fs/reiserfs/do_balan.c b/ANDROID_3.4.5/fs/reiserfs/do_balan.c deleted file mode 100644 index 2b7882b5..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/do_balan.c +++ /dev/null @@ -1,2074 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/* Now we have all buffers that must be used in balancing of the tree */ -/* Further calculations can not cause schedule(), and thus the buffer */ -/* tree will be stable until the balancing will be finished */ -/* balance the tree according to the analysis made before, */ -/* and using buffers obtained after all above. */ - -/** - ** balance_leaf_when_delete - ** balance_leaf - ** do_balance - ** - **/ - -#include <asm/uaccess.h> -#include <linux/time.h> -#include "reiserfs.h" -#include <linux/buffer_head.h> -#include <linux/kernel.h> - -static inline void buffer_info_init_left(struct tree_balance *tb, - struct buffer_info *bi) -{ - bi->tb = tb; - bi->bi_bh = tb->L[0]; - bi->bi_parent = tb->FL[0]; - bi->bi_position = get_left_neighbor_position(tb, 0); -} - -static inline void buffer_info_init_right(struct tree_balance *tb, - struct buffer_info *bi) -{ - bi->tb = tb; - bi->bi_bh = tb->R[0]; - bi->bi_parent = tb->FR[0]; - bi->bi_position = get_right_neighbor_position(tb, 0); -} - -static inline void buffer_info_init_tbS0(struct tree_balance *tb, - struct buffer_info *bi) -{ - bi->tb = tb; - bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); - bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); - bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); -} - -static inline void buffer_info_init_bh(struct tree_balance *tb, - struct buffer_info *bi, - struct buffer_head *bh) -{ - bi->tb = tb; - bi->bi_bh = bh; - bi->bi_parent = NULL; - bi->bi_position = 0; -} - -inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, - struct buffer_head *bh, int flag) -{ - journal_mark_dirty(tb->transaction_handle, - tb->transaction_handle->t_super, bh); -} - -#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty -#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty - -/* summary: - if deleting something ( tb->insert_size[0] < 0 ) - return(balance_leaf_when_delete()); (flag d handled here) - else - if lnum is larger than 0 we put items into the left node - if rnum is larger than 0 we put items into the right node - if snum1 is larger than 0 we put items into the new node s1 - if snum2 is larger than 0 we put items into the new node s2 -Note that all *num* count new items being created. - -It would be easier to read balance_leaf() if each of these summary -lines was a separate procedure rather than being inlined. I think -that there are many passages here and in balance_leaf_when_delete() in -which two calls to one procedure can replace two passages, and it -might save cache space and improve software maintenance costs to do so. - -Vladimir made the perceptive comment that we should offload most of -the decision making in this function into fix_nodes/check_balance, and -then create some sort of structure in tb that says what actions should -be performed by do_balance. - --Hans */ - -/* Balance leaf node in case of delete or cut: insert_size[0] < 0 - * - * lnum, rnum can have values >= -1 - * -1 means that the neighbor must be joined with S - * 0 means that nothing should be done with the neighbor - * >0 means to shift entirely or partly the specified number of items to the neighbor - */ -static int balance_leaf_when_delete(struct tree_balance *tb, int flag) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int item_pos = PATH_LAST_POSITION(tb->tb_path); - int pos_in_item = tb->tb_path->pos_in_item; - struct buffer_info bi; - int n; - struct item_head *ih; - - RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, - "vs- 12000: level: wrong FR %z", tb->FR[0]); - RFALSE(tb->blknum[0] > 1, - "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); - RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), - "PAP-12010: tree can not be empty"); - - ih = B_N_PITEM_HEAD(tbS0, item_pos); - buffer_info_init_tbS0(tb, &bi); - - /* Delete or truncate the item */ - - switch (flag) { - case M_DELETE: /* delete item in S[0] */ - - RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], - "vs-12013: mode Delete, insert size %d, ih to be deleted %h", - -tb->insert_size[0], ih); - - leaf_delete_items(&bi, 0, item_pos, 1, -1); - - if (!item_pos && tb->CFL[0]) { - if (B_NR_ITEMS(tbS0)) { - replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, - 0); - } else { - if (!PATH_H_POSITION(tb->tb_path, 1)) - replace_key(tb, tb->CFL[0], tb->lkey[0], - PATH_H_PPARENT(tb->tb_path, - 0), 0); - } - } - - RFALSE(!item_pos && !tb->CFL[0], - "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], - tb->L[0]); - - break; - - case M_CUT:{ /* cut item in S[0] */ - if (is_direntry_le_ih(ih)) { - - /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ - /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ - tb->insert_size[0] = -1; - leaf_cut_from_buffer(&bi, item_pos, pos_in_item, - -tb->insert_size[0]); - - RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], - "PAP-12030: can not change delimiting key. CFL[0]=%p", - tb->CFL[0]); - - if (!item_pos && !pos_in_item && tb->CFL[0]) { - replace_key(tb, tb->CFL[0], tb->lkey[0], - tbS0, 0); - } - } else { - leaf_cut_from_buffer(&bi, item_pos, pos_in_item, - -tb->insert_size[0]); - - RFALSE(!ih_item_len(ih), - "PAP-12035: cut must leave non-zero dynamic length of item"); - } - break; - } - - default: - print_cur_tb("12040"); - reiserfs_panic(tb->tb_sb, "PAP-12040", - "unexpected mode: %s(%d)", - (flag == - M_PASTE) ? "PASTE" : ((flag == - M_INSERT) ? "INSERT" : - "UNKNOWN"), flag); - } - - /* the rule is that no shifting occurs unless by shifting a node can be freed */ - n = B_NR_ITEMS(tbS0); - if (tb->lnum[0]) { /* L[0] takes part in balancing */ - if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ - if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ - if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { - /* all contents of all the 3 buffers will be in L[0] */ - if (PATH_H_POSITION(tb->tb_path, 1) == 0 - && 1 < B_NR_ITEMS(tb->FR[0])) - replace_key(tb, tb->CFL[0], - tb->lkey[0], - tb->FR[0], 1); - - leaf_move_items(LEAF_FROM_S_TO_L, tb, n, - -1, NULL); - leaf_move_items(LEAF_FROM_R_TO_L, tb, - B_NR_ITEMS(tb->R[0]), - -1, NULL); - - reiserfs_invalidate_buffer(tb, tbS0); - reiserfs_invalidate_buffer(tb, - tb->R[0]); - - return 0; - } - /* all contents of all the 3 buffers will be in R[0] */ - leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, - NULL); - leaf_move_items(LEAF_FROM_L_TO_R, tb, - B_NR_ITEMS(tb->L[0]), -1, NULL); - - /* right_delimiting_key is correct in R[0] */ - replace_key(tb, tb->CFR[0], tb->rkey[0], - tb->R[0], 0); - - reiserfs_invalidate_buffer(tb, tbS0); - reiserfs_invalidate_buffer(tb, tb->L[0]); - - return -1; - } - - RFALSE(tb->rnum[0] != 0, - "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); - /* all contents of L[0] and S[0] will be in L[0] */ - leaf_shift_left(tb, n, -1); - - reiserfs_invalidate_buffer(tb, tbS0); - - return 0; - } - /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ - - RFALSE((tb->lnum[0] + tb->rnum[0] < n) || - (tb->lnum[0] + tb->rnum[0] > n + 1), - "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", - tb->rnum[0], tb->lnum[0], n); - RFALSE((tb->lnum[0] + tb->rnum[0] == n) && - (tb->lbytes != -1 || tb->rbytes != -1), - "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", - tb->rbytes, tb->lbytes); - RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && - (tb->lbytes < 1 || tb->rbytes != -1), - "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", - tb->rbytes, tb->lbytes); - - leaf_shift_left(tb, tb->lnum[0], tb->lbytes); - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - - reiserfs_invalidate_buffer(tb, tbS0); - - return 0; - } - - if (tb->rnum[0] == -1) { - /* all contents of R[0] and S[0] will be in R[0] */ - leaf_shift_right(tb, n, -1); - reiserfs_invalidate_buffer(tb, tbS0); - return 0; - } - - RFALSE(tb->rnum[0], - "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); - return 0; -} - -static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ - const char *body, /* body of inserted item or bytes to paste */ - int flag, /* i - insert, d - delete, c - cut, p - paste - (see comment to do_balance) */ - struct item_head *insert_key, /* in our processing of one level we sometimes determine what - must be inserted into the next higher level. This insertion - consists of a key or two keys and their corresponding - pointers */ - struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */ - ) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] - of the affected item */ - struct buffer_info bi; - struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ - int snum[2]; /* number of items that will be placed - into S_new (includes partially shifted - items) */ - int sbytes[2]; /* if an item is partially shifted into S_new then - if it is a directory item - it is the number of entries from the item that are shifted into S_new - else - it is the number of bytes from the item that are shifted into S_new - */ - int n, i; - int ret_val; - int pos_in_item; - int zeros_num; - - PROC_INFO_INC(tb->tb_sb, balance_at[0]); - - /* Make balance in case insert_size[0] < 0 */ - if (tb->insert_size[0] < 0) - return balance_leaf_when_delete(tb, flag); - - zeros_num = 0; - if (flag == M_INSERT && !body) - zeros_num = ih_item_len(ih); - - pos_in_item = tb->tb_path->pos_in_item; - /* for indirect item pos_in_item is measured in unformatted node - pointers. Recalculate to bytes */ - if (flag != M_INSERT - && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) - pos_in_item *= UNFM_P_SIZE; - - if (tb->lnum[0] > 0) { - /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ - if (item_pos < tb->lnum[0]) { - /* new item or it part falls to L[0], shift it too */ - n = B_NR_ITEMS(tb->L[0]); - - switch (flag) { - case M_INSERT: /* insert item into L[0] */ - - if (item_pos == tb->lnum[0] - 1 - && tb->lbytes != -1) { - /* part of new item falls into L[0] */ - int new_item_len; - int version; - - ret_val = - leaf_shift_left(tb, tb->lnum[0] - 1, - -1); - - /* Calculate item length to insert to S[0] */ - new_item_len = - ih_item_len(ih) - tb->lbytes; - /* Calculate and check item length to insert to L[0] */ - put_ih_item_len(ih, - ih_item_len(ih) - - new_item_len); - - RFALSE(ih_item_len(ih) <= 0, - "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", - ih_item_len(ih)); - - /* Insert new item into L[0] */ - buffer_info_init_left(tb, &bi); - leaf_insert_into_buf(&bi, - n + item_pos - - ret_val, ih, body, - zeros_num > - ih_item_len(ih) ? - ih_item_len(ih) : - zeros_num); - - version = ih_version(ih); - - /* Calculate key component, item length and body to insert into S[0] */ - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - (tb-> - lbytes << - (is_indirect_le_ih - (ih) ? tb->tb_sb-> - s_blocksize_bits - - UNFM_P_SHIFT : - 0))); - - put_ih_item_len(ih, new_item_len); - if (tb->lbytes > zeros_num) { - body += - (tb->lbytes - zeros_num); - zeros_num = 0; - } else - zeros_num -= tb->lbytes; - - RFALSE(ih_item_len(ih) <= 0, - "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", - ih_item_len(ih)); - } else { - /* new item in whole falls into L[0] */ - /* Shift lnum[0]-1 items to L[0] */ - ret_val = - leaf_shift_left(tb, tb->lnum[0] - 1, - tb->lbytes); - /* Insert new item into L[0] */ - buffer_info_init_left(tb, &bi); - leaf_insert_into_buf(&bi, - n + item_pos - - ret_val, ih, body, - zeros_num); - tb->insert_size[0] = 0; - zeros_num = 0; - } - break; - - case M_PASTE: /* append item in L[0] */ - - if (item_pos == tb->lnum[0] - 1 - && tb->lbytes != -1) { - /* we must shift the part of the appended item */ - if (is_direntry_le_ih - (B_N_PITEM_HEAD(tbS0, item_pos))) { - - RFALSE(zeros_num, - "PAP-12090: invalid parameter in case of a directory"); - /* directory item */ - if (tb->lbytes > pos_in_item) { - /* new directory entry falls into L[0] */ - struct item_head - *pasted; - int l_pos_in_item = - pos_in_item; - - /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ - ret_val = - leaf_shift_left(tb, - tb-> - lnum - [0], - tb-> - lbytes - - - 1); - if (ret_val - && !item_pos) { - pasted = - B_N_PITEM_HEAD - (tb->L[0], - B_NR_ITEMS - (tb-> - L[0]) - - 1); - l_pos_in_item += - I_ENTRY_COUNT - (pasted) - - (tb-> - lbytes - - 1); - } - - /* Append given directory entry to directory item */ - buffer_info_init_left(tb, &bi); - leaf_paste_in_buffer - (&bi, - n + item_pos - - ret_val, - l_pos_in_item, - tb->insert_size[0], - body, zeros_num); - - /* previous string prepared space for pasting new entry, following string pastes this entry */ - - /* when we have merge directory item, pos_in_item has been changed too */ - - /* paste new directory entry. 1 is entry number */ - leaf_paste_entries(&bi, - n + - item_pos - - - ret_val, - l_pos_in_item, - 1, - (struct - reiserfs_de_head - *) - body, - body - + - DEH_SIZE, - tb-> - insert_size - [0] - ); - tb->insert_size[0] = 0; - } else { - /* new directory item doesn't fall into L[0] */ - /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ - leaf_shift_left(tb, - tb-> - lnum[0], - tb-> - lbytes); - } - /* Calculate new position to append in item body */ - pos_in_item -= tb->lbytes; - } else { - /* regular object */ - RFALSE(tb->lbytes <= 0, - "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", - tb->lbytes); - RFALSE(pos_in_item != - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos)), - "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos)), - pos_in_item); - - if (tb->lbytes >= pos_in_item) { - /* appended item will be in L[0] in whole */ - int l_n; - - /* this bytes number must be appended to the last item of L[h] */ - l_n = - tb->lbytes - - pos_in_item; - - /* Calculate new insert_size[0] */ - tb->insert_size[0] -= - l_n; - - RFALSE(tb-> - insert_size[0] <= - 0, - "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", - tb-> - insert_size[0]); - ret_val = - leaf_shift_left(tb, - tb-> - lnum - [0], - ih_item_len - (B_N_PITEM_HEAD - (tbS0, - item_pos))); - /* Append to body of item in L[0] */ - buffer_info_init_left(tb, &bi); - leaf_paste_in_buffer - (&bi, - n + item_pos - - ret_val, - ih_item_len - (B_N_PITEM_HEAD - (tb->L[0], - n + item_pos - - ret_val)), l_n, - body, - zeros_num > - l_n ? l_n : - zeros_num); - /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ - { - int version; - int temp_l = - l_n; - - RFALSE - (ih_item_len - (B_N_PITEM_HEAD - (tbS0, - 0)), - "PAP-12106: item length must be 0"); - RFALSE - (comp_short_le_keys - (B_N_PKEY - (tbS0, 0), - B_N_PKEY - (tb->L[0], - n + - item_pos - - - ret_val)), - "PAP-12107: items must be of the same file"); - if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) { - temp_l = - l_n - << - (tb-> - tb_sb-> - s_blocksize_bits - - - UNFM_P_SHIFT); - } - /* update key of first item in S0 */ - version = - ih_version - (B_N_PITEM_HEAD - (tbS0, 0)); - set_le_key_k_offset - (version, - B_N_PKEY - (tbS0, 0), - le_key_k_offset - (version, - B_N_PKEY - (tbS0, - 0)) + - temp_l); - /* update left delimiting key */ - set_le_key_k_offset - (version, - B_N_PDELIM_KEY - (tb-> - CFL[0], - tb-> - lkey[0]), - le_key_k_offset - (version, - B_N_PDELIM_KEY - (tb-> - CFL[0], - tb-> - lkey[0])) - + temp_l); - } - - /* Calculate new body, position in item and insert_size[0] */ - if (l_n > zeros_num) { - body += - (l_n - - zeros_num); - zeros_num = 0; - } else - zeros_num -= - l_n; - pos_in_item = 0; - - RFALSE - (comp_short_le_keys - (B_N_PKEY(tbS0, 0), - B_N_PKEY(tb->L[0], - B_NR_ITEMS - (tb-> - L[0]) - - 1)) - || - !op_is_left_mergeable - (B_N_PKEY(tbS0, 0), - tbS0->b_size) - || - !op_is_left_mergeable - (B_N_PDELIM_KEY - (tb->CFL[0], - tb->lkey[0]), - tbS0->b_size), - "PAP-12120: item must be merge-able with left neighboring item"); - } else { /* only part of the appended item will be in L[0] */ - - /* Calculate position in item for append in S[0] */ - pos_in_item -= - tb->lbytes; - - RFALSE(pos_in_item <= 0, - "PAP-12125: no place for paste. pos_in_item=%d", - pos_in_item); - - /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ - leaf_shift_left(tb, - tb-> - lnum[0], - tb-> - lbytes); - } - } - } else { /* appended item will be in L[0] in whole */ - - struct item_head *pasted; - - if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ - /* then increment pos_in_item by the size of the last item in L[0] */ - pasted = - B_N_PITEM_HEAD(tb->L[0], - n - 1); - if (is_direntry_le_ih(pasted)) - pos_in_item += - ih_entry_count - (pasted); - else - pos_in_item += - ih_item_len(pasted); - } - - /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ - ret_val = - leaf_shift_left(tb, tb->lnum[0], - tb->lbytes); - /* Append to body of item in L[0] */ - buffer_info_init_left(tb, &bi); - leaf_paste_in_buffer(&bi, - n + item_pos - - ret_val, - pos_in_item, - tb->insert_size[0], - body, zeros_num); - - /* if appended item is directory, paste entry */ - pasted = - B_N_PITEM_HEAD(tb->L[0], - n + item_pos - - ret_val); - if (is_direntry_le_ih(pasted)) - leaf_paste_entries(&bi, - n + - item_pos - - ret_val, - pos_in_item, - 1, - (struct - reiserfs_de_head - *)body, - body + - DEH_SIZE, - tb-> - insert_size - [0] - ); - /* if appended item is indirect item, put unformatted node into un list */ - if (is_indirect_le_ih(pasted)) - set_ih_free_space(pasted, 0); - tb->insert_size[0] = 0; - zeros_num = 0; - } - break; - default: /* cases d and t */ - reiserfs_panic(tb->tb_sb, "PAP-12130", - "lnum > 0: unexpected mode: " - " %s(%d)", - (flag == - M_DELETE) ? "DELETE" : ((flag == - M_CUT) - ? "CUT" - : - "UNKNOWN"), - flag); - } - } else { - /* new item doesn't fall into L[0] */ - leaf_shift_left(tb, tb->lnum[0], tb->lbytes); - } - } - - /* tb->lnum[0] > 0 */ - /* Calculate new item position */ - item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); - - if (tb->rnum[0] > 0) { - /* shift rnum[0] items from S[0] to the right neighbor R[0] */ - n = B_NR_ITEMS(tbS0); - switch (flag) { - - case M_INSERT: /* insert item */ - if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ - if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ - loff_t old_key_comp, old_len, - r_zeros_number; - const char *r_body; - int version; - loff_t offset; - - leaf_shift_right(tb, tb->rnum[0] - 1, - -1); - - version = ih_version(ih); - /* Remember key component and item length */ - old_key_comp = le_ih_k_offset(ih); - old_len = ih_item_len(ih); - - /* Calculate key component and item length to insert into R[0] */ - offset = - le_ih_k_offset(ih) + - ((old_len - - tb-> - rbytes) << (is_indirect_le_ih(ih) - ? tb->tb_sb-> - s_blocksize_bits - - UNFM_P_SHIFT : 0)); - set_le_ih_k_offset(ih, offset); - put_ih_item_len(ih, tb->rbytes); - /* Insert part of the item into R[0] */ - buffer_info_init_right(tb, &bi); - if ((old_len - tb->rbytes) > zeros_num) { - r_zeros_number = 0; - r_body = - body + (old_len - - tb->rbytes) - - zeros_num; - } else { - r_body = body; - r_zeros_number = - zeros_num - (old_len - - tb->rbytes); - zeros_num -= r_zeros_number; - } - - leaf_insert_into_buf(&bi, 0, ih, r_body, - r_zeros_number); - - /* Replace right delimiting key by first key in R[0] */ - replace_key(tb, tb->CFR[0], tb->rkey[0], - tb->R[0], 0); - - /* Calculate key component and item length to insert into S[0] */ - set_le_ih_k_offset(ih, old_key_comp); - put_ih_item_len(ih, - old_len - tb->rbytes); - - tb->insert_size[0] -= tb->rbytes; - - } else { /* whole new item falls into R[0] */ - - /* Shift rnum[0]-1 items to R[0] */ - ret_val = - leaf_shift_right(tb, - tb->rnum[0] - 1, - tb->rbytes); - /* Insert new item into R[0] */ - buffer_info_init_right(tb, &bi); - leaf_insert_into_buf(&bi, - item_pos - n + - tb->rnum[0] - 1, - ih, body, - zeros_num); - - if (item_pos - n + tb->rnum[0] - 1 == 0) { - replace_key(tb, tb->CFR[0], - tb->rkey[0], - tb->R[0], 0); - - } - zeros_num = tb->insert_size[0] = 0; - } - } else { /* new item or part of it doesn't fall into R[0] */ - - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - } - break; - - case M_PASTE: /* append item */ - - if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ - if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ - if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */ - int entry_count; - - RFALSE(zeros_num, - "PAP-12145: invalid parameter in case of a directory"); - entry_count = - I_ENTRY_COUNT(B_N_PITEM_HEAD - (tbS0, - item_pos)); - if (entry_count - tb->rbytes < - pos_in_item) - /* new directory entry falls into R[0] */ - { - int paste_entry_position; - - RFALSE(tb->rbytes - 1 >= - entry_count - || !tb-> - insert_size[0], - "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", - tb->rbytes, - entry_count); - /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ - leaf_shift_right(tb, - tb-> - rnum - [0], - tb-> - rbytes - - 1); - /* Paste given directory entry to directory item */ - paste_entry_position = - pos_in_item - - entry_count + - tb->rbytes - 1; - buffer_info_init_right(tb, &bi); - leaf_paste_in_buffer - (&bi, 0, - paste_entry_position, - tb->insert_size[0], - body, zeros_num); - /* paste entry */ - leaf_paste_entries(&bi, - 0, - paste_entry_position, - 1, - (struct - reiserfs_de_head - *) - body, - body - + - DEH_SIZE, - tb-> - insert_size - [0] - ); - - if (paste_entry_position - == 0) { - /* change delimiting keys */ - replace_key(tb, - tb-> - CFR - [0], - tb-> - rkey - [0], - tb-> - R - [0], - 0); - } - - tb->insert_size[0] = 0; - pos_in_item++; - } else { /* new directory entry doesn't fall into R[0] */ - - leaf_shift_right(tb, - tb-> - rnum - [0], - tb-> - rbytes); - } - } else { /* regular object */ - - int n_shift, n_rem, - r_zeros_number; - const char *r_body; - - /* Calculate number of bytes which must be shifted from appended item */ - if ((n_shift = - tb->rbytes - - tb->insert_size[0]) < 0) - n_shift = 0; - - RFALSE(pos_in_item != - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos)), - "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", - pos_in_item, - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos))); - - leaf_shift_right(tb, - tb->rnum[0], - n_shift); - /* Calculate number of bytes which must remain in body after appending to R[0] */ - if ((n_rem = - tb->insert_size[0] - - tb->rbytes) < 0) - n_rem = 0; - - { - int version; - unsigned long temp_rem = - n_rem; - - version = - ih_version - (B_N_PITEM_HEAD - (tb->R[0], 0)); - if (is_indirect_le_key - (version, - B_N_PKEY(tb->R[0], - 0))) { - temp_rem = - n_rem << - (tb->tb_sb-> - s_blocksize_bits - - - UNFM_P_SHIFT); - } - set_le_key_k_offset - (version, - B_N_PKEY(tb->R[0], - 0), - le_key_k_offset - (version, - B_N_PKEY(tb->R[0], - 0)) + - temp_rem); - set_le_key_k_offset - (version, - B_N_PDELIM_KEY(tb-> - CFR - [0], - tb-> - rkey - [0]), - le_key_k_offset - (version, - B_N_PDELIM_KEY - (tb->CFR[0], - tb->rkey[0])) + - temp_rem); - } -/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; - k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ - do_balance_mark_internal_dirty - (tb, tb->CFR[0], 0); - - /* Append part of body into R[0] */ - buffer_info_init_right(tb, &bi); - if (n_rem > zeros_num) { - r_zeros_number = 0; - r_body = - body + n_rem - - zeros_num; - } else { - r_body = body; - r_zeros_number = - zeros_num - n_rem; - zeros_num -= - r_zeros_number; - } - - leaf_paste_in_buffer(&bi, 0, - n_shift, - tb-> - insert_size - [0] - - n_rem, - r_body, - r_zeros_number); - - if (is_indirect_le_ih - (B_N_PITEM_HEAD - (tb->R[0], 0))) { -#if 0 - RFALSE(n_rem, - "PAP-12160: paste more than one unformatted node pointer"); -#endif - set_ih_free_space - (B_N_PITEM_HEAD - (tb->R[0], 0), 0); - } - tb->insert_size[0] = n_rem; - if (!n_rem) - pos_in_item++; - } - } else { /* pasted item in whole falls into R[0] */ - - struct item_head *pasted; - - ret_val = - leaf_shift_right(tb, tb->rnum[0], - tb->rbytes); - /* append item in R[0] */ - if (pos_in_item >= 0) { - buffer_info_init_right(tb, &bi); - leaf_paste_in_buffer(&bi, - item_pos - - n + - tb-> - rnum[0], - pos_in_item, - tb-> - insert_size - [0], body, - zeros_num); - } - - /* paste new entry, if item is directory item */ - pasted = - B_N_PITEM_HEAD(tb->R[0], - item_pos - n + - tb->rnum[0]); - if (is_direntry_le_ih(pasted) - && pos_in_item >= 0) { - leaf_paste_entries(&bi, - item_pos - - n + - tb->rnum[0], - pos_in_item, - 1, - (struct - reiserfs_de_head - *)body, - body + - DEH_SIZE, - tb-> - insert_size - [0] - ); - if (!pos_in_item) { - - RFALSE(item_pos - n + - tb->rnum[0], - "PAP-12165: directory item must be first item of node when pasting is in 0th position"); - - /* update delimiting keys */ - replace_key(tb, - tb->CFR[0], - tb->rkey[0], - tb->R[0], - 0); - } - } - - if (is_indirect_le_ih(pasted)) - set_ih_free_space(pasted, 0); - zeros_num = tb->insert_size[0] = 0; - } - } else { /* new item doesn't fall into R[0] */ - - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - } - break; - default: /* cases d and t */ - reiserfs_panic(tb->tb_sb, "PAP-12175", - "rnum > 0: unexpected mode: %s(%d)", - (flag == - M_DELETE) ? "DELETE" : ((flag == - M_CUT) ? "CUT" - : "UNKNOWN"), - flag); - } - - } - - /* tb->rnum[0] > 0 */ - RFALSE(tb->blknum[0] > 3, - "PAP-12180: blknum can not be %d. It must be <= 3", - tb->blknum[0]); - RFALSE(tb->blknum[0] < 0, - "PAP-12185: blknum can not be %d. It must be >= 0", - tb->blknum[0]); - - /* if while adding to a node we discover that it is possible to split - it in two, and merge the left part into the left neighbor and the - right part into the right neighbor, eliminating the node */ - if (tb->blknum[0] == 0) { /* node S[0] is empty now */ - - RFALSE(!tb->lnum[0] || !tb->rnum[0], - "PAP-12190: lnum and rnum must not be zero"); - /* if insertion was done before 0-th position in R[0], right - delimiting key of the tb->L[0]'s and left delimiting key are - not set correctly */ - if (tb->CFL[0]) { - if (!tb->CFR[0]) - reiserfs_panic(tb->tb_sb, "vs-12195", - "CFR not initialized"); - copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), - B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); - do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); - } - - reiserfs_invalidate_buffer(tb, tbS0); - return 0; - } - - /* Fill new nodes that appear in place of S[0] */ - - /* I am told that this copying is because we need an array to enable - the looping code. -Hans */ - snum[0] = tb->s1num, snum[1] = tb->s2num; - sbytes[0] = tb->s1bytes; - sbytes[1] = tb->s2bytes; - for (i = tb->blknum[0] - 2; i >= 0; i--) { - - RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, - snum[i]); - - /* here we shift from S to S_new nodes */ - - S_new[i] = get_FEB(tb); - - /* initialized block type and tree level */ - set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); - - n = B_NR_ITEMS(tbS0); - - switch (flag) { - case M_INSERT: /* insert item */ - - if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ - if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ - int old_key_comp, old_len, - r_zeros_number; - const char *r_body; - int version; - - /* Move snum[i]-1 items from S[0] to S_new[i] */ - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, - snum[i] - 1, -1, - S_new[i]); - /* Remember key component and item length */ - version = ih_version(ih); - old_key_comp = le_ih_k_offset(ih); - old_len = ih_item_len(ih); - - /* Calculate key component and item length to insert into S_new[i] */ - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - ((old_len - - sbytes[i]) << - (is_indirect_le_ih - (ih) ? tb->tb_sb-> - s_blocksize_bits - - UNFM_P_SHIFT : - 0))); - - put_ih_item_len(ih, sbytes[i]); - - /* Insert part of the item into S_new[i] before 0-th item */ - buffer_info_init_bh(tb, &bi, S_new[i]); - - if ((old_len - sbytes[i]) > zeros_num) { - r_zeros_number = 0; - r_body = - body + (old_len - - sbytes[i]) - - zeros_num; - } else { - r_body = body; - r_zeros_number = - zeros_num - (old_len - - sbytes[i]); - zeros_num -= r_zeros_number; - } - - leaf_insert_into_buf(&bi, 0, ih, r_body, - r_zeros_number); - - /* Calculate key component and item length to insert into S[i] */ - set_le_ih_k_offset(ih, old_key_comp); - put_ih_item_len(ih, - old_len - sbytes[i]); - tb->insert_size[0] -= sbytes[i]; - } else { /* whole new item falls into S_new[i] */ - - /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, - snum[i] - 1, sbytes[i], - S_new[i]); - - /* Insert new item into S_new[i] */ - buffer_info_init_bh(tb, &bi, S_new[i]); - leaf_insert_into_buf(&bi, - item_pos - n + - snum[i] - 1, ih, - body, zeros_num); - - zeros_num = tb->insert_size[0] = 0; - } - } - - else { /* new item or it part don't falls into S_new[i] */ - - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, - snum[i], sbytes[i], S_new[i]); - } - break; - - case M_PASTE: /* append item */ - - if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ - if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ - struct item_head *aux_ih; - - RFALSE(ih, "PAP-12210: ih must be 0"); - - aux_ih = B_N_PITEM_HEAD(tbS0, item_pos); - if (is_direntry_le_ih(aux_ih)) { - /* we append to directory item */ - - int entry_count; - - entry_count = - ih_entry_count(aux_ih); - - if (entry_count - sbytes[i] < - pos_in_item - && pos_in_item <= - entry_count) { - /* new directory entry falls into S_new[i] */ - - RFALSE(!tb-> - insert_size[0], - "PAP-12215: insert_size is already 0"); - RFALSE(sbytes[i] - 1 >= - entry_count, - "PAP-12220: there are no so much entries (%d), only %d", - sbytes[i] - 1, - entry_count); - - /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ - leaf_move_items - (LEAF_FROM_S_TO_SNEW, - tb, snum[i], - sbytes[i] - 1, - S_new[i]); - /* Paste given directory entry to directory item */ - buffer_info_init_bh(tb, &bi, S_new[i]); - leaf_paste_in_buffer - (&bi, 0, - pos_in_item - - entry_count + - sbytes[i] - 1, - tb->insert_size[0], - body, zeros_num); - /* paste new directory entry */ - leaf_paste_entries(&bi, - 0, - pos_in_item - - - entry_count - + - sbytes - [i] - - 1, 1, - (struct - reiserfs_de_head - *) - body, - body - + - DEH_SIZE, - tb-> - insert_size - [0] - ); - tb->insert_size[0] = 0; - pos_in_item++; - } else { /* new directory entry doesn't fall into S_new[i] */ - leaf_move_items - (LEAF_FROM_S_TO_SNEW, - tb, snum[i], - sbytes[i], - S_new[i]); - } - } else { /* regular object */ - - int n_shift, n_rem, - r_zeros_number; - const char *r_body; - - RFALSE(pos_in_item != - ih_item_len - (B_N_PITEM_HEAD - (tbS0, item_pos)) - || tb->insert_size[0] <= - 0, - "PAP-12225: item too short or insert_size <= 0"); - - /* Calculate number of bytes which must be shifted from appended item */ - n_shift = - sbytes[i] - - tb->insert_size[0]; - if (n_shift < 0) - n_shift = 0; - leaf_move_items - (LEAF_FROM_S_TO_SNEW, tb, - snum[i], n_shift, - S_new[i]); - - /* Calculate number of bytes which must remain in body after append to S_new[i] */ - n_rem = - tb->insert_size[0] - - sbytes[i]; - if (n_rem < 0) - n_rem = 0; - /* Append part of body into S_new[0] */ - buffer_info_init_bh(tb, &bi, S_new[i]); - if (n_rem > zeros_num) { - r_zeros_number = 0; - r_body = - body + n_rem - - zeros_num; - } else { - r_body = body; - r_zeros_number = - zeros_num - n_rem; - zeros_num -= - r_zeros_number; - } - - leaf_paste_in_buffer(&bi, 0, - n_shift, - tb-> - insert_size - [0] - - n_rem, - r_body, - r_zeros_number); - { - struct item_head *tmp; - - tmp = - B_N_PITEM_HEAD(S_new - [i], - 0); - if (is_indirect_le_ih - (tmp)) { - set_ih_free_space - (tmp, 0); - set_le_ih_k_offset - (tmp, - le_ih_k_offset - (tmp) + - (n_rem << - (tb-> - tb_sb-> - s_blocksize_bits - - - UNFM_P_SHIFT))); - } else { - set_le_ih_k_offset - (tmp, - le_ih_k_offset - (tmp) + - n_rem); - } - } - - tb->insert_size[0] = n_rem; - if (!n_rem) - pos_in_item++; - } - } else - /* item falls wholly into S_new[i] */ - { - int leaf_mi; - struct item_head *pasted; - -#ifdef CONFIG_REISERFS_CHECK - struct item_head *ih_check = - B_N_PITEM_HEAD(tbS0, item_pos); - - if (!is_direntry_le_ih(ih_check) - && (pos_in_item != ih_item_len(ih_check) - || tb->insert_size[0] <= 0)) - reiserfs_panic(tb->tb_sb, - "PAP-12235", - "pos_in_item " - "must be equal " - "to ih_item_len"); -#endif /* CONFIG_REISERFS_CHECK */ - - leaf_mi = - leaf_move_items(LEAF_FROM_S_TO_SNEW, - tb, snum[i], - sbytes[i], - S_new[i]); - - RFALSE(leaf_mi, - "PAP-12240: unexpected value returned by leaf_move_items (%d)", - leaf_mi); - - /* paste into item */ - buffer_info_init_bh(tb, &bi, S_new[i]); - leaf_paste_in_buffer(&bi, - item_pos - n + - snum[i], - pos_in_item, - tb->insert_size[0], - body, zeros_num); - - pasted = - B_N_PITEM_HEAD(S_new[i], - item_pos - n + - snum[i]); - if (is_direntry_le_ih(pasted)) { - leaf_paste_entries(&bi, - item_pos - - n + snum[i], - pos_in_item, - 1, - (struct - reiserfs_de_head - *)body, - body + - DEH_SIZE, - tb-> - insert_size - [0] - ); - } - - /* if we paste to indirect item update ih_free_space */ - if (is_indirect_le_ih(pasted)) - set_ih_free_space(pasted, 0); - zeros_num = tb->insert_size[0] = 0; - } - } - - else { /* pasted item doesn't fall into S_new[i] */ - - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, - snum[i], sbytes[i], S_new[i]); - } - break; - default: /* cases d and t */ - reiserfs_panic(tb->tb_sb, "PAP-12245", - "blknum > 2: unexpected mode: %s(%d)", - (flag == - M_DELETE) ? "DELETE" : ((flag == - M_CUT) ? "CUT" - : "UNKNOWN"), - flag); - } - - memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); - insert_ptr[i] = S_new[i]; - - RFALSE(!buffer_journaled(S_new[i]) - || buffer_journal_dirty(S_new[i]) - || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", - i, S_new[i]); - } - - /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the - affected item which remains in S */ - if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */ - - switch (flag) { - case M_INSERT: /* insert item into S[0] */ - buffer_info_init_tbS0(tb, &bi); - leaf_insert_into_buf(&bi, item_pos, ih, body, - zeros_num); - - /* If we insert the first key change the delimiting key */ - if (item_pos == 0) { - if (tb->CFL[0]) /* can be 0 in reiserfsck */ - replace_key(tb, tb->CFL[0], tb->lkey[0], - tbS0, 0); - - } - break; - - case M_PASTE:{ /* append item in S[0] */ - struct item_head *pasted; - - pasted = B_N_PITEM_HEAD(tbS0, item_pos); - /* when directory, may be new entry already pasted */ - if (is_direntry_le_ih(pasted)) { - if (pos_in_item >= 0 && - pos_in_item <= - ih_entry_count(pasted)) { - - RFALSE(!tb->insert_size[0], - "PAP-12260: insert_size is 0 already"); - - /* prepare space */ - buffer_info_init_tbS0(tb, &bi); - leaf_paste_in_buffer(&bi, - item_pos, - pos_in_item, - tb-> - insert_size - [0], body, - zeros_num); - - /* paste entry */ - leaf_paste_entries(&bi, - item_pos, - pos_in_item, - 1, - (struct - reiserfs_de_head - *)body, - body + - DEH_SIZE, - tb-> - insert_size - [0] - ); - if (!item_pos && !pos_in_item) { - RFALSE(!tb->CFL[0] - || !tb->L[0], - "PAP-12270: CFL[0]/L[0] must be specified"); - if (tb->CFL[0]) { - replace_key(tb, - tb-> - CFL - [0], - tb-> - lkey - [0], - tbS0, - 0); - - } - } - tb->insert_size[0] = 0; - } - } else { /* regular object */ - if (pos_in_item == ih_item_len(pasted)) { - - RFALSE(tb->insert_size[0] <= 0, - "PAP-12275: insert size must not be %d", - tb->insert_size[0]); - buffer_info_init_tbS0(tb, &bi); - leaf_paste_in_buffer(&bi, - item_pos, - pos_in_item, - tb-> - insert_size - [0], body, - zeros_num); - - if (is_indirect_le_ih(pasted)) { -#if 0 - RFALSE(tb-> - insert_size[0] != - UNFM_P_SIZE, - "PAP-12280: insert_size for indirect item must be %d, not %d", - UNFM_P_SIZE, - tb-> - insert_size[0]); -#endif - set_ih_free_space - (pasted, 0); - } - tb->insert_size[0] = 0; - } -#ifdef CONFIG_REISERFS_CHECK - else { - if (tb->insert_size[0]) { - print_cur_tb("12285"); - reiserfs_panic(tb-> - tb_sb, - "PAP-12285", - "insert_size " - "must be 0 " - "(%d)", - tb->insert_size[0]); - } - } -#endif /* CONFIG_REISERFS_CHECK */ - - } - } /* case M_PASTE: */ - } - } -#ifdef CONFIG_REISERFS_CHECK - if (flag == M_PASTE && tb->insert_size[0]) { - print_cur_tb("12290"); - reiserfs_panic(tb->tb_sb, - "PAP-12290", "insert_size is still not 0 (%d)", - tb->insert_size[0]); - } -#endif /* CONFIG_REISERFS_CHECK */ - return 0; -} /* Leaf level of the tree is balanced (end of balance_leaf) */ - -/* Make empty node */ -void make_empty_node(struct buffer_info *bi) -{ - struct block_head *blkh; - - RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); - - blkh = B_BLK_HEAD(bi->bi_bh); - set_blkh_nr_item(blkh, 0); - set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); - - if (bi->bi_parent) - B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ -} - -/* Get first empty buffer */ -struct buffer_head *get_FEB(struct tree_balance *tb) -{ - int i; - struct buffer_info bi; - - for (i = 0; i < MAX_FEB_SIZE; i++) - if (tb->FEB[i] != NULL) - break; - - if (i == MAX_FEB_SIZE) - reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); - - buffer_info_init_bh(tb, &bi, tb->FEB[i]); - make_empty_node(&bi); - set_buffer_uptodate(tb->FEB[i]); - tb->used[i] = tb->FEB[i]; - tb->FEB[i] = NULL; - - return tb->used[i]; -} - -/* This is now used because reiserfs_free_block has to be able to -** schedule. -*/ -static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) -{ - int i; - - if (buffer_dirty(bh)) - reiserfs_warning(tb->tb_sb, "reiserfs-12320", - "called with dirty buffer"); - for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) - if (!tb->thrown[i]) { - tb->thrown[i] = bh; - get_bh(bh); /* free_thrown puts this */ - return; - } - reiserfs_warning(tb->tb_sb, "reiserfs-12321", - "too many thrown buffers"); -} - -static void free_thrown(struct tree_balance *tb) -{ - int i; - b_blocknr_t blocknr; - for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) { - if (tb->thrown[i]) { - blocknr = tb->thrown[i]->b_blocknr; - if (buffer_dirty(tb->thrown[i])) - reiserfs_warning(tb->tb_sb, "reiserfs-12322", - "called with dirty buffer %d", - blocknr); - brelse(tb->thrown[i]); /* incremented in store_thrown */ - reiserfs_free_block(tb->transaction_handle, NULL, - blocknr, 0); - } - } -} - -void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) -{ - struct block_head *blkh; - blkh = B_BLK_HEAD(bh); - set_blkh_level(blkh, FREE_LEVEL); - set_blkh_nr_item(blkh, 0); - - clear_buffer_dirty(bh); - store_thrown(tb, bh); -} - -/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ -void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, - struct buffer_head *src, int n_src) -{ - - RFALSE(dest == NULL || src == NULL, - "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", - src, dest); - RFALSE(!B_IS_KEYS_LEVEL(dest), - "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", - dest); - RFALSE(n_dest < 0 || n_src < 0, - "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); - RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), - "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", - n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); - - if (B_IS_ITEMS_LEVEL(src)) - /* source buffer contains leaf node */ - memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src), - KEY_SIZE); - else - memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src), - KEY_SIZE); - - do_balance_mark_internal_dirty(tb, dest, 0); -} - -int get_left_neighbor_position(struct tree_balance *tb, int h) -{ - int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); - - RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL, - "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", - h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); - - if (Sh_position == 0) - return B_NR_ITEMS(tb->FL[h]); - else - return Sh_position - 1; -} - -int get_right_neighbor_position(struct tree_balance *tb, int h) -{ - int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); - - RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL, - "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", - h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); - - if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) - return 0; - else - return Sh_position + 1; -} - -#ifdef CONFIG_REISERFS_CHECK - -int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); -static void check_internal_node(struct super_block *s, struct buffer_head *bh, - char *mes) -{ - struct disk_child *dc; - int i; - - RFALSE(!bh, "PAP-12336: bh == 0"); - - if (!bh || !B_IS_IN_TREE(bh)) - return; - - RFALSE(!buffer_dirty(bh) && - !(buffer_journaled(bh) || buffer_journal_dirty(bh)), - "PAP-12337: buffer (%b) must be dirty", bh); - dc = B_N_CHILD(bh, 0); - - for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { - if (!is_reusable(s, dc_block_number(dc), 1)) { - print_cur_tb(mes); - reiserfs_panic(s, "PAP-12338", - "invalid child pointer %y in %b", - dc, bh); - } - } -} - -static int locked_or_not_in_tree(struct tree_balance *tb, - struct buffer_head *bh, char *which) -{ - if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || - !B_IS_IN_TREE(bh)) { - reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); - return 1; - } - return 0; -} - -static int check_before_balancing(struct tree_balance *tb) -{ - int retval = 0; - - if (REISERFS_SB(tb->tb_sb)->cur_tb) { - reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " - "occurred based on cur_tb not being null at " - "this point in code. do_balance cannot properly " - "handle concurrent tree accesses on a same " - "mount point."); - } - - /* double check that buffers that we will modify are unlocked. (fix_nodes should already have - prepped all of these for us). */ - if (tb->lnum[0]) { - retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); - retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); - retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); - check_leaf(tb->L[0]); - } - if (tb->rnum[0]) { - retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); - retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); - retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); - check_leaf(tb->R[0]); - } - retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), - "S[0]"); - check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); - - return retval; -} - -static void check_after_balance_leaf(struct tree_balance *tb) -{ - if (tb->lnum[0]) { - if (B_FREE_SPACE(tb->L[0]) != - MAX_CHILD_SIZE(tb->L[0]) - - dc_size(B_N_CHILD - (tb->FL[0], get_left_neighbor_position(tb, 0)))) { - print_cur_tb("12221"); - reiserfs_panic(tb->tb_sb, "PAP-12355", - "shift to left was incorrect"); - } - } - if (tb->rnum[0]) { - if (B_FREE_SPACE(tb->R[0]) != - MAX_CHILD_SIZE(tb->R[0]) - - dc_size(B_N_CHILD - (tb->FR[0], get_right_neighbor_position(tb, 0)))) { - print_cur_tb("12222"); - reiserfs_panic(tb->tb_sb, "PAP-12360", - "shift to right was incorrect"); - } - } - if (PATH_H_PBUFFER(tb->tb_path, 1) && - (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != - (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - - dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), - PATH_H_POSITION(tb->tb_path, 1)))))) { - int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); - int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - - dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), - PATH_H_POSITION(tb->tb_path, - 1)))); - print_cur_tb("12223"); - reiserfs_warning(tb->tb_sb, "reiserfs-12363", - "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " - "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", - left, - MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), - PATH_H_PBUFFER(tb->tb_path, 1), - PATH_H_POSITION(tb->tb_path, 1), - dc_size(B_N_CHILD - (PATH_H_PBUFFER(tb->tb_path, 1), - PATH_H_POSITION(tb->tb_path, 1))), - right); - reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); - } -} - -static void check_leaf_level(struct tree_balance *tb) -{ - check_leaf(tb->L[0]); - check_leaf(tb->R[0]); - check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); -} - -static void check_internal_levels(struct tree_balance *tb) -{ - int h; - - /* check all internal nodes */ - for (h = 1; tb->insert_size[h]; h++) { - check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), - "BAD BUFFER ON PATH"); - if (tb->lnum[h]) - check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); - if (tb->rnum[h]) - check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); - } - -} - -#endif - -/* Now we have all of the buffers that must be used in balancing of - the tree. We rely on the assumption that schedule() will not occur - while do_balance works. ( Only interrupt handlers are acceptable.) - We balance the tree according to the analysis made before this, - using buffers already obtained. For SMP support it will someday be - necessary to add ordered locking of tb. */ - -/* Some interesting rules of balancing: - - we delete a maximum of two nodes per level per balancing: we never - delete R, when we delete two of three nodes L, S, R then we move - them into R. - - we only delete L if we are deleting two nodes, if we delete only - one node we delete S - - if we shift leaves then we shift as much as we can: this is a - deliberate policy of extremism in node packing which results in - higher average utilization after repeated random balance operations - at the cost of more memory copies and more balancing as a result of - small insertions to full nodes. - - if we shift internal nodes we try to evenly balance the node - utilization, with consequent less balancing at the cost of lower - utilization. - - one could argue that the policy for directories in leaves should be - that of internal nodes, but we will wait until another day to - evaluate this.... It would be nice to someday measure and prove - these assumptions as to what is optimal.... - -*/ - -static inline void do_balance_starts(struct tree_balance *tb) -{ - /* use print_cur_tb() to see initial state of struct - tree_balance */ - - /* store_print_tb (tb); */ - - /* do not delete, just comment it out */ -/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, - "check");*/ - RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); -#ifdef CONFIG_REISERFS_CHECK - REISERFS_SB(tb->tb_sb)->cur_tb = tb; -#endif -} - -static inline void do_balance_completed(struct tree_balance *tb) -{ - -#ifdef CONFIG_REISERFS_CHECK - check_leaf_level(tb); - check_internal_levels(tb); - REISERFS_SB(tb->tb_sb)->cur_tb = NULL; -#endif - - /* reiserfs_free_block is no longer schedule safe. So, we need to - ** put the buffers we want freed on the thrown list during do_balance, - ** and then free them now - */ - - REISERFS_SB(tb->tb_sb)->s_do_balance++; - - /* release all nodes hold to perform the balancing */ - unfix_nodes(tb); - - free_thrown(tb); -} - -void do_balance(struct tree_balance *tb, /* tree_balance structure */ - struct item_head *ih, /* item header of inserted item */ - const char *body, /* body of inserted item or bytes to paste */ - int flag) -{ /* i - insert, d - delete - c - cut, p - paste - - Cut means delete part of an item - (includes removing an entry from a - directory). - - Delete means delete whole item. - - Insert means add a new item into the - tree. - - Paste means to append to the end of an - existing file or to insert a directory - entry. */ - int child_pos, /* position of a child node in its parent */ - h; /* level of the tree being processed */ - struct item_head insert_key[2]; /* in our processing of one level - we sometimes determine what - must be inserted into the next - higher level. This insertion - consists of a key or two keys - and their corresponding - pointers */ - struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next - level */ - - tb->tb_mode = flag; - tb->need_balance_dirty = 0; - - if (FILESYSTEM_CHANGED_TB(tb)) { - reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " - "changed"); - } - /* if we have no real work to do */ - if (!tb->insert_size[0]) { - reiserfs_warning(tb->tb_sb, "PAP-12350", - "insert_size == 0, mode == %c", flag); - unfix_nodes(tb); - return; - } - - atomic_inc(&(fs_generation(tb->tb_sb))); - do_balance_starts(tb); - - /* balance leaf returns 0 except if combining L R and S into - one node. see balance_internal() for explanation of this - line of code. */ - child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + - balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); - -#ifdef CONFIG_REISERFS_CHECK - check_after_balance_leaf(tb); -#endif - - /* Balance internal level of the tree. */ - for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) - child_pos = - balance_internal(tb, h, child_pos, insert_key, insert_ptr); - - do_balance_completed(tb); - -} diff --git a/ANDROID_3.4.5/fs/reiserfs/file.c b/ANDROID_3.4.5/fs/reiserfs/file.c deleted file mode 100644 index 8375c922..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/file.c +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <linux/time.h> -#include "reiserfs.h" -#include "acl.h" -#include "xattr.h" -#include <asm/uaccess.h> -#include <linux/pagemap.h> -#include <linux/swap.h> -#include <linux/writeback.h> -#include <linux/blkdev.h> -#include <linux/buffer_head.h> -#include <linux/quotaops.h> - -/* -** We pack the tails of files on file close, not at the time they are written. -** This implies an unnecessary copy of the tail and an unnecessary indirect item -** insertion/balancing, for files that are written in one write. -** It avoids unnecessary tail packings (balances) for files that are written in -** multiple writes and are small enough to have tails. -** -** file_release is called by the VFS layer when the file is closed. If -** this is the last open file descriptor, and the file -** small enough to have a tail, and the tail is currently in an -** unformatted node, the tail is converted back into a direct item. -** -** We use reiserfs_truncate_file to pack the tail, since it already has -** all the conditions coded. -*/ -static int reiserfs_file_release(struct inode *inode, struct file *filp) -{ - - struct reiserfs_transaction_handle th; - int err; - int jbegin_failure = 0; - - BUG_ON(!S_ISREG(inode->i_mode)); - - if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1)) - return 0; - - mutex_lock(&(REISERFS_I(inode)->tailpack)); - - if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) { - mutex_unlock(&(REISERFS_I(inode)->tailpack)); - return 0; - } - - /* fast out for when nothing needs to be done */ - if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || - !tail_has_to_be_packed(inode)) && - REISERFS_I(inode)->i_prealloc_count <= 0) { - mutex_unlock(&(REISERFS_I(inode)->tailpack)); - return 0; - } - - reiserfs_write_lock(inode->i_sb); - /* freeing preallocation only involves relogging blocks that - * are already in the current transaction. preallocation gets - * freed at the end of each transaction, so it is impossible for - * us to log any additional blocks (including quota blocks) - */ - err = journal_begin(&th, inode->i_sb, 1); - if (err) { - /* uh oh, we can't allow the inode to go away while there - * is still preallocation blocks pending. Try to join the - * aborted transaction - */ - jbegin_failure = err; - err = journal_join_abort(&th, inode->i_sb, 1); - - if (err) { - /* hmpf, our choices here aren't good. We can pin the inode - * which will disallow unmount from every happening, we can - * do nothing, which will corrupt random memory on unmount, - * or we can forcibly remove the file from the preallocation - * list, which will leak blocks on disk. Lets pin the inode - * and let the admin know what is going on. - */ - igrab(inode); - reiserfs_warning(inode->i_sb, "clm-9001", - "pinning inode %lu because the " - "preallocation can't be freed", - inode->i_ino); - goto out; - } - } - reiserfs_update_inode_transaction(inode); - -#ifdef REISERFS_PREALLOCATE - reiserfs_discard_prealloc(&th, inode); -#endif - err = journal_end(&th, inode->i_sb, 1); - - /* copy back the error code from journal_begin */ - if (!err) - err = jbegin_failure; - - if (!err && - (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && - tail_has_to_be_packed(inode)) { - - /* if regular file is released by last holder and it has been - appended (we append by unformatted node only) or its direct - item(s) had to be converted, then it may have to be - indirect2direct converted */ - err = reiserfs_truncate_file(inode, 0); - } - out: - reiserfs_write_unlock(inode->i_sb); - mutex_unlock(&(REISERFS_I(inode)->tailpack)); - return err; -} - -static int reiserfs_file_open(struct inode *inode, struct file *file) -{ - int err = dquot_file_open(inode, file); - if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { - /* somebody might be tailpacking on final close; wait for it */ - mutex_lock(&(REISERFS_I(inode)->tailpack)); - atomic_inc(&REISERFS_I(inode)->openers); - mutex_unlock(&(REISERFS_I(inode)->tailpack)); - } - return err; -} - -static void reiserfs_vfs_truncate_file(struct inode *inode) -{ - mutex_lock(&(REISERFS_I(inode)->tailpack)); - reiserfs_truncate_file(inode, 1); - mutex_unlock(&(REISERFS_I(inode)->tailpack)); -} - -/* Sync a reiserfs file. */ - -/* - * FIXME: sync_mapping_buffers() never has anything to sync. Can - * be removed... - */ - -static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end, - int datasync) -{ - struct inode *inode = filp->f_mapping->host; - int err; - int barrier_done; - - err = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (err) - return err; - - mutex_lock(&inode->i_mutex); - BUG_ON(!S_ISREG(inode->i_mode)); - err = sync_mapping_buffers(inode->i_mapping); - reiserfs_write_lock(inode->i_sb); - barrier_done = reiserfs_commit_for_inode(inode); - reiserfs_write_unlock(inode->i_sb); - if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); - mutex_unlock(&inode->i_mutex); - if (barrier_done < 0) - return barrier_done; - return (err < 0) ? -EIO : 0; -} - -/* taken fs/buffer.c:__block_commit_write */ -int reiserfs_commit_page(struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - unsigned block_start, block_end; - int partial = 0; - unsigned blocksize; - struct buffer_head *bh, *head; - unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; - int new; - int logit = reiserfs_file_data_log(inode); - struct super_block *s = inode->i_sb; - int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; - struct reiserfs_transaction_handle th; - int ret = 0; - - th.t_trans_id = 0; - blocksize = 1 << inode->i_blkbits; - - if (logit) { - reiserfs_write_lock(s); - ret = journal_begin(&th, s, bh_per_page + 1); - if (ret) - goto drop_write_lock; - reiserfs_update_inode_transaction(inode); - } - for (bh = head = page_buffers(page), block_start = 0; - bh != head || !block_start; - block_start = block_end, bh = bh->b_this_page) { - - new = buffer_new(bh); - clear_buffer_new(bh); - block_end = block_start + blocksize; - if (block_end <= from || block_start >= to) { - if (!buffer_uptodate(bh)) - partial = 1; - } else { - set_buffer_uptodate(bh); - if (logit) { - reiserfs_prepare_for_journal(s, bh, 1); - journal_mark_dirty(&th, s, bh); - } else if (!buffer_dirty(bh)) { - mark_buffer_dirty(bh); - /* do data=ordered on any page past the end - * of file and any buffer marked BH_New. - */ - if (reiserfs_data_ordered(inode->i_sb) && - (new || page->index >= i_size_index)) { - reiserfs_add_ordered_list(inode, bh); - } - } - } - } - if (logit) { - ret = journal_end(&th, s, bh_per_page + 1); - drop_write_lock: - reiserfs_write_unlock(s); - } - /* - * If this is a partial write which happened to make all buffers - * uptodate then we can optimize away a bogus readpage() for - * the next read(). Here we 'discover' whether the page went - * uptodate as a result of this (potentially partial) write. - */ - if (!partial) - SetPageUptodate(page); - return ret; -} - -/* Write @count bytes at position @ppos in a file indicated by @file - from the buffer @buf. - - generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want - something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was - written for (ext2/3). This is for several reasons: - - * It has no understanding of any filesystem specific optimizations. - - * It enters the filesystem repeatedly for each page that is written. - - * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key - * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time - * to reiserfs which allows for fewer tree traversals. - - * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks. - - * Asking the block allocation code for blocks one at a time is slightly less efficient. - - All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to - use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make - things right finally. - - Future Features: providing search_by_key with hints. - -*/ -static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */ - const char __user * buf, /* pointer to user supplied data - (in userspace) */ - size_t count, /* amount of bytes to write */ - loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to - * new current position before returning. */ - ) -{ - struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. - /* To simplify coding at this time, we store - locked pages in array for now */ - struct reiserfs_transaction_handle th; - th.t_trans_id = 0; - - /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items - * lying around (most of the disk, in fact). Despite the filesystem - * now being a v3.6 format, the old items still can't support large - * file sizes. Catch this case here, as the rest of the VFS layer is - * oblivious to the different limitations between old and new items. - * reiserfs_setattr catches this for truncates. This chunk is lifted - * from generic_write_checks. */ - if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && - *ppos + count > MAX_NON_LFS) { - if (*ppos >= MAX_NON_LFS) { - return -EFBIG; - } - if (count > MAX_NON_LFS - (unsigned long)*ppos) - count = MAX_NON_LFS - (unsigned long)*ppos; - } - - return do_sync_write(file, buf, count, ppos); -} - -const struct file_operations reiserfs_file_operations = { - .read = do_sync_read, - .write = reiserfs_file_write, - .unlocked_ioctl = reiserfs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = reiserfs_compat_ioctl, -#endif - .mmap = generic_file_mmap, - .open = reiserfs_file_open, - .release = reiserfs_file_release, - .fsync = reiserfs_sync_file, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, - .llseek = generic_file_llseek, -}; - -const struct inode_operations reiserfs_file_inode_operations = { - .truncate = reiserfs_vfs_truncate_file, - .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, - .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, - .permission = reiserfs_permission, - .get_acl = reiserfs_get_acl, -}; diff --git a/ANDROID_3.4.5/fs/reiserfs/fix_node.c b/ANDROID_3.4.5/fs/reiserfs/fix_node.c deleted file mode 100644 index 430e0658..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/fix_node.c +++ /dev/null @@ -1,2593 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/** - ** old_item_num - ** old_entry_num - ** set_entry_sizes - ** create_virtual_node - ** check_left - ** check_right - ** directory_part_size - ** get_num_ver - ** set_parameters - ** is_leaf_removable - ** are_leaves_removable - ** get_empty_nodes - ** get_lfree - ** get_rfree - ** is_left_neighbor_in_cache - ** decrement_key - ** get_far_parent - ** get_parents - ** can_node_be_removed - ** ip_check_balance - ** dc_check_balance_internal - ** dc_check_balance_leaf - ** dc_check_balance - ** check_balance - ** get_direct_parent - ** get_neighbors - ** fix_nodes - ** - ** - **/ - -#include <linux/time.h> -#include <linux/slab.h> -#include <linux/string.h> -#include "reiserfs.h" -#include <linux/buffer_head.h> - -/* To make any changes in the tree we find a node, that contains item - to be changed/deleted or position in the node we insert a new item - to. We call this node S. To do balancing we need to decide what we - will shift to left/right neighbor, or to a new node, where new item - will be etc. To make this analysis simpler we build virtual - node. Virtual node is an array of items, that will replace items of - node S. (For instance if we are going to delete an item, virtual - node does not contain it). Virtual node keeps information about - item sizes and types, mergeability of first and last items, sizes - of all entries in directory item. We use this array of items when - calculating what we can shift to neighbors and how many nodes we - have to have if we do not any shiftings, if we shift to left/right - neighbor or to both. */ - -/* taking item number in virtual node, returns number of item, that it has in source buffer */ -static inline int old_item_num(int new_num, int affected_item_num, int mode) -{ - if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) - return new_num; - - if (mode == M_INSERT) { - - RFALSE(new_num == 0, - "vs-8005: for INSERT mode and item number of inserted item"); - - return new_num - 1; - } - - RFALSE(mode != M_DELETE, - "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", - mode); - /* delete mode */ - return new_num + 1; -} - -static void create_virtual_node(struct tree_balance *tb, int h) -{ - struct item_head *ih; - struct virtual_node *vn = tb->tb_vn; - int new_num; - struct buffer_head *Sh; /* this comes from tb->S[h] */ - - Sh = PATH_H_PBUFFER(tb->tb_path, h); - - /* size of changed node */ - vn->vn_size = - MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h]; - - /* for internal nodes array if virtual items is not created */ - if (h) { - vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); - return; - } - - /* number of items in virtual node */ - vn->vn_nr_item = - B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) - - ((vn->vn_mode == M_DELETE) ? 1 : 0); - - /* first virtual item */ - vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); - memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item)); - vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); - - /* first item in the node */ - ih = B_N_PITEM_HEAD(Sh, 0); - - /* define the mergeability for 0-th item (if it is not being deleted) */ - if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size) - && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) - vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; - - /* go through all items those remain in the virtual node (except for the new (inserted) one) */ - for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { - int j; - struct virtual_item *vi = vn->vn_vi + new_num; - int is_affected = - ((new_num != vn->vn_affected_item_num) ? 0 : 1); - - if (is_affected && vn->vn_mode == M_INSERT) - continue; - - /* get item number in source node */ - j = old_item_num(new_num, vn->vn_affected_item_num, - vn->vn_mode); - - vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; - vi->vi_ih = ih + j; - vi->vi_item = B_I_PITEM(Sh, ih + j); - vi->vi_uarea = vn->vn_free_ptr; - - // FIXME: there is no check, that item operation did not - // consume too much memory - vn->vn_free_ptr += - op_create_vi(vn, vi, is_affected, tb->insert_size[0]); - if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) - reiserfs_panic(tb->tb_sb, "vs-8030", - "virtual node space consumed"); - - if (!is_affected) - /* this is not being changed */ - continue; - - if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { - vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; - vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted - } - } - - /* virtual inserted item is not defined yet */ - if (vn->vn_mode == M_INSERT) { - struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num; - - RFALSE(vn->vn_ins_ih == NULL, - "vs-8040: item header of inserted item is not specified"); - vi->vi_item_len = tb->insert_size[0]; - vi->vi_ih = vn->vn_ins_ih; - vi->vi_item = vn->vn_data; - vi->vi_uarea = vn->vn_free_ptr; - - op_create_vi(vn, vi, 0 /*not pasted or cut */ , - tb->insert_size[0]); - } - - /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ - if (tb->CFR[0]) { - struct reiserfs_key *key; - - key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]); - if (op_is_left_mergeable(key, Sh->b_size) - && (vn->vn_mode != M_DELETE - || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) - vn->vn_vi[vn->vn_nr_item - 1].vi_type |= - VI_TYPE_RIGHT_MERGEABLE; - -#ifdef CONFIG_REISERFS_CHECK - if (op_is_left_mergeable(key, Sh->b_size) && - !(vn->vn_mode != M_DELETE - || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { - /* we delete last item and it could be merged with right neighbor's first item */ - if (! - (B_NR_ITEMS(Sh) == 1 - && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0)) - && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) { - /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ - print_block(Sh, 0, -1, -1); - reiserfs_panic(tb->tb_sb, "vs-8045", - "rdkey %k, affected item==%d " - "(mode==%c) Must be %c", - key, vn->vn_affected_item_num, - vn->vn_mode, M_DELETE); - } - } -#endif - - } -} - -/* using virtual node check, how many items can be shifted to left - neighbor */ -static void check_left(struct tree_balance *tb, int h, int cur_free) -{ - int i; - struct virtual_node *vn = tb->tb_vn; - struct virtual_item *vi; - int d_size, ih_size; - - RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); - - /* internal level */ - if (h > 0) { - tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); - return; - } - - /* leaf level */ - - if (!cur_free || !vn->vn_nr_item) { - /* no free space or nothing to move */ - tb->lnum[h] = 0; - tb->lbytes = -1; - return; - } - - RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), - "vs-8055: parent does not exist or invalid"); - - vi = vn->vn_vi; - if ((unsigned int)cur_free >= - (vn->vn_size - - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { - /* all contents of S[0] fits into L[0] */ - - RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, - "vs-8055: invalid mode or balance condition failed"); - - tb->lnum[0] = vn->vn_nr_item; - tb->lbytes = -1; - return; - } - - d_size = 0, ih_size = IH_SIZE; - - /* first item may be merge with last item in left neighbor */ - if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) - d_size = -((int)IH_SIZE), ih_size = 0; - - tb->lnum[0] = 0; - for (i = 0; i < vn->vn_nr_item; - i++, ih_size = IH_SIZE, d_size = 0, vi++) { - d_size += vi->vi_item_len; - if (cur_free >= d_size) { - /* the item can be shifted entirely */ - cur_free -= d_size; - tb->lnum[0]++; - continue; - } - - /* the item cannot be shifted entirely, try to split it */ - /* check whether L[0] can hold ih and at least one byte of the item body */ - if (cur_free <= ih_size) { - /* cannot shift even a part of the current item */ - tb->lbytes = -1; - return; - } - cur_free -= ih_size; - - tb->lbytes = op_check_left(vi, cur_free, 0, 0); - if (tb->lbytes != -1) - /* count partially shifted item */ - tb->lnum[0]++; - - break; - } - - return; -} - -/* using virtual node check, how many items can be shifted to right - neighbor */ -static void check_right(struct tree_balance *tb, int h, int cur_free) -{ - int i; - struct virtual_node *vn = tb->tb_vn; - struct virtual_item *vi; - int d_size, ih_size; - - RFALSE(cur_free < 0, "vs-8070: cur_free < 0"); - - /* internal level */ - if (h > 0) { - tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); - return; - } - - /* leaf level */ - - if (!cur_free || !vn->vn_nr_item) { - /* no free space */ - tb->rnum[h] = 0; - tb->rbytes = -1; - return; - } - - RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), - "vs-8075: parent does not exist or invalid"); - - vi = vn->vn_vi + vn->vn_nr_item - 1; - if ((unsigned int)cur_free >= - (vn->vn_size - - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { - /* all contents of S[0] fits into R[0] */ - - RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, - "vs-8080: invalid mode or balance condition failed"); - - tb->rnum[h] = vn->vn_nr_item; - tb->rbytes = -1; - return; - } - - d_size = 0, ih_size = IH_SIZE; - - /* last item may be merge with first item in right neighbor */ - if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) - d_size = -(int)IH_SIZE, ih_size = 0; - - tb->rnum[0] = 0; - for (i = vn->vn_nr_item - 1; i >= 0; - i--, d_size = 0, ih_size = IH_SIZE, vi--) { - d_size += vi->vi_item_len; - if (cur_free >= d_size) { - /* the item can be shifted entirely */ - cur_free -= d_size; - tb->rnum[0]++; - continue; - } - - /* check whether R[0] can hold ih and at least one byte of the item body */ - if (cur_free <= ih_size) { /* cannot shift even a part of the current item */ - tb->rbytes = -1; - return; - } - - /* R[0] can hold the header of the item and at least one byte of its body */ - cur_free -= ih_size; /* cur_free is still > 0 */ - - tb->rbytes = op_check_right(vi, cur_free); - if (tb->rbytes != -1) - /* count partially shifted item */ - tb->rnum[0]++; - - break; - } - - return; -} - -/* - * from - number of items, which are shifted to left neighbor entirely - * to - number of item, which are shifted to right neighbor entirely - * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor - * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ -static int get_num_ver(int mode, struct tree_balance *tb, int h, - int from, int from_bytes, - int to, int to_bytes, short *snum012, int flow) -{ - int i; - int cur_free; - // int bytes; - int units; - struct virtual_node *vn = tb->tb_vn; - // struct virtual_item * vi; - - int total_node_size, max_node_size, current_item_size; - int needed_nodes; - int start_item, /* position of item we start filling node from */ - end_item, /* position of item we finish filling node by */ - start_bytes, /* number of first bytes (entries for directory) of start_item-th item - we do not include into node that is being filled */ - end_bytes; /* number of last bytes (entries for directory) of end_item-th item - we do node include into node that is being filled */ - int split_item_positions[2]; /* these are positions in virtual item of - items, that are split between S[0] and - S1new and S1new and S2new */ - - split_item_positions[0] = -1; - split_item_positions[1] = -1; - - /* We only create additional nodes if we are in insert or paste mode - or we are in replace mode at the internal level. If h is 0 and - the mode is M_REPLACE then in fix_nodes we change the mode to - paste or insert before we get here in the code. */ - RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), - "vs-8100: insert_size < 0 in overflow"); - - max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); - - /* snum012 [0-2] - number of items, that lay - to S[0], first new node and second new node */ - snum012[3] = -1; /* s1bytes */ - snum012[4] = -1; /* s2bytes */ - - /* internal level */ - if (h > 0) { - i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); - if (i == max_node_size) - return 1; - return (i / max_node_size + 1); - } - - /* leaf level */ - needed_nodes = 1; - total_node_size = 0; - cur_free = max_node_size; - - // start from 'from'-th item - start_item = from; - // skip its first 'start_bytes' units - start_bytes = ((from_bytes != -1) ? from_bytes : 0); - - // last included item is the 'end_item'-th one - end_item = vn->vn_nr_item - to - 1; - // do not count last 'end_bytes' units of 'end_item'-th item - end_bytes = (to_bytes != -1) ? to_bytes : 0; - - /* go through all item beginning from the start_item-th item and ending by - the end_item-th item. Do not count first 'start_bytes' units of - 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ - - for (i = start_item; i <= end_item; i++) { - struct virtual_item *vi = vn->vn_vi + i; - int skip_from_end = ((i == end_item) ? end_bytes : 0); - - RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed"); - - /* get size of current item */ - current_item_size = vi->vi_item_len; - - /* do not take in calculation head part (from_bytes) of from-th item */ - current_item_size -= - op_part_size(vi, 0 /*from start */ , start_bytes); - - /* do not take in calculation tail part of last item */ - current_item_size -= - op_part_size(vi, 1 /*from end */ , skip_from_end); - - /* if item fits into current node entierly */ - if (total_node_size + current_item_size <= max_node_size) { - snum012[needed_nodes - 1]++; - total_node_size += current_item_size; - start_bytes = 0; - continue; - } - - if (current_item_size > max_node_size) { - /* virtual item length is longer, than max size of item in - a node. It is impossible for direct item */ - RFALSE(is_direct_le_ih(vi->vi_ih), - "vs-8110: " - "direct item length is %d. It can not be longer than %d", - current_item_size, max_node_size); - /* we will try to split it */ - flow = 1; - } - - if (!flow) { - /* as we do not split items, take new node and continue */ - needed_nodes++; - i--; - total_node_size = 0; - continue; - } - // calculate number of item units which fit into node being - // filled - { - int free_space; - - free_space = max_node_size - total_node_size - IH_SIZE; - units = - op_check_left(vi, free_space, start_bytes, - skip_from_end); - if (units == -1) { - /* nothing fits into current node, take new node and continue */ - needed_nodes++, i--, total_node_size = 0; - continue; - } - } - - /* something fits into the current node */ - //if (snum012[3] != -1 || needed_nodes != 1) - // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required"); - //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units; - start_bytes += units; - snum012[needed_nodes - 1 + 3] = units; - - if (needed_nodes > 2) - reiserfs_warning(tb->tb_sb, "vs-8111", - "split_item_position is out of range"); - snum012[needed_nodes - 1]++; - split_item_positions[needed_nodes - 1] = i; - needed_nodes++; - /* continue from the same item with start_bytes != -1 */ - start_item = i; - i--; - total_node_size = 0; - } - - // sum012[4] (if it is not -1) contains number of units of which - // are to be in S1new, snum012[3] - to be in S0. They are supposed - // to be S1bytes and S2bytes correspondingly, so recalculate - if (snum012[4] > 0) { - int split_item_num; - int bytes_to_r, bytes_to_l; - int bytes_to_S1new; - - split_item_num = split_item_positions[1]; - bytes_to_l = - ((from == split_item_num - && from_bytes != -1) ? from_bytes : 0); - bytes_to_r = - ((end_item == split_item_num - && end_bytes != -1) ? end_bytes : 0); - bytes_to_S1new = - ((split_item_positions[0] == - split_item_positions[1]) ? snum012[3] : 0); - - // s2bytes - snum012[4] = - op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - - bytes_to_r - bytes_to_l - bytes_to_S1new; - - if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && - vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) - reiserfs_warning(tb->tb_sb, "vs-8115", - "not directory or indirect item"); - } - - /* now we know S2bytes, calculate S1bytes */ - if (snum012[3] > 0) { - int split_item_num; - int bytes_to_r, bytes_to_l; - int bytes_to_S2new; - - split_item_num = split_item_positions[0]; - bytes_to_l = - ((from == split_item_num - && from_bytes != -1) ? from_bytes : 0); - bytes_to_r = - ((end_item == split_item_num - && end_bytes != -1) ? end_bytes : 0); - bytes_to_S2new = - ((split_item_positions[0] == split_item_positions[1] - && snum012[4] != -1) ? snum012[4] : 0); - - // s1bytes - snum012[3] = - op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - - bytes_to_r - bytes_to_l - bytes_to_S2new; - } - - return needed_nodes; -} - - -/* Set parameters for balancing. - * Performs write of results of analysis of balancing into structure tb, - * where it will later be used by the functions that actually do the balancing. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * lnum number of items from S[h] that must be shifted to L[h]; - * rnum number of items from S[h] that must be shifted to R[h]; - * blk_num number of blocks that S[h] will be splitted into; - * s012 number of items that fall into splitted nodes. - * lbytes number of bytes which flow to the left neighbor from the item that is not - * not shifted entirely - * rbytes number of bytes which flow to the right neighbor from the item that is not - * not shifted entirely - * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) - */ - -static void set_parameters(struct tree_balance *tb, int h, int lnum, - int rnum, int blk_num, short *s012, int lb, int rb) -{ - - tb->lnum[h] = lnum; - tb->rnum[h] = rnum; - tb->blknum[h] = blk_num; - - if (h == 0) { /* only for leaf level */ - if (s012 != NULL) { - tb->s0num = *s012++, - tb->s1num = *s012++, tb->s2num = *s012++; - tb->s1bytes = *s012++; - tb->s2bytes = *s012; - } - tb->lbytes = lb; - tb->rbytes = rb; - } - PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum); - PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum); - - PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb); - PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); -} - -/* check, does node disappear if we shift tb->lnum[0] items to left - neighbor and tb->rnum[0] to the right one. */ -static int is_leaf_removable(struct tree_balance *tb) -{ - struct virtual_node *vn = tb->tb_vn; - int to_left, to_right; - int size; - int remain_items; - - /* number of items, that will be shifted to left (right) neighbor - entirely */ - to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); - to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); - remain_items = vn->vn_nr_item; - - /* how many items remain in S[0] after shiftings to neighbors */ - remain_items -= (to_left + to_right); - - if (remain_items < 1) { - /* all content of node can be shifted to neighbors */ - set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, - NULL, -1, -1); - return 1; - } - - if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) - /* S[0] is not removable */ - return 0; - - /* check, whether we can divide 1 remaining item between neighbors */ - - /* get size of remaining item (in item units) */ - size = op_unit_num(&(vn->vn_vi[to_left])); - - if (tb->lbytes + tb->rbytes >= size) { - set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, - tb->lbytes, -1); - return 1; - } - - return 0; -} - -/* check whether L, S, R can be joined in one node */ -static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) -{ - struct virtual_node *vn = tb->tb_vn; - int ih_size; - struct buffer_head *S0; - - S0 = PATH_H_PBUFFER(tb->tb_path, 0); - - ih_size = 0; - if (vn->vn_nr_item) { - if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) - ih_size += IH_SIZE; - - if (vn->vn_vi[vn->vn_nr_item - 1]. - vi_type & VI_TYPE_RIGHT_MERGEABLE) - ih_size += IH_SIZE; - } else { - /* there was only one item and it will be deleted */ - struct item_head *ih; - - RFALSE(B_NR_ITEMS(S0) != 1, - "vs-8125: item number must be 1: it is %d", - B_NR_ITEMS(S0)); - - ih = B_N_PITEM_HEAD(S0, 0); - if (tb->CFR[0] - && !comp_short_le_keys(&(ih->ih_key), - B_N_PDELIM_KEY(tb->CFR[0], - tb->rkey[0]))) - if (is_direntry_le_ih(ih)) { - /* Directory must be in correct state here: that is - somewhere at the left side should exist first directory - item. But the item being deleted can not be that first - one because its right neighbor is item of the same - directory. (But first item always gets deleted in last - turn). So, neighbors of deleted item can be merged, so - we can save ih_size */ - ih_size = IH_SIZE; - - /* we might check that left neighbor exists and is of the - same directory */ - RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, - "vs-8130: first directory item can not be removed until directory is not empty"); - } - - } - - if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) { - set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1); - PROC_INFO_INC(tb->tb_sb, leaves_removable); - return 1; - } - return 0; - -} - -/* when we do not split item, lnum and rnum are numbers of entire items */ -#define SET_PAR_SHIFT_LEFT \ -if (h)\ -{\ - int to_l;\ - \ - to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\ - (MAX_NR_KEY(Sh) + 1 - lpar);\ - \ - set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\ -}\ -else \ -{\ - if (lset==LEFT_SHIFT_FLOW)\ - set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\ - tb->lbytes, -1);\ - else\ - set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\ - -1, -1);\ -} - -#define SET_PAR_SHIFT_RIGHT \ -if (h)\ -{\ - int to_r;\ - \ - to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\ - \ - set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\ -}\ -else \ -{\ - if (rset==RIGHT_SHIFT_FLOW)\ - set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\ - -1, tb->rbytes);\ - else\ - set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\ - -1, -1);\ -} - -static void free_buffers_in_tb(struct tree_balance *tb) -{ - int i; - - pathrelse(tb->tb_path); - - for (i = 0; i < MAX_HEIGHT; i++) { - brelse(tb->L[i]); - brelse(tb->R[i]); - brelse(tb->FL[i]); - brelse(tb->FR[i]); - brelse(tb->CFL[i]); - brelse(tb->CFR[i]); - - tb->L[i] = NULL; - tb->R[i] = NULL; - tb->FL[i] = NULL; - tb->FR[i] = NULL; - tb->CFL[i] = NULL; - tb->CFR[i] = NULL; - } -} - -/* Get new buffers for storing new nodes that are created while balancing. - * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; - * CARRY_ON - schedule didn't occur while the function worked; - * NO_DISK_SPACE - no disk space. - */ -/* The function is NOT SCHEDULE-SAFE! */ -static int get_empty_nodes(struct tree_balance *tb, int h) -{ - struct buffer_head *new_bh, - *Sh = PATH_H_PBUFFER(tb->tb_path, h); - b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; - int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */ - retval = CARRY_ON; - struct super_block *sb = tb->tb_sb; - - /* number_of_freeblk is the number of empty blocks which have been - acquired for use by the balancing algorithm minus the number of - empty blocks used in the previous levels of the analysis, - number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs - after empty blocks are acquired, and the balancing analysis is - then restarted, amount_needed is the number needed by this level - (h) of the balancing analysis. - - Note that for systems with many processes writing, it would be - more layout optimal to calculate the total number needed by all - levels and then to run reiserfs_new_blocks to get all of them at once. */ - - /* Initiate number_of_freeblk to the amount acquired prior to the restart of - the analysis or 0 if not restarted, then subtract the amount needed - by all of the levels of the tree below h. */ - /* blknum includes S[h], so we subtract 1 in this calculation */ - for (counter = 0, number_of_freeblk = tb->cur_blknum; - counter < h; counter++) - number_of_freeblk -= - (tb->blknum[counter]) ? (tb->blknum[counter] - - 1) : 0; - - /* Allocate missing empty blocks. */ - /* if Sh == 0 then we are getting a new root */ - amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; - /* Amount_needed = the amount that we need more than the amount that we have. */ - if (amount_needed > number_of_freeblk) - amount_needed -= number_of_freeblk; - else /* If we have enough already then there is nothing to do. */ - return CARRY_ON; - - /* No need to check quota - is not allocated for blocks used for formatted nodes */ - if (reiserfs_new_form_blocknrs(tb, blocknrs, - amount_needed) == NO_DISK_SPACE) - return NO_DISK_SPACE; - - /* for each blocknumber we just got, get a buffer and stick it on FEB */ - for (blocknr = blocknrs, counter = 0; - counter < amount_needed; blocknr++, counter++) { - - RFALSE(!*blocknr, - "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); - - new_bh = sb_getblk(sb, *blocknr); - RFALSE(buffer_dirty(new_bh) || - buffer_journaled(new_bh) || - buffer_journal_dirty(new_bh), - "PAP-8140: journaled or dirty buffer %b for the new block", - new_bh); - - /* Put empty buffers into the array. */ - RFALSE(tb->FEB[tb->cur_blknum], - "PAP-8141: busy slot for new buffer"); - - set_buffer_journal_new(new_bh); - tb->FEB[tb->cur_blknum++] = new_bh; - } - - if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) - retval = REPEAT_SEARCH; - - return retval; -} - -/* Get free space of the left neighbor, which is stored in the parent - * node of the left neighbor. */ -static int get_lfree(struct tree_balance *tb, int h) -{ - struct buffer_head *l, *f; - int order; - - if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || - (l = tb->FL[h]) == NULL) - return 0; - - if (f == l) - order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1; - else { - order = B_NR_ITEMS(l); - f = l; - } - - return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); -} - -/* Get free space of the right neighbor, - * which is stored in the parent node of the right neighbor. - */ -static int get_rfree(struct tree_balance *tb, int h) -{ - struct buffer_head *r, *f; - int order; - - if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || - (r = tb->FR[h]) == NULL) - return 0; - - if (f == r) - order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1; - else { - order = 0; - f = r; - } - - return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); - -} - -/* Check whether left neighbor is in memory. */ -static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) -{ - struct buffer_head *father, *left; - struct super_block *sb = tb->tb_sb; - b_blocknr_t left_neighbor_blocknr; - int left_neighbor_position; - - /* Father of the left neighbor does not exist. */ - if (!tb->FL[h]) - return 0; - - /* Calculate father of the node to be balanced. */ - father = PATH_H_PBUFFER(tb->tb_path, h + 1); - - RFALSE(!father || - !B_IS_IN_TREE(father) || - !B_IS_IN_TREE(tb->FL[h]) || - !buffer_uptodate(father) || - !buffer_uptodate(tb->FL[h]), - "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", - father, tb->FL[h]); - - /* Get position of the pointer to the left neighbor into the left father. */ - left_neighbor_position = (father == tb->FL[h]) ? - tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); - /* Get left neighbor block number. */ - left_neighbor_blocknr = - B_N_CHILD_NUM(tb->FL[h], left_neighbor_position); - /* Look for the left neighbor in the cache. */ - if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) { - - RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), - "vs-8170: left neighbor (%b %z) is not in the tree", - left, left); - put_bh(left); - return 1; - } - - return 0; -} - -#define LEFT_PARENTS 'l' -#define RIGHT_PARENTS 'r' - -static void decrement_key(struct cpu_key *key) -{ - // call item specific function for this key - item_ops[cpu_key_k_type(key)]->decrement_key(key); -} - -/* Calculate far left/right parent of the left/right neighbor of the current node, that - * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. - * Calculate left/right common parent of the current node and L[h]/R[h]. - * Calculate left/right delimiting key position. - * Returns: PATH_INCORRECT - path in the tree is not correct; - SCHEDULE_OCCURRED - schedule occurred while the function worked; - * CARRY_ON - schedule didn't occur while the function worked; - */ -static int get_far_parent(struct tree_balance *tb, - int h, - struct buffer_head **pfather, - struct buffer_head **pcom_father, char c_lr_par) -{ - struct buffer_head *parent; - INITIALIZE_PATH(s_path_to_neighbor_father); - struct treepath *path = tb->tb_path; - struct cpu_key s_lr_father_key; - int counter, - position = INT_MAX, - first_last_position = 0, - path_offset = PATH_H_PATH_OFFSET(path, h); - - /* Starting from F[h] go upwards in the tree, and look for the common - ancestor of F[h], and its neighbor l/r, that should be obtained. */ - - counter = path_offset; - - RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET, - "PAP-8180: invalid path length"); - - for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { - /* Check whether parent of the current buffer in the path is really parent in the tree. */ - if (!B_IS_IN_TREE - (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) - return REPEAT_SEARCH; - /* Check whether position in the parent is correct. */ - if ((position = - PATH_OFFSET_POSITION(path, - counter - 1)) > - B_NR_ITEMS(parent)) - return REPEAT_SEARCH; - /* Check whether parent at the path really points to the child. */ - if (B_N_CHILD_NUM(parent, position) != - PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) - return REPEAT_SEARCH; - /* Return delimiting key if position in the parent is not equal to first/last one. */ - if (c_lr_par == RIGHT_PARENTS) - first_last_position = B_NR_ITEMS(parent); - if (position != first_last_position) { - *pcom_father = parent; - get_bh(*pcom_father); - /*(*pcom_father = parent)->b_count++; */ - break; - } - } - - /* if we are in the root of the tree, then there is no common father */ - if (counter == FIRST_PATH_ELEMENT_OFFSET) { - /* Check whether first buffer in the path is the root of the tree. */ - if (PATH_OFFSET_PBUFFER - (tb->tb_path, - FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == - SB_ROOT_BLOCK(tb->tb_sb)) { - *pfather = *pcom_father = NULL; - return CARRY_ON; - } - return REPEAT_SEARCH; - } - - RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL, - "PAP-8185: (%b %z) level too small", - *pcom_father, *pcom_father); - - /* Check whether the common parent is locked. */ - - if (buffer_locked(*pcom_father)) { - - /* Release the write lock while the buffer is busy */ - reiserfs_write_unlock(tb->tb_sb); - __wait_on_buffer(*pcom_father); - reiserfs_write_lock(tb->tb_sb); - if (FILESYSTEM_CHANGED_TB(tb)) { - brelse(*pcom_father); - return REPEAT_SEARCH; - } - } - - /* So, we got common parent of the current node and its left/right neighbor. - Now we are geting the parent of the left/right neighbor. */ - - /* Form key to get parent of the left/right neighbor. */ - le_key2cpu_key(&s_lr_father_key, - B_N_PDELIM_KEY(*pcom_father, - (c_lr_par == - LEFT_PARENTS) ? (tb->lkey[h - 1] = - position - - 1) : (tb->rkey[h - - 1] = - position))); - - if (c_lr_par == LEFT_PARENTS) - decrement_key(&s_lr_father_key); - - if (search_by_key - (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, - h + 1) == IO_ERROR) - // path is released - return IO_ERROR; - - if (FILESYSTEM_CHANGED_TB(tb)) { - pathrelse(&s_path_to_neighbor_father); - brelse(*pcom_father); - return REPEAT_SEARCH; - } - - *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); - - RFALSE(B_LEVEL(*pfather) != h + 1, - "PAP-8190: (%b %z) level too small", *pfather, *pfather); - RFALSE(s_path_to_neighbor_father.path_length < - FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); - - s_path_to_neighbor_father.path_length--; - pathrelse(&s_path_to_neighbor_father); - return CARRY_ON; -} - -/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of - * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset], - * FR[path_offset], CFL[path_offset], CFR[path_offset]. - * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset]. - * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; - * CARRY_ON - schedule didn't occur while the function worked; - */ -static int get_parents(struct tree_balance *tb, int h) -{ - struct treepath *path = tb->tb_path; - int position, - ret, - path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); - struct buffer_head *curf, *curcf; - - /* Current node is the root of the tree or will be root of the tree */ - if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { - /* The root can not have parents. - Release nodes which previously were obtained as parents of the current node neighbors. */ - brelse(tb->FL[h]); - brelse(tb->CFL[h]); - brelse(tb->FR[h]); - brelse(tb->CFR[h]); - tb->FL[h] = NULL; - tb->CFL[h] = NULL; - tb->FR[h] = NULL; - tb->CFR[h] = NULL; - return CARRY_ON; - } - - /* Get parent FL[path_offset] of L[path_offset]. */ - position = PATH_OFFSET_POSITION(path, path_offset - 1); - if (position) { - /* Current node is not the first child of its parent. */ - curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); - curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); - get_bh(curf); - get_bh(curf); - tb->lkey[h] = position - 1; - } else { - /* Calculate current parent of L[path_offset], which is the left neighbor of the current node. - Calculate current common parent of L[path_offset] and the current node. Note that - CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset]. - Calculate lkey[path_offset]. */ - if ((ret = get_far_parent(tb, h + 1, &curf, - &curcf, - LEFT_PARENTS)) != CARRY_ON) - return ret; - } - - brelse(tb->FL[h]); - tb->FL[h] = curf; /* New initialization of FL[h]. */ - brelse(tb->CFL[h]); - tb->CFL[h] = curcf; /* New initialization of CFL[h]. */ - - RFALSE((curf && !B_IS_IN_TREE(curf)) || - (curcf && !B_IS_IN_TREE(curcf)), - "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); - -/* Get parent FR[h] of R[h]. */ - -/* Current node is the last child of F[h]. FR[h] != F[h]. */ - if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { -/* Calculate current parent of R[h], which is the right neighbor of F[h]. - Calculate current common parent of R[h] and current node. Note that CFR[h] - not equal FR[path_offset] and CFR[h] not equal F[h]. */ - if ((ret = - get_far_parent(tb, h + 1, &curf, &curcf, - RIGHT_PARENTS)) != CARRY_ON) - return ret; - } else { -/* Current node is not the last child of its parent F[h]. */ - curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); - curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); - get_bh(curf); - get_bh(curf); - tb->rkey[h] = position; - } - - brelse(tb->FR[h]); - /* New initialization of FR[path_offset]. */ - tb->FR[h] = curf; - - brelse(tb->CFR[h]); - /* New initialization of CFR[path_offset]. */ - tb->CFR[h] = curcf; - - RFALSE((curf && !B_IS_IN_TREE(curf)) || - (curcf && !B_IS_IN_TREE(curcf)), - "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf); - - return CARRY_ON; -} - -/* it is possible to remove node as result of shiftings to - neighbors even when we insert or paste item. */ -static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, - struct tree_balance *tb, int h) -{ - struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h); - int levbytes = tb->insert_size[h]; - struct item_head *ih; - struct reiserfs_key *r_key = NULL; - - ih = B_N_PITEM_HEAD(Sh, 0); - if (tb->CFR[h]) - r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]); - - if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes - /* shifting may merge items which might save space */ - - - ((!h - && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0) - - - ((!h && r_key - && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) - + ((h) ? KEY_SIZE : 0)) { - /* node can not be removed */ - if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ - if (!h) - tb->s0num = - B_NR_ITEMS(Sh) + - ((mode == M_INSERT) ? 1 : 0); - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - } - PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]); - return !NO_BALANCING_NEEDED; -} - -/* Check whether current node S[h] is balanced when increasing its size by - * Inserting or Pasting. - * Calculate parameters for balancing for current level h. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * inum item number in S[h]; - * mode i - insert, p - paste; - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - */ -/* ip means Inserting or Pasting */ -static int ip_check_balance(struct tree_balance *tb, int h) -{ - struct virtual_node *vn = tb->tb_vn; - int levbytes, /* Number of bytes that must be inserted into (value - is negative if bytes are deleted) buffer which - contains node being balanced. The mnemonic is - that the attempted change in node space used level - is levbytes bytes. */ - ret; - - int lfree, sfree, rfree /* free space in L, S and R */ ; - - /* nver is short for number of vertixes, and lnver is the number if - we shift to the left, rnver is the number if we shift to the - right, and lrnver is the number if we shift in both directions. - The goal is to minimize first the number of vertixes, and second, - the number of vertixes whose contents are changed by shifting, - and third the number of uncached vertixes whose contents are - changed by shifting and must be read from disk. */ - int nver, lnver, rnver, lrnver; - - /* used at leaf level only, S0 = S[0] is the node being balanced, - sInum [ I = 0,1,2 ] is the number of items that will - remain in node SI after balancing. S1 and S2 are new - nodes that might be created. */ - - /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. - where 4th parameter is s1bytes and 5th - s2bytes - */ - short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases - 0,1 - do not shift and do not shift but bottle - 2 - shift only whole item to left - 3 - shift to left and bottle as much as possible - 4,5 - shift to right (whole items and as much as possible - 6,7 - shift to both directions (whole items and as much as possible) - */ - - /* Sh is the node whose balance is currently being checked */ - struct buffer_head *Sh; - - Sh = PATH_H_PBUFFER(tb->tb_path, h); - levbytes = tb->insert_size[h]; - - /* Calculate balance parameters for creating new root. */ - if (!Sh) { - if (!h) - reiserfs_panic(tb->tb_sb, "vs-8210", - "S[0] can not be 0"); - switch (ret = get_empty_nodes(tb, h)) { - case CARRY_ON: - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ - - case NO_DISK_SPACE: - case REPEAT_SEARCH: - return ret; - default: - reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect " - "return value of get_empty_nodes"); - } - } - - if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ - return ret; - - sfree = B_FREE_SPACE(Sh); - - /* get free space of neighbors */ - rfree = get_rfree(tb, h); - lfree = get_lfree(tb, h); - - if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == - NO_BALANCING_NEEDED) - /* and new item fits into node S[h] without any shifting */ - return NO_BALANCING_NEEDED; - - create_virtual_node(tb, h); - - /* - determine maximal number of items we can shift to the left neighbor (in tb structure) - and the maximal number of bytes that can flow to the left neighbor - from the left most liquid item that cannot be shifted from S[0] entirely (returned value) - */ - check_left(tb, h, lfree); - - /* - determine maximal number of items we can shift to the right neighbor (in tb structure) - and the maximal number of bytes that can flow to the right neighbor - from the right most liquid item that cannot be shifted from S[0] entirely (returned value) - */ - check_right(tb, h, rfree); - - /* all contents of internal node S[h] can be moved into its - neighbors, S[h] will be removed after balancing */ - if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { - int to_r; - - /* Since we are working on internal nodes, and our internal - nodes have fixed size entries, then we can balance by the - number of items rather than the space they consume. In this - routine we set the left node equal to the right node, - allowing a difference of less than or equal to 1 child - pointer. */ - to_r = - ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + - vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - - tb->rnum[h]); - set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, - -1, -1); - return CARRY_ON; - } - - /* this checks balance condition, that any two neighboring nodes can not fit in one node */ - RFALSE(h && - (tb->lnum[h] >= vn->vn_nr_item + 1 || - tb->rnum[h] >= vn->vn_nr_item + 1), - "vs-8220: tree is not balanced on internal level"); - RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || - (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), - "vs-8225: tree is not balanced on leaf level"); - - /* all contents of S[0] can be moved into its neighbors - S[0] will be removed after balancing. */ - if (!h && is_leaf_removable(tb)) - return CARRY_ON; - - /* why do we perform this check here rather than earlier?? - Answer: we can win 1 node in some cases above. Moreover we - checked it above, when we checked, that S[0] is not removable - in principle */ - if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ - if (!h) - tb->s0num = vn->vn_nr_item; - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - - { - int lpar, rpar, nset, lset, rset, lrset; - /* - * regular overflowing of the node - */ - - /* get_num_ver works in 2 modes (FLOW & NO_FLOW) - lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) - nset, lset, rset, lrset - shows, whether flowing items give better packing - */ -#define FLOW 1 -#define NO_FLOW 0 /* do not any splitting */ - - /* we choose one the following */ -#define NOTHING_SHIFT_NO_FLOW 0 -#define NOTHING_SHIFT_FLOW 5 -#define LEFT_SHIFT_NO_FLOW 10 -#define LEFT_SHIFT_FLOW 15 -#define RIGHT_SHIFT_NO_FLOW 20 -#define RIGHT_SHIFT_FLOW 25 -#define LR_SHIFT_NO_FLOW 30 -#define LR_SHIFT_FLOW 35 - - lpar = tb->lnum[h]; - rpar = tb->rnum[h]; - - /* calculate number of blocks S[h] must be split into when - nothing is shifted to the neighbors, - as well as number of items in each part of the split node (s012 numbers), - and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ - nset = NOTHING_SHIFT_NO_FLOW; - nver = get_num_ver(vn->vn_mode, tb, h, - 0, -1, h ? vn->vn_nr_item : 0, -1, - snum012, NO_FLOW); - - if (!h) { - int nver1; - - /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ - nver1 = get_num_ver(vn->vn_mode, tb, h, - 0, -1, 0, -1, - snum012 + NOTHING_SHIFT_FLOW, FLOW); - if (nver > nver1) - nset = NOTHING_SHIFT_FLOW, nver = nver1; - } - - /* calculate number of blocks S[h] must be split into when - l_shift_num first items and l_shift_bytes of the right most - liquid item to be shifted are shifted to the left neighbor, - as well as number of items in each part of the splitted node (s012 numbers), - and number of bytes (s1bytes) of the shared drop which flow to S1 if any - */ - lset = LEFT_SHIFT_NO_FLOW; - lnver = get_num_ver(vn->vn_mode, tb, h, - lpar - ((h || tb->lbytes == -1) ? 0 : 1), - -1, h ? vn->vn_nr_item : 0, -1, - snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); - if (!h) { - int lnver1; - - lnver1 = get_num_ver(vn->vn_mode, tb, h, - lpar - - ((tb->lbytes != -1) ? 1 : 0), - tb->lbytes, 0, -1, - snum012 + LEFT_SHIFT_FLOW, FLOW); - if (lnver > lnver1) - lset = LEFT_SHIFT_FLOW, lnver = lnver1; - } - - /* calculate number of blocks S[h] must be split into when - r_shift_num first items and r_shift_bytes of the left most - liquid item to be shifted are shifted to the right neighbor, - as well as number of items in each part of the splitted node (s012 numbers), - and number of bytes (s1bytes) of the shared drop which flow to S1 if any - */ - rset = RIGHT_SHIFT_NO_FLOW; - rnver = get_num_ver(vn->vn_mode, tb, h, - 0, -1, - h ? (vn->vn_nr_item - rpar) : (rpar - - ((tb-> - rbytes != - -1) ? 1 : - 0)), -1, - snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); - if (!h) { - int rnver1; - - rnver1 = get_num_ver(vn->vn_mode, tb, h, - 0, -1, - (rpar - - ((tb->rbytes != -1) ? 1 : 0)), - tb->rbytes, - snum012 + RIGHT_SHIFT_FLOW, FLOW); - - if (rnver > rnver1) - rset = RIGHT_SHIFT_FLOW, rnver = rnver1; - } - - /* calculate number of blocks S[h] must be split into when - items are shifted in both directions, - as well as number of items in each part of the splitted node (s012 numbers), - and number of bytes (s1bytes) of the shared drop which flow to S1 if any - */ - lrset = LR_SHIFT_NO_FLOW; - lrnver = get_num_ver(vn->vn_mode, tb, h, - lpar - ((h || tb->lbytes == -1) ? 0 : 1), - -1, - h ? (vn->vn_nr_item - rpar) : (rpar - - ((tb-> - rbytes != - -1) ? 1 : - 0)), -1, - snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); - if (!h) { - int lrnver1; - - lrnver1 = get_num_ver(vn->vn_mode, tb, h, - lpar - - ((tb->lbytes != -1) ? 1 : 0), - tb->lbytes, - (rpar - - ((tb->rbytes != -1) ? 1 : 0)), - tb->rbytes, - snum012 + LR_SHIFT_FLOW, FLOW); - if (lrnver > lrnver1) - lrset = LR_SHIFT_FLOW, lrnver = lrnver1; - } - - /* Our general shifting strategy is: - 1) to minimized number of new nodes; - 2) to minimized number of neighbors involved in shifting; - 3) to minimized number of disk reads; */ - - /* we can win TWO or ONE nodes by shifting in both directions */ - if (lrnver < lnver && lrnver < rnver) { - RFALSE(h && - (tb->lnum[h] != 1 || - tb->rnum[h] != 1 || - lrnver != 1 || rnver != 2 || lnver != 2 - || h != 1), "vs-8230: bad h"); - if (lrset == LR_SHIFT_FLOW) - set_parameters(tb, h, tb->lnum[h], tb->rnum[h], - lrnver, snum012 + lrset, - tb->lbytes, tb->rbytes); - else - set_parameters(tb, h, - tb->lnum[h] - - ((tb->lbytes == -1) ? 0 : 1), - tb->rnum[h] - - ((tb->rbytes == -1) ? 0 : 1), - lrnver, snum012 + lrset, -1, -1); - - return CARRY_ON; - } - - /* if shifting doesn't lead to better packing then don't shift */ - if (nver == lrnver) { - set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, - -1); - return CARRY_ON; - } - - /* now we know that for better packing shifting in only one - direction either to the left or to the right is required */ - - /* if shifting to the left is better than shifting to the right */ - if (lnver < rnver) { - SET_PAR_SHIFT_LEFT; - return CARRY_ON; - } - - /* if shifting to the right is better than shifting to the left */ - if (lnver > rnver) { - SET_PAR_SHIFT_RIGHT; - return CARRY_ON; - } - - /* now shifting in either direction gives the same number - of nodes and we can make use of the cached neighbors */ - if (is_left_neighbor_in_cache(tb, h)) { - SET_PAR_SHIFT_LEFT; - return CARRY_ON; - } - - /* shift to the right independently on whether the right neighbor in cache or not */ - SET_PAR_SHIFT_RIGHT; - return CARRY_ON; - } -} - -/* Check whether current node S[h] is balanced when Decreasing its size by - * Deleting or Cutting for INTERNAL node of S+tree. - * Calculate parameters for balancing for current level h. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * inum item number in S[h]; - * mode i - insert, p - paste; - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - * - * Note: Items of internal nodes have fixed size, so the balance condition for - * the internal part of S+tree is as for the B-trees. - */ -static int dc_check_balance_internal(struct tree_balance *tb, int h) -{ - struct virtual_node *vn = tb->tb_vn; - - /* Sh is the node whose balance is currently being checked, - and Fh is its father. */ - struct buffer_head *Sh, *Fh; - int maxsize, ret; - int lfree, rfree /* free space in L and R */ ; - - Sh = PATH_H_PBUFFER(tb->tb_path, h); - Fh = PATH_H_PPARENT(tb->tb_path, h); - - maxsize = MAX_CHILD_SIZE(Sh); - -/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ -/* new_nr_item = number of items node would have if operation is */ -/* performed without balancing (new_nr_item); */ - create_virtual_node(tb, h); - - if (!Fh) { /* S[h] is the root. */ - if (vn->vn_nr_item > 0) { - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ - } - /* new_nr_item == 0. - * Current root will be deleted resulting in - * decrementing the tree height. */ - set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - if ((ret = get_parents(tb, h)) != CARRY_ON) - return ret; - - /* get free space of neighbors */ - rfree = get_rfree(tb, h); - lfree = get_lfree(tb, h); - - /* determine maximal number of items we can fit into neighbors */ - check_left(tb, h, lfree); - check_right(tb, h, rfree); - - if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid. - * In this case we balance only if it leads to better packing. */ - if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors, - * which is impossible with greater values of new_nr_item. */ - if (tb->lnum[h] >= vn->vn_nr_item + 1) { - /* All contents of S[h] can be moved to L[h]. */ - int n; - int order_L; - - order_L = - ((n = - PATH_H_B_ITEM_ORDER(tb->tb_path, - h)) == - 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; - n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / - (DC_SIZE + KEY_SIZE); - set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, - -1); - return CARRY_ON; - } - - if (tb->rnum[h] >= vn->vn_nr_item + 1) { - /* All contents of S[h] can be moved to R[h]. */ - int n; - int order_R; - - order_R = - ((n = - PATH_H_B_ITEM_ORDER(tb->tb_path, - h)) == - B_NR_ITEMS(Fh)) ? 0 : n + 1; - n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / - (DC_SIZE + KEY_SIZE); - set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, - -1); - return CARRY_ON; - } - } - - if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { - /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ - int to_r; - - to_r = - ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - - tb->rnum[h] + vn->vn_nr_item + 1) / 2 - - (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); - set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, - 0, NULL, -1, -1); - return CARRY_ON; - } - - /* Balancing does not lead to better packing. */ - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - - /* Current node contain insufficient number of items. Balancing is required. */ - /* Check whether we can merge S[h] with left neighbor. */ - if (tb->lnum[h] >= vn->vn_nr_item + 1) - if (is_left_neighbor_in_cache(tb, h) - || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) { - int n; - int order_L; - - order_L = - ((n = - PATH_H_B_ITEM_ORDER(tb->tb_path, - h)) == - 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; - n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE + - KEY_SIZE); - set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* Check whether we can merge S[h] with right neighbor. */ - if (tb->rnum[h] >= vn->vn_nr_item + 1) { - int n; - int order_R; - - order_R = - ((n = - PATH_H_B_ITEM_ORDER(tb->tb_path, - h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1); - n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE + - KEY_SIZE); - set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ - if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { - int to_r; - - to_r = - ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + - vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - - tb->rnum[h]); - set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, - -1, -1); - return CARRY_ON; - } - - /* For internal nodes try to borrow item from a neighbor */ - RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root"); - - /* Borrow one or two items from caching neighbor */ - if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) { - int from_l; - - from_l = - (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + - 1) / 2 - (vn->vn_nr_item + 1); - set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1); - return CARRY_ON; - } - - set_parameters(tb, h, 0, - -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item + - 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1); - return CARRY_ON; -} - -/* Check whether current node S[h] is balanced when Decreasing its size by - * Deleting or Truncating for LEAF node of S+tree. - * Calculate parameters for balancing for current level h. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * inum item number in S[h]; - * mode i - insert, p - paste; - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - */ -static int dc_check_balance_leaf(struct tree_balance *tb, int h) -{ - struct virtual_node *vn = tb->tb_vn; - - /* Number of bytes that must be deleted from - (value is negative if bytes are deleted) buffer which - contains node being balanced. The mnemonic is that the - attempted change in node space used level is levbytes bytes. */ - int levbytes; - /* the maximal item size */ - int maxsize, ret; - /* S0 is the node whose balance is currently being checked, - and F0 is its father. */ - struct buffer_head *S0, *F0; - int lfree, rfree /* free space in L and R */ ; - - S0 = PATH_H_PBUFFER(tb->tb_path, 0); - F0 = PATH_H_PPARENT(tb->tb_path, 0); - - levbytes = tb->insert_size[h]; - - maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */ - - if (!F0) { /* S[0] is the root now. */ - - RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0), - "vs-8240: attempt to create empty buffer tree"); - - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - - if ((ret = get_parents(tb, h)) != CARRY_ON) - return ret; - - /* get free space of neighbors */ - rfree = get_rfree(tb, h); - lfree = get_lfree(tb, h); - - create_virtual_node(tb, h); - - /* if 3 leaves can be merge to one, set parameters and return */ - if (are_leaves_removable(tb, lfree, rfree)) - return CARRY_ON; - - /* determine maximal number of items we can shift to the left/right neighbor - and the maximal number of bytes that can flow to the left/right neighbor - from the left/right most liquid item that cannot be shifted from S[0] entirely - */ - check_left(tb, h, lfree); - check_right(tb, h, rfree); - - /* check whether we can merge S with left neighbor. */ - if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) - if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */ - !tb->FR[h]) { - - RFALSE(!tb->FL[h], - "vs-8245: dc_check_balance_leaf: FL[h] must exist"); - - /* set parameter to merge S[0] with its left neighbor */ - set_parameters(tb, h, -1, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* check whether we can merge S[0] with right neighbor. */ - if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { - set_parameters(tb, h, 0, -1, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ - if (is_leaf_removable(tb)) - return CARRY_ON; - - /* Balancing is not required. */ - tb->s0num = vn->vn_nr_item; - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; -} - -/* Check whether current node S[h] is balanced when Decreasing its size by - * Deleting or Cutting. - * Calculate parameters for balancing for current level h. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * inum item number in S[h]; - * mode d - delete, c - cut. - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - */ -static int dc_check_balance(struct tree_balance *tb, int h) -{ - RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)), - "vs-8250: S is not initialized"); - - if (h) - return dc_check_balance_internal(tb, h); - else - return dc_check_balance_leaf(tb, h); -} - -/* Check whether current node S[h] is balanced. - * Calculate parameters for balancing for current level h. - * Parameters: - * - * tb tree_balance structure: - * - * tb is a large structure that must be read about in the header file - * at the same time as this procedure if the reader is to successfully - * understand this procedure - * - * h current level of the node; - * inum item number in S[h]; - * mode i - insert, p - paste, d - delete, c - cut. - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - */ -static int check_balance(int mode, - struct tree_balance *tb, - int h, - int inum, - int pos_in_item, - struct item_head *ins_ih, const void *data) -{ - struct virtual_node *vn; - - vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); - vn->vn_free_ptr = (char *)(tb->tb_vn + 1); - vn->vn_mode = mode; - vn->vn_affected_item_num = inum; - vn->vn_pos_in_item = pos_in_item; - vn->vn_ins_ih = ins_ih; - vn->vn_data = data; - - RFALSE(mode == M_INSERT && !vn->vn_ins_ih, - "vs-8255: ins_ih can not be 0 in insert mode"); - - if (tb->insert_size[h] > 0) - /* Calculate balance parameters when size of node is increasing. */ - return ip_check_balance(tb, h); - - /* Calculate balance parameters when size of node is decreasing. */ - return dc_check_balance(tb, h); -} - -/* Check whether parent at the path is the really parent of the current node.*/ -static int get_direct_parent(struct tree_balance *tb, int h) -{ - struct buffer_head *bh; - struct treepath *path = tb->tb_path; - int position, - path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); - - /* We are in the root or in the new root. */ - if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { - - RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, - "PAP-8260: invalid offset in the path"); - - if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)-> - b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) { - /* Root is not changed. */ - PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL; - PATH_OFFSET_POSITION(path, path_offset - 1) = 0; - return CARRY_ON; - } - return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ - } - - if (!B_IS_IN_TREE - (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) - return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ - - if ((position = - PATH_OFFSET_POSITION(path, - path_offset - 1)) > B_NR_ITEMS(bh)) - return REPEAT_SEARCH; - - if (B_N_CHILD_NUM(bh, position) != - PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) - /* Parent in the path is not parent of the current node in the tree. */ - return REPEAT_SEARCH; - - if (buffer_locked(bh)) { - reiserfs_write_unlock(tb->tb_sb); - __wait_on_buffer(bh); - reiserfs_write_lock(tb->tb_sb); - if (FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - } - - return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ -} - -/* Using lnum[h] and rnum[h] we should determine what neighbors - * of S[h] we - * need in order to balance S[h], and get them if necessary. - * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; - * CARRY_ON - schedule didn't occur while the function worked; - */ -static int get_neighbors(struct tree_balance *tb, int h) -{ - int child_position, - path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1); - unsigned long son_number; - struct super_block *sb = tb->tb_sb; - struct buffer_head *bh; - - PROC_INFO_INC(sb, get_neighbors[h]); - - if (tb->lnum[h]) { - /* We need left neighbor to balance S[h]. */ - PROC_INFO_INC(sb, need_l_neighbor[h]); - bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); - - RFALSE(bh == tb->FL[h] && - !PATH_OFFSET_POSITION(tb->tb_path, path_offset), - "PAP-8270: invalid position in the parent"); - - child_position = - (bh == - tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> - FL[h]); - son_number = B_N_CHILD_NUM(tb->FL[h], child_position); - reiserfs_write_unlock(sb); - bh = sb_bread(sb, son_number); - reiserfs_write_lock(sb); - if (!bh) - return IO_ERROR; - if (FILESYSTEM_CHANGED_TB(tb)) { - brelse(bh); - PROC_INFO_INC(sb, get_neighbors_restart[h]); - return REPEAT_SEARCH; - } - - RFALSE(!B_IS_IN_TREE(tb->FL[h]) || - child_position > B_NR_ITEMS(tb->FL[h]) || - B_N_CHILD_NUM(tb->FL[h], child_position) != - bh->b_blocknr, "PAP-8275: invalid parent"); - RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child"); - RFALSE(!h && - B_FREE_SPACE(bh) != - MAX_CHILD_SIZE(bh) - - dc_size(B_N_CHILD(tb->FL[0], child_position)), - "PAP-8290: invalid child size of left neighbor"); - - brelse(tb->L[h]); - tb->L[h] = bh; - } - - /* We need right neighbor to balance S[path_offset]. */ - if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */ - PROC_INFO_INC(sb, need_r_neighbor[h]); - bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); - - RFALSE(bh == tb->FR[h] && - PATH_OFFSET_POSITION(tb->tb_path, - path_offset) >= - B_NR_ITEMS(bh), - "PAP-8295: invalid position in the parent"); - - child_position = - (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; - son_number = B_N_CHILD_NUM(tb->FR[h], child_position); - reiserfs_write_unlock(sb); - bh = sb_bread(sb, son_number); - reiserfs_write_lock(sb); - if (!bh) - return IO_ERROR; - if (FILESYSTEM_CHANGED_TB(tb)) { - brelse(bh); - PROC_INFO_INC(sb, get_neighbors_restart[h]); - return REPEAT_SEARCH; - } - brelse(tb->R[h]); - tb->R[h] = bh; - - RFALSE(!h - && B_FREE_SPACE(bh) != - MAX_CHILD_SIZE(bh) - - dc_size(B_N_CHILD(tb->FR[0], child_position)), - "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", - B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh), - dc_size(B_N_CHILD(tb->FR[0], child_position))); - - } - return CARRY_ON; -} - -static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) -{ - int max_num_of_items; - int max_num_of_entries; - unsigned long blocksize = sb->s_blocksize; - -#define MIN_NAME_LEN 1 - - max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); - max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / - (DEH_SIZE + MIN_NAME_LEN); - - return sizeof(struct virtual_node) + - max(max_num_of_items * sizeof(struct virtual_item), - sizeof(struct virtual_item) + sizeof(struct direntry_uarea) + - (max_num_of_entries - 1) * sizeof(__u16)); -} - -/* maybe we should fail balancing we are going to perform when kmalloc - fails several times. But now it will loop until kmalloc gets - required memory */ -static int get_mem_for_virtual_node(struct tree_balance *tb) -{ - int check_fs = 0; - int size; - char *buf; - - size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); - - if (size > tb->vn_buf_size) { - /* we have to allocate more memory for virtual node */ - if (tb->vn_buf) { - /* free memory allocated before */ - kfree(tb->vn_buf); - /* this is not needed if kfree is atomic */ - check_fs = 1; - } - - /* virtual node requires now more memory */ - tb->vn_buf_size = size; - - /* get memory for virtual item */ - buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); - if (!buf) { - /* getting memory with GFP_KERNEL priority may involve - balancing now (due to indirect_to_direct conversion on - dcache shrinking). So, release path and collected - resources here */ - free_buffers_in_tb(tb); - buf = kmalloc(size, GFP_NOFS); - if (!buf) { - tb->vn_buf_size = 0; - } - tb->vn_buf = buf; - schedule(); - return REPEAT_SEARCH; - } - - tb->vn_buf = buf; - } - - if (check_fs && FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - - return CARRY_ON; -} - -#ifdef CONFIG_REISERFS_CHECK -static void tb_buffer_sanity_check(struct super_block *sb, - struct buffer_head *bh, - const char *descr, int level) -{ - if (bh) { - if (atomic_read(&(bh->b_count)) <= 0) - - reiserfs_panic(sb, "jmacd-1", "negative or zero " - "reference counter for buffer %s[%d] " - "(%b)", descr, level, bh); - - if (!buffer_uptodate(bh)) - reiserfs_panic(sb, "jmacd-2", "buffer is not up " - "to date %s[%d] (%b)", - descr, level, bh); - - if (!B_IS_IN_TREE(bh)) - reiserfs_panic(sb, "jmacd-3", "buffer is not " - "in tree %s[%d] (%b)", - descr, level, bh); - - if (bh->b_bdev != sb->s_bdev) - reiserfs_panic(sb, "jmacd-4", "buffer has wrong " - "device %s[%d] (%b)", - descr, level, bh); - - if (bh->b_size != sb->s_blocksize) - reiserfs_panic(sb, "jmacd-5", "buffer has wrong " - "blocksize %s[%d] (%b)", - descr, level, bh); - - if (bh->b_blocknr > SB_BLOCK_COUNT(sb)) - reiserfs_panic(sb, "jmacd-6", "buffer block " - "number too high %s[%d] (%b)", - descr, level, bh); - } -} -#else -static void tb_buffer_sanity_check(struct super_block *sb, - struct buffer_head *bh, - const char *descr, int level) -{; -} -#endif - -static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh) -{ - return reiserfs_prepare_for_journal(s, bh, 0); -} - -static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) -{ - struct buffer_head *locked; -#ifdef CONFIG_REISERFS_CHECK - int repeat_counter = 0; -#endif - int i; - - do { - - locked = NULL; - - for (i = tb->tb_path->path_length; - !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { - if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { - /* if I understand correctly, we can only be sure the last buffer - ** in the path is in the tree --clm - */ -#ifdef CONFIG_REISERFS_CHECK - if (PATH_PLAST_BUFFER(tb->tb_path) == - PATH_OFFSET_PBUFFER(tb->tb_path, i)) - tb_buffer_sanity_check(tb->tb_sb, - PATH_OFFSET_PBUFFER - (tb->tb_path, - i), "S", - tb->tb_path-> - path_length - i); -#endif - if (!clear_all_dirty_bits(tb->tb_sb, - PATH_OFFSET_PBUFFER - (tb->tb_path, - i))) { - locked = - PATH_OFFSET_PBUFFER(tb->tb_path, - i); - } - } - } - - for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i]; - i++) { - - if (tb->lnum[i]) { - - if (tb->L[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->L[i], - "L", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->L[i])) - locked = tb->L[i]; - } - - if (!locked && tb->FL[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->FL[i], - "FL", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->FL[i])) - locked = tb->FL[i]; - } - - if (!locked && tb->CFL[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->CFL[i], - "CFL", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->CFL[i])) - locked = tb->CFL[i]; - } - - } - - if (!locked && (tb->rnum[i])) { - - if (tb->R[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->R[i], - "R", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->R[i])) - locked = tb->R[i]; - } - - if (!locked && tb->FR[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->FR[i], - "FR", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->FR[i])) - locked = tb->FR[i]; - } - - if (!locked && tb->CFR[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->CFR[i], - "CFR", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->CFR[i])) - locked = tb->CFR[i]; - } - } - } - /* as far as I can tell, this is not required. The FEB list seems - ** to be full of newly allocated nodes, which will never be locked, - ** dirty, or anything else. - ** To be safe, I'm putting in the checks and waits in. For the moment, - ** they are needed to keep the code in journal.c from complaining - ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. - ** --clm - */ - for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { - if (tb->FEB[i]) { - if (!clear_all_dirty_bits - (tb->tb_sb, tb->FEB[i])) - locked = tb->FEB[i]; - } - } - - if (locked) { -#ifdef CONFIG_REISERFS_CHECK - repeat_counter++; - if ((repeat_counter % 10000) == 0) { - reiserfs_warning(tb->tb_sb, "reiserfs-8200", - "too many iterations waiting " - "for buffer to unlock " - "(%b)", locked); - - /* Don't loop forever. Try to recover from possible error. */ - - return (FILESYSTEM_CHANGED_TB(tb)) ? - REPEAT_SEARCH : CARRY_ON; - } -#endif - reiserfs_write_unlock(tb->tb_sb); - __wait_on_buffer(locked); - reiserfs_write_lock(tb->tb_sb); - if (FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - } - - } while (locked); - - return CARRY_ON; -} - -/* Prepare for balancing, that is - * get all necessary parents, and neighbors; - * analyze what and where should be moved; - * get sufficient number of new nodes; - * Balancing will start only after all resources will be collected at a time. - * - * When ported to SMP kernels, only at the last moment after all needed nodes - * are collected in cache, will the resources be locked using the usual - * textbook ordered lock acquisition algorithms. Note that ensuring that - * this code neither write locks what it does not need to write lock nor locks out of order - * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans - * - * fix is meant in the sense of render unchanging - * - * Latency might be improved by first gathering a list of what buffers are needed - * and then getting as many of them in parallel as possible? -Hans - * - * Parameters: - * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) - * tb tree_balance structure; - * inum item number in S[h]; - * pos_in_item - comment this if you can - * ins_ih item head of item being inserted - * data inserted item or data to be pasted - * Returns: 1 - schedule occurred while the function worked; - * 0 - schedule didn't occur while the function worked; - * -1 - if no_disk_space - */ - -int fix_nodes(int op_mode, struct tree_balance *tb, - struct item_head *ins_ih, const void *data) -{ - int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); - int pos_in_item; - - /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared - ** during wait_tb_buffers_run - */ - int wait_tb_buffers_run = 0; - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - - ++REISERFS_SB(tb->tb_sb)->s_fix_nodes; - - pos_in_item = tb->tb_path->pos_in_item; - - tb->fs_gen = get_generation(tb->tb_sb); - - /* we prepare and log the super here so it will already be in the - ** transaction when do_balance needs to change it. - ** This way do_balance won't have to schedule when trying to prepare - ** the super for logging - */ - reiserfs_prepare_for_journal(tb->tb_sb, - SB_BUFFER_WITH_SB(tb->tb_sb), 1); - journal_mark_dirty(tb->transaction_handle, tb->tb_sb, - SB_BUFFER_WITH_SB(tb->tb_sb)); - if (FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - - /* if it possible in indirect_to_direct conversion */ - if (buffer_locked(tbS0)) { - reiserfs_write_unlock(tb->tb_sb); - __wait_on_buffer(tbS0); - reiserfs_write_lock(tb->tb_sb); - if (FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - } -#ifdef CONFIG_REISERFS_CHECK - if (REISERFS_SB(tb->tb_sb)->cur_tb) { - print_cur_tb("fix_nodes"); - reiserfs_panic(tb->tb_sb, "PAP-8305", - "there is pending do_balance"); - } - - if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0)) - reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " - "not uptodate at the beginning of fix_nodes " - "or not in tree (mode %c)", - tbS0, tbS0, op_mode); - - /* Check parameters. */ - switch (op_mode) { - case M_INSERT: - if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0)) - reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect " - "item number %d (in S0 - %d) in case " - "of insert", item_num, - B_NR_ITEMS(tbS0)); - break; - case M_PASTE: - case M_DELETE: - case M_CUT: - if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) { - print_block(tbS0, 0, -1, -1); - reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect " - "item number(%d); mode = %c " - "insert_size = %d", - item_num, op_mode, - tb->insert_size[0]); - } - break; - default: - reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode " - "of operation"); - } -#endif - - if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) - // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat - return REPEAT_SEARCH; - - /* Starting from the leaf level; for all levels h of the tree. */ - for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) { - ret = get_direct_parent(tb, h); - if (ret != CARRY_ON) - goto repeat; - - ret = check_balance(op_mode, tb, h, item_num, - pos_in_item, ins_ih, data); - if (ret != CARRY_ON) { - if (ret == NO_BALANCING_NEEDED) { - /* No balancing for higher levels needed. */ - ret = get_neighbors(tb, h); - if (ret != CARRY_ON) - goto repeat; - if (h != MAX_HEIGHT - 1) - tb->insert_size[h + 1] = 0; - /* ok, analysis and resource gathering are complete */ - break; - } - goto repeat; - } - - ret = get_neighbors(tb, h); - if (ret != CARRY_ON) - goto repeat; - - /* No disk space, or schedule occurred and analysis may be - * invalid and needs to be redone. */ - ret = get_empty_nodes(tb, h); - if (ret != CARRY_ON) - goto repeat; - - if (!PATH_H_PBUFFER(tb->tb_path, h)) { - /* We have a positive insert size but no nodes exist on this - level, this means that we are creating a new root. */ - - RFALSE(tb->blknum[h] != 1, - "PAP-8350: creating new empty root"); - - if (h < MAX_HEIGHT - 1) - tb->insert_size[h + 1] = 0; - } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { - if (tb->blknum[h] > 1) { - /* The tree needs to be grown, so this node S[h] - which is the root node is split into two nodes, - and a new node (S[h+1]) will be created to - become the root node. */ - - RFALSE(h == MAX_HEIGHT - 1, - "PAP-8355: attempt to create too high of a tree"); - - tb->insert_size[h + 1] = - (DC_SIZE + - KEY_SIZE) * (tb->blknum[h] - 1) + - DC_SIZE; - } else if (h < MAX_HEIGHT - 1) - tb->insert_size[h + 1] = 0; - } else - tb->insert_size[h + 1] = - (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1); - } - - ret = wait_tb_buffers_until_unlocked(tb); - if (ret == CARRY_ON) { - if (FILESYSTEM_CHANGED_TB(tb)) { - wait_tb_buffers_run = 1; - ret = REPEAT_SEARCH; - goto repeat; - } else { - return CARRY_ON; - } - } else { - wait_tb_buffers_run = 1; - goto repeat; - } - - repeat: - // fix_nodes was unable to perform its calculation due to - // filesystem got changed under us, lack of free disk space or i/o - // failure. If the first is the case - the search will be - // repeated. For now - free all resources acquired so far except - // for the new allocated nodes - { - int i; - - /* Release path buffers. */ - if (wait_tb_buffers_run) { - pathrelse_and_restore(tb->tb_sb, tb->tb_path); - } else { - pathrelse(tb->tb_path); - } - /* brelse all resources collected for balancing */ - for (i = 0; i < MAX_HEIGHT; i++) { - if (wait_tb_buffers_run) { - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb->L[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb->R[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb->FL[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb->FR[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb-> - CFL[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb-> - CFR[i]); - } - - brelse(tb->L[i]); - brelse(tb->R[i]); - brelse(tb->FL[i]); - brelse(tb->FR[i]); - brelse(tb->CFL[i]); - brelse(tb->CFR[i]); - - tb->L[i] = NULL; - tb->R[i] = NULL; - tb->FL[i] = NULL; - tb->FR[i] = NULL; - tb->CFL[i] = NULL; - tb->CFR[i] = NULL; - } - - if (wait_tb_buffers_run) { - for (i = 0; i < MAX_FEB_SIZE; i++) { - if (tb->FEB[i]) - reiserfs_restore_prepared_buffer - (tb->tb_sb, tb->FEB[i]); - } - } - return ret; - } - -} - -/* Anatoly will probably forgive me renaming tb to tb. I just - wanted to make lines shorter */ -void unfix_nodes(struct tree_balance *tb) -{ - int i; - - /* Release path buffers. */ - pathrelse_and_restore(tb->tb_sb, tb->tb_path); - - /* brelse all resources collected for balancing */ - for (i = 0; i < MAX_HEIGHT; i++) { - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]); - - brelse(tb->L[i]); - brelse(tb->R[i]); - brelse(tb->FL[i]); - brelse(tb->FR[i]); - brelse(tb->CFL[i]); - brelse(tb->CFR[i]); - } - - /* deal with list of allocated (used and unused) nodes */ - for (i = 0; i < MAX_FEB_SIZE; i++) { - if (tb->FEB[i]) { - b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; - /* de-allocated block which was not used by balancing and - bforget about buffer for it */ - brelse(tb->FEB[i]); - reiserfs_free_block(tb->transaction_handle, NULL, - blocknr, 0); - } - if (tb->used[i]) { - /* release used as new nodes including a new root */ - brelse(tb->used[i]); - } - } - - kfree(tb->vn_buf); - -} diff --git a/ANDROID_3.4.5/fs/reiserfs/hashes.c b/ANDROID_3.4.5/fs/reiserfs/hashes.c deleted file mode 100644 index 91b0cc12..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/hashes.c +++ /dev/null @@ -1,182 +0,0 @@ - -/* - * Keyed 32-bit hash function using TEA in a Davis-Meyer function - * H0 = Key - * Hi = E Mi(Hi-1) + Hi-1 - * - * (see Applied Cryptography, 2nd edition, p448). - * - * Jeremy Fitzhardinge <jeremy@zip.com.au> 1998 - * - * Jeremy has agreed to the contents of reiserfs/README. -Hans - * Yura's function is added (04/07/2000) - */ - -// -// keyed_hash -// yura_hash -// r5_hash -// - -#include <linux/kernel.h> -#include "reiserfs.h" -#include <asm/types.h> - -#define DELTA 0x9E3779B9 -#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ -#define PARTROUNDS 6 /* 6 gets complete mixing */ - -/* a, b, c, d - data; h0, h1 - accumulated hash */ -#define TEACORE(rounds) \ - do { \ - u32 sum = 0; \ - int n = rounds; \ - u32 b0, b1; \ - \ - b0 = h0; \ - b1 = h1; \ - \ - do \ - { \ - sum += DELTA; \ - b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); \ - b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); \ - } while(--n); \ - \ - h0 += b0; \ - h1 += b1; \ - } while(0) - -u32 keyed_hash(const signed char *msg, int len) -{ - u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3 }; - - u32 h0 = k[0], h1 = k[1]; - u32 a, b, c, d; - u32 pad; - int i; - - // assert(len >= 0 && len < 256); - - pad = (u32) len | ((u32) len << 8); - pad |= pad << 16; - - while (len >= 16) { - a = (u32) msg[0] | - (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; - b = (u32) msg[4] | - (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; - c = (u32) msg[8] | - (u32) msg[9] << 8 | - (u32) msg[10] << 16 | (u32) msg[11] << 24; - d = (u32) msg[12] | - (u32) msg[13] << 8 | - (u32) msg[14] << 16 | (u32) msg[15] << 24; - - TEACORE(PARTROUNDS); - - len -= 16; - msg += 16; - } - - if (len >= 12) { - a = (u32) msg[0] | - (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; - b = (u32) msg[4] | - (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; - c = (u32) msg[8] | - (u32) msg[9] << 8 | - (u32) msg[10] << 16 | (u32) msg[11] << 24; - - d = pad; - for (i = 12; i < len; i++) { - d <<= 8; - d |= msg[i]; - } - } else if (len >= 8) { - a = (u32) msg[0] | - (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; - b = (u32) msg[4] | - (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; - - c = d = pad; - for (i = 8; i < len; i++) { - c <<= 8; - c |= msg[i]; - } - } else if (len >= 4) { - a = (u32) msg[0] | - (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; - - b = c = d = pad; - for (i = 4; i < len; i++) { - b <<= 8; - b |= msg[i]; - } - } else { - a = b = c = d = pad; - for (i = 0; i < len; i++) { - a <<= 8; - a |= msg[i]; - } - } - - TEACORE(FULLROUNDS); - -/* return 0;*/ - return h0 ^ h1; -} - -/* What follows in this file is copyright 2000 by Hans Reiser, and the - * licensing of what follows is governed by reiserfs/README */ - -u32 yura_hash(const signed char *msg, int len) -{ - int j, pow; - u32 a, c; - int i; - - for (pow = 1, i = 1; i < len; i++) - pow = pow * 10; - - if (len == 1) - a = msg[0] - 48; - else - a = (msg[0] - 48) * pow; - - for (i = 1; i < len; i++) { - c = msg[i] - 48; - for (pow = 1, j = i; j < len - 1; j++) - pow = pow * 10; - a = a + c * pow; - } - - for (; i < 40; i++) { - c = '0' - 48; - for (pow = 1, j = i; j < len - 1; j++) - pow = pow * 10; - a = a + c * pow; - } - - for (; i < 256; i++) { - c = i; - for (pow = 1, j = i; j < len - 1; j++) - pow = pow * 10; - a = a + c * pow; - } - - a = a << 7; - return a; -} - -u32 r5_hash(const signed char *msg, int len) -{ - u32 a = 0; - while (*msg) { - a += *msg << 4; - a += *msg >> 4; - a *= 11; - msg++; - } - return a; -} diff --git a/ANDROID_3.4.5/fs/reiserfs/ibalance.c b/ANDROID_3.4.5/fs/reiserfs/ibalance.c deleted file mode 100644 index e1978fd8..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/ibalance.c +++ /dev/null @@ -1,1089 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <asm/uaccess.h> -#include <linux/string.h> -#include <linux/time.h> -#include "reiserfs.h" -#include <linux/buffer_head.h> - -/* this is one and only function that is used outside (do_balance.c) */ -int balance_internal(struct tree_balance *, - int, int, struct item_head *, struct buffer_head **); - -/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ -#define INTERNAL_SHIFT_FROM_S_TO_L 0 -#define INTERNAL_SHIFT_FROM_R_TO_S 1 -#define INTERNAL_SHIFT_FROM_L_TO_S 2 -#define INTERNAL_SHIFT_FROM_S_TO_R 3 -#define INTERNAL_INSERT_TO_S 4 -#define INTERNAL_INSERT_TO_L 5 -#define INTERNAL_INSERT_TO_R 6 - -static void internal_define_dest_src_infos(int shift_mode, - struct tree_balance *tb, - int h, - struct buffer_info *dest_bi, - struct buffer_info *src_bi, - int *d_key, struct buffer_head **cf) -{ - memset(dest_bi, 0, sizeof(struct buffer_info)); - memset(src_bi, 0, sizeof(struct buffer_info)); - /* define dest, src, dest parent, dest position */ - switch (shift_mode) { - case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ - src_bi->tb = tb; - src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[h]; - dest_bi->bi_parent = tb->FL[h]; - dest_bi->bi_position = get_left_neighbor_position(tb, h); - *d_key = tb->lkey[h]; - *cf = tb->CFL[h]; - break; - case INTERNAL_SHIFT_FROM_L_TO_S: - src_bi->tb = tb; - src_bi->bi_bh = tb->L[h]; - src_bi->bi_parent = tb->FL[h]; - src_bi->bi_position = get_left_neighbor_position(tb, h); - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ - *d_key = tb->lkey[h]; - *cf = tb->CFL[h]; - break; - - case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ - src_bi->tb = tb; - src_bi->bi_bh = tb->R[h]; - src_bi->bi_parent = tb->FR[h]; - src_bi->bi_position = get_right_neighbor_position(tb, h); - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - *d_key = tb->rkey[h]; - *cf = tb->CFR[h]; - break; - - case INTERNAL_SHIFT_FROM_S_TO_R: - src_bi->tb = tb; - src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[h]; - dest_bi->bi_parent = tb->FR[h]; - dest_bi->bi_position = get_right_neighbor_position(tb, h); - *d_key = tb->rkey[h]; - *cf = tb->CFR[h]; - break; - - case INTERNAL_INSERT_TO_L: - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[h]; - dest_bi->bi_parent = tb->FL[h]; - dest_bi->bi_position = get_left_neighbor_position(tb, h); - break; - - case INTERNAL_INSERT_TO_S: - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - break; - - case INTERNAL_INSERT_TO_R: - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[h]; - dest_bi->bi_parent = tb->FR[h]; - dest_bi->bi_position = get_right_neighbor_position(tb, h); - break; - - default: - reiserfs_panic(tb->tb_sb, "ibalance-1", - "shift type is unknown (%d)", - shift_mode); - } -} - -/* Insert count node pointers into buffer cur before position to + 1. - * Insert count items into buffer cur before position to. - * Items and node pointers are specified by inserted and bh respectively. - */ -static void internal_insert_childs(struct buffer_info *cur_bi, - int to, int count, - struct item_head *inserted, - struct buffer_head **bh) -{ - struct buffer_head *cur = cur_bi->bi_bh; - struct block_head *blkh; - int nr; - struct reiserfs_key *ih; - struct disk_child new_dc[2]; - struct disk_child *dc; - int i; - - if (count <= 0) - return; - - blkh = B_BLK_HEAD(cur); - nr = blkh_nr_item(blkh); - - RFALSE(count > 2, "too many children (%d) are to be inserted", count); - RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE), - "no enough free space (%d), needed %d bytes", - B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE)); - - /* prepare space for count disk_child */ - dc = B_N_CHILD(cur, to + 1); - - memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE); - - /* copy to_be_insert disk children */ - for (i = 0; i < count; i++) { - put_dc_size(&(new_dc[i]), - MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); - put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr); - } - memcpy(dc, new_dc, DC_SIZE * count); - - /* prepare space for count items */ - ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to)); - - memmove(ih + count, ih, - (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); - - /* copy item headers (keys) */ - memcpy(ih, inserted, KEY_SIZE); - if (count > 1) - memcpy(ih + 1, inserted + 1, KEY_SIZE); - - /* sizes, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count); - set_blkh_free_space(blkh, - blkh_free_space(blkh) - count * (DC_SIZE + - KEY_SIZE)); - - do_balance_mark_internal_dirty(cur_bi->tb, cur, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(cur); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - - if (cur_bi->bi_parent) { - struct disk_child *t_dc = - B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE))); - do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, - 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(cur_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - } - -} - -/* Delete del_num items and node pointers from buffer cur starting from * - * the first_i'th item and first_p'th pointers respectively. */ -static void internal_delete_pointers_items(struct buffer_info *cur_bi, - int first_p, - int first_i, int del_num) -{ - struct buffer_head *cur = cur_bi->bi_bh; - int nr; - struct block_head *blkh; - struct reiserfs_key *key; - struct disk_child *dc; - - RFALSE(cur == NULL, "buffer is 0"); - RFALSE(del_num < 0, - "negative number of items (%d) can not be deleted", del_num); - RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1 - || first_i < 0, - "first pointer order (%d) < 0 or " - "no so many pointers (%d), only (%d) or " - "first key order %d < 0", first_p, first_p + del_num, - B_NR_ITEMS(cur) + 1, first_i); - if (del_num == 0) - return; - - blkh = B_BLK_HEAD(cur); - nr = blkh_nr_item(blkh); - - if (first_p == 0 && del_num == nr + 1) { - RFALSE(first_i != 0, - "1st deleted key must have order 0, not %d", first_i); - make_empty_node(cur_bi); - return; - } - - RFALSE(first_i + del_num > B_NR_ITEMS(cur), - "first_i = %d del_num = %d " - "no so many keys (%d) in the node (%b)(%z)", - first_i, del_num, first_i + del_num, cur, cur); - - /* deleting */ - dc = B_N_CHILD(cur, first_p); - - memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE); - key = B_N_PDELIM_KEY(cur, first_i); - memmove(key, key + del_num, - (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - - del_num) * DC_SIZE); - - /* sizes, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num); - set_blkh_free_space(blkh, - blkh_free_space(blkh) + - (del_num * (KEY_SIZE + DC_SIZE))); - - do_balance_mark_internal_dirty(cur_bi->tb, cur, 0); - /*&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(cur); - /*&&&&&&&&&&&&&&&&&&&&&&& */ - - if (cur_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE))); - - do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, - 0); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(cur_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - } -} - -/* delete n node pointers and items starting from given position */ -static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n) -{ - int i_from; - - i_from = (from == 0) ? from : from - 1; - - /* delete n pointers starting from `from' position in CUR; - delete n keys starting from 'i_from' position in CUR; - */ - internal_delete_pointers_items(cur_bi, from, i_from, n); -} - -/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest -* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest - * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest - */ -static void internal_copy_pointers_items(struct buffer_info *dest_bi, - struct buffer_head *src, - int last_first, int cpy_num) -{ - /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * - * as delimiting key have already inserted to buffer dest.*/ - struct buffer_head *dest = dest_bi->bi_bh; - int nr_dest, nr_src; - int dest_order, src_order; - struct block_head *blkh; - struct reiserfs_key *key; - struct disk_child *dc; - - nr_src = B_NR_ITEMS(src); - - RFALSE(dest == NULL || src == NULL, - "src (%p) or dest (%p) buffer is 0", src, dest); - RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, - "invalid last_first parameter (%d)", last_first); - RFALSE(nr_src < cpy_num - 1, - "no so many items (%d) in src (%d)", cpy_num, nr_src); - RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num); - RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest), - "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)", - cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest)); - - if (cpy_num == 0) - return; - - /* coping */ - blkh = B_BLK_HEAD(dest); - nr_dest = blkh_nr_item(blkh); - - /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */ - /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */ - (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order = - nr_src - cpy_num + 1) : (dest_order = - nr_dest, - src_order = - 0); - - /* prepare space for cpy_num pointers */ - dc = B_N_CHILD(dest, dest_order); - - memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE); - - /* insert pointers */ - memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num); - - /* prepare space for cpy_num - 1 item headers */ - key = B_N_PDELIM_KEY(dest, dest_order); - memmove(key + cpy_num - 1, key, - KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + - cpy_num)); - - /* insert headers */ - memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1)); - - /* sizes, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1)); - set_blkh_free_space(blkh, - blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) + - DC_SIZE * cpy_num)); - - do_balance_mark_internal_dirty(dest_bi->tb, dest, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(dest); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) + - DC_SIZE * cpy_num)); - - do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, - 0); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(dest_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - } - -} - -/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. - * Delete cpy_num - del_par items and node pointers from buffer src. - * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. - * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. - */ -static void internal_move_pointers_items(struct buffer_info *dest_bi, - struct buffer_info *src_bi, - int last_first, int cpy_num, - int del_par) -{ - int first_pointer; - int first_item; - - internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first, - cpy_num); - - if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ - first_pointer = 0; - first_item = 0; - /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, - for key - with first_item */ - internal_delete_pointers_items(src_bi, first_pointer, - first_item, cpy_num - del_par); - } else { /* shift_right occurs */ - int i, j; - - i = (cpy_num - del_par == - (j = - B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num + - del_par; - - internal_delete_pointers_items(src_bi, - j + 1 - cpy_num + del_par, i, - cpy_num - del_par); - } -} - -/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ -static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */ - struct buffer_head *src, int src_position) -{ - struct buffer_head *dest = dest_bi->bi_bh; - int nr; - struct block_head *blkh; - struct reiserfs_key *key; - - RFALSE(dest == NULL || src == NULL, - "source(%p) or dest(%p) buffer is 0", src, dest); - RFALSE(dest_position_before < 0 || src_position < 0, - "source(%d) or dest(%d) key number less than 0", - src_position, dest_position_before); - RFALSE(dest_position_before > B_NR_ITEMS(dest) || - src_position >= B_NR_ITEMS(src), - "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))", - dest_position_before, B_NR_ITEMS(dest), - src_position, B_NR_ITEMS(src)); - RFALSE(B_FREE_SPACE(dest) < KEY_SIZE, - "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest)); - - blkh = B_BLK_HEAD(dest); - nr = blkh_nr_item(blkh); - - /* prepare space for inserting key */ - key = B_N_PDELIM_KEY(dest, dest_position_before); - memmove(key + 1, key, - (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); - - /* insert key */ - memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE); - - /* Change dirt, free space, item number fields. */ - - set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1); - set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE); - - do_balance_mark_internal_dirty(dest_bi->tb, dest, 0); - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); - put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE); - - do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, - 0); - } -} - -/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. - * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. - * Replace d_key'th key in buffer cfl. - * Delete pointer_amount items and node pointers from buffer src. - */ -/* this can be invoked both to shift from S to L and from R to S */ -static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ - struct tree_balance *tb, - int h, int pointer_amount) -{ - struct buffer_info dest_bi, src_bi; - struct buffer_head *cf; - int d_key_position; - - internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi, - &d_key_position, &cf); - - /*printk("pointer_amount = %d\n",pointer_amount); */ - - if (pointer_amount) { - /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ - internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, - d_key_position); - - if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) { - if (src_bi.bi_position /*src->b_item_order */ == 0) - replace_key(tb, cf, d_key_position, - src_bi. - bi_parent /*src->b_parent */ , 0); - } else - replace_key(tb, cf, d_key_position, src_bi.bi_bh, - pointer_amount - 1); - } - /* last parameter is del_parameter */ - internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, - pointer_amount, 0); - -} - -/* Insert delimiting key to L[h]. - * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. - * Delete n - 1 items and node pointers from buffer S[h]. - */ -/* it always shifts from S[h] to L[h] */ -static void internal_shift1_left(struct tree_balance *tb, - int h, int pointer_amount) -{ - struct buffer_info dest_bi, src_bi; - struct buffer_head *cf; - int d_key_position; - - internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, - &dest_bi, &src_bi, &d_key_position, &cf); - - if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ - internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, - d_key_position); - /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */ - - /* last parameter is del_parameter */ - internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, - pointer_amount, 1); - /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */ -} - -/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. - * Copy n node pointers and n - 1 items from buffer src to buffer dest. - * Replace d_key'th key in buffer cfr. - * Delete n items and node pointers from buffer src. - */ -static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ - struct tree_balance *tb, - int h, int pointer_amount) -{ - struct buffer_info dest_bi, src_bi; - struct buffer_head *cf; - int d_key_position; - int nr; - - internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi, - &d_key_position, &cf); - - nr = B_NR_ITEMS(src_bi.bi_bh); - - if (pointer_amount > 0) { - /* insert delimiting key from common father of dest and src to dest node into position 0 */ - internal_insert_key(&dest_bi, 0, cf, d_key_position); - if (nr == pointer_amount - 1) { - RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || - dest_bi.bi_bh != tb->R[h], - "src (%p) must be == tb->S[h](%p) when it disappears", - src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h)); - /* when S[h] disappers replace left delemiting key as well */ - if (tb->CFL[h]) - replace_key(tb, cf, d_key_position, tb->CFL[h], - tb->lkey[h]); - } else - replace_key(tb, cf, d_key_position, src_bi.bi_bh, - nr - pointer_amount); - } - - /* last parameter is del_parameter */ - internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, - pointer_amount, 0); -} - -/* Insert delimiting key to R[h]. - * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. - * Delete n - 1 items and node pointers from buffer S[h]. - */ -/* it always shift from S[h] to R[h] */ -static void internal_shift1_right(struct tree_balance *tb, - int h, int pointer_amount) -{ - struct buffer_info dest_bi, src_bi; - struct buffer_head *cf; - int d_key_position; - - internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, - &dest_bi, &src_bi, &d_key_position, &cf); - - if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ - internal_insert_key(&dest_bi, 0, cf, d_key_position); - /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */ - - /* last parameter is del_parameter */ - internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, - pointer_amount, 1); - /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */ -} - -/* Delete insert_num node pointers together with their left items - * and balance current node.*/ -static void balance_internal_when_delete(struct tree_balance *tb, - int h, int child_pos) -{ - int insert_num; - int n; - struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); - struct buffer_info bi; - - insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE)); - - /* delete child-node-pointer(s) together with their left item(s) */ - bi.tb = tb; - bi.bi_bh = tbSh; - bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); - bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - - internal_delete_childs(&bi, child_pos, -insert_num); - - RFALSE(tb->blknum[h] > 1, - "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]); - - n = B_NR_ITEMS(tbSh); - - if (tb->lnum[h] == 0 && tb->rnum[h] == 0) { - if (tb->blknum[h] == 0) { - /* node S[h] (root of the tree) is empty now */ - struct buffer_head *new_root; - - RFALSE(n - || B_FREE_SPACE(tbSh) != - MAX_CHILD_SIZE(tbSh) - DC_SIZE, - "buffer must have only 0 keys (%d)", n); - RFALSE(bi.bi_parent, "root has parent (%p)", - bi.bi_parent); - - /* choose a new root */ - if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1])) - new_root = tb->R[h - 1]; - else - new_root = tb->L[h - 1]; - /* switch super block's tree root block number to the new value */ - PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); - //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; - PUT_SB_TREE_HEIGHT(tb->tb_sb, - SB_TREE_HEIGHT(tb->tb_sb) - 1); - - do_balance_mark_sb_dirty(tb, - REISERFS_SB(tb->tb_sb)->s_sbh, - 1); - /*&&&&&&&&&&&&&&&&&&&&&& */ - if (h > 1) - /* use check_internal if new root is an internal node */ - check_internal(new_root); - /*&&&&&&&&&&&&&&&&&&&&&& */ - - /* do what is needed for buffer thrown from tree */ - reiserfs_invalidate_buffer(tb, tbSh); - return; - } - return; - } - - if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */ - - RFALSE(tb->rnum[h] != 0, - "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", - h, tb->rnum[h]); - - internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1); - reiserfs_invalidate_buffer(tb, tbSh); - - return; - } - - if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */ - RFALSE(tb->lnum[h] != 0, - "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", - h, tb->lnum[h]); - - internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1); - - reiserfs_invalidate_buffer(tb, tbSh); - return; - } - - if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */ - RFALSE(tb->rnum[h] != 0, - "wrong tb->rnum[%d]==%d when borrow from L[h]", h, - tb->rnum[h]); - /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */ - internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, - -tb->lnum[h]); - return; - } - - if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */ - RFALSE(tb->lnum[h] != 0, - "invalid tb->lnum[%d]==%d when borrow from R[h]", - h, tb->lnum[h]); - internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]); /*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */ - return; - } - - if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */ - RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, - "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", - h, tb->lnum[h], h, tb->rnum[h], n); - - internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); /*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */ - internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, - tb->rnum[h]); - - reiserfs_invalidate_buffer(tb, tbSh); - - return; - } - reiserfs_panic(tb->tb_sb, "ibalance-2", - "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d", - h, tb->lnum[h], h, tb->rnum[h]); -} - -/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/ -static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key) -{ - RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL, - "L[h](%p) and CFL[h](%p) must exist in replace_lkey", - tb->L[h], tb->CFL[h]); - - if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) - return; - - memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE); - - do_balance_mark_internal_dirty(tb, tb->CFL[h], 0); -} - -/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/ -static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key) -{ - RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL, - "R[h](%p) and CFR[h](%p) must exist in replace_rkey", - tb->R[h], tb->CFR[h]); - RFALSE(B_NR_ITEMS(tb->R[h]) == 0, - "R[h] can not be empty if it exists (item number=%d)", - B_NR_ITEMS(tb->R[h])); - - memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE); - - do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); -} - -int balance_internal(struct tree_balance *tb, /* tree_balance structure */ - int h, /* level of the tree */ - int child_pos, struct item_head *insert_key, /* key for insertion on higher level */ - struct buffer_head **insert_ptr /* node for insertion on higher level */ - ) - /* if inserting/pasting - { - child_pos is the position of the node-pointer in S[h] that * - pointed to S[h-1] before balancing of the h-1 level; * - this means that new pointers and items must be inserted AFTER * - child_pos - } - else - { - it is the position of the leftmost pointer that must be deleted (together with - its corresponding key to the left of the pointer) - as a result of the previous level's balancing. - } - */ -{ - struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); - struct buffer_info bi; - int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ - int insert_num, n, k; - struct buffer_head *S_new; - struct item_head new_insert_key; - struct buffer_head *new_insert_ptr = NULL; - struct item_head *new_insert_key_addr = insert_key; - - RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h); - - PROC_INFO_INC(tb->tb_sb, balance_at[h]); - - order = - (tbSh) ? PATH_H_POSITION(tb->tb_path, - h + 1) /*tb->S[h]->b_item_order */ : 0; - - /* Using insert_size[h] calculate the number insert_num of items - that must be inserted to or deleted from S[h]. */ - insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); - - /* Check whether insert_num is proper * */ - RFALSE(insert_num < -2 || insert_num > 2, - "incorrect number of items inserted to the internal node (%d)", - insert_num); - RFALSE(h > 1 && (insert_num > 1 || insert_num < -1), - "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level", - insert_num, h); - - /* Make balance in case insert_num < 0 */ - if (insert_num < 0) { - balance_internal_when_delete(tb, h, child_pos); - return order; - } - - k = 0; - if (tb->lnum[h] > 0) { - /* shift lnum[h] items from S[h] to the left neighbor L[h]. - check how many of new items fall into L[h] or CFL[h] after - shifting */ - n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ - if (tb->lnum[h] <= child_pos) { - /* new items don't fall into L[h] or CFL[h] */ - internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, - tb->lnum[h]); - /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */ - child_pos -= tb->lnum[h]; - } else if (tb->lnum[h] > child_pos + insert_num) { - /* all new items fall into L[h] */ - internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, - tb->lnum[h] - insert_num); - /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh, - tb->lnum[h]-insert_num); - */ - /* insert insert_num keys and node-pointers into L[h] */ - bi.tb = tb; - bi.bi_bh = tb->L[h]; - bi.bi_parent = tb->FL[h]; - bi.bi_position = get_left_neighbor_position(tb, h); - internal_insert_childs(&bi, - /*tb->L[h], tb->S[h-1]->b_next */ - n + child_pos + 1, - insert_num, insert_key, - insert_ptr); - - insert_num = 0; - } else { - struct disk_child *dc; - - /* some items fall into L[h] or CFL[h], but some don't fall */ - internal_shift1_left(tb, h, child_pos + 1); - /* calculate number of new items that fall into L[h] */ - k = tb->lnum[h] - child_pos - 1; - bi.tb = tb; - bi.bi_bh = tb->L[h]; - bi.bi_parent = tb->FL[h]; - bi.bi_position = get_left_neighbor_position(tb, h); - internal_insert_childs(&bi, - /*tb->L[h], tb->S[h-1]->b_next, */ - n + child_pos + 1, k, - insert_key, insert_ptr); - - replace_lkey(tb, h, insert_key + k); - - /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */ - dc = B_N_CHILD(tbSh, 0); - put_dc_size(dc, - MAX_CHILD_SIZE(insert_ptr[k]) - - B_FREE_SPACE(insert_ptr[k])); - put_dc_block_number(dc, insert_ptr[k]->b_blocknr); - - do_balance_mark_internal_dirty(tb, tbSh, 0); - - k++; - insert_key += k; - insert_ptr += k; - insert_num -= k; - child_pos = 0; - } - } - /* tb->lnum[h] > 0 */ - if (tb->rnum[h] > 0) { - /*shift rnum[h] items from S[h] to the right neighbor R[h] */ - /* check how many of new items fall into R or CFR after shifting */ - n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ - if (n - tb->rnum[h] >= child_pos) - /* new items fall into S[h] */ - /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */ - internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, - tb->rnum[h]); - else if (n + insert_num - tb->rnum[h] < child_pos) { - /* all new items fall into R[h] */ - /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h], - tb->rnum[h] - insert_num); */ - internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, - tb->rnum[h] - insert_num); - - /* insert insert_num keys and node-pointers into R[h] */ - bi.tb = tb; - bi.bi_bh = tb->R[h]; - bi.bi_parent = tb->FR[h]; - bi.bi_position = get_right_neighbor_position(tb, h); - internal_insert_childs(&bi, - /*tb->R[h],tb->S[h-1]->b_next */ - child_pos - n - insert_num + - tb->rnum[h] - 1, - insert_num, insert_key, - insert_ptr); - insert_num = 0; - } else { - struct disk_child *dc; - - /* one of the items falls into CFR[h] */ - internal_shift1_right(tb, h, n - child_pos + 1); - /* calculate number of new items that fall into R[h] */ - k = tb->rnum[h] - n + child_pos - 1; - bi.tb = tb; - bi.bi_bh = tb->R[h]; - bi.bi_parent = tb->FR[h]; - bi.bi_position = get_right_neighbor_position(tb, h); - internal_insert_childs(&bi, - /*tb->R[h], tb->R[h]->b_child, */ - 0, k, insert_key + 1, - insert_ptr + 1); - - replace_rkey(tb, h, insert_key + insert_num - k - 1); - - /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */ - dc = B_N_CHILD(tb->R[h], 0); - put_dc_size(dc, - MAX_CHILD_SIZE(insert_ptr - [insert_num - k - 1]) - - B_FREE_SPACE(insert_ptr - [insert_num - k - 1])); - put_dc_block_number(dc, - insert_ptr[insert_num - k - - 1]->b_blocknr); - - do_balance_mark_internal_dirty(tb, tb->R[h], 0); - - insert_num -= (k + 1); - } - } - - /** Fill new node that appears instead of S[h] **/ - RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); - RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); - - if (!tb->blknum[h]) { /* node S[h] is empty now */ - RFALSE(!tbSh, "S[h] is equal NULL"); - - /* do what is needed for buffer thrown from tree */ - reiserfs_invalidate_buffer(tb, tbSh); - return order; - } - - if (!tbSh) { - /* create new root */ - struct disk_child *dc; - struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1); - struct block_head *blkh; - - if (tb->blknum[h] != 1) - reiserfs_panic(NULL, "ibalance-3", "One new node " - "required for creating the new root"); - /* S[h] = empty buffer from the list FEB. */ - tbSh = get_FEB(tb); - blkh = B_BLK_HEAD(tbSh); - set_blkh_level(blkh, h + 1); - - /* Put the unique node-pointer to S[h] that points to S[h-1]. */ - - dc = B_N_CHILD(tbSh, 0); - put_dc_block_number(dc, tbSh_1->b_blocknr); - put_dc_size(dc, - (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1))); - - tb->insert_size[h] -= DC_SIZE; - set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE); - - do_balance_mark_internal_dirty(tb, tbSh, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(tbSh); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - - /* put new root into path structure */ - PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) = - tbSh; - - /* Change root in structure super block. */ - PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr); - PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1); - do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1); - } - - if (tb->blknum[h] == 2) { - int snum; - struct buffer_info dest_bi, src_bi; - - /* S_new = free buffer from list FEB */ - S_new = get_FEB(tb); - - set_blkh_level(B_BLK_HEAD(S_new), h + 1); - - dest_bi.tb = tb; - dest_bi.bi_bh = S_new; - dest_bi.bi_parent = NULL; - dest_bi.bi_position = 0; - src_bi.tb = tb; - src_bi.bi_bh = tbSh; - src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); - src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - - n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ - snum = (insert_num + n + 1) / 2; - if (n - snum >= child_pos) { - /* new items don't fall into S_new */ - /* store the delimiting key for the next level */ - /* new_insert_key = (n - snum)'th key in S[h] */ - memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum), - KEY_SIZE); - /* last parameter is del_par */ - internal_move_pointers_items(&dest_bi, &src_bi, - LAST_TO_FIRST, snum, 0); - /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */ - } else if (n + insert_num - snum < child_pos) { - /* all new items fall into S_new */ - /* store the delimiting key for the next level */ - /* new_insert_key = (n + insert_item - snum)'th key in S[h] */ - memcpy(&new_insert_key, - B_N_PDELIM_KEY(tbSh, n + insert_num - snum), - KEY_SIZE); - /* last parameter is del_par */ - internal_move_pointers_items(&dest_bi, &src_bi, - LAST_TO_FIRST, - snum - insert_num, 0); - /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */ - - /* insert insert_num keys and node-pointers into S_new */ - internal_insert_childs(&dest_bi, - /*S_new,tb->S[h-1]->b_next, */ - child_pos - n - insert_num + - snum - 1, - insert_num, insert_key, - insert_ptr); - - insert_num = 0; - } else { - struct disk_child *dc; - - /* some items fall into S_new, but some don't fall */ - /* last parameter is del_par */ - internal_move_pointers_items(&dest_bi, &src_bi, - LAST_TO_FIRST, - n - child_pos + 1, 1); - /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */ - /* calculate number of new items that fall into S_new */ - k = snum - n + child_pos - 1; - - internal_insert_childs(&dest_bi, /*S_new, */ 0, k, - insert_key + 1, insert_ptr + 1); - - /* new_insert_key = insert_key[insert_num - k - 1] */ - memcpy(&new_insert_key, insert_key + insert_num - k - 1, - KEY_SIZE); - /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */ - - dc = B_N_CHILD(S_new, 0); - put_dc_size(dc, - (MAX_CHILD_SIZE - (insert_ptr[insert_num - k - 1]) - - B_FREE_SPACE(insert_ptr - [insert_num - k - 1]))); - put_dc_block_number(dc, - insert_ptr[insert_num - k - - 1]->b_blocknr); - - do_balance_mark_internal_dirty(tb, S_new, 0); - - insert_num -= (k + 1); - } - /* new_insert_ptr = node_pointer to S_new */ - new_insert_ptr = S_new; - - RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new) - || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", - S_new); - - // S_new is released in unfix_nodes - } - - n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ - - if (0 <= child_pos && child_pos <= n && insert_num > 0) { - bi.tb = tb; - bi.bi_bh = tbSh; - bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); - bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - internal_insert_childs(&bi, /*tbSh, */ - /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next, */ - child_pos, insert_num, insert_key, - insert_ptr); - } - - memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE); - insert_ptr[0] = new_insert_ptr; - - return order; -} diff --git a/ANDROID_3.4.5/fs/reiserfs/inode.c b/ANDROID_3.4.5/fs/reiserfs/inode.c deleted file mode 100644 index 494c315c..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/inode.c +++ /dev/null @@ -1,3232 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <linux/time.h> -#include <linux/fs.h> -#include "reiserfs.h" -#include "acl.h" -#include "xattr.h" -#include <linux/exportfs.h> -#include <linux/pagemap.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <asm/uaccess.h> -#include <asm/unaligned.h> -#include <linux/buffer_head.h> -#include <linux/mpage.h> -#include <linux/writeback.h> -#include <linux/quotaops.h> -#include <linux/swap.h> - -int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to); - -void reiserfs_evict_inode(struct inode *inode) -{ - /* We need blocks for transaction + (user+group) quota update (possibly delete) */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 2 + - 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); - struct reiserfs_transaction_handle th; - int depth; - int err; - - if (!inode->i_nlink && !is_bad_inode(inode)) - dquot_initialize(inode); - - truncate_inode_pages(&inode->i_data, 0); - if (inode->i_nlink) - goto no_delete; - - depth = reiserfs_write_lock_once(inode->i_sb); - - /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ - if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ - reiserfs_delete_xattrs(inode); - - if (journal_begin(&th, inode->i_sb, jbegin_count)) - goto out; - reiserfs_update_inode_transaction(inode); - - reiserfs_discard_prealloc(&th, inode); - - err = reiserfs_delete_object(&th, inode); - - /* Do quota update inside a transaction for journaled quotas. We must do that - * after delete_object so that quota updates go into the same transaction as - * stat data deletion */ - if (!err) - dquot_free_inode(inode); - - if (journal_end(&th, inode->i_sb, jbegin_count)) - goto out; - - /* check return value from reiserfs_delete_object after - * ending the transaction - */ - if (err) - goto out; - - /* all items of file are deleted, so we can remove "save" link */ - remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything - * about an error here */ - } else { - /* no object items are in the tree */ - ; - } - out: - end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */ - dquot_drop(inode); - inode->i_blocks = 0; - reiserfs_write_unlock_once(inode->i_sb, depth); - return; - -no_delete: - end_writeback(inode); - dquot_drop(inode); -} - -static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, - __u32 objectid, loff_t offset, int type, int length) -{ - key->version = version; - - key->on_disk_key.k_dir_id = dirid; - key->on_disk_key.k_objectid = objectid; - set_cpu_key_k_offset(key, offset); - set_cpu_key_k_type(key, type); - key->key_length = length; -} - -/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set - offset and type of key */ -void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, - int type, int length) -{ - _make_cpu_key(key, get_inode_item_key_version(inode), - le32_to_cpu(INODE_PKEY(inode)->k_dir_id), - le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, - length); -} - -// -// when key is 0, do not set version and short key -// -inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, - int version, - loff_t offset, int type, int length, - int entry_count /*or ih_free_space */ ) -{ - if (key) { - ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); - ih->ih_key.k_objectid = - cpu_to_le32(key->on_disk_key.k_objectid); - } - put_ih_version(ih, version); - set_le_ih_k_offset(ih, offset); - set_le_ih_k_type(ih, type); - put_ih_item_len(ih, length); - /* set_ih_free_space (ih, 0); */ - // for directory items it is entry count, for directs and stat - // datas - 0xffff, for indirects - 0 - put_ih_entry_count(ih, entry_count); -} - -// -// FIXME: we might cache recently accessed indirect item - -// Ugh. Not too eager for that.... -// I cut the code until such time as I see a convincing argument (benchmark). -// I don't want a bloated inode struct..., and I don't like code complexity.... - -/* cutting the code is fine, since it really isn't in use yet and is easy -** to add back in. But, Vladimir has a really good idea here. Think -** about what happens for reading a file. For each page, -** The VFS layer calls reiserfs_readpage, who searches the tree to find -** an indirect item. This indirect item has X number of pointers, where -** X is a big number if we've done the block allocation right. But, -** we only use one or two of these pointers during each call to readpage, -** needlessly researching again later on. -** -** The size of the cache could be dynamic based on the size of the file. -** -** I'd also like to see us cache the location the stat data item, since -** we are needlessly researching for that frequently. -** -** --chris -*/ - -/* If this page has a file tail in it, and -** it was read in by get_block_create_0, the page data is valid, -** but tail is still sitting in a direct item, and we can't write to -** it. So, look through this page, and check all the mapped buffers -** to make sure they have valid block numbers. Any that don't need -** to be unmapped, so that __block_write_begin will correctly call -** reiserfs_get_block to convert the tail into an unformatted node -*/ -static inline void fix_tail_page_for_writing(struct page *page) -{ - struct buffer_head *head, *next, *bh; - - if (page && page_has_buffers(page)) { - head = page_buffers(page); - bh = head; - do { - next = bh->b_this_page; - if (buffer_mapped(bh) && bh->b_blocknr == 0) { - reiserfs_unmap_buffer(bh); - } - bh = next; - } while (bh != head); - } -} - -/* reiserfs_get_block does not need to allocate a block only if it has been - done already or non-hole position has been found in the indirect item */ -static inline int allocation_needed(int retval, b_blocknr_t allocated, - struct item_head *ih, - __le32 * item, int pos_in_item) -{ - if (allocated) - return 0; - if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && - get_block_num(item, pos_in_item)) - return 0; - return 1; -} - -static inline int indirect_item_found(int retval, struct item_head *ih) -{ - return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); -} - -static inline void set_block_dev_mapped(struct buffer_head *bh, - b_blocknr_t block, struct inode *inode) -{ - map_bh(bh, inode->i_sb, block); -} - -// -// files which were created in the earlier version can not be longer, -// than 2 gb -// -static int file_capable(struct inode *inode, sector_t block) -{ - if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. - block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb - return 1; - - return 0; -} - -static int restart_transaction(struct reiserfs_transaction_handle *th, - struct inode *inode, struct treepath *path) -{ - struct super_block *s = th->t_super; - int len = th->t_blocks_allocated; - int err; - - BUG_ON(!th->t_trans_id); - BUG_ON(!th->t_refcount); - - pathrelse(path); - - /* we cannot restart while nested */ - if (th->t_refcount > 1) { - return 0; - } - reiserfs_update_sd(th, inode); - err = journal_end(th, s, len); - if (!err) { - err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); - if (!err) - reiserfs_update_inode_transaction(inode); - } - return err; -} - -// it is called by get_block when create == 0. Returns block number -// for 'block'-th logical block of file. When it hits direct item it -// returns 0 (being called from bmap) or read direct item into piece -// of page (bh_result) - -// Please improve the english/clarity in the comment above, as it is -// hard to understand. - -static int _get_block_create_0(struct inode *inode, sector_t block, - struct buffer_head *bh_result, int args) -{ - INITIALIZE_PATH(path); - struct cpu_key key; - struct buffer_head *bh; - struct item_head *ih, tmp_ih; - b_blocknr_t blocknr; - char *p = NULL; - int chars; - int ret; - int result; - int done = 0; - unsigned long offset; - - // prepare the key to look for the 'block'-th block of file - make_cpu_key(&key, inode, - (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, - 3); - - result = search_for_position_by_key(inode->i_sb, &key, &path); - if (result != POSITION_FOUND) { - pathrelse(&path); - if (p) - kunmap(bh_result->b_page); - if (result == IO_ERROR) - return -EIO; - // We do not return -ENOENT if there is a hole but page is uptodate, because it means - // That there is some MMAPED data associated with it that is yet to be written to disk. - if ((args & GET_BLOCK_NO_HOLE) - && !PageUptodate(bh_result->b_page)) { - return -ENOENT; - } - return 0; - } - // - bh = get_last_bh(&path); - ih = get_ih(&path); - if (is_indirect_le_ih(ih)) { - __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); - - /* FIXME: here we could cache indirect item or part of it in - the inode to avoid search_by_key in case of subsequent - access to file */ - blocknr = get_block_num(ind_item, path.pos_in_item); - ret = 0; - if (blocknr) { - map_bh(bh_result, inode->i_sb, blocknr); - if (path.pos_in_item == - ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { - set_buffer_boundary(bh_result); - } - } else - // We do not return -ENOENT if there is a hole but page is uptodate, because it means - // That there is some MMAPED data associated with it that is yet to be written to disk. - if ((args & GET_BLOCK_NO_HOLE) - && !PageUptodate(bh_result->b_page)) { - ret = -ENOENT; - } - - pathrelse(&path); - if (p) - kunmap(bh_result->b_page); - return ret; - } - // requested data are in direct item(s) - if (!(args & GET_BLOCK_READ_DIRECT)) { - // we are called by bmap. FIXME: we can not map block of file - // when it is stored in direct item(s) - pathrelse(&path); - if (p) - kunmap(bh_result->b_page); - return -ENOENT; - } - - /* if we've got a direct item, and the buffer or page was uptodate, - ** we don't want to pull data off disk again. skip to the - ** end, where we map the buffer and return - */ - if (buffer_uptodate(bh_result)) { - goto finished; - } else - /* - ** grab_tail_page can trigger calls to reiserfs_get_block on up to date - ** pages without any buffers. If the page is up to date, we don't want - ** read old data off disk. Set the up to date bit on the buffer instead - ** and jump to the end - */ - if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { - set_buffer_uptodate(bh_result); - goto finished; - } - // read file tail into part of page - offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); - copy_item_head(&tmp_ih, ih); - - /* we only want to kmap if we are reading the tail into the page. - ** this is not the common case, so we don't kmap until we are - ** sure we need to. But, this means the item might move if - ** kmap schedules - */ - if (!p) - p = (char *)kmap(bh_result->b_page); - - p += offset; - memset(p, 0, inode->i_sb->s_blocksize); - do { - if (!is_direct_le_ih(ih)) { - BUG(); - } - /* make sure we don't read more bytes than actually exist in - ** the file. This can happen in odd cases where i_size isn't - ** correct, and when direct item padding results in a few - ** extra bytes at the end of the direct item - */ - if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) - break; - if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { - chars = - inode->i_size - (le_ih_k_offset(ih) - 1) - - path.pos_in_item; - done = 1; - } else { - chars = ih_item_len(ih) - path.pos_in_item; - } - memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); - - if (done) - break; - - p += chars; - - if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) - // we done, if read direct item is not the last item of - // node FIXME: we could try to check right delimiting key - // to see whether direct item continues in the right - // neighbor or rely on i_size - break; - - // update key to look for the next piece - set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); - result = search_for_position_by_key(inode->i_sb, &key, &path); - if (result != POSITION_FOUND) - // i/o error most likely - break; - bh = get_last_bh(&path); - ih = get_ih(&path); - } while (1); - - flush_dcache_page(bh_result->b_page); - kunmap(bh_result->b_page); - - finished: - pathrelse(&path); - - if (result == IO_ERROR) - return -EIO; - - /* this buffer has valid data, but isn't valid for io. mapping it to - * block #0 tells the rest of reiserfs it just has a tail in it - */ - map_bh(bh_result, inode->i_sb, 0); - set_buffer_uptodate(bh_result); - return 0; -} - -// this is called to create file map. So, _get_block_create_0 will not -// read direct item -static int reiserfs_bmap(struct inode *inode, sector_t block, - struct buffer_head *bh_result, int create) -{ - if (!file_capable(inode, block)) - return -EFBIG; - - reiserfs_write_lock(inode->i_sb); - /* do not read the direct item */ - _get_block_create_0(inode, block, bh_result, 0); - reiserfs_write_unlock(inode->i_sb); - return 0; -} - -/* special version of get_block that is only used by grab_tail_page right -** now. It is sent to __block_write_begin, and when you try to get a -** block past the end of the file (or a block from a hole) it returns -** -ENOENT instead of a valid buffer. __block_write_begin expects to -** be able to do i/o on the buffers returned, unless an error value -** is also returned. -** -** So, this allows __block_write_begin to be used for reading a single block -** in a page. Where it does not produce a valid page for holes, or past the -** end of the file. This turns out to be exactly what we need for reading -** tails for conversion. -** -** The point of the wrapper is forcing a certain value for create, even -** though the VFS layer is calling this function with create==1. If you -** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, -** don't use this function. -*/ -static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, - struct buffer_head *bh_result, - int create) -{ - return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); -} - -/* This is special helper for reiserfs_get_block in case we are executing - direct_IO request. */ -static int reiserfs_get_blocks_direct_io(struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create) -{ - int ret; - - bh_result->b_page = NULL; - - /* We set the b_size before reiserfs_get_block call since it is - referenced in convert_tail_for_hole() that may be called from - reiserfs_get_block() */ - bh_result->b_size = (1 << inode->i_blkbits); - - ret = reiserfs_get_block(inode, iblock, bh_result, - create | GET_BLOCK_NO_DANGLE); - if (ret) - goto out; - - /* don't allow direct io onto tail pages */ - if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { - /* make sure future calls to the direct io funcs for this offset - ** in the file fail by unmapping the buffer - */ - clear_buffer_mapped(bh_result); - ret = -EINVAL; - } - /* Possible unpacked tail. Flush the data before pages have - disappeared */ - if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { - int err; - - reiserfs_write_lock(inode->i_sb); - - err = reiserfs_commit_for_inode(inode); - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - - reiserfs_write_unlock(inode->i_sb); - - if (err < 0) - ret = err; - } - out: - return ret; -} - -/* -** helper function for when reiserfs_get_block is called for a hole -** but the file tail is still in a direct item -** bh_result is the buffer head for the hole -** tail_offset is the offset of the start of the tail in the file -** -** This calls prepare_write, which will start a new transaction -** you should not be in a transaction, or have any paths held when you -** call this. -*/ -static int convert_tail_for_hole(struct inode *inode, - struct buffer_head *bh_result, - loff_t tail_offset) -{ - unsigned long index; - unsigned long tail_end; - unsigned long tail_start; - struct page *tail_page; - struct page *hole_page = bh_result->b_page; - int retval = 0; - - if ((tail_offset & (bh_result->b_size - 1)) != 1) - return -EIO; - - /* always try to read until the end of the block */ - tail_start = tail_offset & (PAGE_CACHE_SIZE - 1); - tail_end = (tail_start | (bh_result->b_size - 1)) + 1; - - index = tail_offset >> PAGE_CACHE_SHIFT; - /* hole_page can be zero in case of direct_io, we are sure - that we cannot get here if we write with O_DIRECT into - tail page */ - if (!hole_page || index != hole_page->index) { - tail_page = grab_cache_page(inode->i_mapping, index); - retval = -ENOMEM; - if (!tail_page) { - goto out; - } - } else { - tail_page = hole_page; - } - - /* we don't have to make sure the conversion did not happen while - ** we were locking the page because anyone that could convert - ** must first take i_mutex. - ** - ** We must fix the tail page for writing because it might have buffers - ** that are mapped, but have a block number of 0. This indicates tail - ** data that has been read directly into the page, and - ** __block_write_begin won't trigger a get_block in this case. - */ - fix_tail_page_for_writing(tail_page); - retval = __reiserfs_write_begin(tail_page, tail_start, - tail_end - tail_start); - if (retval) - goto unlock; - - /* tail conversion might change the data in the page */ - flush_dcache_page(tail_page); - - retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); - - unlock: - if (tail_page != hole_page) { - unlock_page(tail_page); - page_cache_release(tail_page); - } - out: - return retval; -} - -static inline int _allocate_block(struct reiserfs_transaction_handle *th, - sector_t block, - struct inode *inode, - b_blocknr_t * allocated_block_nr, - struct treepath *path, int flags) -{ - BUG_ON(!th->t_trans_id); - -#ifdef REISERFS_PREALLOCATE - if (!(flags & GET_BLOCK_NO_IMUX)) { - return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, - path, block); - } -#endif - return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, - block); -} - -int reiserfs_get_block(struct inode *inode, sector_t block, - struct buffer_head *bh_result, int create) -{ - int repeat, retval = 0; - b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int - INITIALIZE_PATH(path); - int pos_in_item; - struct cpu_key key; - struct buffer_head *bh, *unbh = NULL; - struct item_head *ih, tmp_ih; - __le32 *item; - int done; - int fs_gen; - int lock_depth; - struct reiserfs_transaction_handle *th = NULL; - /* space reserved in transaction batch: - . 3 balancings in direct->indirect conversion - . 1 block involved into reiserfs_update_sd() - XXX in practically impossible worst case direct2indirect() - can incur (much) more than 3 balancings. - quota update for user, group */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + 1 + - 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); - int version; - int dangle = 1; - loff_t new_offset = - (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; - - lock_depth = reiserfs_write_lock_once(inode->i_sb); - version = get_inode_item_key_version(inode); - - if (!file_capable(inode, block)) { - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - return -EFBIG; - } - - /* if !create, we aren't changing the FS, so we don't need to - ** log anything, so we don't need to start a transaction - */ - if (!(create & GET_BLOCK_CREATE)) { - int ret; - /* find number of block-th logical block of the file */ - ret = _get_block_create_0(inode, block, bh_result, - create | GET_BLOCK_READ_DIRECT); - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - return ret; - } - /* - * if we're already in a transaction, make sure to close - * any new transactions we start in this func - */ - if ((create & GET_BLOCK_NO_DANGLE) || - reiserfs_transaction_running(inode->i_sb)) - dangle = 0; - - /* If file is of such a size, that it might have a tail and tails are enabled - ** we should mark it as possibly needing tail packing on close - */ - if ((have_large_tails(inode->i_sb) - && inode->i_size < i_block_size(inode) * 4) - || (have_small_tails(inode->i_sb) - && inode->i_size < i_block_size(inode))) - REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; - - /* set the key of the first byte in the 'block'-th block of file */ - make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); - if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { - start_trans: - th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); - if (!th) { - retval = -ENOMEM; - goto failure; - } - reiserfs_update_inode_transaction(inode); - } - research: - - retval = search_for_position_by_key(inode->i_sb, &key, &path); - if (retval == IO_ERROR) { - retval = -EIO; - goto failure; - } - - bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); - pos_in_item = path.pos_in_item; - - fs_gen = get_generation(inode->i_sb); - copy_item_head(&tmp_ih, ih); - - if (allocation_needed - (retval, allocated_block_nr, ih, item, pos_in_item)) { - /* we have to allocate block for the unformatted node */ - if (!th) { - pathrelse(&path); - goto start_trans; - } - - repeat = - _allocate_block(th, block, inode, &allocated_block_nr, - &path, create); - - if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { - /* restart the transaction to give the journal a chance to free - ** some blocks. releases the path, so we have to go back to - ** research if we succeed on the second try - */ - SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; - retval = restart_transaction(th, inode, &path); - if (retval) - goto failure; - repeat = - _allocate_block(th, block, inode, - &allocated_block_nr, NULL, create); - - if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { - goto research; - } - if (repeat == QUOTA_EXCEEDED) - retval = -EDQUOT; - else - retval = -ENOSPC; - goto failure; - } - - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - goto research; - } - } - - if (indirect_item_found(retval, ih)) { - b_blocknr_t unfm_ptr; - /* 'block'-th block is in the file already (there is - corresponding cell in some indirect item). But it may be - zero unformatted node pointer (hole) */ - unfm_ptr = get_block_num(item, pos_in_item); - if (unfm_ptr == 0) { - /* use allocated block to plug the hole */ - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, - bh); - goto research; - } - set_buffer_new(bh_result); - if (buffer_dirty(bh_result) - && reiserfs_data_ordered(inode->i_sb)) - reiserfs_add_ordered_list(inode, bh_result); - put_block_num(item, pos_in_item, allocated_block_nr); - unfm_ptr = allocated_block_nr; - journal_mark_dirty(th, inode->i_sb, bh); - reiserfs_update_sd(th, inode); - } - set_block_dev_mapped(bh_result, unfm_ptr, inode); - pathrelse(&path); - retval = 0; - if (!dangle && th) - retval = reiserfs_end_persistent_transaction(th); - - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - - /* the item was found, so new blocks were not added to the file - ** there is no need to make sure the inode is updated with this - ** transaction - */ - return retval; - } - - if (!th) { - pathrelse(&path); - goto start_trans; - } - - /* desired position is not found or is in the direct item. We have - to append file with holes up to 'block'-th block converting - direct items to indirect one if necessary */ - done = 0; - do { - if (is_statdata_le_ih(ih)) { - __le32 unp = 0; - struct cpu_key tmp_key; - - /* indirect item has to be inserted */ - make_le_item_head(&tmp_ih, &key, version, 1, - TYPE_INDIRECT, UNFM_P_SIZE, - 0 /* free_space */ ); - - if (cpu_key_k_offset(&key) == 1) { - /* we are going to add 'block'-th block to the file. Use - allocated block for that */ - unp = cpu_to_le32(allocated_block_nr); - set_block_dev_mapped(bh_result, - allocated_block_nr, inode); - set_buffer_new(bh_result); - done = 1; - } - tmp_key = key; // ;) - set_cpu_key_k_offset(&tmp_key, 1); - PATH_LAST_POSITION(&path)++; - - retval = - reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, - inode, (char *)&unp); - if (retval) { - reiserfs_free_block(th, inode, - allocated_block_nr, 1); - goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST - } - //mark_tail_converted (inode); - } else if (is_direct_le_ih(ih)) { - /* direct item has to be converted */ - loff_t tail_offset; - - tail_offset = - ((le_ih_k_offset(ih) - - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; - if (tail_offset == cpu_key_k_offset(&key)) { - /* direct item we just found fits into block we have - to map. Convert it into unformatted node: use - bh_result for the conversion */ - set_block_dev_mapped(bh_result, - allocated_block_nr, inode); - unbh = bh_result; - done = 1; - } else { - /* we have to padd file tail stored in direct item(s) - up to block size and convert it to unformatted - node. FIXME: this should also get into page cache */ - - pathrelse(&path); - /* - * ugly, but we can only end the transaction if - * we aren't nested - */ - BUG_ON(!th->t_refcount); - if (th->t_refcount == 1) { - retval = - reiserfs_end_persistent_transaction - (th); - th = NULL; - if (retval) - goto failure; - } - - retval = - convert_tail_for_hole(inode, bh_result, - tail_offset); - if (retval) { - if (retval != -ENOSPC) - reiserfs_error(inode->i_sb, - "clm-6004", - "convert tail failed " - "inode %lu, error %d", - inode->i_ino, - retval); - if (allocated_block_nr) { - /* the bitmap, the super, and the stat data == 3 */ - if (!th) - th = reiserfs_persistent_transaction(inode->i_sb, 3); - if (th) - reiserfs_free_block(th, - inode, - allocated_block_nr, - 1); - } - goto failure; - } - goto research; - } - retval = - direct2indirect(th, inode, &path, unbh, - tail_offset); - if (retval) { - reiserfs_unmap_buffer(unbh); - reiserfs_free_block(th, inode, - allocated_block_nr, 1); - goto failure; - } - /* it is important the set_buffer_uptodate is done after - ** the direct2indirect. The buffer might contain valid - ** data newer than the data on disk (read by readpage, changed, - ** and then sent here by writepage). direct2indirect needs - ** to know if unbh was already up to date, so it can decide - ** if the data in unbh needs to be replaced with data from - ** the disk - */ - set_buffer_uptodate(unbh); - - /* unbh->b_page == NULL in case of DIRECT_IO request, this means - buffer will disappear shortly, so it should not be added to - */ - if (unbh->b_page) { - /* we've converted the tail, so we must - ** flush unbh before the transaction commits - */ - reiserfs_add_tail_list(inode, unbh); - - /* mark it dirty now to prevent commit_write from adding - ** this buffer to the inode's dirty buffer list - */ - /* - * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). - * It's still atomic, but it sets the page dirty too, - * which makes it eligible for writeback at any time by the - * VM (which was also the case with __mark_buffer_dirty()) - */ - mark_buffer_dirty(unbh); - } - } else { - /* append indirect item with holes if needed, when appending - pointer to 'block'-th block use block, which is already - allocated */ - struct cpu_key tmp_key; - unp_t unf_single = 0; // We use this in case we need to allocate only - // one block which is a fastpath - unp_t *un; - __u64 max_to_insert = - MAX_ITEM_LEN(inode->i_sb->s_blocksize) / - UNFM_P_SIZE; - __u64 blocks_needed; - - RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, - "vs-804: invalid position for append"); - /* indirect item has to be appended, set up key of that position */ - make_cpu_key(&tmp_key, inode, - le_key_k_offset(version, - &(ih->ih_key)) + - op_bytes_number(ih, - inode->i_sb->s_blocksize), - //pos_in_item * inode->i_sb->s_blocksize, - TYPE_INDIRECT, 3); // key type is unimportant - - RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), - "green-805: invalid offset"); - blocks_needed = - 1 + - ((cpu_key_k_offset(&key) - - cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> - s_blocksize_bits); - - if (blocks_needed == 1) { - un = &unf_single; - } else { - un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS); - if (!un) { - un = &unf_single; - blocks_needed = 1; - max_to_insert = 0; - } - } - if (blocks_needed <= max_to_insert) { - /* we are going to add target block to the file. Use allocated - block for that */ - un[blocks_needed - 1] = - cpu_to_le32(allocated_block_nr); - set_block_dev_mapped(bh_result, - allocated_block_nr, inode); - set_buffer_new(bh_result); - done = 1; - } else { - /* paste hole to the indirect item */ - /* If kmalloc failed, max_to_insert becomes zero and it means we - only have space for one block */ - blocks_needed = - max_to_insert ? max_to_insert : 1; - } - retval = - reiserfs_paste_into_item(th, &path, &tmp_key, inode, - (char *)un, - UNFM_P_SIZE * - blocks_needed); - - if (blocks_needed != 1) - kfree(un); - - if (retval) { - reiserfs_free_block(th, inode, - allocated_block_nr, 1); - goto failure; - } - if (!done) { - /* We need to mark new file size in case this function will be - interrupted/aborted later on. And we may do this only for - holes. */ - inode->i_size += - inode->i_sb->s_blocksize * blocks_needed; - } - } - - if (done == 1) - break; - - /* this loop could log more blocks than we had originally asked - ** for. So, we have to allow the transaction to end if it is - ** too big or too full. Update the inode so things are - ** consistent if we crash before the function returns - ** - ** release the path so that anybody waiting on the path before - ** ending their transaction will be able to continue. - */ - if (journal_transaction_should_end(th, th->t_blocks_allocated)) { - retval = restart_transaction(th, inode, &path); - if (retval) - goto failure; - } - /* - * inserting indirect pointers for a hole can take a - * long time. reschedule if needed and also release the write - * lock for others. - */ - if (need_resched()) { - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - schedule(); - lock_depth = reiserfs_write_lock_once(inode->i_sb); - } - - retval = search_for_position_by_key(inode->i_sb, &key, &path); - if (retval == IO_ERROR) { - retval = -EIO; - goto failure; - } - if (retval == POSITION_FOUND) { - reiserfs_warning(inode->i_sb, "vs-825", - "%K should not be found", &key); - retval = -EEXIST; - if (allocated_block_nr) - reiserfs_free_block(th, inode, - allocated_block_nr, 1); - pathrelse(&path); - goto failure; - } - bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); - pos_in_item = path.pos_in_item; - } while (1); - - retval = 0; - - failure: - if (th && (!dangle || (retval && !th->t_trans_id))) { - int err; - if (th->t_trans_id) - reiserfs_update_sd(th, inode); - err = reiserfs_end_persistent_transaction(th); - if (err) - retval = err; - } - - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - reiserfs_check_path(&path); - return retval; -} - -static int -reiserfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); -} - -/* Compute real number of used bytes by file - * Following three functions can go away when we'll have enough space in stat item - */ -static int real_space_diff(struct inode *inode, int sd_size) -{ - int bytes; - loff_t blocksize = inode->i_sb->s_blocksize; - - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) - return sd_size; - - /* End of file is also in full block with indirect reference, so round - ** up to the next block. - ** - ** there is just no way to know if the tail is actually packed - ** on the file, so we have to assume it isn't. When we pack the - ** tail, we add 4 bytes to pretend there really is an unformatted - ** node pointer - */ - bytes = - ((inode->i_size + - (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + - sd_size; - return bytes; -} - -static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, - int sd_size) -{ - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { - return inode->i_size + - (loff_t) (real_space_diff(inode, sd_size)); - } - return ((loff_t) real_space_diff(inode, sd_size)) + - (((loff_t) blocks) << 9); -} - -/* Compute number of blocks used by file in ReiserFS counting */ -static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) -{ - loff_t bytes = inode_get_bytes(inode); - loff_t real_space = real_space_diff(inode, sd_size); - - /* keeps fsck and non-quota versions of reiserfs happy */ - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { - bytes += (loff_t) 511; - } - - /* files from before the quota patch might i_blocks such that - ** bytes < real_space. Deal with that here to prevent it from - ** going negative. - */ - if (bytes < real_space) - return 0; - return (bytes - real_space) >> 9; -} - -// -// BAD: new directories have stat data of new type and all other items -// of old type. Version stored in the inode says about body items, so -// in update_stat_data we can not rely on inode, but have to check -// item version directly -// - -// called by read_locked_inode -static void init_inode(struct inode *inode, struct treepath *path) -{ - struct buffer_head *bh; - struct item_head *ih; - __u32 rdev; - //int version = ITEM_VERSION_1; - - bh = PATH_PLAST_BUFFER(path); - ih = PATH_PITEM_HEAD(path); - - copy_key(INODE_PKEY(inode), &(ih->ih_key)); - - INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); - REISERFS_I(inode)->i_flags = 0; - REISERFS_I(inode)->i_prealloc_block = 0; - REISERFS_I(inode)->i_prealloc_count = 0; - REISERFS_I(inode)->i_trans_id = 0; - REISERFS_I(inode)->i_jl = NULL; - reiserfs_init_xattr_rwsem(inode); - - if (stat_data_v1(ih)) { - struct stat_data_v1 *sd = - (struct stat_data_v1 *)B_I_PITEM(bh, ih); - unsigned long blocks; - - set_inode_item_key_version(inode, KEY_FORMAT_3_5); - set_inode_sd_version(inode, STAT_DATA_V1); - inode->i_mode = sd_v1_mode(sd); - set_nlink(inode, sd_v1_nlink(sd)); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); - inode->i_size = sd_v1_size(sd); - inode->i_atime.tv_sec = sd_v1_atime(sd); - inode->i_mtime.tv_sec = sd_v1_mtime(sd); - inode->i_ctime.tv_sec = sd_v1_ctime(sd); - inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - - inode->i_blocks = sd_v1_blocks(sd); - inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - blocks = (inode->i_size + 511) >> 9; - blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); - if (inode->i_blocks > blocks) { - // there was a bug in <=3.5.23 when i_blocks could take negative - // values. Starting from 3.5.17 this value could even be stored in - // stat data. For such files we set i_blocks based on file - // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be - // only updated if file's inode will ever change - inode->i_blocks = blocks; - } - - rdev = sd_v1_rdev(sd); - REISERFS_I(inode)->i_first_direct_byte = - sd_v1_first_direct_byte(sd); - /* an early bug in the quota code can give us an odd number for the - ** block count. This is incorrect, fix it here. - */ - if (inode->i_blocks & 1) { - inode->i_blocks++; - } - inode_set_bytes(inode, - to_real_used_space(inode, inode->i_blocks, - SD_V1_SIZE)); - /* nopack is initially zero for v1 objects. For v2 objects, - nopack is initialised from sd_attrs */ - REISERFS_I(inode)->i_flags &= ~i_nopack_mask; - } else { - // new stat data found, but object may have old items - // (directories and symlinks) - struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); - - inode->i_mode = sd_v2_mode(sd); - set_nlink(inode, sd_v2_nlink(sd)); - inode->i_uid = sd_v2_uid(sd); - inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); - inode->i_mtime.tv_sec = sd_v2_mtime(sd); - inode->i_atime.tv_sec = sd_v2_atime(sd); - inode->i_ctime.tv_sec = sd_v2_ctime(sd); - inode->i_ctime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - inode->i_atime.tv_nsec = 0; - inode->i_blocks = sd_v2_blocks(sd); - rdev = sd_v2_rdev(sd); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - inode->i_generation = - le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - else - inode->i_generation = sd_v2_generation(sd); - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - set_inode_item_key_version(inode, KEY_FORMAT_3_5); - else - set_inode_item_key_version(inode, KEY_FORMAT_3_6); - REISERFS_I(inode)->i_first_direct_byte = 0; - set_inode_sd_version(inode, STAT_DATA_V2); - inode_set_bytes(inode, - to_real_used_space(inode, inode->i_blocks, - SD_V2_SIZE)); - /* read persistent inode attributes from sd and initialise - generic inode flags from them */ - REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); - sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); - } - - pathrelse(path); - if (S_ISREG(inode->i_mode)) { - inode->i_op = &reiserfs_file_inode_operations; - inode->i_fop = &reiserfs_file_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &reiserfs_dir_inode_operations; - inode->i_fop = &reiserfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &reiserfs_symlink_inode_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - } else { - inode->i_blocks = 0; - inode->i_op = &reiserfs_special_inode_operations; - init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); - } -} - -// update new stat data with inode fields -static void inode2sd(void *sd, struct inode *inode, loff_t size) -{ - struct stat_data *sd_v2 = (struct stat_data *)sd; - __u16 flags; - - set_sd_v2_mode(sd_v2, inode->i_mode); - set_sd_v2_nlink(sd_v2, inode->i_nlink); - set_sd_v2_uid(sd_v2, inode->i_uid); - set_sd_v2_size(sd_v2, size); - set_sd_v2_gid(sd_v2, inode->i_gid); - set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); - set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); - set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); - set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); - else - set_sd_v2_generation(sd_v2, inode->i_generation); - flags = REISERFS_I(inode)->i_attrs; - i_attrs_to_sd_attrs(inode, &flags); - set_sd_v2_attrs(sd_v2, flags); -} - -// used to copy inode's fields to old stat data -static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) -{ - struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; - - set_sd_v1_mode(sd_v1, inode->i_mode); - set_sd_v1_uid(sd_v1, inode->i_uid); - set_sd_v1_gid(sd_v1, inode->i_gid); - set_sd_v1_nlink(sd_v1, inode->i_nlink); - set_sd_v1_size(sd_v1, size); - set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); - set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); - set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); - - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); - else - set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); - - // Sigh. i_first_direct_byte is back - set_sd_v1_first_direct_byte(sd_v1, - REISERFS_I(inode)->i_first_direct_byte); -} - -/* NOTE, you must prepare the buffer head before sending it here, -** and then log it after the call -*/ -static void update_stat_data(struct treepath *path, struct inode *inode, - loff_t size) -{ - struct buffer_head *bh; - struct item_head *ih; - - bh = PATH_PLAST_BUFFER(path); - ih = PATH_PITEM_HEAD(path); - - if (!is_statdata_le_ih(ih)) - reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", - INODE_PKEY(inode), ih); - - if (stat_data_v1(ih)) { - // path points to old stat data - inode2sd_v1(B_I_PITEM(bh, ih), inode, size); - } else { - inode2sd(B_I_PITEM(bh, ih), inode, size); - } - - return; -} - -void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, - struct inode *inode, loff_t size) -{ - struct cpu_key key; - INITIALIZE_PATH(path); - struct buffer_head *bh; - int fs_gen; - struct item_head *ih, tmp_ih; - int retval; - - BUG_ON(!th->t_trans_id); - - make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant - - for (;;) { - int pos; - /* look for the object's stat data */ - retval = search_item(inode->i_sb, &key, &path); - if (retval == IO_ERROR) { - reiserfs_error(inode->i_sb, "vs-13050", - "i/o failure occurred trying to " - "update %K stat data", &key); - return; - } - if (retval == ITEM_NOT_FOUND) { - pos = PATH_LAST_POSITION(&path); - pathrelse(&path); - if (inode->i_nlink == 0) { - /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ - return; - } - reiserfs_warning(inode->i_sb, "vs-13060", - "stat data of object %k (nlink == %d) " - "not found (pos %d)", - INODE_PKEY(inode), inode->i_nlink, - pos); - reiserfs_check_path(&path); - return; - } - - /* sigh, prepare_for_journal might schedule. When it schedules the - ** FS might change. We have to detect that, and loop back to the - ** search if the stat data item has moved - */ - bh = get_last_bh(&path); - ih = get_ih(&path); - copy_item_head(&tmp_ih, ih); - fs_gen = get_generation(inode->i_sb); - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, bh); - continue; /* Stat_data item has been moved after scheduling. */ - } - break; - } - update_stat_data(&path, inode, size); - journal_mark_dirty(th, th->t_super, bh); - pathrelse(&path); - return; -} - -/* reiserfs_read_locked_inode is called to read the inode off disk, and it -** does a make_bad_inode when things go wrong. But, we need to make sure -** and clear the key in the private portion of the inode, otherwise a -** corresponding iput might try to delete whatever object the inode last -** represented. -*/ -static void reiserfs_make_bad_inode(struct inode *inode) -{ - memset(INODE_PKEY(inode), 0, KEY_SIZE); - make_bad_inode(inode); -} - -// -// initially this function was derived from minix or ext2's analog and -// evolved as the prototype did -// - -int reiserfs_init_locked_inode(struct inode *inode, void *p) -{ - struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; - inode->i_ino = args->objectid; - INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); - return 0; -} - -/* looks for stat data in the tree, and fills up the fields of in-core - inode stat data fields */ -void reiserfs_read_locked_inode(struct inode *inode, - struct reiserfs_iget_args *args) -{ - INITIALIZE_PATH(path_to_sd); - struct cpu_key key; - unsigned long dirino; - int retval; - - dirino = args->dirid; - - /* set version 1, version 2 could be used too, because stat data - key is the same in both versions */ - key.version = KEY_FORMAT_3_5; - key.on_disk_key.k_dir_id = dirino; - key.on_disk_key.k_objectid = inode->i_ino; - key.on_disk_key.k_offset = 0; - key.on_disk_key.k_type = 0; - - /* look for the object's stat data */ - retval = search_item(inode->i_sb, &key, &path_to_sd); - if (retval == IO_ERROR) { - reiserfs_error(inode->i_sb, "vs-13070", - "i/o failure occurred trying to find " - "stat data of %K", &key); - reiserfs_make_bad_inode(inode); - return; - } - if (retval != ITEM_FOUND) { - /* a stale NFS handle can trigger this without it being an error */ - pathrelse(&path_to_sd); - reiserfs_make_bad_inode(inode); - clear_nlink(inode); - return; - } - - init_inode(inode, &path_to_sd); - - /* It is possible that knfsd is trying to access inode of a file - that is being removed from the disk by some other thread. As we - update sd on unlink all that is required is to check for nlink - here. This bug was first found by Sizif when debugging - SquidNG/Butterfly, forgotten, and found again after Philippe - Gramoulle <philippe.gramoulle@mmania.com> reproduced it. - - More logical fix would require changes in fs/inode.c:iput() to - remove inode from hash-table _after_ fs cleaned disk stuff up and - in iget() to return NULL if I_FREEING inode is found in - hash-table. */ - /* Currently there is one place where it's ok to meet inode with - nlink==0: processing of open-unlinked and half-truncated files - during mount (fs/reiserfs/super.c:finish_unfinished()). */ - if ((inode->i_nlink == 0) && - !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { - reiserfs_warning(inode->i_sb, "vs-13075", - "dead inode read from disk %K. " - "This is likely to be race with knfsd. Ignore", - &key); - reiserfs_make_bad_inode(inode); - } - - reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ - - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - cache_no_acl(inode); -} - -/** - * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). - * - * @inode: inode from hash table to check - * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. - * - * This function is called by iget5_locked() to distinguish reiserfs inodes - * having the same inode numbers. Such inodes can only exist due to some - * error condition. One of them should be bad. Inodes with identical - * inode numbers (objectids) are distinguished by parent directory ids. - * - */ -int reiserfs_find_actor(struct inode *inode, void *opaque) -{ - struct reiserfs_iget_args *args; - - args = opaque; - /* args is already in CPU order */ - return (inode->i_ino == args->objectid) && - (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); -} - -struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) -{ - struct inode *inode; - struct reiserfs_iget_args args; - - args.objectid = key->on_disk_key.k_objectid; - args.dirid = key->on_disk_key.k_dir_id; - reiserfs_write_unlock(s); - inode = iget5_locked(s, key->on_disk_key.k_objectid, - reiserfs_find_actor, reiserfs_init_locked_inode, - (void *)(&args)); - reiserfs_write_lock(s); - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) { - reiserfs_read_locked_inode(inode, &args); - unlock_new_inode(inode); - } - - if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { - /* either due to i/o error or a stale NFS handle */ - iput(inode); - inode = NULL; - } - return inode; -} - -static struct dentry *reiserfs_get_dentry(struct super_block *sb, - u32 objectid, u32 dir_id, u32 generation) - -{ - struct cpu_key key; - struct inode *inode; - - key.on_disk_key.k_objectid = objectid; - key.on_disk_key.k_dir_id = dir_id; - reiserfs_write_lock(sb); - inode = reiserfs_iget(sb, &key); - if (inode && !IS_ERR(inode) && generation != 0 && - generation != inode->i_generation) { - iput(inode); - inode = NULL; - } - reiserfs_write_unlock(sb); - - return d_obtain_alias(inode); -} - -struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - /* fhtype happens to reflect the number of u32s encoded. - * due to a bug in earlier code, fhtype might indicate there - * are more u32s then actually fitted. - * so if fhtype seems to be more than len, reduce fhtype. - * Valid types are: - * 2 - objectid + dir_id - legacy support - * 3 - objectid + dir_id + generation - * 4 - objectid + dir_id + objectid and dirid of parent - legacy - * 5 - objectid + dir_id + generation + objectid and dirid of parent - * 6 - as above plus generation of directory - * 6 does not fit in NFSv2 handles - */ - if (fh_type > fh_len) { - if (fh_type != 6 || fh_len != 5) - reiserfs_warning(sb, "reiserfs-13077", - "nfsd/reiserfs, fhtype=%d, len=%d - odd", - fh_type, fh_len); - fh_type = 5; - } - - return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], - (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); -} - -struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - if (fh_type < 4) - return NULL; - - return reiserfs_get_dentry(sb, - (fh_type >= 5) ? fid->raw[3] : fid->raw[2], - (fh_type >= 5) ? fid->raw[4] : fid->raw[3], - (fh_type == 6) ? fid->raw[5] : 0); -} - -int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, - int need_parent) -{ - struct inode *inode = dentry->d_inode; - int maxlen = *lenp; - - if (need_parent && (maxlen < 5)) { - *lenp = 5; - return 255; - } else if (maxlen < 3) { - *lenp = 3; - return 255; - } - - data[0] = inode->i_ino; - data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - data[2] = inode->i_generation; - *lenp = 3; - /* no room for directory info? return what we've stored so far */ - if (maxlen < 5 || !need_parent) - return 3; - - spin_lock(&dentry->d_lock); - inode = dentry->d_parent->d_inode; - data[3] = inode->i_ino; - data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - *lenp = 5; - if (maxlen >= 6) { - data[5] = inode->i_generation; - *lenp = 6; - } - spin_unlock(&dentry->d_lock); - return *lenp; -} - -/* looks for stat data, then copies fields to it, marks the buffer - containing stat data as dirty */ -/* reiserfs inodes are never really dirty, since the dirty inode call -** always logs them. This call allows the VFS inode marking routines -** to properly mark inodes for datasync and such, but only actually -** does something when called for a synchronous update. -*/ -int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - struct reiserfs_transaction_handle th; - int jbegin_count = 1; - - if (inode->i_sb->s_flags & MS_RDONLY) - return -EROFS; - /* memory pressure can sometimes initiate write_inode calls with sync == 1, - ** these cases are just when the system needs ram, not when the - ** inode needs to reach disk for safety, and they can safely be - ** ignored because the altered inode has already been logged. - */ - if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { - reiserfs_write_lock(inode->i_sb); - if (!journal_begin(&th, inode->i_sb, jbegin_count)) { - reiserfs_update_sd(&th, inode); - journal_end_sync(&th, inode->i_sb, jbegin_count); - } - reiserfs_write_unlock(inode->i_sb); - } - return 0; -} - -/* stat data of new object is inserted already, this inserts the item - containing "." and ".." entries */ -static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct item_head *ih, struct treepath *path, - struct inode *dir) -{ - struct super_block *sb = th->t_super; - char empty_dir[EMPTY_DIR_SIZE]; - char *body = empty_dir; - struct cpu_key key; - int retval; - - BUG_ON(!th->t_trans_id); - - _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), - le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, - TYPE_DIRENTRY, 3 /*key length */ ); - - /* compose item head for new item. Directories consist of items of - old type (ITEM_VERSION_1). Do not set key (second arg is 0), it - is done by reiserfs_new_inode */ - if (old_format_only(sb)) { - make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, - TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); - - make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, - ih->ih_key.k_objectid, - INODE_PKEY(dir)->k_dir_id, - INODE_PKEY(dir)->k_objectid); - } else { - make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, - TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); - - make_empty_dir_item(body, ih->ih_key.k_dir_id, - ih->ih_key.k_objectid, - INODE_PKEY(dir)->k_dir_id, - INODE_PKEY(dir)->k_objectid); - } - - /* look for place in the tree for new item */ - retval = search_item(sb, &key, path); - if (retval == IO_ERROR) { - reiserfs_error(sb, "vs-13080", - "i/o failure occurred creating new directory"); - return -EIO; - } - if (retval == ITEM_FOUND) { - pathrelse(path); - reiserfs_warning(sb, "vs-13070", - "object with this key exists (%k)", - &(ih->ih_key)); - return -EEXIST; - } - - /* insert item, that is empty directory item */ - return reiserfs_insert_item(th, path, &key, ih, inode, body); -} - -/* stat data of object has been inserted, this inserts the item - containing the body of symlink */ -static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ - struct item_head *ih, - struct treepath *path, const char *symname, - int item_len) -{ - struct super_block *sb = th->t_super; - struct cpu_key key; - int retval; - - BUG_ON(!th->t_trans_id); - - _make_cpu_key(&key, KEY_FORMAT_3_5, - le32_to_cpu(ih->ih_key.k_dir_id), - le32_to_cpu(ih->ih_key.k_objectid), - 1, TYPE_DIRECT, 3 /*key length */ ); - - make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, - 0 /*free_space */ ); - - /* look for place in the tree for new item */ - retval = search_item(sb, &key, path); - if (retval == IO_ERROR) { - reiserfs_error(sb, "vs-13080", - "i/o failure occurred creating new symlink"); - return -EIO; - } - if (retval == ITEM_FOUND) { - pathrelse(path); - reiserfs_warning(sb, "vs-13080", - "object with this key exists (%k)", - &(ih->ih_key)); - return -EEXIST; - } - - /* insert item, that is body of symlink */ - return reiserfs_insert_item(th, path, &key, ih, inode, symname); -} - -/* inserts the stat data into the tree, and then calls - reiserfs_new_directory (to insert ".", ".." item if new object is - directory) or reiserfs_new_symlink (to insert symlink body if new - object is symlink) or nothing (if new object is regular file) - - NOTE! uid and gid must already be set in the inode. If we return - non-zero due to an error, we have to drop the quota previously allocated - for the fresh inode. This can only be done outside a transaction, so - if we return non-zero, we also end the transaction. */ -int reiserfs_new_inode(struct reiserfs_transaction_handle *th, - struct inode *dir, umode_t mode, const char *symname, - /* 0 for regular, EMTRY_DIR_SIZE for dirs, - strlen (symname) for symlinks) */ - loff_t i_size, struct dentry *dentry, - struct inode *inode, - struct reiserfs_security_handle *security) -{ - struct super_block *sb; - struct reiserfs_iget_args args; - INITIALIZE_PATH(path_to_key); - struct cpu_key key; - struct item_head ih; - struct stat_data sd; - int retval; - int err; - - BUG_ON(!th->t_trans_id); - - dquot_initialize(inode); - err = dquot_alloc_inode(inode); - if (err) - goto out_end_trans; - if (!dir->i_nlink) { - err = -EPERM; - goto out_bad_inode; - } - - sb = dir->i_sb; - - /* item head of new item */ - ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); - ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); - if (!ih.ih_key.k_objectid) { - err = -ENOMEM; - goto out_bad_inode; - } - args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); - if (old_format_only(sb)) - make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, - TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); - else - make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, - TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); - memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); - args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); - if (insert_inode_locked4(inode, args.objectid, - reiserfs_find_actor, &args) < 0) { - err = -EINVAL; - goto out_bad_inode; - } - if (old_format_only(sb)) - /* not a perfect generation count, as object ids can be reused, but - ** this is as good as reiserfs can do right now. - ** note that the private part of inode isn't filled in yet, we have - ** to use the directory. - */ - inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); - else -#if defined( USE_INODE_GENERATION_COUNTER ) - inode->i_generation = - le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); -#else - inode->i_generation = ++event; -#endif - - /* fill stat data */ - set_nlink(inode, (S_ISDIR(mode) ? 2 : 1)); - - /* uid and gid must already be set by the caller for quota init */ - - /* symlink cannot be immutable or append only, right? */ - if (S_ISLNK(inode->i_mode)) - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); - - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - inode->i_size = i_size; - inode->i_blocks = 0; - inode->i_bytes = 0; - REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : - U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; - - INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); - REISERFS_I(inode)->i_flags = 0; - REISERFS_I(inode)->i_prealloc_block = 0; - REISERFS_I(inode)->i_prealloc_count = 0; - REISERFS_I(inode)->i_trans_id = 0; - REISERFS_I(inode)->i_jl = NULL; - REISERFS_I(inode)->i_attrs = - REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; - sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); - reiserfs_init_xattr_rwsem(inode); - - /* key to search for correct place for new stat data */ - _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), - le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, - TYPE_STAT_DATA, 3 /*key length */ ); - - /* find proper place for inserting of stat data */ - retval = search_item(sb, &key, &path_to_key); - if (retval == IO_ERROR) { - err = -EIO; - goto out_bad_inode; - } - if (retval == ITEM_FOUND) { - pathrelse(&path_to_key); - err = -EEXIST; - goto out_bad_inode; - } - if (old_format_only(sb)) { - if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { - pathrelse(&path_to_key); - /* i_uid or i_gid is too big to be stored in stat data v3.5 */ - err = -EINVAL; - goto out_bad_inode; - } - inode2sd_v1(&sd, inode, inode->i_size); - } else { - inode2sd(&sd, inode, inode->i_size); - } - // store in in-core inode the key of stat data and version all - // object items will have (directory items will have old offset - // format, other new objects will consist of new items) - if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) - set_inode_item_key_version(inode, KEY_FORMAT_3_5); - else - set_inode_item_key_version(inode, KEY_FORMAT_3_6); - if (old_format_only(sb)) - set_inode_sd_version(inode, STAT_DATA_V1); - else - set_inode_sd_version(inode, STAT_DATA_V2); - - /* insert the stat data into the tree */ -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (REISERFS_I(dir)->new_packing_locality) - th->displace_new_blocks = 1; -#endif - retval = - reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, - (char *)(&sd)); - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key); - goto out_bad_inode; - } -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (!th->displace_new_blocks) - REISERFS_I(dir)->new_packing_locality = 0; -#endif - if (S_ISDIR(mode)) { - /* insert item with "." and ".." */ - retval = - reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); - } - - if (S_ISLNK(mode)) { - /* insert body of symlink */ - if (!old_format_only(sb)) - i_size = ROUND_UP(i_size); - retval = - reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, - i_size); - } - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key); - journal_end(th, th->t_super, th->t_blocks_allocated); - goto out_inserted_sd; - } - - if (reiserfs_posixacl(inode->i_sb)) { - retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key); - journal_end(th, th->t_super, th->t_blocks_allocated); - goto out_inserted_sd; - } - } else if (inode->i_sb->s_flags & MS_POSIXACL) { - reiserfs_warning(inode->i_sb, "jdm-13090", - "ACLs aren't enabled in the fs, " - "but vfs thinks they are!"); - } else if (IS_PRIVATE(dir)) - inode->i_flags |= S_PRIVATE; - - if (security->name) { - retval = reiserfs_security_write(th, inode, security); - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key); - retval = journal_end(th, th->t_super, - th->t_blocks_allocated); - if (retval) - err = retval; - goto out_inserted_sd; - } - } - - reiserfs_update_sd(th, inode); - reiserfs_check_path(&path_to_key); - - return 0; - -/* it looks like you can easily compress these two goto targets into - * one. Keeping it like this doesn't actually hurt anything, and they - * are place holders for what the quota code actually needs. - */ - out_bad_inode: - /* Invalidate the object, nothing was inserted yet */ - INODE_PKEY(inode)->k_objectid = 0; - - /* Quota change must be inside a transaction for journaling */ - dquot_free_inode(inode); - - out_end_trans: - journal_end(th, th->t_super, th->t_blocks_allocated); - /* Drop can be outside and it needs more credits so it's better to have it outside */ - dquot_drop(inode); - inode->i_flags |= S_NOQUOTA; - make_bad_inode(inode); - - out_inserted_sd: - clear_nlink(inode); - th->t_trans_id = 0; /* so the caller can't use this handle later */ - unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ - iput(inode); - return err; -} - -/* -** finds the tail page in the page cache, -** reads the last block in. -** -** On success, page_result is set to a locked, pinned page, and bh_result -** is set to an up to date buffer for the last block in the file. returns 0. -** -** tail conversion is not done, so bh_result might not be valid for writing -** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before -** trying to write the block. -** -** on failure, nonzero is returned, page_result and bh_result are untouched. -*/ -static int grab_tail_page(struct inode *inode, - struct page **page_result, - struct buffer_head **bh_result) -{ - - /* we want the page with the last byte in the file, - ** not the page that will hold the next byte for appending - */ - unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; - unsigned long pos = 0; - unsigned long start = 0; - unsigned long blocksize = inode->i_sb->s_blocksize; - unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1); - struct buffer_head *bh; - struct buffer_head *head; - struct page *page; - int error; - - /* we know that we are only called with inode->i_size > 0. - ** we also know that a file tail can never be as big as a block - ** If i_size % blocksize == 0, our file is currently block aligned - ** and it won't need converting or zeroing after a truncate. - */ - if ((offset & (blocksize - 1)) == 0) { - return -ENOENT; - } - page = grab_cache_page(inode->i_mapping, index); - error = -ENOMEM; - if (!page) { - goto out; - } - /* start within the page of the last block in the file */ - start = (offset / blocksize) * blocksize; - - error = __block_write_begin(page, start, offset - start, - reiserfs_get_block_create_0); - if (error) - goto unlock; - - head = page_buffers(page); - bh = head; - do { - if (pos >= start) { - break; - } - bh = bh->b_this_page; - pos += blocksize; - } while (bh != head); - - if (!buffer_uptodate(bh)) { - /* note, this should never happen, prepare_write should - ** be taking care of this for us. If the buffer isn't up to date, - ** I've screwed up the code to find the buffer, or the code to - ** call prepare_write - */ - reiserfs_error(inode->i_sb, "clm-6000", - "error reading block %lu", bh->b_blocknr); - error = -EIO; - goto unlock; - } - *bh_result = bh; - *page_result = page; - - out: - return error; - - unlock: - unlock_page(page); - page_cache_release(page); - return error; -} - -/* -** vfs version of truncate file. Must NOT be called with -** a transaction already started. -** -** some code taken from block_truncate_page -*/ -int reiserfs_truncate_file(struct inode *inode, int update_timestamps) -{ - struct reiserfs_transaction_handle th; - /* we want the offset for the first byte after the end of the file */ - unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1); - unsigned blocksize = inode->i_sb->s_blocksize; - unsigned length; - struct page *page = NULL; - int error; - struct buffer_head *bh = NULL; - int err2; - int lock_depth; - - lock_depth = reiserfs_write_lock_once(inode->i_sb); - - if (inode->i_size > 0) { - error = grab_tail_page(inode, &page, &bh); - if (error) { - // -ENOENT means we truncated past the end of the file, - // and get_block_create_0 could not find a block to read in, - // which is ok. - if (error != -ENOENT) - reiserfs_error(inode->i_sb, "clm-6001", - "grab_tail_page failed %d", - error); - page = NULL; - bh = NULL; - } - } - - /* so, if page != NULL, we have a buffer head for the offset at - ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, - ** then we have an unformatted node. Otherwise, we have a direct item, - ** and no zeroing is required on disk. We zero after the truncate, - ** because the truncate might pack the item anyway - ** (it will unmap bh if it packs). - */ - /* it is enough to reserve space in transaction for 2 balancings: - one for "save" link adding and another for the first - cut_from_item. 1 is for update_sd */ - error = journal_begin(&th, inode->i_sb, - JOURNAL_PER_BALANCE_CNT * 2 + 1); - if (error) - goto out; - reiserfs_update_inode_transaction(inode); - if (update_timestamps) - /* we are doing real truncate: if the system crashes before the last - transaction of truncating gets committed - on reboot the file - either appears truncated properly or not truncated at all */ - add_save_link(&th, inode, 1); - err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); - error = - journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); - if (error) - goto out; - - /* check reiserfs_do_truncate after ending the transaction */ - if (err2) { - error = err2; - goto out; - } - - if (update_timestamps) { - error = remove_save_link(inode, 1 /* truncate */); - if (error) - goto out; - } - - if (page) { - length = offset & (blocksize - 1); - /* if we are not on a block boundary */ - if (length) { - length = blocksize - length; - zero_user(page, offset, length); - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - mark_buffer_dirty(bh); - } - } - unlock_page(page); - page_cache_release(page); - } - - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - - return 0; - out: - if (page) { - unlock_page(page); - page_cache_release(page); - } - - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - - return error; -} - -static int map_block_for_writepage(struct inode *inode, - struct buffer_head *bh_result, - unsigned long block) -{ - struct reiserfs_transaction_handle th; - int fs_gen; - struct item_head tmp_ih; - struct item_head *ih; - struct buffer_head *bh; - __le32 *item; - struct cpu_key key; - INITIALIZE_PATH(path); - int pos_in_item; - int jbegin_count = JOURNAL_PER_BALANCE_CNT; - loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1; - int retval; - int use_get_block = 0; - int bytes_copied = 0; - int copy_size; - int trans_running = 0; - - /* catch places below that try to log something without starting a trans */ - th.t_trans_id = 0; - - if (!buffer_uptodate(bh_result)) { - return -EIO; - } - - kmap(bh_result->b_page); - start_over: - reiserfs_write_lock(inode->i_sb); - make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); - - research: - retval = search_for_position_by_key(inode->i_sb, &key, &path); - if (retval != POSITION_FOUND) { - use_get_block = 1; - goto out; - } - - bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); - pos_in_item = path.pos_in_item; - - /* we've found an unformatted node */ - if (indirect_item_found(retval, ih)) { - if (bytes_copied > 0) { - reiserfs_warning(inode->i_sb, "clm-6002", - "bytes_copied %d", bytes_copied); - } - if (!get_block_num(item, pos_in_item)) { - /* crap, we are writing to a hole */ - use_get_block = 1; - goto out; - } - set_block_dev_mapped(bh_result, - get_block_num(item, pos_in_item), inode); - } else if (is_direct_le_ih(ih)) { - char *p; - p = page_address(bh_result->b_page); - p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1); - copy_size = ih_item_len(ih) - pos_in_item; - - fs_gen = get_generation(inode->i_sb); - copy_item_head(&tmp_ih, ih); - - if (!trans_running) { - /* vs-3050 is gone, no need to drop the path */ - retval = journal_begin(&th, inode->i_sb, jbegin_count); - if (retval) - goto out; - reiserfs_update_inode_transaction(inode); - trans_running = 1; - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, - bh); - goto research; - } - } - - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); - - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, bh); - goto research; - } - - memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, - copy_size); - - journal_mark_dirty(&th, inode->i_sb, bh); - bytes_copied += copy_size; - set_block_dev_mapped(bh_result, 0, inode); - - /* are there still bytes left? */ - if (bytes_copied < bh_result->b_size && - (byte_offset + bytes_copied) < inode->i_size) { - set_cpu_key_k_offset(&key, - cpu_key_k_offset(&key) + - copy_size); - goto research; - } - } else { - reiserfs_warning(inode->i_sb, "clm-6003", - "bad item inode %lu", inode->i_ino); - retval = -EIO; - goto out; - } - retval = 0; - - out: - pathrelse(&path); - if (trans_running) { - int err = journal_end(&th, inode->i_sb, jbegin_count); - if (err) - retval = err; - trans_running = 0; - } - reiserfs_write_unlock(inode->i_sb); - - /* this is where we fill in holes in the file. */ - if (use_get_block) { - retval = reiserfs_get_block(inode, block, bh_result, - GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX - | GET_BLOCK_NO_DANGLE); - if (!retval) { - if (!buffer_mapped(bh_result) - || bh_result->b_blocknr == 0) { - /* get_block failed to find a mapped unformatted node. */ - use_get_block = 0; - goto start_over; - } - } - } - kunmap(bh_result->b_page); - - if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { - /* we've copied data from the page into the direct item, so the - * buffer in the page is now clean, mark it to reflect that. - */ - lock_buffer(bh_result); - clear_buffer_dirty(bh_result); - unlock_buffer(bh_result); - } - return retval; -} - -/* - * mason@suse.com: updated in 2.5.54 to follow the same general io - * start/recovery path as __block_write_full_page, along with special - * code to handle reiserfs tails. - */ -static int reiserfs_write_full_page(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; - int error = 0; - unsigned long block; - sector_t last_block; - struct buffer_head *head, *bh; - int partial = 0; - int nr = 0; - int checked = PageChecked(page); - struct reiserfs_transaction_handle th; - struct super_block *s = inode->i_sb; - int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; - th.t_trans_id = 0; - - /* no logging allowed when nonblocking or from PF_MEMALLOC */ - if (checked && (current->flags & PF_MEMALLOC)) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } - - /* The page dirty bit is cleared before writepage is called, which - * means we have to tell create_empty_buffers to make dirty buffers - * The page really should be up to date at this point, so tossing - * in the BH_Uptodate is just a sanity check. - */ - if (!page_has_buffers(page)) { - create_empty_buffers(page, s->s_blocksize, - (1 << BH_Dirty) | (1 << BH_Uptodate)); - } - head = page_buffers(page); - - /* last page in the file, zero out any contents past the - ** last byte in the file - */ - if (page->index >= end_index) { - unsigned last_offset; - - last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); - /* no file contents in this page */ - if (page->index >= end_index + 1 || !last_offset) { - unlock_page(page); - return 0; - } - zero_user_segment(page, last_offset, PAGE_CACHE_SIZE); - } - bh = head; - block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - /* first map all the buffers, logging any direct items we find */ - do { - if (block > last_block) { - /* - * This can happen when the block size is less than - * the page size. The corresponding bytes in the page - * were zero filled above - */ - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - } else if ((checked || buffer_dirty(bh)) && - (!buffer_mapped(bh) || (buffer_mapped(bh) - && bh->b_blocknr == - 0))) { - /* not mapped yet, or it points to a direct item, search - * the btree for the mapping info, and log any direct - * items found - */ - if ((error = map_block_for_writepage(inode, bh, block))) { - goto fail; - } - } - bh = bh->b_this_page; - block++; - } while (bh != head); - - /* - * we start the transaction after map_block_for_writepage, - * because it can create holes in the file (an unbounded operation). - * starting it here, we can make a reliable estimate for how many - * blocks we're going to log - */ - if (checked) { - ClearPageChecked(page); - reiserfs_write_lock(s); - error = journal_begin(&th, s, bh_per_page + 1); - if (error) { - reiserfs_write_unlock(s); - goto fail; - } - reiserfs_update_inode_transaction(inode); - } - /* now go through and lock any dirty buffers on the page */ - do { - get_bh(bh); - if (!buffer_mapped(bh)) - continue; - if (buffer_mapped(bh) && bh->b_blocknr == 0) - continue; - - if (checked) { - reiserfs_prepare_for_journal(s, bh, 1); - journal_mark_dirty(&th, s, bh); - continue; - } - /* from this point on, we know the buffer is mapped to a - * real block and not a direct item - */ - if (wbc->sync_mode != WB_SYNC_NONE) { - lock_buffer(bh); - } else { - if (!trylock_buffer(bh)) { - redirty_page_for_writepage(wbc, page); - continue; - } - } - if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write(bh); - } else { - unlock_buffer(bh); - } - } while ((bh = bh->b_this_page) != head); - - if (checked) { - error = journal_end(&th, s, bh_per_page + 1); - reiserfs_write_unlock(s); - if (error) - goto fail; - } - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - unlock_page(page); - - /* - * since any buffer might be the only dirty buffer on the page, - * the first submit_bh can bring the page out of writeback. - * be careful with the buffers. - */ - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); - nr++; - } - put_bh(bh); - bh = next; - } while (bh != head); - - error = 0; - done: - if (nr == 0) { - /* - * if this page only had a direct item, it is very possible for - * no io to be required without there being an error. Or, - * someone else could have locked them and sent them down the - * pipe without locking the page - */ - bh = head; - do { - if (!buffer_uptodate(bh)) { - partial = 1; - break; - } - bh = bh->b_this_page; - } while (bh != head); - if (!partial) - SetPageUptodate(page); - end_page_writeback(page); - } - return error; - - fail: - /* catches various errors, we need to make sure any valid dirty blocks - * get to the media. The page is currently locked and not marked for - * writeback - */ - ClearPageUptodate(page); - bh = head; - do { - get_bh(bh); - if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { - lock_buffer(bh); - mark_buffer_async_write(bh); - } else { - /* - * clear any dirty bits that might have come from getting - * attached to a dirty page - */ - clear_buffer_dirty(bh); - } - bh = bh->b_this_page; - } while (bh != head); - SetPageError(page); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - unlock_page(page); - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - clear_buffer_dirty(bh); - submit_bh(WRITE, bh); - nr++; - } - put_bh(bh); - bh = next; - } while (bh != head); - goto done; -} - -static int reiserfs_readpage(struct file *f, struct page *page) -{ - return block_read_full_page(page, reiserfs_get_block); -} - -static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - reiserfs_wait_on_write_block(inode->i_sb); - return reiserfs_write_full_page(page, wbc); -} - -static void reiserfs_truncate_failed_write(struct inode *inode) -{ - truncate_inode_pages(inode->i_mapping, inode->i_size); - reiserfs_truncate_file(inode, 0); -} - -static int reiserfs_write_begin(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - struct inode *inode; - struct page *page; - pgoff_t index; - int ret; - int old_ref = 0; - - inode = mapping->host; - *fsdata = 0; - if (flags & AOP_FLAG_CONT_EXPAND && - (pos & (inode->i_sb->s_blocksize - 1)) == 0) { - pos ++; - *fsdata = (void *)(unsigned long)flags; - } - - index = pos >> PAGE_CACHE_SHIFT; - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; - *pagep = page; - - reiserfs_wait_on_write_block(inode->i_sb); - fix_tail_page_for_writing(page); - if (reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th; - th = (struct reiserfs_transaction_handle *)current-> - journal_info; - BUG_ON(!th->t_refcount); - BUG_ON(!th->t_trans_id); - old_ref = th->t_refcount; - th->t_refcount++; - } - ret = __block_write_begin(page, pos, len, reiserfs_get_block); - if (ret && reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th = current->journal_info; - /* this gets a little ugly. If reiserfs_get_block returned an - * error and left a transacstion running, we've got to close it, - * and we've got to free handle if it was a persistent transaction. - * - * But, if we had nested into an existing transaction, we need - * to just drop the ref count on the handle. - * - * If old_ref == 0, the transaction is from reiserfs_get_block, - * and it was a persistent trans. Otherwise, it was nested above. - */ - if (th->t_refcount > old_ref) { - if (old_ref) - th->t_refcount--; - else { - int err; - reiserfs_write_lock(inode->i_sb); - err = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock(inode->i_sb); - if (err) - ret = err; - } - } - } - if (ret) { - unlock_page(page); - page_cache_release(page); - /* Truncate allocated blocks */ - reiserfs_truncate_failed_write(inode); - } - return ret; -} - -int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) -{ - struct inode *inode = page->mapping->host; - int ret; - int old_ref = 0; - - reiserfs_write_unlock(inode->i_sb); - reiserfs_wait_on_write_block(inode->i_sb); - reiserfs_write_lock(inode->i_sb); - - fix_tail_page_for_writing(page); - if (reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th; - th = (struct reiserfs_transaction_handle *)current-> - journal_info; - BUG_ON(!th->t_refcount); - BUG_ON(!th->t_trans_id); - old_ref = th->t_refcount; - th->t_refcount++; - } - - ret = __block_write_begin(page, from, len, reiserfs_get_block); - if (ret && reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th = current->journal_info; - /* this gets a little ugly. If reiserfs_get_block returned an - * error and left a transacstion running, we've got to close it, - * and we've got to free handle if it was a persistent transaction. - * - * But, if we had nested into an existing transaction, we need - * to just drop the ref count on the handle. - * - * If old_ref == 0, the transaction is from reiserfs_get_block, - * and it was a persistent trans. Otherwise, it was nested above. - */ - if (th->t_refcount > old_ref) { - if (old_ref) - th->t_refcount--; - else { - int err; - reiserfs_write_lock(inode->i_sb); - err = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock(inode->i_sb); - if (err) - ret = err; - } - } - } - return ret; - -} - -static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) -{ - return generic_block_bmap(as, block, reiserfs_bmap); -} - -static int reiserfs_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = page->mapping->host; - int ret = 0; - int update_sd = 0; - struct reiserfs_transaction_handle *th; - unsigned start; - int lock_depth = 0; - bool locked = false; - - if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) - pos ++; - - reiserfs_wait_on_write_block(inode->i_sb); - if (reiserfs_transaction_running(inode->i_sb)) - th = current->journal_info; - else - th = NULL; - - start = pos & (PAGE_CACHE_SIZE - 1); - if (unlikely(copied < len)) { - if (!PageUptodate(page)) - copied = 0; - - page_zero_new_buffers(page, start + copied, start + len); - } - flush_dcache_page(page); - - reiserfs_commit_page(inode, page, start, start + copied); - - /* generic_commit_write does this for us, but does not update the - ** transaction tracking stuff when the size changes. So, we have - ** to do the i_size updates here. - */ - if (pos + copied > inode->i_size) { - struct reiserfs_transaction_handle myth; - lock_depth = reiserfs_write_lock_once(inode->i_sb); - locked = true; - /* If the file have grown beyond the border where it - can have a tail, unmark it as needing a tail - packing */ - if ((have_large_tails(inode->i_sb) - && inode->i_size > i_block_size(inode) * 4) - || (have_small_tails(inode->i_sb) - && inode->i_size > i_block_size(inode))) - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - - ret = journal_begin(&myth, inode->i_sb, 1); - if (ret) - goto journal_error; - - reiserfs_update_inode_transaction(inode); - inode->i_size = pos + copied; - /* - * this will just nest into our transaction. It's important - * to use mark_inode_dirty so the inode gets pushed around on the - * dirty lists, and so that O_SYNC works as expected - */ - mark_inode_dirty(inode); - reiserfs_update_sd(&myth, inode); - update_sd = 1; - ret = journal_end(&myth, inode->i_sb, 1); - if (ret) - goto journal_error; - } - if (th) { - if (!locked) { - lock_depth = reiserfs_write_lock_once(inode->i_sb); - locked = true; - } - if (!update_sd) - mark_inode_dirty(inode); - ret = reiserfs_end_persistent_transaction(th); - if (ret) - goto out; - } - - out: - if (locked) - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - unlock_page(page); - page_cache_release(page); - - if (pos + len > inode->i_size) - reiserfs_truncate_failed_write(inode); - - return ret == 0 ? copied : ret; - - journal_error: - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - locked = false; - if (th) { - if (!update_sd) - reiserfs_update_sd(th, inode); - ret = reiserfs_end_persistent_transaction(th); - } - goto out; -} - -int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; - int ret = 0; - int update_sd = 0; - struct reiserfs_transaction_handle *th = NULL; - - reiserfs_write_unlock(inode->i_sb); - reiserfs_wait_on_write_block(inode->i_sb); - reiserfs_write_lock(inode->i_sb); - - if (reiserfs_transaction_running(inode->i_sb)) { - th = current->journal_info; - } - reiserfs_commit_page(inode, page, from, to); - - /* generic_commit_write does this for us, but does not update the - ** transaction tracking stuff when the size changes. So, we have - ** to do the i_size updates here. - */ - if (pos > inode->i_size) { - struct reiserfs_transaction_handle myth; - /* If the file have grown beyond the border where it - can have a tail, unmark it as needing a tail - packing */ - if ((have_large_tails(inode->i_sb) - && inode->i_size > i_block_size(inode) * 4) - || (have_small_tails(inode->i_sb) - && inode->i_size > i_block_size(inode))) - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - - ret = journal_begin(&myth, inode->i_sb, 1); - if (ret) - goto journal_error; - - reiserfs_update_inode_transaction(inode); - inode->i_size = pos; - /* - * this will just nest into our transaction. It's important - * to use mark_inode_dirty so the inode gets pushed around on the - * dirty lists, and so that O_SYNC works as expected - */ - mark_inode_dirty(inode); - reiserfs_update_sd(&myth, inode); - update_sd = 1; - ret = journal_end(&myth, inode->i_sb, 1); - if (ret) - goto journal_error; - } - if (th) { - if (!update_sd) - mark_inode_dirty(inode); - ret = reiserfs_end_persistent_transaction(th); - if (ret) - goto out; - } - - out: - return ret; - - journal_error: - if (th) { - if (!update_sd) - reiserfs_update_sd(th, inode); - ret = reiserfs_end_persistent_transaction(th); - } - - return ret; -} - -void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) -{ - if (reiserfs_attrs(inode->i_sb)) { - if (sd_attrs & REISERFS_SYNC_FL) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (sd_attrs & REISERFS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (sd_attrs & REISERFS_APPEND_FL) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (sd_attrs & REISERFS_NOATIME_FL) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; - if (sd_attrs & REISERFS_NOTAIL_FL) - REISERFS_I(inode)->i_flags |= i_nopack_mask; - else - REISERFS_I(inode)->i_flags &= ~i_nopack_mask; - } -} - -void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) -{ - if (reiserfs_attrs(inode->i_sb)) { - if (inode->i_flags & S_IMMUTABLE) - *sd_attrs |= REISERFS_IMMUTABLE_FL; - else - *sd_attrs &= ~REISERFS_IMMUTABLE_FL; - if (inode->i_flags & S_SYNC) - *sd_attrs |= REISERFS_SYNC_FL; - else - *sd_attrs &= ~REISERFS_SYNC_FL; - if (inode->i_flags & S_NOATIME) - *sd_attrs |= REISERFS_NOATIME_FL; - else - *sd_attrs &= ~REISERFS_NOATIME_FL; - if (REISERFS_I(inode)->i_flags & i_nopack_mask) - *sd_attrs |= REISERFS_NOTAIL_FL; - else - *sd_attrs &= ~REISERFS_NOTAIL_FL; - } -} - -/* decide if this buffer needs to stay around for data logging or ordered -** write purposes -*/ -static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) -{ - int ret = 1; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); - - lock_buffer(bh); - spin_lock(&j->j_dirty_buffers_lock); - if (!buffer_mapped(bh)) { - goto free_jh; - } - /* the page is locked, and the only places that log a data buffer - * also lock the page. - */ - if (reiserfs_file_data_log(inode)) { - /* - * very conservative, leave the buffer pinned if - * anyone might need it. - */ - if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { - ret = 0; - } - } else if (buffer_dirty(bh)) { - struct reiserfs_journal_list *jl; - struct reiserfs_jh *jh = bh->b_private; - - /* why is this safe? - * reiserfs_setattr updates i_size in the on disk - * stat data before allowing vmtruncate to be called. - * - * If buffer was put onto the ordered list for this - * transaction, we know for sure either this transaction - * or an older one already has updated i_size on disk, - * and this ordered data won't be referenced in the file - * if we crash. - * - * if the buffer was put onto the ordered list for an older - * transaction, we need to leave it around - */ - if (jh && (jl = jh->jl) - && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) - ret = 0; - } - free_jh: - if (ret && bh->b_private) { - reiserfs_free_jh(bh); - } - spin_unlock(&j->j_dirty_buffers_lock); - unlock_buffer(bh); - return ret; -} - -/* clm -- taken from fs/buffer.c:block_invalidate_page */ -static void reiserfs_invalidatepage(struct page *page, unsigned long offset) -{ - struct buffer_head *head, *bh, *next; - struct inode *inode = page->mapping->host; - unsigned int curr_off = 0; - int ret = 1; - - BUG_ON(!PageLocked(page)); - - if (offset == 0) - ClearPageChecked(page); - - if (!page_has_buffers(page)) - goto out; - - head = page_buffers(page); - bh = head; - do { - unsigned int next_off = curr_off + bh->b_size; - next = bh->b_this_page; - - /* - * is this block fully invalidated? - */ - if (offset <= curr_off) { - if (invalidatepage_can_drop(inode, bh)) - reiserfs_unmap_buffer(bh); - else - ret = 0; - } - curr_off = next_off; - bh = next; - } while (bh != head); - - /* - * We release buffers only if the entire page is being invalidated. - * The get_block cached value has been unconditionally invalidated, - * so real IO is not possible anymore. - */ - if (!offset && ret) { - ret = try_to_release_page(page, 0); - /* maybe should BUG_ON(!ret); - neilb */ - } - out: - return; -} - -static int reiserfs_set_page_dirty(struct page *page) -{ - struct inode *inode = page->mapping->host; - if (reiserfs_file_data_log(inode)) { - SetPageChecked(page); - return __set_page_dirty_nobuffers(page); - } - return __set_page_dirty_buffers(page); -} - -/* - * Returns 1 if the page's buffers were dropped. The page is locked. - * - * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads - * in the buffers at page_buffers(page). - * - * even in -o notail mode, we can't be sure an old mount without -o notail - * didn't create files with tails. - */ -static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) -{ - struct inode *inode = page->mapping->host; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); - struct buffer_head *head; - struct buffer_head *bh; - int ret = 1; - - WARN_ON(PageChecked(page)); - spin_lock(&j->j_dirty_buffers_lock); - head = page_buffers(page); - bh = head; - do { - if (bh->b_private) { - if (!buffer_dirty(bh) && !buffer_locked(bh)) { - reiserfs_free_jh(bh); - } else { - ret = 0; - break; - } - } - bh = bh->b_this_page; - } while (bh != head); - if (ret) - ret = try_to_free_buffers(page); - spin_unlock(&j->j_dirty_buffers_lock); - return ret; -} - -/* We thank Mingming Cao for helping us understand in great detail what - to do in this section of the code. */ -static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - ssize_t ret; - - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - reiserfs_get_blocks_direct_io); - - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again. - */ - if (unlikely((rw & WRITE) && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); - - if (end > isize) - vmtruncate(inode, isize); - } - - return ret; -} - -int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = dentry->d_inode; - unsigned int ia_valid; - int depth; - int error; - - error = inode_change_ok(inode, attr); - if (error) - return error; - - /* must be turned off for recursive notify_change calls */ - ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); - - depth = reiserfs_write_lock_once(inode->i_sb); - if (is_quota_modification(inode, attr)) - dquot_initialize(inode); - - if (attr->ia_valid & ATTR_SIZE) { - /* version 2 items will be caught by the s_maxbytes check - ** done for us in vmtruncate - */ - if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && - attr->ia_size > MAX_NON_LFS) { - error = -EFBIG; - goto out; - } - - inode_dio_wait(inode); - - /* fill in hole pointers in the expanding truncate case. */ - if (attr->ia_size > inode->i_size) { - error = generic_cont_expand_simple(inode, attr->ia_size); - if (REISERFS_I(inode)->i_prealloc_count > 0) { - int err; - struct reiserfs_transaction_handle th; - /* we're changing at most 2 bitmaps, inode + super */ - err = journal_begin(&th, inode->i_sb, 4); - if (!err) { - reiserfs_discard_prealloc(&th, inode); - err = journal_end(&th, inode->i_sb, 4); - } - if (err) - error = err; - } - if (error) - goto out; - /* - * file size is changed, ctime and mtime are - * to be updated - */ - attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME); - } - } - - if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || - ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && - (get_inode_sd_version(inode) == STAT_DATA_V1)) { - /* stat data of format v3.5 has 16 bit uid and gid */ - error = -EINVAL; - goto out; - } - - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { - struct reiserfs_transaction_handle th; - int jbegin_count = - 2 * - (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + - REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + - 2; - - error = reiserfs_chown_xattrs(inode, attr); - - if (error) - return error; - - /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ - error = journal_begin(&th, inode->i_sb, jbegin_count); - if (error) - goto out; - error = dquot_transfer(inode, attr); - if (error) { - journal_end(&th, inode->i_sb, jbegin_count); - goto out; - } - - /* Update corresponding info in inode so that everything is in - * one transaction */ - if (attr->ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (attr->ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - mark_inode_dirty(inode); - error = journal_end(&th, inode->i_sb, jbegin_count); - if (error) - goto out; - } - - /* - * Relax the lock here, as it might truncate the - * inode pages and wait for inode pages locks. - * To release such page lock, the owner needs the - * reiserfs lock - */ - reiserfs_write_unlock_once(inode->i_sb, depth); - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) - error = vmtruncate(inode, attr->ia_size); - - if (!error) { - setattr_copy(inode, attr); - mark_inode_dirty(inode); - } - depth = reiserfs_write_lock_once(inode->i_sb); - - if (!error && reiserfs_posixacl(inode->i_sb)) { - if (attr->ia_valid & ATTR_MODE) - error = reiserfs_acl_chmod(inode); - } - - out: - reiserfs_write_unlock_once(inode->i_sb, depth); - - return error; -} - -const struct address_space_operations reiserfs_address_space_operations = { - .writepage = reiserfs_writepage, - .readpage = reiserfs_readpage, - .readpages = reiserfs_readpages, - .releasepage = reiserfs_releasepage, - .invalidatepage = reiserfs_invalidatepage, - .write_begin = reiserfs_write_begin, - .write_end = reiserfs_write_end, - .bmap = reiserfs_aop_bmap, - .direct_IO = reiserfs_direct_IO, - .set_page_dirty = reiserfs_set_page_dirty, -}; diff --git a/ANDROID_3.4.5/fs/reiserfs/ioctl.c b/ANDROID_3.4.5/fs/reiserfs/ioctl.c deleted file mode 100644 index 0c218504..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/ioctl.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <linux/capability.h> -#include <linux/fs.h> -#include <linux/mount.h> -#include "reiserfs.h" -#include <linux/time.h> -#include <asm/uaccess.h> -#include <linux/pagemap.h> -#include <linux/compat.h> - -/* - * reiserfs_ioctl - handler for ioctl for inode - * supported commands: - * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect - * and prevent packing file (argument arg has to be non-zero) - * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION - * 3) That's all for a while ... - */ -long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - unsigned int flags; - int err = 0; - - reiserfs_write_lock(inode->i_sb); - - switch (cmd) { - case REISERFS_IOC_UNPACK: - if (S_ISREG(inode->i_mode)) { - if (arg) - err = reiserfs_unpack(inode, filp); - } else - err = -ENOTTY; - break; - /* - * following two cases are taken from fs/ext2/ioctl.c by Remy - * Card (card@masi.ibp.fr) - */ - case REISERFS_IOC_GETFLAGS: - if (!reiserfs_attrs(inode->i_sb)) { - err = -ENOTTY; - break; - } - - flags = REISERFS_I(inode)->i_attrs; - i_attrs_to_sd_attrs(inode, (__u16 *) & flags); - err = put_user(flags, (int __user *)arg); - break; - case REISERFS_IOC_SETFLAGS:{ - if (!reiserfs_attrs(inode->i_sb)) { - err = -ENOTTY; - break; - } - - err = mnt_want_write_file(filp); - if (err) - break; - - if (!inode_owner_or_capable(inode)) { - err = -EPERM; - goto setflags_out; - } - if (get_user(flags, (int __user *)arg)) { - err = -EFAULT; - goto setflags_out; - } - /* - * Is it quota file? Do not allow user to mess with it - */ - if (IS_NOQUOTA(inode)) { - err = -EPERM; - goto setflags_out; - } - if (((flags ^ REISERFS_I(inode)-> - i_attrs) & (REISERFS_IMMUTABLE_FL | - REISERFS_APPEND_FL)) - && !capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; - goto setflags_out; - } - if ((flags & REISERFS_NOTAIL_FL) && - S_ISREG(inode->i_mode)) { - int result; - - result = reiserfs_unpack(inode, filp); - if (result) { - err = result; - goto setflags_out; - } - } - sd_attrs_to_i_attrs(flags, inode); - REISERFS_I(inode)->i_attrs = flags; - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); -setflags_out: - mnt_drop_write_file(filp); - break; - } - case REISERFS_IOC_GETVERSION: - err = put_user(inode->i_generation, (int __user *)arg); - break; - case REISERFS_IOC_SETVERSION: - if (!inode_owner_or_capable(inode)) { - err = -EPERM; - break; - } - err = mnt_want_write_file(filp); - if (err) - break; - if (get_user(inode->i_generation, (int __user *)arg)) { - err = -EFAULT; - goto setversion_out; - } - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); -setversion_out: - mnt_drop_write_file(filp); - break; - default: - err = -ENOTTY; - } - - reiserfs_write_unlock(inode->i_sb); - - return err; -} - -#ifdef CONFIG_COMPAT -long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case REISERFS_IOC32_UNPACK: - cmd = REISERFS_IOC_UNPACK; - break; - case REISERFS_IOC32_GETFLAGS: - cmd = REISERFS_IOC_GETFLAGS; - break; - case REISERFS_IOC32_SETFLAGS: - cmd = REISERFS_IOC_SETFLAGS; - break; - case REISERFS_IOC32_GETVERSION: - cmd = REISERFS_IOC_GETVERSION; - break; - case REISERFS_IOC32_SETVERSION: - cmd = REISERFS_IOC_SETVERSION; - break; - default: - return -ENOIOCTLCMD; - } - - return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); -} -#endif - -int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to); -/* -** reiserfs_unpack -** Function try to convert tail from direct item into indirect. -** It set up nopack attribute in the REISERFS_I(inode)->nopack -*/ -int reiserfs_unpack(struct inode *inode, struct file *filp) -{ - int retval = 0; - int depth; - int index; - struct page *page; - struct address_space *mapping; - unsigned long write_from; - unsigned long blocksize = inode->i_sb->s_blocksize; - - if (inode->i_size == 0) { - REISERFS_I(inode)->i_flags |= i_nopack_mask; - return 0; - } - /* ioctl already done */ - if (REISERFS_I(inode)->i_flags & i_nopack_mask) { - return 0; - } - - depth = reiserfs_write_lock_once(inode->i_sb); - - /* we need to make sure nobody is changing the file size beneath us */ - reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); - - write_from = inode->i_size & (blocksize - 1); - /* if we are on a block boundary, we are already unpacked. */ - if (write_from == 0) { - REISERFS_I(inode)->i_flags |= i_nopack_mask; - goto out; - } - - /* we unpack by finding the page with the tail, and calling - ** __reiserfs_write_begin on that page. This will force a - ** reiserfs_get_block to unpack the tail for us. - */ - index = inode->i_size >> PAGE_CACHE_SHIFT; - mapping = inode->i_mapping; - page = grab_cache_page(mapping, index); - retval = -ENOMEM; - if (!page) { - goto out; - } - retval = __reiserfs_write_begin(page, write_from, 0); - if (retval) - goto out_unlock; - - /* conversion can change page contents, must flush */ - flush_dcache_page(page); - retval = reiserfs_commit_write(NULL, page, write_from, write_from); - REISERFS_I(inode)->i_flags |= i_nopack_mask; - - out_unlock: - unlock_page(page); - page_cache_release(page); - - out: - mutex_unlock(&inode->i_mutex); - reiserfs_write_unlock_once(inode->i_sb, depth); - return retval; -} diff --git a/ANDROID_3.4.5/fs/reiserfs/item_ops.c b/ANDROID_3.4.5/fs/reiserfs/item_ops.c deleted file mode 100644 index ee382ef3..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/item_ops.c +++ /dev/null @@ -1,756 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <linux/time.h> -#include "reiserfs.h" - -// this contains item handlers for old item types: sd, direct, -// indirect, directory - -/* and where are the comments? how about saying where we can find an - explanation of each item handler method? -Hans */ - -////////////////////////////////////////////////////////////////////////////// -// stat data functions -// -static int sd_bytes_number(struct item_head *ih, int block_size) -{ - return 0; -} - -static void sd_decrement_key(struct cpu_key *key) -{ - key->on_disk_key.k_objectid--; - set_cpu_key_k_type(key, TYPE_ANY); - set_cpu_key_k_offset(key, (loff_t)(~0ULL >> 1)); -} - -static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize) -{ - return 0; -} - -static char *print_time(time_t t) -{ - static char timebuf[256]; - - sprintf(timebuf, "%ld", t); - return timebuf; -} - -static void sd_print_item(struct item_head *ih, char *item) -{ - printk("\tmode | size | nlinks | first direct | mtime\n"); - if (stat_data_v1(ih)) { - struct stat_data_v1 *sd = (struct stat_data_v1 *)item; - - printk("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd), - sd_v1_size(sd), sd_v1_nlink(sd), - sd_v1_first_direct_byte(sd), - print_time(sd_v1_mtime(sd))); - } else { - struct stat_data *sd = (struct stat_data *)item; - - printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd), - (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd), - sd_v2_rdev(sd), print_time(sd_v2_mtime(sd))); - } -} - -static void sd_check_item(struct item_head *ih, char *item) -{ - // FIXME: type something here! -} - -static int sd_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - vi->vi_index = TYPE_STAT_DATA; - //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed? - return 0; -} - -static int sd_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - BUG_ON(start_skip || end_skip); - return -1; -} - -static int sd_check_right(struct virtual_item *vi, int free) -{ - return -1; -} - -static int sd_part_size(struct virtual_item *vi, int first, int count) -{ - BUG_ON(count); - return 0; -} - -static int sd_unit_num(struct virtual_item *vi) -{ - return vi->vi_item_len - IH_SIZE; -} - -static void sd_print_vi(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "reiserfs-16100", - "STATDATA, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); -} - -static struct item_operations stat_data_ops = { - .bytes_number = sd_bytes_number, - .decrement_key = sd_decrement_key, - .is_left_mergeable = sd_is_left_mergeable, - .print_item = sd_print_item, - .check_item = sd_check_item, - - .create_vi = sd_create_vi, - .check_left = sd_check_left, - .check_right = sd_check_right, - .part_size = sd_part_size, - .unit_num = sd_unit_num, - .print_vi = sd_print_vi -}; - -////////////////////////////////////////////////////////////////////////////// -// direct item functions -// -static int direct_bytes_number(struct item_head *ih, int block_size) -{ - return ih_item_len(ih); -} - -// FIXME: this should probably switch to indirect as well -static void direct_decrement_key(struct cpu_key *key) -{ - cpu_key_k_offset_dec(key); - if (cpu_key_k_offset(key) == 0) - set_cpu_key_k_type(key, TYPE_STAT_DATA); -} - -static int direct_is_left_mergeable(struct reiserfs_key *key, - unsigned long bsize) -{ - int version = le_key_version(key); - return ((le_key_k_offset(version, key) & (bsize - 1)) != 1); -} - -static void direct_print_item(struct item_head *ih, char *item) -{ - int j = 0; - -// return; - printk("\""); - while (j < ih_item_len(ih)) - printk("%c", item[j++]); - printk("\"\n"); -} - -static void direct_check_item(struct item_head *ih, char *item) -{ - // FIXME: type something here! -} - -static int direct_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - vi->vi_index = TYPE_DIRECT; - //vi->vi_type |= VI_TYPE_DIRECT; - return 0; -} - -static int direct_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - int bytes; - - bytes = free - free % 8; - return bytes ? : -1; -} - -static int direct_check_right(struct virtual_item *vi, int free) -{ - return direct_check_left(vi, free, 0, 0); -} - -static int direct_part_size(struct virtual_item *vi, int first, int count) -{ - return count; -} - -static int direct_unit_num(struct virtual_item *vi) -{ - return vi->vi_item_len - IH_SIZE; -} - -static void direct_print_vi(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "reiserfs-16101", - "DIRECT, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); -} - -static struct item_operations direct_ops = { - .bytes_number = direct_bytes_number, - .decrement_key = direct_decrement_key, - .is_left_mergeable = direct_is_left_mergeable, - .print_item = direct_print_item, - .check_item = direct_check_item, - - .create_vi = direct_create_vi, - .check_left = direct_check_left, - .check_right = direct_check_right, - .part_size = direct_part_size, - .unit_num = direct_unit_num, - .print_vi = direct_print_vi -}; - -////////////////////////////////////////////////////////////////////////////// -// indirect item functions -// - -static int indirect_bytes_number(struct item_head *ih, int block_size) -{ - return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); -} - -// decrease offset, if it becomes 0, change type to stat data -static void indirect_decrement_key(struct cpu_key *key) -{ - cpu_key_k_offset_dec(key); - if (cpu_key_k_offset(key) == 0) - set_cpu_key_k_type(key, TYPE_STAT_DATA); -} - -// if it is not first item of the body, then it is mergeable -static int indirect_is_left_mergeable(struct reiserfs_key *key, - unsigned long bsize) -{ - int version = le_key_version(key); - return (le_key_k_offset(version, key) != 1); -} - -// printing of indirect item -static void start_new_sequence(__u32 * start, int *len, __u32 new) -{ - *start = new; - *len = 1; -} - -static int sequence_finished(__u32 start, int *len, __u32 new) -{ - if (start == INT_MAX) - return 1; - - if (start == 0 && new == 0) { - (*len)++; - return 0; - } - if (start != 0 && (start + *len) == new) { - (*len)++; - return 0; - } - return 1; -} - -static void print_sequence(__u32 start, int len) -{ - if (start == INT_MAX) - return; - - if (len == 1) - printk(" %d", start); - else - printk(" %d(%d)", start, len); -} - -static void indirect_print_item(struct item_head *ih, char *item) -{ - int j; - __le32 *unp; - __u32 prev = INT_MAX; - int num = 0; - - unp = (__le32 *) item; - - if (ih_item_len(ih) % UNFM_P_SIZE) - reiserfs_warning(NULL, "reiserfs-16102", "invalid item len"); - - printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih)); - for (j = 0; j < I_UNFM_NUM(ih); j++) { - if (sequence_finished(prev, &num, get_block_num(unp, j))) { - print_sequence(prev, num); - start_new_sequence(&prev, &num, get_block_num(unp, j)); - } - } - print_sequence(prev, num); - printk("]\n"); -} - -static void indirect_check_item(struct item_head *ih, char *item) -{ - // FIXME: type something here! -} - -static int indirect_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - vi->vi_index = TYPE_INDIRECT; - //vi->vi_type |= VI_TYPE_INDIRECT; - return 0; -} - -static int indirect_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - int bytes; - - bytes = free - free % UNFM_P_SIZE; - return bytes ? : -1; -} - -static int indirect_check_right(struct virtual_item *vi, int free) -{ - return indirect_check_left(vi, free, 0, 0); -} - -// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right) -static int indirect_part_size(struct virtual_item *vi, int first, int units) -{ - // unit of indirect item is byte (yet) - return units; -} - -static int indirect_unit_num(struct virtual_item *vi) -{ - // unit of indirect item is byte (yet) - return vi->vi_item_len - IH_SIZE; -} - -static void indirect_print_vi(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "reiserfs-16103", - "INDIRECT, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); -} - -static struct item_operations indirect_ops = { - .bytes_number = indirect_bytes_number, - .decrement_key = indirect_decrement_key, - .is_left_mergeable = indirect_is_left_mergeable, - .print_item = indirect_print_item, - .check_item = indirect_check_item, - - .create_vi = indirect_create_vi, - .check_left = indirect_check_left, - .check_right = indirect_check_right, - .part_size = indirect_part_size, - .unit_num = indirect_unit_num, - .print_vi = indirect_print_vi -}; - -////////////////////////////////////////////////////////////////////////////// -// direntry functions -// - -static int direntry_bytes_number(struct item_head *ih, int block_size) -{ - reiserfs_warning(NULL, "vs-16090", - "bytes number is asked for direntry"); - return 0; -} - -static void direntry_decrement_key(struct cpu_key *key) -{ - cpu_key_k_offset_dec(key); - if (cpu_key_k_offset(key) == 0) - set_cpu_key_k_type(key, TYPE_STAT_DATA); -} - -static int direntry_is_left_mergeable(struct reiserfs_key *key, - unsigned long bsize) -{ - if (le32_to_cpu(key->u.k_offset_v1.k_offset) == DOT_OFFSET) - return 0; - return 1; - -} - -static void direntry_print_item(struct item_head *ih, char *item) -{ - int i; - int namelen; - struct reiserfs_de_head *deh; - char *name; - static char namebuf[80]; - - printk("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name", - "Key of pointed object", "Hash", "Gen number", "Status"); - - deh = (struct reiserfs_de_head *)item; - - for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) { - namelen = - (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - - deh_location(deh); - name = item + deh_location(deh); - if (name[namelen - 1] == 0) - namelen = strlen(name); - namebuf[0] = '"'; - if (namelen > sizeof(namebuf) - 3) { - strncpy(namebuf + 1, name, sizeof(namebuf) - 3); - namebuf[sizeof(namebuf) - 2] = '"'; - namebuf[sizeof(namebuf) - 1] = 0; - } else { - memcpy(namebuf + 1, name, namelen); - namebuf[namelen + 1] = '"'; - namebuf[namelen + 2] = 0; - } - - printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n", - i, namebuf, - deh_dir_id(deh), deh_objectid(deh), - GET_HASH_VALUE(deh_offset(deh)), - GET_GENERATION_NUMBER((deh_offset(deh))), - (de_hidden(deh)) ? "HIDDEN" : "VISIBLE"); - } -} - -static void direntry_check_item(struct item_head *ih, char *item) -{ - int i; - struct reiserfs_de_head *deh; - - // FIXME: type something here! - deh = (struct reiserfs_de_head *)item; - for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) { - ; - } -} - -#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1 - -/* - * function returns old entry number in directory item in real node - * using new entry number in virtual item in virtual node */ -static inline int old_entry_num(int is_affected, int virtual_entry_num, - int pos_in_item, int mode) -{ - if (mode == M_INSERT || mode == M_DELETE) - return virtual_entry_num; - - if (!is_affected) - /* cut or paste is applied to another item */ - return virtual_entry_num; - - if (virtual_entry_num < pos_in_item) - return virtual_entry_num; - - if (mode == M_CUT) - return virtual_entry_num + 1; - - RFALSE(mode != M_PASTE || virtual_entry_num == 0, - "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'", - mode); - - return virtual_entry_num - 1; -} - -/* Create an array of sizes of directory entries for virtual - item. Return space used by an item. FIXME: no control over - consuming of space used by this item handler */ -static int direntry_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - struct direntry_uarea *dir_u = vi->vi_uarea; - int i, j; - int size = sizeof(struct direntry_uarea); - struct reiserfs_de_head *deh; - - vi->vi_index = TYPE_DIRENTRY; - - BUG_ON(!(vi->vi_ih) || !vi->vi_item); - - dir_u->flags = 0; - if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET) - dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM; - - deh = (struct reiserfs_de_head *)(vi->vi_item); - - /* virtual directory item have this amount of entry after */ - dir_u->entry_count = ih_entry_count(vi->vi_ih) + - ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 : - (vn->vn_mode == M_PASTE ? 1 : 0)) : 0); - - for (i = 0; i < dir_u->entry_count; i++) { - j = old_entry_num(is_affected, i, vn->vn_pos_in_item, - vn->vn_mode); - dir_u->entry_sizes[i] = - (j ? deh_location(&(deh[j - 1])) : ih_item_len(vi->vi_ih)) - - deh_location(&(deh[j])) + DEH_SIZE; - } - - size += (dir_u->entry_count * sizeof(short)); - - /* set size of pasted entry */ - if (is_affected && vn->vn_mode == M_PASTE) - dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size; - -#ifdef CONFIG_REISERFS_CHECK - /* compare total size of entries with item length */ - { - int k, l; - - l = 0; - for (k = 0; k < dir_u->entry_count; k++) - l += dir_u->entry_sizes[k]; - - if (l + IH_SIZE != vi->vi_item_len + - ((is_affected - && (vn->vn_mode == M_PASTE - || vn->vn_mode == M_CUT)) ? insert_size : 0)) { - reiserfs_panic(NULL, "vs-8025", "(mode==%c, " - "insert_size==%d), invalid length of " - "directory item", - vn->vn_mode, insert_size); - } - } -#endif - - return size; - -} - -// -// return number of entries which may fit into specified amount of -// free space, or -1 if free space is not enough even for 1 entry -// -static int direntry_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - int i; - int entries = 0; - struct direntry_uarea *dir_u = vi->vi_uarea; - - for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { - if (dir_u->entry_sizes[i] > free) - /* i-th entry doesn't fit into the remaining free space */ - break; - - free -= dir_u->entry_sizes[i]; - entries++; - } - - if (entries == dir_u->entry_count) { - reiserfs_panic(NULL, "item_ops-1", - "free space %d, entry_count %d", free, - dir_u->entry_count); - } - - /* "." and ".." can not be separated from each other */ - if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) - && entries < 2) - entries = 0; - - return entries ? : -1; -} - -static int direntry_check_right(struct virtual_item *vi, int free) -{ - int i; - int entries = 0; - struct direntry_uarea *dir_u = vi->vi_uarea; - - for (i = dir_u->entry_count - 1; i >= 0; i--) { - if (dir_u->entry_sizes[i] > free) - /* i-th entry doesn't fit into the remaining free space */ - break; - - free -= dir_u->entry_sizes[i]; - entries++; - } - BUG_ON(entries == dir_u->entry_count); - - /* "." and ".." can not be separated from each other */ - if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) - && entries > dir_u->entry_count - 2) - entries = dir_u->entry_count - 2; - - return entries ? : -1; -} - -/* sum of entry sizes between from-th and to-th entries including both edges */ -static int direntry_part_size(struct virtual_item *vi, int first, int count) -{ - int i, retval; - int from, to; - struct direntry_uarea *dir_u = vi->vi_uarea; - - retval = 0; - if (first == 0) - from = 0; - else - from = dir_u->entry_count - count; - to = from + count - 1; - - for (i = from; i <= to; i++) - retval += dir_u->entry_sizes[i]; - - return retval; -} - -static int direntry_unit_num(struct virtual_item *vi) -{ - struct direntry_uarea *dir_u = vi->vi_uarea; - - return dir_u->entry_count; -} - -static void direntry_print_vi(struct virtual_item *vi) -{ - int i; - struct direntry_uarea *dir_u = vi->vi_uarea; - - reiserfs_warning(NULL, "reiserfs-16104", - "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", - vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); - printk("%d entries: ", dir_u->entry_count); - for (i = 0; i < dir_u->entry_count; i++) - printk("%d ", dir_u->entry_sizes[i]); - printk("\n"); -} - -static struct item_operations direntry_ops = { - .bytes_number = direntry_bytes_number, - .decrement_key = direntry_decrement_key, - .is_left_mergeable = direntry_is_left_mergeable, - .print_item = direntry_print_item, - .check_item = direntry_check_item, - - .create_vi = direntry_create_vi, - .check_left = direntry_check_left, - .check_right = direntry_check_right, - .part_size = direntry_part_size, - .unit_num = direntry_unit_num, - .print_vi = direntry_print_vi -}; - -////////////////////////////////////////////////////////////////////////////// -// Error catching functions to catch errors caused by incorrect item types. -// -static int errcatch_bytes_number(struct item_head *ih, int block_size) -{ - reiserfs_warning(NULL, "green-16001", - "Invalid item type observed, run fsck ASAP"); - return 0; -} - -static void errcatch_decrement_key(struct cpu_key *key) -{ - reiserfs_warning(NULL, "green-16002", - "Invalid item type observed, run fsck ASAP"); -} - -static int errcatch_is_left_mergeable(struct reiserfs_key *key, - unsigned long bsize) -{ - reiserfs_warning(NULL, "green-16003", - "Invalid item type observed, run fsck ASAP"); - return 0; -} - -static void errcatch_print_item(struct item_head *ih, char *item) -{ - reiserfs_warning(NULL, "green-16004", - "Invalid item type observed, run fsck ASAP"); -} - -static void errcatch_check_item(struct item_head *ih, char *item) -{ - reiserfs_warning(NULL, "green-16005", - "Invalid item type observed, run fsck ASAP"); -} - -static int errcatch_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - reiserfs_warning(NULL, "green-16006", - "Invalid item type observed, run fsck ASAP"); - return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where - // this operation is called from is of return type void. -} - -static int errcatch_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - reiserfs_warning(NULL, "green-16007", - "Invalid item type observed, run fsck ASAP"); - return -1; -} - -static int errcatch_check_right(struct virtual_item *vi, int free) -{ - reiserfs_warning(NULL, "green-16008", - "Invalid item type observed, run fsck ASAP"); - return -1; -} - -static int errcatch_part_size(struct virtual_item *vi, int first, int count) -{ - reiserfs_warning(NULL, "green-16009", - "Invalid item type observed, run fsck ASAP"); - return 0; -} - -static int errcatch_unit_num(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "green-16010", - "Invalid item type observed, run fsck ASAP"); - return 0; -} - -static void errcatch_print_vi(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "green-16011", - "Invalid item type observed, run fsck ASAP"); -} - -static struct item_operations errcatch_ops = { - errcatch_bytes_number, - errcatch_decrement_key, - errcatch_is_left_mergeable, - errcatch_print_item, - errcatch_check_item, - - errcatch_create_vi, - errcatch_check_left, - errcatch_check_right, - errcatch_part_size, - errcatch_unit_num, - errcatch_print_vi -}; - -////////////////////////////////////////////////////////////////////////////// -// -// -#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) -#error Item types must use disk-format assigned values. -#endif - -struct item_operations *item_ops[TYPE_ANY + 1] = { - &stat_data_ops, - &indirect_ops, - &direct_ops, - &direntry_ops, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - &errcatch_ops /* This is to catch errors with invalid type (15th entry for TYPE_ANY) */ -}; diff --git a/ANDROID_3.4.5/fs/reiserfs/journal.c b/ANDROID_3.4.5/fs/reiserfs/journal.c deleted file mode 100644 index b1a08573..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/journal.c +++ /dev/null @@ -1,4286 +0,0 @@ -/* -** Write ahead logging implementation copyright Chris Mason 2000 -** -** The background commits make this code very interrelated, and -** overly complex. I need to rethink things a bit....The major players: -** -** journal_begin -- call with the number of blocks you expect to log. -** If the current transaction is too -** old, it will block until the current transaction is -** finished, and then start a new one. -** Usually, your transaction will get joined in with -** previous ones for speed. -** -** journal_join -- same as journal_begin, but won't block on the current -** transaction regardless of age. Don't ever call -** this. Ever. There are only two places it should be -** called from, and they are both inside this file. -** -** journal_mark_dirty -- adds blocks into this transaction. clears any flags -** that might make them get sent to disk -** and then marks them BH_JDirty. Puts the buffer head -** into the current transaction hash. -** -** journal_end -- if the current transaction is batchable, it does nothing -** otherwise, it could do an async/synchronous commit, or -** a full flush of all log and real blocks in the -** transaction. -** -** flush_old_commits -- if the current transaction is too old, it is ended and -** commit blocks are sent to disk. Forces commit blocks -** to disk for all backgrounded commits that have been -** around too long. -** -- Note, if you call this as an immediate flush from -** from within kupdate, it will ignore the immediate flag -*/ - -#include <linux/time.h> -#include <linux/semaphore.h> -#include <linux/vmalloc.h> -#include "reiserfs.h" -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/string.h> -#include <linux/buffer_head.h> -#include <linux/workqueue.h> -#include <linux/writeback.h> -#include <linux/blkdev.h> -#include <linux/backing-dev.h> -#include <linux/uaccess.h> -#include <linux/slab.h> - - -/* gets a struct reiserfs_journal_list * from a list head */ -#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ - j_list)) -#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ - j_working_list)) - -/* the number of mounted filesystems. This is used to decide when to -** start and kill the commit workqueue -*/ -static int reiserfs_mounted_fs_count; - -static struct workqueue_struct *commit_wq; - -#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit - structs at 4k */ -#define BUFNR 64 /*read ahead */ - -/* cnode stat bits. Move these into reiserfs_fs.h */ - -#define BLOCK_FREED 2 /* this block was freed, and can't be written. */ -#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ - -#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ -#define BLOCK_DIRTIED 5 - -/* journal list state bits */ -#define LIST_TOUCHED 1 -#define LIST_DIRTY 2 -#define LIST_COMMIT_PENDING 4 /* someone will commit this list */ - -/* flags for do_journal_end */ -#define FLUSH_ALL 1 /* flush commit and real blocks */ -#define COMMIT_NOW 2 /* end and commit this transaction */ -#define WAIT 4 /* wait for the log blocks to hit the disk */ - -static int do_journal_end(struct reiserfs_transaction_handle *, - struct super_block *, unsigned long nblocks, - int flags); -static int flush_journal_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall); -static int flush_commit_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall); -static int can_dirty(struct reiserfs_journal_cnode *cn); -static int journal_join(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks); -static int release_journal_dev(struct super_block *super, - struct reiserfs_journal *journal); -static int dirty_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl); -static void flush_async_commits(struct work_struct *work); -static void queue_log_writer(struct super_block *s); - -/* values for join in do_journal_begin_r */ -enum { - JBEGIN_REG = 0, /* regular journal begin */ - JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ - JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ -}; - -static int do_journal_begin_r(struct reiserfs_transaction_handle *th, - struct super_block *sb, - unsigned long nblocks, int join); - -static void init_journal_hash(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - memset(journal->j_hash_table, 0, - JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); -} - -/* -** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to -** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for -** more details. -*/ -static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) -{ - if (bh) { - clear_buffer_dirty(bh); - clear_buffer_journal_test(bh); - } - return 0; -} - -static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block - *sb) -{ - struct reiserfs_bitmap_node *bn; - static int id; - - bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS); - if (!bn) { - return NULL; - } - bn->data = kzalloc(sb->s_blocksize, GFP_NOFS); - if (!bn->data) { - kfree(bn); - return NULL; - } - bn->id = id++; - INIT_LIST_HEAD(&bn->list); - return bn; -} - -static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_bitmap_node *bn = NULL; - struct list_head *entry = journal->j_bitmap_nodes.next; - - journal->j_used_bitmap_nodes++; - repeat: - - if (entry != &journal->j_bitmap_nodes) { - bn = list_entry(entry, struct reiserfs_bitmap_node, list); - list_del(entry); - memset(bn->data, 0, sb->s_blocksize); - journal->j_free_bitmap_nodes--; - return bn; - } - bn = allocate_bitmap_node(sb); - if (!bn) { - yield(); - goto repeat; - } - return bn; -} -static inline void free_bitmap_node(struct super_block *sb, - struct reiserfs_bitmap_node *bn) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - journal->j_used_bitmap_nodes--; - if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { - kfree(bn->data); - kfree(bn); - } else { - list_add(&bn->list, &journal->j_bitmap_nodes); - journal->j_free_bitmap_nodes++; - } -} - -static void allocate_bitmap_nodes(struct super_block *sb) -{ - int i; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_bitmap_node *bn = NULL; - for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { - bn = allocate_bitmap_node(sb); - if (bn) { - list_add(&bn->list, &journal->j_bitmap_nodes); - journal->j_free_bitmap_nodes++; - } else { - break; /* this is ok, we'll try again when more are needed */ - } - } -} - -static int set_bit_in_list_bitmap(struct super_block *sb, - b_blocknr_t block, - struct reiserfs_list_bitmap *jb) -{ - unsigned int bmap_nr = block / (sb->s_blocksize << 3); - unsigned int bit_nr = block % (sb->s_blocksize << 3); - - if (!jb->bitmaps[bmap_nr]) { - jb->bitmaps[bmap_nr] = get_bitmap_node(sb); - } - set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); - return 0; -} - -static void cleanup_bitmap_list(struct super_block *sb, - struct reiserfs_list_bitmap *jb) -{ - int i; - if (jb->bitmaps == NULL) - return; - - for (i = 0; i < reiserfs_bmap_count(sb); i++) { - if (jb->bitmaps[i]) { - free_bitmap_node(sb, jb->bitmaps[i]); - jb->bitmaps[i] = NULL; - } - } -} - -/* -** only call this on FS unmount. -*/ -static int free_list_bitmaps(struct super_block *sb, - struct reiserfs_list_bitmap *jb_array) -{ - int i; - struct reiserfs_list_bitmap *jb; - for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { - jb = jb_array + i; - jb->journal_list = NULL; - cleanup_bitmap_list(sb, jb); - vfree(jb->bitmaps); - jb->bitmaps = NULL; - } - return 0; -} - -static int free_bitmap_nodes(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct list_head *next = journal->j_bitmap_nodes.next; - struct reiserfs_bitmap_node *bn; - - while (next != &journal->j_bitmap_nodes) { - bn = list_entry(next, struct reiserfs_bitmap_node, list); - list_del(next); - kfree(bn->data); - kfree(bn); - next = journal->j_bitmap_nodes.next; - journal->j_free_bitmap_nodes--; - } - - return 0; -} - -/* -** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. -** jb_array is the array to be filled in. -*/ -int reiserfs_allocate_list_bitmaps(struct super_block *sb, - struct reiserfs_list_bitmap *jb_array, - unsigned int bmap_nr) -{ - int i; - int failed = 0; - struct reiserfs_list_bitmap *jb; - int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); - - for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { - jb = jb_array + i; - jb->journal_list = NULL; - jb->bitmaps = vzalloc(mem); - if (!jb->bitmaps) { - reiserfs_warning(sb, "clm-2000", "unable to " - "allocate bitmaps for journal lists"); - failed = 1; - break; - } - } - if (failed) { - free_list_bitmaps(sb, jb_array); - return -1; - } - return 0; -} - -/* -** find an available list bitmap. If you can't find one, flush a commit list -** and try again -*/ -static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, - struct reiserfs_journal_list - *jl) -{ - int i, j; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_list_bitmap *jb = NULL; - - for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { - i = journal->j_list_bitmap_index; - journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; - jb = journal->j_list_bitmap + i; - if (journal->j_list_bitmap[i].journal_list) { - flush_commit_list(sb, - journal->j_list_bitmap[i]. - journal_list, 1); - if (!journal->j_list_bitmap[i].journal_list) { - break; - } - } else { - break; - } - } - if (jb->journal_list) { /* double check to make sure if flushed correctly */ - return NULL; - } - jb->journal_list = jl; - return jb; -} - -/* -** allocates a new chunk of X nodes, and links them all together as a list. -** Uses the cnode->next and cnode->prev pointers -** returns NULL on failure -*/ -static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) -{ - struct reiserfs_journal_cnode *head; - int i; - if (num_cnodes <= 0) { - return NULL; - } - head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); - if (!head) { - return NULL; - } - head[0].prev = NULL; - head[0].next = head + 1; - for (i = 1; i < num_cnodes; i++) { - head[i].prev = head + (i - 1); - head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ - } - head[num_cnodes - 1].next = NULL; - return head; -} - -/* -** pulls a cnode off the free list, or returns NULL on failure -*/ -static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) -{ - struct reiserfs_journal_cnode *cn; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - reiserfs_check_lock_depth(sb, "get_cnode"); - - if (journal->j_cnode_free <= 0) { - return NULL; - } - journal->j_cnode_used++; - journal->j_cnode_free--; - cn = journal->j_cnode_free_list; - if (!cn) { - return cn; - } - if (cn->next) { - cn->next->prev = NULL; - } - journal->j_cnode_free_list = cn->next; - memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); - return cn; -} - -/* -** returns a cnode to the free list -*/ -static void free_cnode(struct super_block *sb, - struct reiserfs_journal_cnode *cn) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - reiserfs_check_lock_depth(sb, "free_cnode"); - - journal->j_cnode_used--; - journal->j_cnode_free++; - /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ - cn->next = journal->j_cnode_free_list; - if (journal->j_cnode_free_list) { - journal->j_cnode_free_list->prev = cn; - } - cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ - journal->j_cnode_free_list = cn; -} - -static void clear_prepared_bits(struct buffer_head *bh) -{ - clear_buffer_journal_prepared(bh); - clear_buffer_journal_restore_dirty(bh); -} - -/* return a cnode with same dev, block number and size in table, or null if not found */ -static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct - super_block - *sb, - struct - reiserfs_journal_cnode - **table, - long bl) -{ - struct reiserfs_journal_cnode *cn; - cn = journal_hash(table, sb, bl); - while (cn) { - if (cn->blocknr == bl && cn->sb == sb) - return cn; - cn = cn->hnext; - } - return (struct reiserfs_journal_cnode *)0; -} - -/* -** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated -** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever -** being overwritten by a replay after crashing. -** -** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting -** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make -** sure you never write the block without logging it. -** -** next_zero_bit is a suggestion about the next block to try for find_forward. -** when bl is rejected because it is set in a journal list bitmap, we search -** for the next zero bit in the bitmap that rejected bl. Then, we return that -** through next_zero_bit for find_forward to try. -** -** Just because we return something in next_zero_bit does not mean we won't -** reject it on the next call to reiserfs_in_journal -** -*/ -int reiserfs_in_journal(struct super_block *sb, - unsigned int bmap_nr, int bit_nr, int search_all, - b_blocknr_t * next_zero_bit) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn; - struct reiserfs_list_bitmap *jb; - int i; - unsigned long bl; - - *next_zero_bit = 0; /* always start this at zero. */ - - PROC_INFO_INC(sb, journal.in_journal); - /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. - ** if we crash before the transaction that freed it commits, this transaction won't - ** have committed either, and the block will never be written - */ - if (search_all) { - for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { - PROC_INFO_INC(sb, journal.in_journal_bitmap); - jb = journal->j_list_bitmap + i; - if (jb->journal_list && jb->bitmaps[bmap_nr] && - test_bit(bit_nr, - (unsigned long *)jb->bitmaps[bmap_nr]-> - data)) { - *next_zero_bit = - find_next_zero_bit((unsigned long *) - (jb->bitmaps[bmap_nr]-> - data), - sb->s_blocksize << 3, - bit_nr + 1); - return 1; - } - } - } - - bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr; - /* is it in any old transactions? */ - if (search_all - && (cn = - get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) { - return 1; - } - - /* is it in the current transaction. This should never happen */ - if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) { - BUG(); - return 1; - } - - PROC_INFO_INC(sb, journal.in_journal_reusable); - /* safe for reuse */ - return 0; -} - -/* insert cn into table -*/ -static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, - struct reiserfs_journal_cnode *cn) -{ - struct reiserfs_journal_cnode *cn_orig; - - cn_orig = journal_hash(table, cn->sb, cn->blocknr); - cn->hnext = cn_orig; - cn->hprev = NULL; - if (cn_orig) { - cn_orig->hprev = cn; - } - journal_hash(table, cn->sb, cn->blocknr) = cn; -} - -/* lock the current transaction */ -static inline void lock_journal(struct super_block *sb) -{ - PROC_INFO_INC(sb, journal.lock_journal); - - reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb); -} - -/* unlock the current transaction */ -static inline void unlock_journal(struct super_block *sb) -{ - mutex_unlock(&SB_JOURNAL(sb)->j_mutex); -} - -static inline void get_journal_list(struct reiserfs_journal_list *jl) -{ - jl->j_refcount++; -} - -static inline void put_journal_list(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - if (jl->j_refcount < 1) { - reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d", - jl->j_trans_id, jl->j_refcount); - } - if (--jl->j_refcount == 0) - kfree(jl); -} - -/* -** this used to be much more involved, and I'm keeping it just in case things get ugly again. -** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a -** transaction. -*/ -static void cleanup_freed_for_journal_list(struct super_block *sb, - struct reiserfs_journal_list *jl) -{ - - struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; - if (jb) { - cleanup_bitmap_list(sb, jb); - } - jl->j_list_bitmap->journal_list = NULL; - jl->j_list_bitmap = NULL; -} - -static int journal_list_still_alive(struct super_block *s, - unsigned int trans_id) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - struct list_head *entry = &journal->j_journal_list; - struct reiserfs_journal_list *jl; - - if (!list_empty(entry)) { - jl = JOURNAL_LIST_ENTRY(entry->next); - if (jl->j_trans_id <= trans_id) { - return 1; - } - } - return 0; -} - -/* - * If page->mapping was null, we failed to truncate this page for - * some reason. Most likely because it was truncated after being - * logged via data=journal. - * - * This does a check to see if the buffer belongs to one of these - * lost pages before doing the final put_bh. If page->mapping was - * null, it tries to free buffers on the page, which should make the - * final page_cache_release drop the page from the lru. - */ -static void release_buffer_page(struct buffer_head *bh) -{ - struct page *page = bh->b_page; - if (!page->mapping && trylock_page(page)) { - page_cache_get(page); - put_bh(bh); - if (!page->mapping) - try_to_free_buffers(page); - unlock_page(page); - page_cache_release(page); - } else { - put_bh(bh); - } -} - -static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) -{ - char b[BDEVNAME_SIZE]; - - if (buffer_journaled(bh)) { - reiserfs_warning(NULL, "clm-2084", - "pinned buffer %lu:%s sent to disk", - bh->b_blocknr, bdevname(bh->b_bdev, b)); - } - if (uptodate) - set_buffer_uptodate(bh); - else - clear_buffer_uptodate(bh); - - unlock_buffer(bh); - release_buffer_page(bh); -} - -static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) -{ - if (uptodate) - set_buffer_uptodate(bh); - else - clear_buffer_uptodate(bh); - unlock_buffer(bh); - put_bh(bh); -} - -static void submit_logged_buffer(struct buffer_head *bh) -{ - get_bh(bh); - bh->b_end_io = reiserfs_end_buffer_io_sync; - clear_buffer_journal_new(bh); - clear_buffer_dirty(bh); - if (!test_clear_buffer_journal_test(bh)) - BUG(); - if (!buffer_uptodate(bh)) - BUG(); - submit_bh(WRITE, bh); -} - -static void submit_ordered_buffer(struct buffer_head *bh) -{ - get_bh(bh); - bh->b_end_io = reiserfs_end_ordered_io; - clear_buffer_dirty(bh); - if (!buffer_uptodate(bh)) - BUG(); - submit_bh(WRITE, bh); -} - -#define CHUNK_SIZE 32 -struct buffer_chunk { - struct buffer_head *bh[CHUNK_SIZE]; - int nr; -}; - -static void write_chunk(struct buffer_chunk *chunk) -{ - int i; - for (i = 0; i < chunk->nr; i++) { - submit_logged_buffer(chunk->bh[i]); - } - chunk->nr = 0; -} - -static void write_ordered_chunk(struct buffer_chunk *chunk) -{ - int i; - for (i = 0; i < chunk->nr; i++) { - submit_ordered_buffer(chunk->bh[i]); - } - chunk->nr = 0; -} - -static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, - spinlock_t * lock, void (fn) (struct buffer_chunk *)) -{ - int ret = 0; - BUG_ON(chunk->nr >= CHUNK_SIZE); - chunk->bh[chunk->nr++] = bh; - if (chunk->nr >= CHUNK_SIZE) { - ret = 1; - if (lock) - spin_unlock(lock); - fn(chunk); - if (lock) - spin_lock(lock); - } - return ret; -} - -static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); -static struct reiserfs_jh *alloc_jh(void) -{ - struct reiserfs_jh *jh; - while (1) { - jh = kmalloc(sizeof(*jh), GFP_NOFS); - if (jh) { - atomic_inc(&nr_reiserfs_jh); - return jh; - } - yield(); - } -} - -/* - * we want to free the jh when the buffer has been written - * and waited on - */ -void reiserfs_free_jh(struct buffer_head *bh) -{ - struct reiserfs_jh *jh; - - jh = bh->b_private; - if (jh) { - bh->b_private = NULL; - jh->bh = NULL; - list_del_init(&jh->list); - kfree(jh); - if (atomic_read(&nr_reiserfs_jh) <= 0) - BUG(); - atomic_dec(&nr_reiserfs_jh); - put_bh(bh); - } -} - -static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, - int tail) -{ - struct reiserfs_jh *jh; - - if (bh->b_private) { - spin_lock(&j->j_dirty_buffers_lock); - if (!bh->b_private) { - spin_unlock(&j->j_dirty_buffers_lock); - goto no_jh; - } - jh = bh->b_private; - list_del_init(&jh->list); - } else { - no_jh: - get_bh(bh); - jh = alloc_jh(); - spin_lock(&j->j_dirty_buffers_lock); - /* buffer must be locked for __add_jh, should be able to have - * two adds at the same time - */ - BUG_ON(bh->b_private); - jh->bh = bh; - bh->b_private = jh; - } - jh->jl = j->j_current_jl; - if (tail) - list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); - else { - list_add_tail(&jh->list, &jh->jl->j_bh_list); - } - spin_unlock(&j->j_dirty_buffers_lock); - return 0; -} - -int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) -{ - return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); -} -int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) -{ - return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); -} - -#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) -static int write_ordered_buffers(spinlock_t * lock, - struct reiserfs_journal *j, - struct reiserfs_journal_list *jl, - struct list_head *list) -{ - struct buffer_head *bh; - struct reiserfs_jh *jh; - int ret = j->j_errno; - struct buffer_chunk chunk; - struct list_head tmp; - INIT_LIST_HEAD(&tmp); - - chunk.nr = 0; - spin_lock(lock); - while (!list_empty(list)) { - jh = JH_ENTRY(list->next); - bh = jh->bh; - get_bh(bh); - if (!trylock_buffer(bh)) { - if (!buffer_dirty(bh)) { - list_move(&jh->list, &tmp); - goto loop_next; - } - spin_unlock(lock); - if (chunk.nr) - write_ordered_chunk(&chunk); - wait_on_buffer(bh); - cond_resched(); - spin_lock(lock); - goto loop_next; - } - /* in theory, dirty non-uptodate buffers should never get here, - * but the upper layer io error paths still have a few quirks. - * Handle them here as gracefully as we can - */ - if (!buffer_uptodate(bh) && buffer_dirty(bh)) { - clear_buffer_dirty(bh); - ret = -EIO; - } - if (buffer_dirty(bh)) { - list_move(&jh->list, &tmp); - add_to_chunk(&chunk, bh, lock, write_ordered_chunk); - } else { - reiserfs_free_jh(bh); - unlock_buffer(bh); - } - loop_next: - put_bh(bh); - cond_resched_lock(lock); - } - if (chunk.nr) { - spin_unlock(lock); - write_ordered_chunk(&chunk); - spin_lock(lock); - } - while (!list_empty(&tmp)) { - jh = JH_ENTRY(tmp.prev); - bh = jh->bh; - get_bh(bh); - reiserfs_free_jh(bh); - - if (buffer_locked(bh)) { - spin_unlock(lock); - wait_on_buffer(bh); - spin_lock(lock); - } - if (!buffer_uptodate(bh)) { - ret = -EIO; - } - /* ugly interaction with invalidatepage here. - * reiserfs_invalidate_page will pin any buffer that has a valid - * journal head from an older transaction. If someone else sets - * our buffer dirty after we write it in the first loop, and - * then someone truncates the page away, nobody will ever write - * the buffer. We're safe if we write the page one last time - * after freeing the journal header. - */ - if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { - spin_unlock(lock); - ll_rw_block(WRITE, 1, &bh); - spin_lock(lock); - } - put_bh(bh); - cond_resched_lock(lock); - } - spin_unlock(lock); - return ret; -} - -static int flush_older_commits(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - struct reiserfs_journal_list *other_jl; - struct reiserfs_journal_list *first_jl; - struct list_head *entry; - unsigned int trans_id = jl->j_trans_id; - unsigned int other_trans_id; - unsigned int first_trans_id; - - find_first: - /* - * first we walk backwards to find the oldest uncommitted transation - */ - first_jl = jl; - entry = jl->j_list.prev; - while (1) { - other_jl = JOURNAL_LIST_ENTRY(entry); - if (entry == &journal->j_journal_list || - atomic_read(&other_jl->j_older_commits_done)) - break; - - first_jl = other_jl; - entry = other_jl->j_list.prev; - } - - /* if we didn't find any older uncommitted transactions, return now */ - if (first_jl == jl) { - return 0; - } - - first_trans_id = first_jl->j_trans_id; - - entry = &first_jl->j_list; - while (1) { - other_jl = JOURNAL_LIST_ENTRY(entry); - other_trans_id = other_jl->j_trans_id; - - if (other_trans_id < trans_id) { - if (atomic_read(&other_jl->j_commit_left) != 0) { - flush_commit_list(s, other_jl, 0); - - /* list we were called with is gone, return */ - if (!journal_list_still_alive(s, trans_id)) - return 1; - - /* the one we just flushed is gone, this means all - * older lists are also gone, so first_jl is no longer - * valid either. Go back to the beginning. - */ - if (!journal_list_still_alive - (s, other_trans_id)) { - goto find_first; - } - } - entry = entry->next; - if (entry == &journal->j_journal_list) - return 0; - } else { - return 0; - } - } - return 0; -} - -static int reiserfs_async_progress_wait(struct super_block *s) -{ - struct reiserfs_journal *j = SB_JOURNAL(s); - - if (atomic_read(&j->j_async_throttle)) { - reiserfs_write_unlock(s); - congestion_wait(BLK_RW_ASYNC, HZ / 10); - reiserfs_write_lock(s); - } - - return 0; -} - -/* -** if this journal list still has commit blocks unflushed, send them to disk. -** -** log areas must be flushed in order (transaction 2 can't commit before transaction 1) -** Before the commit block can by written, every other log block must be safely on disk -** -*/ -static int flush_commit_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall) -{ - int i; - b_blocknr_t bn; - struct buffer_head *tbh = NULL; - unsigned int trans_id = jl->j_trans_id; - struct reiserfs_journal *journal = SB_JOURNAL(s); - int retval = 0; - int write_len; - - reiserfs_check_lock_depth(s, "flush_commit_list"); - - if (atomic_read(&jl->j_older_commits_done)) { - return 0; - } - - /* before we can put our commit blocks on disk, we have to make sure everyone older than - ** us is on disk too - */ - BUG_ON(jl->j_len <= 0); - BUG_ON(trans_id == journal->j_trans_id); - - get_journal_list(jl); - if (flushall) { - if (flush_older_commits(s, jl) == 1) { - /* list disappeared during flush_older_commits. return */ - goto put_jl; - } - } - - /* make sure nobody is trying to flush this one at the same time */ - reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s); - - if (!journal_list_still_alive(s, trans_id)) { - mutex_unlock(&jl->j_commit_mutex); - goto put_jl; - } - BUG_ON(jl->j_trans_id == 0); - - /* this commit is done, exit */ - if (atomic_read(&(jl->j_commit_left)) <= 0) { - if (flushall) { - atomic_set(&(jl->j_older_commits_done), 1); - } - mutex_unlock(&jl->j_commit_mutex); - goto put_jl; - } - - if (!list_empty(&jl->j_bh_list)) { - int ret; - - /* - * We might sleep in numerous places inside - * write_ordered_buffers. Relax the write lock. - */ - reiserfs_write_unlock(s); - ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, - journal, jl, &jl->j_bh_list); - if (ret < 0 && retval == 0) - retval = ret; - reiserfs_write_lock(s); - } - BUG_ON(!list_empty(&jl->j_bh_list)); - /* - * for the description block and all the log blocks, submit any buffers - * that haven't already reached the disk. Try to write at least 256 - * log blocks. later on, we will only wait on blocks that correspond - * to this transaction, but while we're unplugging we might as well - * get a chunk of data on there. - */ - atomic_inc(&journal->j_async_throttle); - write_len = jl->j_len + 1; - if (write_len < 256) - write_len = 256; - for (i = 0 ; i < write_len ; i++) { - bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % - SB_ONDISK_JOURNAL_SIZE(s); - tbh = journal_find_get_block(s, bn); - if (tbh) { - if (buffer_dirty(tbh)) { - reiserfs_write_unlock(s); - ll_rw_block(WRITE, 1, &tbh); - reiserfs_write_lock(s); - } - put_bh(tbh) ; - } - } - atomic_dec(&journal->j_async_throttle); - - for (i = 0; i < (jl->j_len + 1); i++) { - bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + - (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); - tbh = journal_find_get_block(s, bn); - - reiserfs_write_unlock(s); - wait_on_buffer(tbh); - reiserfs_write_lock(s); - // since we're using ll_rw_blk above, it might have skipped over - // a locked buffer. Double check here - // - /* redundant, sync_dirty_buffer() checks */ - if (buffer_dirty(tbh)) { - reiserfs_write_unlock(s); - sync_dirty_buffer(tbh); - reiserfs_write_lock(s); - } - if (unlikely(!buffer_uptodate(tbh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-601", - "buffer write failed"); -#endif - retval = -EIO; - } - put_bh(tbh); /* once for journal_find_get_block */ - put_bh(tbh); /* once due to original getblk in do_journal_end */ - atomic_dec(&(jl->j_commit_left)); - } - - BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); - - /* If there was a write error in the journal - we can't commit - * this transaction - it will be invalid and, if successful, - * will just end up propagating the write error out to - * the file system. */ - if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { - if (buffer_dirty(jl->j_commit_bh)) - BUG(); - mark_buffer_dirty(jl->j_commit_bh) ; - reiserfs_write_unlock(s); - if (reiserfs_barrier_flush(s)) - __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); - else - sync_dirty_buffer(jl->j_commit_bh); - reiserfs_write_lock(s); - } - - /* If there was a write error in the journal - we can't commit this - * transaction - it will be invalid and, if successful, will just end - * up propagating the write error out to the filesystem. */ - if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-615", "buffer write failed"); -#endif - retval = -EIO; - } - bforget(jl->j_commit_bh); - if (journal->j_last_commit_id != 0 && - (jl->j_trans_id - journal->j_last_commit_id) != 1) { - reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu", - journal->j_last_commit_id, jl->j_trans_id); - } - journal->j_last_commit_id = jl->j_trans_id; - - /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ - cleanup_freed_for_journal_list(s, jl); - - retval = retval ? retval : journal->j_errno; - - /* mark the metadata dirty */ - if (!retval) - dirty_one_transaction(s, jl); - atomic_dec(&(jl->j_commit_left)); - - if (flushall) { - atomic_set(&(jl->j_older_commits_done), 1); - } - mutex_unlock(&jl->j_commit_mutex); - put_jl: - put_journal_list(s, jl); - - if (retval) - reiserfs_abort(s, retval, "Journal write error in %s", - __func__); - return retval; -} - -/* -** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or -** returns NULL if it can't find anything -*/ -static struct reiserfs_journal_list *find_newer_jl_for_cn(struct - reiserfs_journal_cnode - *cn) -{ - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr; - - cn = cn->hprev; - while (cn) { - if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { - return cn->jlist; - } - cn = cn->hprev; - } - return NULL; -} - -static int newer_jl_done(struct reiserfs_journal_cnode *cn) -{ - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr; - - cn = cn->hprev; - while (cn) { - if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist && - atomic_read(&cn->jlist->j_commit_left) != 0) - return 0; - cn = cn->hprev; - } - return 1; -} - -static void remove_journal_hash(struct super_block *, - struct reiserfs_journal_cnode **, - struct reiserfs_journal_list *, unsigned long, - int); - -/* -** once all the real blocks have been flushed, it is safe to remove them from the -** journal list for this transaction. Aside from freeing the cnode, this also allows the -** block to be reallocated for data blocks if it had been deleted. -*/ -static void remove_all_from_journal_list(struct super_block *sb, - struct reiserfs_journal_list *jl, - int debug) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn, *last; - cn = jl->j_realblock; - - /* which is better, to lock once around the whole loop, or - ** to lock for each call to remove_journal_hash? - */ - while (cn) { - if (cn->blocknr != 0) { - if (debug) { - reiserfs_warning(sb, "reiserfs-2201", - "block %u, bh is %d, state %ld", - cn->blocknr, cn->bh ? 1 : 0, - cn->state); - } - cn->state = 0; - remove_journal_hash(sb, journal->j_list_hash_table, - jl, cn->blocknr, 1); - } - last = cn; - cn = cn->next; - free_cnode(sb, last); - } - jl->j_realblock = NULL; -} - -/* -** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. -** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start -** releasing blocks in this transaction for reuse as data blocks. -** called by flush_journal_list, before it calls remove_all_from_journal_list -** -*/ -static int _update_journal_header_block(struct super_block *sb, - unsigned long offset, - unsigned int trans_id) -{ - struct reiserfs_journal_header *jh; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - if (reiserfs_is_journal_aborted(journal)) - return -EIO; - - if (trans_id >= journal->j_last_flush_trans_id) { - if (buffer_locked((journal->j_header_bh))) { - reiserfs_write_unlock(sb); - wait_on_buffer((journal->j_header_bh)); - reiserfs_write_lock(sb); - if (unlikely(!buffer_uptodate(journal->j_header_bh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(sb, "journal-699", - "buffer write failed"); -#endif - return -EIO; - } - } - journal->j_last_flush_trans_id = trans_id; - journal->j_first_unflushed_offset = offset; - jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> - b_data); - jh->j_last_flush_trans_id = cpu_to_le32(trans_id); - jh->j_first_unflushed_offset = cpu_to_le32(offset); - jh->j_mount_id = cpu_to_le32(journal->j_mount_id); - - set_buffer_dirty(journal->j_header_bh); - reiserfs_write_unlock(sb); - - if (reiserfs_barrier_flush(sb)) - __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); - else - sync_dirty_buffer(journal->j_header_bh); - - reiserfs_write_lock(sb); - if (!buffer_uptodate(journal->j_header_bh)) { - reiserfs_warning(sb, "journal-837", - "IO error during journal replay"); - return -EIO; - } - } - return 0; -} - -static int update_journal_header_block(struct super_block *sb, - unsigned long offset, - unsigned int trans_id) -{ - return _update_journal_header_block(sb, offset, trans_id); -} - -/* -** flush any and all journal lists older than you are -** can only be called from flush_journal_list -*/ -static int flush_older_journal_lists(struct super_block *sb, - struct reiserfs_journal_list *jl) -{ - struct list_head *entry; - struct reiserfs_journal_list *other_jl; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - unsigned int trans_id = jl->j_trans_id; - - /* we know we are the only ones flushing things, no extra race - * protection is required. - */ - restart: - entry = journal->j_journal_list.next; - /* Did we wrap? */ - if (entry == &journal->j_journal_list) - return 0; - other_jl = JOURNAL_LIST_ENTRY(entry); - if (other_jl->j_trans_id < trans_id) { - BUG_ON(other_jl->j_refcount <= 0); - /* do not flush all */ - flush_journal_list(sb, other_jl, 0); - - /* other_jl is now deleted from the list */ - goto restart; - } - return 0; -} - -static void del_from_work_list(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - if (!list_empty(&jl->j_working_list)) { - list_del_init(&jl->j_working_list); - journal->j_num_work_lists--; - } -} - -/* flush a journal list, both commit and real blocks -** -** always set flushall to 1, unless you are calling from inside -** flush_journal_list -** -** IMPORTANT. This can only be called while there are no journal writers, -** and the journal is locked. That means it can only be called from -** do_journal_end, or by journal_release -*/ -static int flush_journal_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall) -{ - struct reiserfs_journal_list *pjl; - struct reiserfs_journal_cnode *cn, *last; - int count; - int was_jwait = 0; - int was_dirty = 0; - struct buffer_head *saved_bh; - unsigned long j_len_saved = jl->j_len; - struct reiserfs_journal *journal = SB_JOURNAL(s); - int err = 0; - - BUG_ON(j_len_saved <= 0); - - if (atomic_read(&journal->j_wcount) != 0) { - reiserfs_warning(s, "clm-2048", "called with wcount %d", - atomic_read(&journal->j_wcount)); - } - BUG_ON(jl->j_trans_id == 0); - - /* if flushall == 0, the lock is already held */ - if (flushall) { - reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); - } else if (mutex_trylock(&journal->j_flush_mutex)) { - BUG(); - } - - count = 0; - if (j_len_saved > journal->j_trans_max) { - reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu", - j_len_saved, jl->j_trans_id); - return 0; - } - - /* if all the work is already done, get out of here */ - if (atomic_read(&(jl->j_nonzerolen)) <= 0 && - atomic_read(&(jl->j_commit_left)) <= 0) { - goto flush_older_and_return; - } - - /* start by putting the commit list on disk. This will also flush - ** the commit lists of any olders transactions - */ - flush_commit_list(s, jl, 1); - - if (!(jl->j_state & LIST_DIRTY) - && !reiserfs_is_journal_aborted(journal)) - BUG(); - - /* are we done now? */ - if (atomic_read(&(jl->j_nonzerolen)) <= 0 && - atomic_read(&(jl->j_commit_left)) <= 0) { - goto flush_older_and_return; - } - - /* loop through each cnode, see if we need to write it, - ** or wait on a more recent transaction, or just ignore it - */ - if (atomic_read(&(journal->j_wcount)) != 0) { - reiserfs_panic(s, "journal-844", "journal list is flushing, " - "wcount is not 0"); - } - cn = jl->j_realblock; - while (cn) { - was_jwait = 0; - was_dirty = 0; - saved_bh = NULL; - /* blocknr of 0 is no longer in the hash, ignore it */ - if (cn->blocknr == 0) { - goto free_cnode; - } - - /* This transaction failed commit. Don't write out to the disk */ - if (!(jl->j_state & LIST_DIRTY)) - goto free_cnode; - - pjl = find_newer_jl_for_cn(cn); - /* the order is important here. We check pjl to make sure we - ** don't clear BH_JDirty_wait if we aren't the one writing this - ** block to disk - */ - if (!pjl && cn->bh) { - saved_bh = cn->bh; - - /* we do this to make sure nobody releases the buffer while - ** we are working with it - */ - get_bh(saved_bh); - - if (buffer_journal_dirty(saved_bh)) { - BUG_ON(!can_dirty(cn)); - was_jwait = 1; - was_dirty = 1; - } else if (can_dirty(cn)) { - /* everything with !pjl && jwait should be writable */ - BUG(); - } - } - - /* if someone has this block in a newer transaction, just make - ** sure they are committed, and don't try writing it to disk - */ - if (pjl) { - if (atomic_read(&pjl->j_commit_left)) - flush_commit_list(s, pjl, 1); - goto free_cnode; - } - - /* bh == NULL when the block got to disk on its own, OR, - ** the block got freed in a future transaction - */ - if (saved_bh == NULL) { - goto free_cnode; - } - - /* this should never happen. kupdate_one_transaction has this list - ** locked while it works, so we should never see a buffer here that - ** is not marked JDirty_wait - */ - if ((!was_jwait) && !buffer_locked(saved_bh)) { - reiserfs_warning(s, "journal-813", - "BAD! buffer %llu %cdirty %cjwait, " - "not in a newer tranasction", - (unsigned long long)saved_bh-> - b_blocknr, was_dirty ? ' ' : '!', - was_jwait ? ' ' : '!'); - } - if (was_dirty) { - /* we inc again because saved_bh gets decremented at free_cnode */ - get_bh(saved_bh); - set_bit(BLOCK_NEEDS_FLUSH, &cn->state); - lock_buffer(saved_bh); - BUG_ON(cn->blocknr != saved_bh->b_blocknr); - if (buffer_dirty(saved_bh)) - submit_logged_buffer(saved_bh); - else - unlock_buffer(saved_bh); - count++; - } else { - reiserfs_warning(s, "clm-2082", - "Unable to flush buffer %llu in %s", - (unsigned long long)saved_bh-> - b_blocknr, __func__); - } - free_cnode: - last = cn; - cn = cn->next; - if (saved_bh) { - /* we incremented this to keep others from taking the buffer head away */ - put_bh(saved_bh); - if (atomic_read(&(saved_bh->b_count)) < 0) { - reiserfs_warning(s, "journal-945", - "saved_bh->b_count < 0"); - } - } - } - if (count > 0) { - cn = jl->j_realblock; - while (cn) { - if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { - if (!cn->bh) { - reiserfs_panic(s, "journal-1011", - "cn->bh is NULL"); - } - - reiserfs_write_unlock(s); - wait_on_buffer(cn->bh); - reiserfs_write_lock(s); - - if (!cn->bh) { - reiserfs_panic(s, "journal-1012", - "cn->bh is NULL"); - } - if (unlikely(!buffer_uptodate(cn->bh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-949", - "buffer write failed"); -#endif - err = -EIO; - } - /* note, we must clear the JDirty_wait bit after the up to date - ** check, otherwise we race against our flushpage routine - */ - BUG_ON(!test_clear_buffer_journal_dirty - (cn->bh)); - - /* drop one ref for us */ - put_bh(cn->bh); - /* drop one ref for journal_mark_dirty */ - release_buffer_page(cn->bh); - } - cn = cn->next; - } - } - - if (err) - reiserfs_abort(s, -EIO, - "Write error while pushing transaction to disk in %s", - __func__); - flush_older_and_return: - - /* before we can update the journal header block, we _must_ flush all - ** real blocks from all older transactions to disk. This is because - ** once the header block is updated, this transaction will not be - ** replayed after a crash - */ - if (flushall) { - flush_older_journal_lists(s, jl); - } - - err = journal->j_errno; - /* before we can remove everything from the hash tables for this - ** transaction, we must make sure it can never be replayed - ** - ** since we are only called from do_journal_end, we know for sure there - ** are no allocations going on while we are flushing journal lists. So, - ** we only need to update the journal header block for the last list - ** being flushed - */ - if (!err && flushall) { - err = - update_journal_header_block(s, - (jl->j_start + jl->j_len + - 2) % SB_ONDISK_JOURNAL_SIZE(s), - jl->j_trans_id); - if (err) - reiserfs_abort(s, -EIO, - "Write error while updating journal header in %s", - __func__); - } - remove_all_from_journal_list(s, jl, 0); - list_del_init(&jl->j_list); - journal->j_num_lists--; - del_from_work_list(s, jl); - - if (journal->j_last_flush_id != 0 && - (jl->j_trans_id - journal->j_last_flush_id) != 1) { - reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu", - journal->j_last_flush_id, jl->j_trans_id); - } - journal->j_last_flush_id = jl->j_trans_id; - - /* not strictly required since we are freeing the list, but it should - * help find code using dead lists later on - */ - jl->j_len = 0; - atomic_set(&(jl->j_nonzerolen), 0); - jl->j_start = 0; - jl->j_realblock = NULL; - jl->j_commit_bh = NULL; - jl->j_trans_id = 0; - jl->j_state = 0; - put_journal_list(s, jl); - if (flushall) - mutex_unlock(&journal->j_flush_mutex); - return err; -} - -static int test_transaction(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal_cnode *cn; - - if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) - return 1; - - cn = jl->j_realblock; - while (cn) { - /* if the blocknr == 0, this has been cleared from the hash, - ** skip it - */ - if (cn->blocknr == 0) { - goto next; - } - if (cn->bh && !newer_jl_done(cn)) - return 0; - next: - cn = cn->next; - cond_resched(); - } - return 0; -} - -static int write_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl, - struct buffer_chunk *chunk) -{ - struct reiserfs_journal_cnode *cn; - int ret = 0; - - jl->j_state |= LIST_TOUCHED; - del_from_work_list(s, jl); - if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { - return 0; - } - - cn = jl->j_realblock; - while (cn) { - /* if the blocknr == 0, this has been cleared from the hash, - ** skip it - */ - if (cn->blocknr == 0) { - goto next; - } - if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { - struct buffer_head *tmp_bh; - /* we can race against journal_mark_freed when we try - * to lock_buffer(cn->bh), so we have to inc the buffer - * count, and recheck things after locking - */ - tmp_bh = cn->bh; - get_bh(tmp_bh); - lock_buffer(tmp_bh); - if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { - if (!buffer_journal_dirty(tmp_bh) || - buffer_journal_prepared(tmp_bh)) - BUG(); - add_to_chunk(chunk, tmp_bh, NULL, write_chunk); - ret++; - } else { - /* note, cn->bh might be null now */ - unlock_buffer(tmp_bh); - } - put_bh(tmp_bh); - } - next: - cn = cn->next; - cond_resched(); - } - return ret; -} - -/* used by flush_commit_list */ -static int dirty_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal_cnode *cn; - struct reiserfs_journal_list *pjl; - int ret = 0; - - jl->j_state |= LIST_DIRTY; - cn = jl->j_realblock; - while (cn) { - /* look for a more recent transaction that logged this - ** buffer. Only the most recent transaction with a buffer in - ** it is allowed to send that buffer to disk - */ - pjl = find_newer_jl_for_cn(cn); - if (!pjl && cn->blocknr && cn->bh - && buffer_journal_dirty(cn->bh)) { - BUG_ON(!can_dirty(cn)); - /* if the buffer is prepared, it will either be logged - * or restored. If restored, we need to make sure - * it actually gets marked dirty - */ - clear_buffer_journal_new(cn->bh); - if (buffer_journal_prepared(cn->bh)) { - set_buffer_journal_restore_dirty(cn->bh); - } else { - set_buffer_journal_test(cn->bh); - mark_buffer_dirty(cn->bh); - } - } - cn = cn->next; - } - return ret; -} - -static int kupdate_transactions(struct super_block *s, - struct reiserfs_journal_list *jl, - struct reiserfs_journal_list **next_jl, - unsigned int *next_trans_id, - int num_blocks, int num_trans) -{ - int ret = 0; - int written = 0; - int transactions_flushed = 0; - unsigned int orig_trans_id = jl->j_trans_id; - struct buffer_chunk chunk; - struct list_head *entry; - struct reiserfs_journal *journal = SB_JOURNAL(s); - chunk.nr = 0; - - reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); - if (!journal_list_still_alive(s, orig_trans_id)) { - goto done; - } - - /* we've got j_flush_mutex held, nobody is going to delete any - * of these lists out from underneath us - */ - while ((num_trans && transactions_flushed < num_trans) || - (!num_trans && written < num_blocks)) { - - if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || - atomic_read(&jl->j_commit_left) - || !(jl->j_state & LIST_DIRTY)) { - del_from_work_list(s, jl); - break; - } - ret = write_one_transaction(s, jl, &chunk); - - if (ret < 0) - goto done; - transactions_flushed++; - written += ret; - entry = jl->j_list.next; - - /* did we wrap? */ - if (entry == &journal->j_journal_list) { - break; - } - jl = JOURNAL_LIST_ENTRY(entry); - - /* don't bother with older transactions */ - if (jl->j_trans_id <= orig_trans_id) - break; - } - if (chunk.nr) { - write_chunk(&chunk); - } - - done: - mutex_unlock(&journal->j_flush_mutex); - return ret; -} - -/* for o_sync and fsync heavy applications, they tend to use -** all the journa list slots with tiny transactions. These -** trigger lots and lots of calls to update the header block, which -** adds seeks and slows things down. -** -** This function tries to clear out a large chunk of the journal lists -** at once, which makes everything faster since only the newest journal -** list updates the header block -*/ -static int flush_used_journal_lists(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - unsigned long len = 0; - unsigned long cur_len; - int ret; - int i; - int limit = 256; - struct reiserfs_journal_list *tjl; - struct reiserfs_journal_list *flush_jl; - unsigned int trans_id; - struct reiserfs_journal *journal = SB_JOURNAL(s); - - flush_jl = tjl = jl; - - /* in data logging mode, try harder to flush a lot of blocks */ - if (reiserfs_data_log(s)) - limit = 1024; - /* flush for 256 transactions or limit blocks, whichever comes first */ - for (i = 0; i < 256 && len < limit; i++) { - if (atomic_read(&tjl->j_commit_left) || - tjl->j_trans_id < jl->j_trans_id) { - break; - } - cur_len = atomic_read(&tjl->j_nonzerolen); - if (cur_len > 0) { - tjl->j_state &= ~LIST_TOUCHED; - } - len += cur_len; - flush_jl = tjl; - if (tjl->j_list.next == &journal->j_journal_list) - break; - tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); - } - /* try to find a group of blocks we can flush across all the - ** transactions, but only bother if we've actually spanned - ** across multiple lists - */ - if (flush_jl != jl) { - ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); - } - flush_journal_list(s, flush_jl, 1); - return 0; -} - -/* -** removes any nodes in table with name block and dev as bh. -** only touchs the hnext and hprev pointers. -*/ -void remove_journal_hash(struct super_block *sb, - struct reiserfs_journal_cnode **table, - struct reiserfs_journal_list *jl, - unsigned long block, int remove_freed) -{ - struct reiserfs_journal_cnode *cur; - struct reiserfs_journal_cnode **head; - - head = &(journal_hash(table, sb, block)); - if (!head) { - return; - } - cur = *head; - while (cur) { - if (cur->blocknr == block && cur->sb == sb - && (jl == NULL || jl == cur->jlist) - && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { - if (cur->hnext) { - cur->hnext->hprev = cur->hprev; - } - if (cur->hprev) { - cur->hprev->hnext = cur->hnext; - } else { - *head = cur->hnext; - } - cur->blocknr = 0; - cur->sb = NULL; - cur->state = 0; - if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ - atomic_dec(&(cur->jlist->j_nonzerolen)); - cur->bh = NULL; - cur->jlist = NULL; - } - cur = cur->hnext; - } -} - -static void free_journal_ram(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - kfree(journal->j_current_jl); - journal->j_num_lists--; - - vfree(journal->j_cnode_free_orig); - free_list_bitmaps(sb, journal->j_list_bitmap); - free_bitmap_nodes(sb); /* must be after free_list_bitmaps */ - if (journal->j_header_bh) { - brelse(journal->j_header_bh); - } - /* j_header_bh is on the journal dev, make sure not to release the journal - * dev until we brelse j_header_bh - */ - release_journal_dev(sb, journal); - vfree(journal); -} - -/* -** call on unmount. Only set error to 1 if you haven't made your way out -** of read_super() yet. Any other caller must keep error at 0. -*/ -static int do_journal_release(struct reiserfs_transaction_handle *th, - struct super_block *sb, int error) -{ - struct reiserfs_transaction_handle myth; - int flushed = 0; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - /* we only want to flush out transactions if we were called with error == 0 - */ - if (!error && !(sb->s_flags & MS_RDONLY)) { - /* end the current trans */ - BUG_ON(!th->t_trans_id); - do_journal_end(th, sb, 10, FLUSH_ALL); - - /* make sure something gets logged to force our way into the flush code */ - if (!journal_join(&myth, sb, 1)) { - reiserfs_prepare_for_journal(sb, - SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(&myth, sb, - SB_BUFFER_WITH_SB(sb)); - do_journal_end(&myth, sb, 1, FLUSH_ALL); - flushed = 1; - } - } - - /* this also catches errors during the do_journal_end above */ - if (!error && reiserfs_is_journal_aborted(journal)) { - memset(&myth, 0, sizeof(myth)); - if (!journal_join_abort(&myth, sb, 1)) { - reiserfs_prepare_for_journal(sb, - SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(&myth, sb, - SB_BUFFER_WITH_SB(sb)); - do_journal_end(&myth, sb, 1, FLUSH_ALL); - } - } - - reiserfs_mounted_fs_count--; - /* wait for all commits to finish */ - cancel_delayed_work(&SB_JOURNAL(sb)->j_work); - - /* - * We must release the write lock here because - * the workqueue job (flush_async_commit) needs this lock - */ - reiserfs_write_unlock(sb); - flush_workqueue(commit_wq); - - if (!reiserfs_mounted_fs_count) { - destroy_workqueue(commit_wq); - commit_wq = NULL; - } - - free_journal_ram(sb); - - reiserfs_write_lock(sb); - - return 0; -} - -/* -** call on unmount. flush all journal trans, release all alloc'd ram -*/ -int journal_release(struct reiserfs_transaction_handle *th, - struct super_block *sb) -{ - return do_journal_release(th, sb, 0); -} - -/* -** only call from an error condition inside reiserfs_read_super! -*/ -int journal_release_error(struct reiserfs_transaction_handle *th, - struct super_block *sb) -{ - return do_journal_release(th, sb, 1); -} - -/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ -static int journal_compare_desc_commit(struct super_block *sb, - struct reiserfs_journal_desc *desc, - struct reiserfs_journal_commit *commit) -{ - if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || - get_commit_trans_len(commit) != get_desc_trans_len(desc) || - get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max || - get_commit_trans_len(commit) <= 0) { - return 1; - } - return 0; -} - -/* returns 0 if it did not find a description block -** returns -1 if it found a corrupt commit block -** returns 1 if both desc and commit were valid -*/ -static int journal_transaction_is_valid(struct super_block *sb, - struct buffer_head *d_bh, - unsigned int *oldest_invalid_trans_id, - unsigned long *newest_mount_id) -{ - struct reiserfs_journal_desc *desc; - struct reiserfs_journal_commit *commit; - struct buffer_head *c_bh; - unsigned long offset; - - if (!d_bh) - return 0; - - desc = (struct reiserfs_journal_desc *)d_bh->b_data; - if (get_desc_trans_len(desc) > 0 - && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { - if (oldest_invalid_trans_id && *oldest_invalid_trans_id - && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-986: transaction " - "is valid returning because trans_id %d is greater than " - "oldest_invalid %lu", - get_desc_trans_id(desc), - *oldest_invalid_trans_id); - return 0; - } - if (newest_mount_id - && *newest_mount_id > get_desc_mount_id(desc)) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1087: transaction " - "is valid returning because mount_id %d is less than " - "newest_mount_id %lu", - get_desc_mount_id(desc), - *newest_mount_id); - return -1; - } - if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) { - reiserfs_warning(sb, "journal-2018", - "Bad transaction length %d " - "encountered, ignoring transaction", - get_desc_trans_len(desc)); - return -1; - } - offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); - - /* ok, we have a journal description block, lets see if the transaction was valid */ - c_bh = - journal_bread(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((offset + get_desc_trans_len(desc) + - 1) % SB_ONDISK_JOURNAL_SIZE(sb))); - if (!c_bh) - return 0; - commit = (struct reiserfs_journal_commit *)c_bh->b_data; - if (journal_compare_desc_commit(sb, desc, commit)) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal_transaction_is_valid, commit offset %ld had bad " - "time %d or length %d", - c_bh->b_blocknr - - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - get_commit_trans_id(commit), - get_commit_trans_len(commit)); - brelse(c_bh); - if (oldest_invalid_trans_id) { - *oldest_invalid_trans_id = - get_desc_trans_id(desc); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1004: " - "transaction_is_valid setting oldest invalid trans_id " - "to %d", - get_desc_trans_id(desc)); - } - return -1; - } - brelse(c_bh); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1006: found valid " - "transaction start offset %llu, len %d id %d", - d_bh->b_blocknr - - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - get_desc_trans_len(desc), - get_desc_trans_id(desc)); - return 1; - } else { - return 0; - } -} - -static void brelse_array(struct buffer_head **heads, int num) -{ - int i; - for (i = 0; i < num; i++) { - brelse(heads[i]); - } -} - -/* -** given the start, and values for the oldest acceptable transactions, -** this either reads in a replays a transaction, or returns because the transaction -** is invalid, or too old. -*/ -static int journal_read_transaction(struct super_block *sb, - unsigned long cur_dblock, - unsigned long oldest_start, - unsigned int oldest_trans_id, - unsigned long newest_mount_id) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_desc *desc; - struct reiserfs_journal_commit *commit; - unsigned int trans_id = 0; - struct buffer_head *c_bh; - struct buffer_head *d_bh; - struct buffer_head **log_blocks = NULL; - struct buffer_head **real_blocks = NULL; - unsigned int trans_offset; - int i; - int trans_half; - - d_bh = journal_bread(sb, cur_dblock); - if (!d_bh) - return 1; - desc = (struct reiserfs_journal_desc *)d_bh->b_data; - trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: " - "journal_read_transaction, offset %llu, len %d mount_id %d", - d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - get_desc_trans_len(desc), get_desc_mount_id(desc)); - if (get_desc_trans_id(desc) < oldest_trans_id) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: " - "journal_read_trans skipping because %lu is too old", - cur_dblock - - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); - brelse(d_bh); - return 1; - } - if (get_desc_mount_id(desc) != newest_mount_id) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: " - "journal_read_trans skipping because %d is != " - "newest_mount_id %lu", get_desc_mount_id(desc), - newest_mount_id); - brelse(d_bh); - return 1; - } - c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((trans_offset + get_desc_trans_len(desc) + 1) % - SB_ONDISK_JOURNAL_SIZE(sb))); - if (!c_bh) { - brelse(d_bh); - return 1; - } - commit = (struct reiserfs_journal_commit *)c_bh->b_data; - if (journal_compare_desc_commit(sb, desc, commit)) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal_read_transaction, " - "commit offset %llu had bad time %d or length %d", - c_bh->b_blocknr - - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - get_commit_trans_id(commit), - get_commit_trans_len(commit)); - brelse(c_bh); - brelse(d_bh); - return 1; - } - - if (bdev_read_only(sb->s_bdev)) { - reiserfs_warning(sb, "clm-2076", - "device is readonly, unable to replay log"); - brelse(c_bh); - brelse(d_bh); - return -EROFS; - } - - trans_id = get_desc_trans_id(desc); - /* now we know we've got a good transaction, and it was inside the valid time ranges */ - log_blocks = kmalloc(get_desc_trans_len(desc) * - sizeof(struct buffer_head *), GFP_NOFS); - real_blocks = kmalloc(get_desc_trans_len(desc) * - sizeof(struct buffer_head *), GFP_NOFS); - if (!log_blocks || !real_blocks) { - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - reiserfs_warning(sb, "journal-1169", - "kmalloc failed, unable to mount FS"); - return -1; - } - /* get all the buffer heads */ - trans_half = journal_trans_half(sb->s_blocksize); - for (i = 0; i < get_desc_trans_len(desc); i++) { - log_blocks[i] = - journal_getblk(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - (trans_offset + 1 + - i) % SB_ONDISK_JOURNAL_SIZE(sb)); - if (i < trans_half) { - real_blocks[i] = - sb_getblk(sb, - le32_to_cpu(desc->j_realblock[i])); - } else { - real_blocks[i] = - sb_getblk(sb, - le32_to_cpu(commit-> - j_realblock[i - trans_half])); - } - if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) { - reiserfs_warning(sb, "journal-1207", - "REPLAY FAILURE fsck required! " - "Block to replay is outside of " - "filesystem"); - goto abort_replay; - } - /* make sure we don't try to replay onto log or reserved area */ - if (is_block_in_log_or_reserved_area - (sb, real_blocks[i]->b_blocknr)) { - reiserfs_warning(sb, "journal-1204", - "REPLAY FAILURE fsck required! " - "Trying to replay onto a log block"); - abort_replay: - brelse_array(log_blocks, i); - brelse_array(real_blocks, i); - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - return -1; - } - } - /* read in the log blocks, memcpy to the corresponding real block */ - ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); - for (i = 0; i < get_desc_trans_len(desc); i++) { - - reiserfs_write_unlock(sb); - wait_on_buffer(log_blocks[i]); - reiserfs_write_lock(sb); - - if (!buffer_uptodate(log_blocks[i])) { - reiserfs_warning(sb, "journal-1212", - "REPLAY FAILURE fsck required! " - "buffer write failed"); - brelse_array(log_blocks + i, - get_desc_trans_len(desc) - i); - brelse_array(real_blocks, get_desc_trans_len(desc)); - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - return -1; - } - memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, - real_blocks[i]->b_size); - set_buffer_uptodate(real_blocks[i]); - brelse(log_blocks[i]); - } - /* flush out the real blocks */ - for (i = 0; i < get_desc_trans_len(desc); i++) { - set_buffer_dirty(real_blocks[i]); - write_dirty_buffer(real_blocks[i], WRITE); - } - for (i = 0; i < get_desc_trans_len(desc); i++) { - wait_on_buffer(real_blocks[i]); - if (!buffer_uptodate(real_blocks[i])) { - reiserfs_warning(sb, "journal-1226", - "REPLAY FAILURE, fsck required! " - "buffer write failed"); - brelse_array(real_blocks + i, - get_desc_trans_len(desc) - i); - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - return -1; - } - brelse(real_blocks[i]); - } - cur_dblock = - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((trans_offset + get_desc_trans_len(desc) + - 2) % SB_ONDISK_JOURNAL_SIZE(sb)); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1095: setting journal " "start to offset %ld", - cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); - - /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ - journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); - journal->j_last_flush_trans_id = trans_id; - journal->j_trans_id = trans_id + 1; - /* check for trans_id overflow */ - if (journal->j_trans_id == 0) - journal->j_trans_id = 10; - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - return 0; -} - -/* This function reads blocks starting from block and to max_block of bufsize - size (but no more than BUFNR blocks at a time). This proved to improve - mounting speed on self-rebuilding raid5 arrays at least. - Right now it is only used from journal code. But later we might use it - from other places. - Note: Do not use journal_getblk/sb_getblk functions here! */ -static struct buffer_head *reiserfs_breada(struct block_device *dev, - b_blocknr_t block, int bufsize, - b_blocknr_t max_block) -{ - struct buffer_head *bhlist[BUFNR]; - unsigned int blocks = BUFNR; - struct buffer_head *bh; - int i, j; - - bh = __getblk(dev, block, bufsize); - if (buffer_uptodate(bh)) - return (bh); - - if (block + BUFNR > max_block) { - blocks = max_block - block; - } - bhlist[0] = bh; - j = 1; - for (i = 1; i < blocks; i++) { - bh = __getblk(dev, block + i, bufsize); - if (buffer_uptodate(bh)) { - brelse(bh); - break; - } else - bhlist[j++] = bh; - } - ll_rw_block(READ, j, bhlist); - for (i = 1; i < j; i++) - brelse(bhlist[i]); - bh = bhlist[0]; - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return bh; - brelse(bh); - return NULL; -} - -/* -** read and replay the log -** on a clean unmount, the journal header's next unflushed pointer will be to an invalid -** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast. -** -** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid. -** -** On exit, it sets things up so the first transaction will work correctly. -*/ -static int journal_read(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_desc *desc; - unsigned int oldest_trans_id = 0; - unsigned int oldest_invalid_trans_id = 0; - time_t start; - unsigned long oldest_start = 0; - unsigned long cur_dblock = 0; - unsigned long newest_mount_id = 9; - struct buffer_head *d_bh; - struct reiserfs_journal_header *jh; - int valid_journal_header = 0; - int replay_count = 0; - int continue_replay = 1; - int ret; - char b[BDEVNAME_SIZE]; - - cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb); - reiserfs_info(sb, "checking transaction log (%s)\n", - bdevname(journal->j_dev_bd, b)); - start = get_seconds(); - - /* step 1, read in the journal header block. Check the transaction it says - ** is the first unflushed, and if that transaction is not valid, - ** replay is done - */ - journal->j_header_bh = journal_bread(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) - + SB_ONDISK_JOURNAL_SIZE(sb)); - if (!journal->j_header_bh) { - return 1; - } - jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); - if (le32_to_cpu(jh->j_first_unflushed_offset) < - SB_ONDISK_JOURNAL_SIZE(sb) - && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { - oldest_start = - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - le32_to_cpu(jh->j_first_unflushed_offset); - oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; - newest_mount_id = le32_to_cpu(jh->j_mount_id); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1153: found in " - "header: first_unflushed_offset %d, last_flushed_trans_id " - "%lu", le32_to_cpu(jh->j_first_unflushed_offset), - le32_to_cpu(jh->j_last_flush_trans_id)); - valid_journal_header = 1; - - /* now, we try to read the first unflushed offset. If it is not valid, - ** there is nothing more we can do, and it makes no sense to read - ** through the whole log. - */ - d_bh = - journal_bread(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - le32_to_cpu(jh->j_first_unflushed_offset)); - ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL); - if (!ret) { - continue_replay = 0; - } - brelse(d_bh); - goto start_log_replay; - } - - /* ok, there are transactions that need to be replayed. start with the first log block, find - ** all the valid transactions, and pick out the oldest. - */ - while (continue_replay - && cur_dblock < - (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - SB_ONDISK_JOURNAL_SIZE(sb))) { - /* Note that it is required for blocksize of primary fs device and journal - device to be the same */ - d_bh = - reiserfs_breada(journal->j_dev_bd, cur_dblock, - sb->s_blocksize, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - SB_ONDISK_JOURNAL_SIZE(sb)); - ret = - journal_transaction_is_valid(sb, d_bh, - &oldest_invalid_trans_id, - &newest_mount_id); - if (ret == 1) { - desc = (struct reiserfs_journal_desc *)d_bh->b_data; - if (oldest_start == 0) { /* init all oldest_ values */ - oldest_trans_id = get_desc_trans_id(desc); - oldest_start = d_bh->b_blocknr; - newest_mount_id = get_desc_mount_id(desc); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1179: Setting " - "oldest_start to offset %llu, trans_id %lu", - oldest_start - - SB_ONDISK_JOURNAL_1st_BLOCK - (sb), oldest_trans_id); - } else if (oldest_trans_id > get_desc_trans_id(desc)) { - /* one we just read was older */ - oldest_trans_id = get_desc_trans_id(desc); - oldest_start = d_bh->b_blocknr; - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1180: Resetting " - "oldest_start to offset %lu, trans_id %lu", - oldest_start - - SB_ONDISK_JOURNAL_1st_BLOCK - (sb), oldest_trans_id); - } - if (newest_mount_id < get_desc_mount_id(desc)) { - newest_mount_id = get_desc_mount_id(desc); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1299: Setting " - "newest_mount_id to %d", - get_desc_mount_id(desc)); - } - cur_dblock += get_desc_trans_len(desc) + 2; - } else { - cur_dblock++; - } - brelse(d_bh); - } - - start_log_replay: - cur_dblock = oldest_start; - if (oldest_trans_id) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1206: Starting replay " - "from offset %llu, trans_id %lu", - cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - oldest_trans_id); - - } - replay_count = 0; - while (continue_replay && oldest_trans_id > 0) { - ret = - journal_read_transaction(sb, cur_dblock, oldest_start, - oldest_trans_id, newest_mount_id); - if (ret < 0) { - return ret; - } else if (ret != 0) { - break; - } - cur_dblock = - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start; - replay_count++; - if (cur_dblock == oldest_start) - break; - } - - if (oldest_trans_id == 0) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1225: No valid " "transactions found"); - } - /* j_start does not get set correctly if we don't replay any transactions. - ** if we had a valid journal_header, set j_start to the first unflushed transaction value, - ** copy the trans_id from the header - */ - if (valid_journal_header && replay_count == 0) { - journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); - journal->j_trans_id = - le32_to_cpu(jh->j_last_flush_trans_id) + 1; - /* check for trans_id overflow */ - if (journal->j_trans_id == 0) - journal->j_trans_id = 10; - journal->j_last_flush_trans_id = - le32_to_cpu(jh->j_last_flush_trans_id); - journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; - } else { - journal->j_mount_id = newest_mount_id + 1; - } - reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " - "newest_mount_id to %lu", journal->j_mount_id); - journal->j_first_unflushed_offset = journal->j_start; - if (replay_count > 0) { - reiserfs_info(sb, - "replayed %d transactions in %lu seconds\n", - replay_count, get_seconds() - start); - } - if (!bdev_read_only(sb->s_bdev) && - _update_journal_header_block(sb, journal->j_start, - journal->j_last_flush_trans_id)) { - /* replay failed, caller must call free_journal_ram and abort - ** the mount - */ - return -1; - } - return 0; -} - -static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) -{ - struct reiserfs_journal_list *jl; - jl = kzalloc(sizeof(struct reiserfs_journal_list), - GFP_NOFS | __GFP_NOFAIL); - INIT_LIST_HEAD(&jl->j_list); - INIT_LIST_HEAD(&jl->j_working_list); - INIT_LIST_HEAD(&jl->j_tail_bh_list); - INIT_LIST_HEAD(&jl->j_bh_list); - mutex_init(&jl->j_commit_mutex); - SB_JOURNAL(s)->j_num_lists++; - get_journal_list(jl); - return jl; -} - -static void journal_list_init(struct super_block *sb) -{ - SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb); -} - -static int release_journal_dev(struct super_block *super, - struct reiserfs_journal *journal) -{ - int result; - - result = 0; - - if (journal->j_dev_bd != NULL) { - result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); - journal->j_dev_bd = NULL; - } - - if (result != 0) { - reiserfs_warning(super, "sh-457", - "Cannot release journal device: %i", result); - } - return result; -} - -static int journal_init_dev(struct super_block *super, - struct reiserfs_journal *journal, - const char *jdev_name) -{ - int result; - dev_t jdev; - fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; - char b[BDEVNAME_SIZE]; - - result = 0; - - journal->j_dev_bd = NULL; - jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? - new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; - - if (bdev_read_only(super->s_bdev)) - blkdev_mode = FMODE_READ; - - /* there is no "jdev" option and journal is on separate device */ - if ((!jdev_name || !jdev_name[0])) { - if (jdev == super->s_dev) - blkdev_mode &= ~FMODE_EXCL; - journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, - journal); - journal->j_dev_mode = blkdev_mode; - if (IS_ERR(journal->j_dev_bd)) { - result = PTR_ERR(journal->j_dev_bd); - journal->j_dev_bd = NULL; - reiserfs_warning(super, "sh-458", - "cannot init journal device '%s': %i", - __bdevname(jdev, b), result); - return result; - } else if (jdev != super->s_dev) - set_blocksize(journal->j_dev_bd, super->s_blocksize); - - return 0; - } - - journal->j_dev_mode = blkdev_mode; - journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal); - if (IS_ERR(journal->j_dev_bd)) { - result = PTR_ERR(journal->j_dev_bd); - journal->j_dev_bd = NULL; - reiserfs_warning(super, - "journal_init_dev: Cannot open '%s': %i", - jdev_name, result); - return result; - } - - set_blocksize(journal->j_dev_bd, super->s_blocksize); - reiserfs_info(super, - "journal_init_dev: journal device: %s\n", - bdevname(journal->j_dev_bd, b)); - return 0; -} - -/** - * When creating/tuning a file system user can assign some - * journal params within boundaries which depend on the ratio - * blocksize/standard_blocksize. - * - * For blocks >= standard_blocksize transaction size should - * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more - * then JOURNAL_TRANS_MAX_DEFAULT. - * - * For blocks < standard_blocksize these boundaries should be - * decreased proportionally. - */ -#define REISERFS_STANDARD_BLKSIZE (4096) - -static int check_advise_trans_params(struct super_block *sb, - struct reiserfs_journal *journal) -{ - if (journal->j_trans_max) { - /* Non-default journal params. - Do sanity check for them. */ - int ratio = 1; - if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) - ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; - - if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio || - journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio || - SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max < - JOURNAL_MIN_RATIO) { - reiserfs_warning(sb, "sh-462", - "bad transaction max size (%u). " - "FSCK?", journal->j_trans_max); - return 1; - } - if (journal->j_max_batch != (journal->j_trans_max) * - JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) { - reiserfs_warning(sb, "sh-463", - "bad transaction max batch (%u). " - "FSCK?", journal->j_max_batch); - return 1; - } - } else { - /* Default journal params. - The file system was created by old version - of mkreiserfs, so some fields contain zeros, - and we need to advise proper values for them */ - if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { - reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", - sb->s_blocksize); - return 1; - } - journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; - journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; - journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; - } - return 0; -} - -/* -** must be called once on fs mount. calls journal_read for you -*/ -int journal_init(struct super_block *sb, const char *j_dev_name, - int old_format, unsigned int commit_max_age) -{ - int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2; - struct buffer_head *bhjh; - struct reiserfs_super_block *rs; - struct reiserfs_journal_header *jh; - struct reiserfs_journal *journal; - struct reiserfs_journal_list *jl; - char b[BDEVNAME_SIZE]; - int ret; - - journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); - if (!journal) { - reiserfs_warning(sb, "journal-1256", - "unable to get memory for journal structure"); - return 1; - } - INIT_LIST_HEAD(&journal->j_bitmap_nodes); - INIT_LIST_HEAD(&journal->j_prealloc_list); - INIT_LIST_HEAD(&journal->j_working_list); - INIT_LIST_HEAD(&journal->j_journal_list); - journal->j_persistent_trans = 0; - if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, - reiserfs_bmap_count(sb))) - goto free_and_return; - - allocate_bitmap_nodes(sb); - - /* reserved for journal area support */ - SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ? - REISERFS_OLD_DISK_OFFSET_IN_BYTES - / sb->s_blocksize + - reiserfs_bmap_count(sb) + - 1 : - REISERFS_DISK_OFFSET_IN_BYTES / - sb->s_blocksize + 2); - - /* Sanity check to see is the standard journal fitting within first bitmap - (actual for small blocksizes) */ - if (!SB_ONDISK_JOURNAL_DEVICE(sb) && - (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + - SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { - reiserfs_warning(sb, "journal-1393", - "journal does not fit for area addressed " - "by first of bitmap blocks. It starts at " - "%u and its size is %u. Block size %ld", - SB_JOURNAL_1st_RESERVED_BLOCK(sb), - SB_ONDISK_JOURNAL_SIZE(sb), - sb->s_blocksize); - goto free_and_return; - } - - if (journal_init_dev(sb, journal, j_dev_name) != 0) { - reiserfs_warning(sb, "sh-462", - "unable to initialize jornal device"); - goto free_and_return; - } - - rs = SB_DISK_SUPER_BLOCK(sb); - - /* read journal header */ - bhjh = journal_bread(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - SB_ONDISK_JOURNAL_SIZE(sb)); - if (!bhjh) { - reiserfs_warning(sb, "sh-459", - "unable to read journal header"); - goto free_and_return; - } - jh = (struct reiserfs_journal_header *)(bhjh->b_data); - - /* make sure that journal matches to the super block */ - if (is_reiserfs_jr(rs) - && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != - sb_jp_journal_magic(rs))) { - reiserfs_warning(sb, "sh-460", - "journal header magic %x (device %s) does " - "not match to magic found in super block %x", - jh->jh_journal.jp_journal_magic, - bdevname(journal->j_dev_bd, b), - sb_jp_journal_magic(rs)); - brelse(bhjh); - goto free_and_return; - } - - journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); - journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); - journal->j_max_commit_age = - le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); - journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; - - if (check_advise_trans_params(sb, journal) != 0) - goto free_and_return; - journal->j_default_max_commit_age = journal->j_max_commit_age; - - if (commit_max_age != 0) { - journal->j_max_commit_age = commit_max_age; - journal->j_max_trans_age = commit_max_age; - } - - reiserfs_info(sb, "journal params: device %s, size %u, " - "journal first block %u, max trans len %u, max batch %u, " - "max commit age %u, max trans age %u\n", - bdevname(journal->j_dev_bd, b), - SB_ONDISK_JOURNAL_SIZE(sb), - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - journal->j_trans_max, - journal->j_max_batch, - journal->j_max_commit_age, journal->j_max_trans_age); - - brelse(bhjh); - - journal->j_list_bitmap_index = 0; - journal_list_init(sb); - - memset(journal->j_list_hash_table, 0, - JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); - - INIT_LIST_HEAD(&journal->j_dirty_buffers); - spin_lock_init(&journal->j_dirty_buffers_lock); - - journal->j_start = 0; - journal->j_len = 0; - journal->j_len_alloc = 0; - atomic_set(&(journal->j_wcount), 0); - atomic_set(&(journal->j_async_throttle), 0); - journal->j_bcount = 0; - journal->j_trans_start_time = 0; - journal->j_last = NULL; - journal->j_first = NULL; - init_waitqueue_head(&(journal->j_join_wait)); - mutex_init(&journal->j_mutex); - mutex_init(&journal->j_flush_mutex); - - journal->j_trans_id = 10; - journal->j_mount_id = 10; - journal->j_state = 0; - atomic_set(&(journal->j_jlock), 0); - journal->j_cnode_free_list = allocate_cnodes(num_cnodes); - journal->j_cnode_free_orig = journal->j_cnode_free_list; - journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; - journal->j_cnode_used = 0; - journal->j_must_wait = 0; - - if (journal->j_cnode_free == 0) { - reiserfs_warning(sb, "journal-2004", "Journal cnode memory " - "allocation failed (%ld bytes). Journal is " - "too large for available memory. Usually " - "this is due to a journal that is too large.", - sizeof (struct reiserfs_journal_cnode) * num_cnodes); - goto free_and_return; - } - - init_journal_hash(sb); - jl = journal->j_current_jl; - - /* - * get_list_bitmap() may call flush_commit_list() which - * requires the lock. Calling flush_commit_list() shouldn't happen - * this early but I like to be paranoid. - */ - reiserfs_write_lock(sb); - jl->j_list_bitmap = get_list_bitmap(sb, jl); - reiserfs_write_unlock(sb); - if (!jl->j_list_bitmap) { - reiserfs_warning(sb, "journal-2005", - "get_list_bitmap failed for journal list 0"); - goto free_and_return; - } - - /* - * Journal_read needs to be inspected in order to push down - * the lock further inside (or even remove it). - */ - reiserfs_write_lock(sb); - ret = journal_read(sb); - reiserfs_write_unlock(sb); - if (ret < 0) { - reiserfs_warning(sb, "reiserfs-2006", - "Replay Failure, unable to mount"); - goto free_and_return; - } - - reiserfs_mounted_fs_count++; - if (reiserfs_mounted_fs_count <= 1) - commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); - - INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); - journal->j_work_sb = sb; - return 0; - free_and_return: - free_journal_ram(sb); - return 1; -} - -/* -** test for a polite end of the current transaction. Used by file_write, and should -** be used by delete to make sure they don't write more than can fit inside a single -** transaction -*/ -int journal_transaction_should_end(struct reiserfs_transaction_handle *th, - int new_alloc) -{ - struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); - time_t now = get_seconds(); - /* cannot restart while nested */ - BUG_ON(!th->t_trans_id); - if (th->t_refcount > 1) - return 0; - if (journal->j_must_wait > 0 || - (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || - atomic_read(&(journal->j_jlock)) || - (now - journal->j_trans_start_time) > journal->j_max_trans_age || - journal->j_cnode_free < (journal->j_trans_max * 3)) { - return 1; - } - - journal->j_len_alloc += new_alloc; - th->t_blocks_allocated += new_alloc ; - return 0; -} - -/* this must be called inside a transaction -*/ -void reiserfs_block_writes(struct reiserfs_transaction_handle *th) -{ - struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); - BUG_ON(!th->t_trans_id); - journal->j_must_wait = 1; - set_bit(J_WRITERS_BLOCKED, &journal->j_state); - return; -} - -/* this must be called without a transaction started -*/ -void reiserfs_allow_writes(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - clear_bit(J_WRITERS_BLOCKED, &journal->j_state); - wake_up(&journal->j_join_wait); -} - -/* this must be called without a transaction started -*/ -void reiserfs_wait_on_write_block(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - wait_event(journal->j_join_wait, - !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); -} - -static void queue_log_writer(struct super_block *s) -{ - wait_queue_t wait; - struct reiserfs_journal *journal = SB_JOURNAL(s); - set_bit(J_WRITERS_QUEUED, &journal->j_state); - - /* - * we don't want to use wait_event here because - * we only want to wait once. - */ - init_waitqueue_entry(&wait, current); - add_wait_queue(&journal->j_join_wait, &wait); - set_current_state(TASK_UNINTERRUPTIBLE); - if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) { - reiserfs_write_unlock(s); - schedule(); - reiserfs_write_lock(s); - } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&journal->j_join_wait, &wait); -} - -static void wake_queued_writers(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) - wake_up(&journal->j_join_wait); -} - -static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - unsigned long bcount = journal->j_bcount; - while (1) { - reiserfs_write_unlock(sb); - schedule_timeout_uninterruptible(1); - reiserfs_write_lock(sb); - journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; - while ((atomic_read(&journal->j_wcount) > 0 || - atomic_read(&journal->j_jlock)) && - journal->j_trans_id == trans_id) { - queue_log_writer(sb); - } - if (journal->j_trans_id != trans_id) - break; - if (bcount == journal->j_bcount) - break; - bcount = journal->j_bcount; - } -} - -/* join == true if you must join an existing transaction. -** join == false if you can deal with waiting for others to finish -** -** this will block until the transaction is joinable. send the number of blocks you -** expect to use in nblocks. -*/ -static int do_journal_begin_r(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks, - int join) -{ - time_t now = get_seconds(); - unsigned int old_trans_id; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_transaction_handle myth; - int sched_count = 0; - int retval; - - reiserfs_check_lock_depth(sb, "journal_begin"); - BUG_ON(nblocks > journal->j_trans_max); - - PROC_INFO_INC(sb, journal.journal_being); - /* set here for journal_join */ - th->t_refcount = 1; - th->t_super = sb; - - relock: - lock_journal(sb); - if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { - unlock_journal(sb); - retval = journal->j_errno; - goto out_fail; - } - journal->j_bcount++; - - if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { - unlock_journal(sb); - reiserfs_write_unlock(sb); - reiserfs_wait_on_write_block(sb); - reiserfs_write_lock(sb); - PROC_INFO_INC(sb, journal.journal_relock_writers); - goto relock; - } - now = get_seconds(); - - /* if there is no room in the journal OR - ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning - ** we don't sleep if there aren't other writers - */ - - if ((!join && journal->j_must_wait > 0) || - (!join - && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) - || (!join && atomic_read(&journal->j_wcount) > 0 - && journal->j_trans_start_time > 0 - && (now - journal->j_trans_start_time) > - journal->j_max_trans_age) || (!join - && atomic_read(&journal->j_jlock)) - || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { - - old_trans_id = journal->j_trans_id; - unlock_journal(sb); /* allow others to finish this transaction */ - - if (!join && (journal->j_len_alloc + nblocks + 2) >= - journal->j_max_batch && - ((journal->j_len + nblocks + 2) * 100) < - (journal->j_len_alloc * 75)) { - if (atomic_read(&journal->j_wcount) > 10) { - sched_count++; - queue_log_writer(sb); - goto relock; - } - } - /* don't mess with joining the transaction if all we have to do is - * wait for someone else to do a commit - */ - if (atomic_read(&journal->j_jlock)) { - while (journal->j_trans_id == old_trans_id && - atomic_read(&journal->j_jlock)) { - queue_log_writer(sb); - } - goto relock; - } - retval = journal_join(&myth, sb, 1); - if (retval) - goto out_fail; - - /* someone might have ended the transaction while we joined */ - if (old_trans_id != journal->j_trans_id) { - retval = do_journal_end(&myth, sb, 1, 0); - } else { - retval = do_journal_end(&myth, sb, 1, COMMIT_NOW); - } - - if (retval) - goto out_fail; - - PROC_INFO_INC(sb, journal.journal_relock_wcount); - goto relock; - } - /* we are the first writer, set trans_id */ - if (journal->j_trans_start_time == 0) { - journal->j_trans_start_time = get_seconds(); - } - atomic_inc(&(journal->j_wcount)); - journal->j_len_alloc += nblocks; - th->t_blocks_logged = 0; - th->t_blocks_allocated = nblocks; - th->t_trans_id = journal->j_trans_id; - unlock_journal(sb); - INIT_LIST_HEAD(&th->t_list); - return 0; - - out_fail: - memset(th, 0, sizeof(*th)); - /* Re-set th->t_super, so we can properly keep track of how many - * persistent transactions there are. We need to do this so if this - * call is part of a failed restart_transaction, we can free it later */ - th->t_super = sb; - return retval; -} - -struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct - super_block - *s, - int nblocks) -{ - int ret; - struct reiserfs_transaction_handle *th; - - /* if we're nesting into an existing transaction. It will be - ** persistent on its own - */ - if (reiserfs_transaction_running(s)) { - th = current->journal_info; - th->t_refcount++; - BUG_ON(th->t_refcount < 2); - - return th; - } - th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); - if (!th) - return NULL; - ret = journal_begin(th, s, nblocks); - if (ret) { - kfree(th); - return NULL; - } - - SB_JOURNAL(s)->j_persistent_trans++; - return th; -} - -int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) -{ - struct super_block *s = th->t_super; - int ret = 0; - if (th->t_trans_id) - ret = journal_end(th, th->t_super, th->t_blocks_allocated); - else - ret = -EIO; - if (th->t_refcount == 0) { - SB_JOURNAL(s)->j_persistent_trans--; - kfree(th); - } - return ret; -} - -static int journal_join(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks) -{ - struct reiserfs_transaction_handle *cur_th = current->journal_info; - - /* this keeps do_journal_end from NULLing out the current->journal_info - ** pointer - */ - th->t_handle_save = cur_th; - BUG_ON(cur_th && cur_th->t_refcount > 1); - return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN); -} - -int journal_join_abort(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks) -{ - struct reiserfs_transaction_handle *cur_th = current->journal_info; - - /* this keeps do_journal_end from NULLing out the current->journal_info - ** pointer - */ - th->t_handle_save = cur_th; - BUG_ON(cur_th && cur_th->t_refcount > 1); - return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT); -} - -int journal_begin(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks) -{ - struct reiserfs_transaction_handle *cur_th = current->journal_info; - int ret; - - th->t_handle_save = NULL; - if (cur_th) { - /* we are nesting into the current transaction */ - if (cur_th->t_super == sb) { - BUG_ON(!cur_th->t_refcount); - cur_th->t_refcount++; - memcpy(th, cur_th, sizeof(*th)); - if (th->t_refcount <= 1) - reiserfs_warning(sb, "reiserfs-2005", - "BAD: refcount <= 1, but " - "journal_info != 0"); - return 0; - } else { - /* we've ended up with a handle from a different filesystem. - ** save it and restore on journal_end. This should never - ** really happen... - */ - reiserfs_warning(sb, "clm-2100", - "nesting info a different FS"); - th->t_handle_save = current->journal_info; - current->journal_info = th; - } - } else { - current->journal_info = th; - } - ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); - BUG_ON(current->journal_info != th); - - /* I guess this boils down to being the reciprocal of clm-2100 above. - * If do_journal_begin_r fails, we need to put it back, since journal_end - * won't be called to do it. */ - if (ret) - current->journal_info = th->t_handle_save; - else - BUG_ON(!th->t_refcount); - - return ret; -} - -/* -** puts bh into the current transaction. If it was already there, reorders removes the -** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). -** -** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the -** transaction is committed. -** -** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. -*/ -int journal_mark_dirty(struct reiserfs_transaction_handle *th, - struct super_block *sb, struct buffer_head *bh) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn = NULL; - int count_already_incd = 0; - int prepared = 0; - BUG_ON(!th->t_trans_id); - - PROC_INFO_INC(sb, journal.mark_dirty); - if (th->t_trans_id != journal->j_trans_id) { - reiserfs_panic(th->t_super, "journal-1577", - "handle trans id %ld != current trans id %ld", - th->t_trans_id, journal->j_trans_id); - } - - sb->s_dirt = 1; - - prepared = test_clear_buffer_journal_prepared(bh); - clear_buffer_journal_restore_dirty(bh); - /* already in this transaction, we are done */ - if (buffer_journaled(bh)) { - PROC_INFO_INC(sb, journal.mark_dirty_already); - return 0; - } - - /* this must be turned into a panic instead of a warning. We can't allow - ** a dirty or journal_dirty or locked buffer to be logged, as some changes - ** could get to disk too early. NOT GOOD. - */ - if (!prepared || buffer_dirty(bh)) { - reiserfs_warning(sb, "journal-1777", - "buffer %llu bad state " - "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", - (unsigned long long)bh->b_blocknr, - prepared ? ' ' : '!', - buffer_locked(bh) ? ' ' : '!', - buffer_dirty(bh) ? ' ' : '!', - buffer_journal_dirty(bh) ? ' ' : '!'); - } - - if (atomic_read(&(journal->j_wcount)) <= 0) { - reiserfs_warning(sb, "journal-1409", - "returning because j_wcount was %d", - atomic_read(&(journal->j_wcount))); - return 1; - } - /* this error means I've screwed up, and we've overflowed the transaction. - ** Nothing can be done here, except make the FS readonly or panic. - */ - if (journal->j_len >= journal->j_trans_max) { - reiserfs_panic(th->t_super, "journal-1413", - "j_len (%lu) is too big", - journal->j_len); - } - - if (buffer_journal_dirty(bh)) { - count_already_incd = 1; - PROC_INFO_INC(sb, journal.mark_dirty_notjournal); - clear_buffer_journal_dirty(bh); - } - - if (journal->j_len > journal->j_len_alloc) { - journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; - } - - set_buffer_journaled(bh); - - /* now put this guy on the end */ - if (!cn) { - cn = get_cnode(sb); - if (!cn) { - reiserfs_panic(sb, "journal-4", "get_cnode failed!"); - } - - if (th->t_blocks_logged == th->t_blocks_allocated) { - th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; - journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; - } - th->t_blocks_logged++; - journal->j_len++; - - cn->bh = bh; - cn->blocknr = bh->b_blocknr; - cn->sb = sb; - cn->jlist = NULL; - insert_journal_hash(journal->j_hash_table, cn); - if (!count_already_incd) { - get_bh(bh); - } - } - cn->next = NULL; - cn->prev = journal->j_last; - cn->bh = bh; - if (journal->j_last) { - journal->j_last->next = cn; - journal->j_last = cn; - } else { - journal->j_first = cn; - journal->j_last = cn; - } - return 0; -} - -int journal_end(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks) -{ - if (!current->journal_info && th->t_refcount > 1) - reiserfs_warning(sb, "REISER-NESTING", - "th NULL, refcount %d", th->t_refcount); - - if (!th->t_trans_id) { - WARN_ON(1); - return -EIO; - } - - th->t_refcount--; - if (th->t_refcount > 0) { - struct reiserfs_transaction_handle *cur_th = - current->journal_info; - - /* we aren't allowed to close a nested transaction on a different - ** filesystem from the one in the task struct - */ - BUG_ON(cur_th->t_super != th->t_super); - - if (th != cur_th) { - memcpy(current->journal_info, th, sizeof(*th)); - th->t_trans_id = 0; - } - return 0; - } else { - return do_journal_end(th, sb, nblocks, 0); - } -} - -/* removes from the current transaction, relsing and descrementing any counters. -** also files the removed buffer directly onto the clean list -** -** called by journal_mark_freed when a block has been deleted -** -** returns 1 if it cleaned and relsed the buffer. 0 otherwise -*/ -static int remove_from_transaction(struct super_block *sb, - b_blocknr_t blocknr, int already_cleaned) -{ - struct buffer_head *bh; - struct reiserfs_journal_cnode *cn; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - int ret = 0; - - cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); - if (!cn || !cn->bh) { - return ret; - } - bh = cn->bh; - if (cn->prev) { - cn->prev->next = cn->next; - } - if (cn->next) { - cn->next->prev = cn->prev; - } - if (cn == journal->j_first) { - journal->j_first = cn->next; - } - if (cn == journal->j_last) { - journal->j_last = cn->prev; - } - if (bh) - remove_journal_hash(sb, journal->j_hash_table, NULL, - bh->b_blocknr, 0); - clear_buffer_journaled(bh); /* don't log this one */ - - if (!already_cleaned) { - clear_buffer_journal_dirty(bh); - clear_buffer_dirty(bh); - clear_buffer_journal_test(bh); - put_bh(bh); - if (atomic_read(&(bh->b_count)) < 0) { - reiserfs_warning(sb, "journal-1752", - "b_count < 0"); - } - ret = 1; - } - journal->j_len--; - journal->j_len_alloc--; - free_cnode(sb, cn); - return ret; -} - -/* -** for any cnode in a journal list, it can only be dirtied of all the -** transactions that include it are committed to disk. -** this checks through each transaction, and returns 1 if you are allowed to dirty, -** and 0 if you aren't -** -** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log -** blocks for a given transaction on disk -** -*/ -static int can_dirty(struct reiserfs_journal_cnode *cn) -{ - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr; - struct reiserfs_journal_cnode *cur = cn->hprev; - int can_dirty = 1; - - /* first test hprev. These are all newer than cn, so any node here - ** with the same block number and dev means this node can't be sent - ** to disk right now. - */ - while (cur && can_dirty) { - if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && - cur->blocknr == blocknr) { - can_dirty = 0; - } - cur = cur->hprev; - } - /* then test hnext. These are all older than cn. As long as they - ** are committed to the log, it is safe to write cn to disk - */ - cur = cn->hnext; - while (cur && can_dirty) { - if (cur->jlist && cur->jlist->j_len > 0 && - atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && - cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { - can_dirty = 0; - } - cur = cur->hnext; - } - return can_dirty; -} - -/* syncs the commit blocks, but does not force the real buffers to disk -** will wait until the current transaction is done/committed before returning -*/ -int journal_end_sync(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - BUG_ON(!th->t_trans_id); - /* you can sync while nested, very, very bad */ - BUG_ON(th->t_refcount > 1); - if (journal->j_len == 0) { - reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); - } - return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); -} - -/* -** writeback the pending async commits to disk -*/ -static void flush_async_commits(struct work_struct *work) -{ - struct reiserfs_journal *journal = - container_of(work, struct reiserfs_journal, j_work.work); - struct super_block *sb = journal->j_work_sb; - struct reiserfs_journal_list *jl; - struct list_head *entry; - - reiserfs_write_lock(sb); - if (!list_empty(&journal->j_journal_list)) { - /* last entry is the youngest, commit it and you get everything */ - entry = journal->j_journal_list.prev; - jl = JOURNAL_LIST_ENTRY(entry); - flush_commit_list(sb, jl, 1); - } - reiserfs_write_unlock(sb); -} - -/* -** flushes any old transactions to disk -** ends the current transaction if it is too old -*/ -int reiserfs_flush_old_commits(struct super_block *sb) -{ - time_t now; - struct reiserfs_transaction_handle th; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - now = get_seconds(); - /* safety check so we don't flush while we are replaying the log during - * mount - */ - if (list_empty(&journal->j_journal_list)) { - return 0; - } - - /* check the current transaction. If there are no writers, and it is - * too old, finish it, and force the commit blocks to disk - */ - if (atomic_read(&journal->j_wcount) <= 0 && - journal->j_trans_start_time > 0 && - journal->j_len > 0 && - (now - journal->j_trans_start_time) > journal->j_max_trans_age) { - if (!journal_join(&th, sb, 1)) { - reiserfs_prepare_for_journal(sb, - SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(&th, sb, - SB_BUFFER_WITH_SB(sb)); - - /* we're only being called from kreiserfsd, it makes no sense to do - ** an async commit so that kreiserfsd can do it later - */ - do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); - } - } - return sb->s_dirt; -} - -/* -** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit -** -** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all -** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just -** flushes the commit list and returns 0. -** -** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. -** -** Note, we can't allow the journal_end to proceed while there are still writers in the log. -*/ -static int check_journal_end(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks, - int flags) -{ - - time_t now; - int flush = flags & FLUSH_ALL; - int commit_now = flags & COMMIT_NOW; - int wait_on_commit = flags & WAIT; - struct reiserfs_journal_list *jl; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - BUG_ON(!th->t_trans_id); - - if (th->t_trans_id != journal->j_trans_id) { - reiserfs_panic(th->t_super, "journal-1577", - "handle trans id %ld != current trans id %ld", - th->t_trans_id, journal->j_trans_id); - } - - journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); - if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ - atomic_dec(&(journal->j_wcount)); - } - - /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released - ** will be dealt with by next transaction that actually writes something, but should be taken - ** care of in this trans - */ - BUG_ON(journal->j_len == 0); - - /* if wcount > 0, and we are called to with flush or commit_now, - ** we wait on j_join_wait. We will wake up when the last writer has - ** finished the transaction, and started it on its way to the disk. - ** Then, we flush the commit or journal list, and just return 0 - ** because the rest of journal end was already done for this transaction. - */ - if (atomic_read(&(journal->j_wcount)) > 0) { - if (flush || commit_now) { - unsigned trans_id; - - jl = journal->j_current_jl; - trans_id = jl->j_trans_id; - if (wait_on_commit) - jl->j_state |= LIST_COMMIT_PENDING; - atomic_set(&(journal->j_jlock), 1); - if (flush) { - journal->j_next_full_flush = 1; - } - unlock_journal(sb); - - /* sleep while the current transaction is still j_jlocked */ - while (journal->j_trans_id == trans_id) { - if (atomic_read(&journal->j_jlock)) { - queue_log_writer(sb); - } else { - lock_journal(sb); - if (journal->j_trans_id == trans_id) { - atomic_set(&(journal->j_jlock), - 1); - } - unlock_journal(sb); - } - } - BUG_ON(journal->j_trans_id == trans_id); - - if (commit_now - && journal_list_still_alive(sb, trans_id) - && wait_on_commit) { - flush_commit_list(sb, jl, 1); - } - return 0; - } - unlock_journal(sb); - return 0; - } - - /* deal with old transactions where we are the last writers */ - now = get_seconds(); - if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { - commit_now = 1; - journal->j_next_async_flush = 1; - } - /* don't batch when someone is waiting on j_join_wait */ - /* don't batch when syncing the commit or flushing the whole trans */ - if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) - && !flush && !commit_now && (journal->j_len < journal->j_max_batch) - && journal->j_len_alloc < journal->j_max_batch - && journal->j_cnode_free > (journal->j_trans_max * 3)) { - journal->j_bcount++; - unlock_journal(sb); - return 0; - } - - if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) { - reiserfs_panic(sb, "journal-003", - "j_start (%ld) is too high", - journal->j_start); - } - return 1; -} - -/* -** Does all the work that makes deleting blocks safe. -** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. -** -** otherwise: -** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes -** before this transaction has finished. -** -** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with -** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, -** the block can't be reallocated yet. -** -** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. -*/ -int journal_mark_freed(struct reiserfs_transaction_handle *th, - struct super_block *sb, b_blocknr_t blocknr) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn = NULL; - struct buffer_head *bh = NULL; - struct reiserfs_list_bitmap *jb = NULL; - int cleaned = 0; - BUG_ON(!th->t_trans_id); - - cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); - if (cn && cn->bh) { - bh = cn->bh; - get_bh(bh); - } - /* if it is journal new, we just remove it from this transaction */ - if (bh && buffer_journal_new(bh)) { - clear_buffer_journal_new(bh); - clear_prepared_bits(bh); - reiserfs_clean_and_file_buffer(bh); - cleaned = remove_from_transaction(sb, blocknr, cleaned); - } else { - /* set the bit for this block in the journal bitmap for this transaction */ - jb = journal->j_current_jl->j_list_bitmap; - if (!jb) { - reiserfs_panic(sb, "journal-1702", - "journal_list_bitmap is NULL"); - } - set_bit_in_list_bitmap(sb, blocknr, jb); - - /* Note, the entire while loop is not allowed to schedule. */ - - if (bh) { - clear_prepared_bits(bh); - reiserfs_clean_and_file_buffer(bh); - } - cleaned = remove_from_transaction(sb, blocknr, cleaned); - - /* find all older transactions with this block, make sure they don't try to write it out */ - cn = get_journal_hash_dev(sb, journal->j_list_hash_table, - blocknr); - while (cn) { - if (sb == cn->sb && blocknr == cn->blocknr) { - set_bit(BLOCK_FREED, &cn->state); - if (cn->bh) { - if (!cleaned) { - /* remove_from_transaction will brelse the buffer if it was - ** in the current trans - */ - clear_buffer_journal_dirty(cn-> - bh); - clear_buffer_dirty(cn->bh); - clear_buffer_journal_test(cn-> - bh); - cleaned = 1; - put_bh(cn->bh); - if (atomic_read - (&(cn->bh->b_count)) < 0) { - reiserfs_warning(sb, - "journal-2138", - "cn->bh->b_count < 0"); - } - } - if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ - atomic_dec(& - (cn->jlist-> - j_nonzerolen)); - } - cn->bh = NULL; - } - } - cn = cn->hnext; - } - } - - if (bh) - release_buffer_page(bh); /* get_hash grabs the buffer */ - return 0; -} - -void reiserfs_update_inode_transaction(struct inode *inode) -{ - struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); - REISERFS_I(inode)->i_jl = journal->j_current_jl; - REISERFS_I(inode)->i_trans_id = journal->j_trans_id; -} - -/* - * returns -1 on error, 0 if no commits/barriers were done and 1 - * if a transaction was actually committed and the barrier was done - */ -static int __commit_trans_jl(struct inode *inode, unsigned long id, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_transaction_handle th; - struct super_block *sb = inode->i_sb; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - int ret = 0; - - /* is it from the current transaction, or from an unknown transaction? */ - if (id == journal->j_trans_id) { - jl = journal->j_current_jl; - /* try to let other writers come in and grow this transaction */ - let_transaction_grow(sb, id); - if (journal->j_trans_id != id) { - goto flush_commit_only; - } - - ret = journal_begin(&th, sb, 1); - if (ret) - return ret; - - /* someone might have ended this transaction while we joined */ - if (journal->j_trans_id != id) { - reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); - ret = journal_end(&th, sb, 1); - goto flush_commit_only; - } - - ret = journal_end_sync(&th, sb, 1); - if (!ret) - ret = 1; - - } else { - /* this gets tricky, we have to make sure the journal list in - * the inode still exists. We know the list is still around - * if we've got a larger transaction id than the oldest list - */ - flush_commit_only: - if (journal_list_still_alive(inode->i_sb, id)) { - /* - * we only set ret to 1 when we know for sure - * the barrier hasn't been started yet on the commit - * block. - */ - if (atomic_read(&jl->j_commit_left) > 1) - ret = 1; - flush_commit_list(sb, jl, 1); - if (journal->j_errno) - ret = journal->j_errno; - } - } - /* otherwise the list is gone, and long since committed */ - return ret; -} - -int reiserfs_commit_for_inode(struct inode *inode) -{ - unsigned int id = REISERFS_I(inode)->i_trans_id; - struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; - - /* for the whole inode, assume unset id means it was - * changed in the current transaction. More conservative - */ - if (!id || !jl) { - reiserfs_update_inode_transaction(inode); - id = REISERFS_I(inode)->i_trans_id; - /* jl will be updated in __commit_trans_jl */ - } - - return __commit_trans_jl(inode, id, jl); -} - -void reiserfs_restore_prepared_buffer(struct super_block *sb, - struct buffer_head *bh) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - PROC_INFO_INC(sb, journal.restore_prepared); - if (!bh) { - return; - } - if (test_clear_buffer_journal_restore_dirty(bh) && - buffer_journal_dirty(bh)) { - struct reiserfs_journal_cnode *cn; - cn = get_journal_hash_dev(sb, - journal->j_list_hash_table, - bh->b_blocknr); - if (cn && can_dirty(cn)) { - set_buffer_journal_test(bh); - mark_buffer_dirty(bh); - } - } - clear_buffer_journal_prepared(bh); -} - -extern struct tree_balance *cur_tb; -/* -** before we can change a metadata block, we have to make sure it won't -** be written to disk while we are altering it. So, we must: -** clean it -** wait on it. -** -*/ -int reiserfs_prepare_for_journal(struct super_block *sb, - struct buffer_head *bh, int wait) -{ - PROC_INFO_INC(sb, journal.prepare); - - if (!trylock_buffer(bh)) { - if (!wait) - return 0; - lock_buffer(bh); - } - set_buffer_journal_prepared(bh); - if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { - clear_buffer_journal_test(bh); - set_buffer_journal_restore_dirty(bh); - } - unlock_buffer(bh); - return 1; -} - -static void flush_old_journal_lists(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - struct reiserfs_journal_list *jl; - struct list_head *entry; - time_t now = get_seconds(); - - while (!list_empty(&journal->j_journal_list)) { - entry = journal->j_journal_list.next; - jl = JOURNAL_LIST_ENTRY(entry); - /* this check should always be run, to send old lists to disk */ - if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) && - atomic_read(&jl->j_commit_left) == 0 && - test_transaction(s, jl)) { - flush_used_journal_lists(s, jl); - } else { - break; - } - } -} - -/* -** long and ugly. If flush, will not return until all commit -** blocks and all real buffers in the trans are on disk. -** If no_async, won't return until all commit blocks are on disk. -** -** keep reading, there are comments as you go along -** -** If the journal is aborted, we just clean up. Things like flushing -** journal lists, etc just won't happen. -*/ -static int do_journal_end(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks, - int flags) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn, *next, *jl_cn; - struct reiserfs_journal_cnode *last_cn = NULL; - struct reiserfs_journal_desc *desc; - struct reiserfs_journal_commit *commit; - struct buffer_head *c_bh; /* commit bh */ - struct buffer_head *d_bh; /* desc bh */ - int cur_write_start = 0; /* start index of current log write */ - int old_start; - int i; - int flush; - int wait_on_commit; - struct reiserfs_journal_list *jl, *temp_jl; - struct list_head *entry, *safe; - unsigned long jindex; - unsigned int commit_trans_id; - int trans_half; - - BUG_ON(th->t_refcount > 1); - BUG_ON(!th->t_trans_id); - - /* protect flush_older_commits from doing mistakes if the - transaction ID counter gets overflowed. */ - if (th->t_trans_id == ~0U) - flags |= FLUSH_ALL | COMMIT_NOW | WAIT; - flush = flags & FLUSH_ALL; - wait_on_commit = flags & WAIT; - - current->journal_info = th->t_handle_save; - reiserfs_check_lock_depth(sb, "journal end"); - if (journal->j_len == 0) { - reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); - } - - lock_journal(sb); - if (journal->j_next_full_flush) { - flags |= FLUSH_ALL; - flush = 1; - } - if (journal->j_next_async_flush) { - flags |= COMMIT_NOW | WAIT; - wait_on_commit = 1; - } - - /* check_journal_end locks the journal, and unlocks if it does not return 1 - ** it tells us if we should continue with the journal_end, or just return - */ - if (!check_journal_end(th, sb, nblocks, flags)) { - sb->s_dirt = 1; - wake_queued_writers(sb); - reiserfs_async_progress_wait(sb); - goto out; - } - - /* check_journal_end might set these, check again */ - if (journal->j_next_full_flush) { - flush = 1; - } - - /* - ** j must wait means we have to flush the log blocks, and the real blocks for - ** this transaction - */ - if (journal->j_must_wait > 0) { - flush = 1; - } -#ifdef REISERFS_PREALLOCATE - /* quota ops might need to nest, setup the journal_info pointer for them - * and raise the refcount so that it is > 0. */ - current->journal_info = th; - th->t_refcount++; - reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into - * the transaction */ - th->t_refcount--; - current->journal_info = th->t_handle_save; -#endif - - /* setup description block */ - d_bh = - journal_getblk(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - journal->j_start); - set_buffer_uptodate(d_bh); - desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; - memset(d_bh->b_data, 0, d_bh->b_size); - memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); - set_desc_trans_id(desc, journal->j_trans_id); - - /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ - c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((journal->j_start + journal->j_len + - 1) % SB_ONDISK_JOURNAL_SIZE(sb))); - commit = (struct reiserfs_journal_commit *)c_bh->b_data; - memset(c_bh->b_data, 0, c_bh->b_size); - set_commit_trans_id(commit, journal->j_trans_id); - set_buffer_uptodate(c_bh); - - /* init this journal list */ - jl = journal->j_current_jl; - - /* we lock the commit before doing anything because - * we want to make sure nobody tries to run flush_commit_list until - * the new transaction is fully setup, and we've already flushed the - * ordered bh list - */ - reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb); - - /* save the transaction id in case we need to commit it later */ - commit_trans_id = jl->j_trans_id; - - atomic_set(&jl->j_older_commits_done, 0); - jl->j_trans_id = journal->j_trans_id; - jl->j_timestamp = journal->j_trans_start_time; - jl->j_commit_bh = c_bh; - jl->j_start = journal->j_start; - jl->j_len = journal->j_len; - atomic_set(&jl->j_nonzerolen, journal->j_len); - atomic_set(&jl->j_commit_left, journal->j_len + 2); - jl->j_realblock = NULL; - - /* The ENTIRE FOR LOOP MUST not cause schedule to occur. - ** for each real block, add it to the journal list hash, - ** copy into real block index array in the commit or desc block - */ - trans_half = journal_trans_half(sb->s_blocksize); - for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { - if (buffer_journaled(cn->bh)) { - jl_cn = get_cnode(sb); - if (!jl_cn) { - reiserfs_panic(sb, "journal-1676", - "get_cnode returned NULL"); - } - if (i == 0) { - jl->j_realblock = jl_cn; - } - jl_cn->prev = last_cn; - jl_cn->next = NULL; - if (last_cn) { - last_cn->next = jl_cn; - } - last_cn = jl_cn; - /* make sure the block we are trying to log is not a block - of journal or reserved area */ - - if (is_block_in_log_or_reserved_area - (sb, cn->bh->b_blocknr)) { - reiserfs_panic(sb, "journal-2332", - "Trying to log block %lu, " - "which is a log block", - cn->bh->b_blocknr); - } - jl_cn->blocknr = cn->bh->b_blocknr; - jl_cn->state = 0; - jl_cn->sb = sb; - jl_cn->bh = cn->bh; - jl_cn->jlist = jl; - insert_journal_hash(journal->j_list_hash_table, jl_cn); - if (i < trans_half) { - desc->j_realblock[i] = - cpu_to_le32(cn->bh->b_blocknr); - } else { - commit->j_realblock[i - trans_half] = - cpu_to_le32(cn->bh->b_blocknr); - } - } else { - i--; - } - } - set_desc_trans_len(desc, journal->j_len); - set_desc_mount_id(desc, journal->j_mount_id); - set_desc_trans_id(desc, journal->j_trans_id); - set_commit_trans_len(commit, journal->j_len); - - /* special check in case all buffers in the journal were marked for not logging */ - BUG_ON(journal->j_len == 0); - - /* we're about to dirty all the log blocks, mark the description block - * dirty now too. Don't mark the commit block dirty until all the - * others are on disk - */ - mark_buffer_dirty(d_bh); - - /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ - cur_write_start = journal->j_start; - cn = journal->j_first; - jindex = 1; /* start at one so we don't get the desc again */ - while (cn) { - clear_buffer_journal_new(cn->bh); - /* copy all the real blocks into log area. dirty log blocks */ - if (buffer_journaled(cn->bh)) { - struct buffer_head *tmp_bh; - char *addr; - struct page *page; - tmp_bh = - journal_getblk(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((cur_write_start + - jindex) % - SB_ONDISK_JOURNAL_SIZE(sb))); - set_buffer_uptodate(tmp_bh); - page = cn->bh->b_page; - addr = kmap(page); - memcpy(tmp_bh->b_data, - addr + offset_in_page(cn->bh->b_data), - cn->bh->b_size); - kunmap(page); - mark_buffer_dirty(tmp_bh); - jindex++; - set_buffer_journal_dirty(cn->bh); - clear_buffer_journaled(cn->bh); - } else { - /* JDirty cleared sometime during transaction. don't log this one */ - reiserfs_warning(sb, "journal-2048", - "BAD, buffer in journal hash, " - "but not JDirty!"); - brelse(cn->bh); - } - next = cn->next; - free_cnode(sb, cn); - cn = next; - reiserfs_write_unlock(sb); - cond_resched(); - reiserfs_write_lock(sb); - } - - /* we are done with both the c_bh and d_bh, but - ** c_bh must be written after all other commit blocks, - ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. - */ - - journal->j_current_jl = alloc_journal_list(sb); - - /* now it is safe to insert this transaction on the main list */ - list_add_tail(&jl->j_list, &journal->j_journal_list); - list_add_tail(&jl->j_working_list, &journal->j_working_list); - journal->j_num_work_lists++; - - /* reset journal values for the next transaction */ - old_start = journal->j_start; - journal->j_start = - (journal->j_start + journal->j_len + - 2) % SB_ONDISK_JOURNAL_SIZE(sb); - atomic_set(&(journal->j_wcount), 0); - journal->j_bcount = 0; - journal->j_last = NULL; - journal->j_first = NULL; - journal->j_len = 0; - journal->j_trans_start_time = 0; - /* check for trans_id overflow */ - if (++journal->j_trans_id == 0) - journal->j_trans_id = 10; - journal->j_current_jl->j_trans_id = journal->j_trans_id; - journal->j_must_wait = 0; - journal->j_len_alloc = 0; - journal->j_next_full_flush = 0; - journal->j_next_async_flush = 0; - init_journal_hash(sb); - - // make sure reiserfs_add_jh sees the new current_jl before we - // write out the tails - smp_mb(); - - /* tail conversion targets have to hit the disk before we end the - * transaction. Otherwise a later transaction might repack the tail - * before this transaction commits, leaving the data block unflushed and - * clean, if we crash before the later transaction commits, the data block - * is lost. - */ - if (!list_empty(&jl->j_tail_bh_list)) { - reiserfs_write_unlock(sb); - write_ordered_buffers(&journal->j_dirty_buffers_lock, - journal, jl, &jl->j_tail_bh_list); - reiserfs_write_lock(sb); - } - BUG_ON(!list_empty(&jl->j_tail_bh_list)); - mutex_unlock(&jl->j_commit_mutex); - - /* honor the flush wishes from the caller, simple commits can - ** be done outside the journal lock, they are done below - ** - ** if we don't flush the commit list right now, we put it into - ** the work queue so the people waiting on the async progress work - ** queue don't wait for this proc to flush journal lists and such. - */ - if (flush) { - flush_commit_list(sb, jl, 1); - flush_journal_list(sb, jl, 1); - } else if (!(jl->j_state & LIST_COMMIT_PENDING)) - queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); - - /* if the next transaction has any chance of wrapping, flush - ** transactions that might get overwritten. If any journal lists are very - ** old flush them as well. - */ - first_jl: - list_for_each_safe(entry, safe, &journal->j_journal_list) { - temp_jl = JOURNAL_LIST_ENTRY(entry); - if (journal->j_start <= temp_jl->j_start) { - if ((journal->j_start + journal->j_trans_max + 1) >= - temp_jl->j_start) { - flush_used_journal_lists(sb, temp_jl); - goto first_jl; - } else if ((journal->j_start + - journal->j_trans_max + 1) < - SB_ONDISK_JOURNAL_SIZE(sb)) { - /* if we don't cross into the next transaction and we don't - * wrap, there is no way we can overlap any later transactions - * break now - */ - break; - } - } else if ((journal->j_start + - journal->j_trans_max + 1) > - SB_ONDISK_JOURNAL_SIZE(sb)) { - if (((journal->j_start + journal->j_trans_max + 1) % - SB_ONDISK_JOURNAL_SIZE(sb)) >= - temp_jl->j_start) { - flush_used_journal_lists(sb, temp_jl); - goto first_jl; - } else { - /* we don't overlap anything from out start to the end of the - * log, and our wrapped portion doesn't overlap anything at - * the start of the log. We can break - */ - break; - } - } - } - flush_old_journal_lists(sb); - - journal->j_current_jl->j_list_bitmap = - get_list_bitmap(sb, journal->j_current_jl); - - if (!(journal->j_current_jl->j_list_bitmap)) { - reiserfs_panic(sb, "journal-1996", - "could not get a list bitmap"); - } - - atomic_set(&(journal->j_jlock), 0); - unlock_journal(sb); - /* wake up any body waiting to join. */ - clear_bit(J_WRITERS_QUEUED, &journal->j_state); - wake_up(&(journal->j_join_wait)); - - if (!flush && wait_on_commit && - journal_list_still_alive(sb, commit_trans_id)) { - flush_commit_list(sb, jl, 1); - } - out: - reiserfs_check_lock_depth(sb, "journal end2"); - - memset(th, 0, sizeof(*th)); - /* Re-set th->t_super, so we can properly keep track of how many - * persistent transactions there are. We need to do this so if this - * call is part of a failed restart_transaction, we can free it later */ - th->t_super = sb; - - return journal->j_errno; -} - -/* Send the file system read only and refuse new transactions */ -void reiserfs_abort_journal(struct super_block *sb, int errno) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - if (test_bit(J_ABORTED, &journal->j_state)) - return; - - if (!journal->j_errno) - journal->j_errno = errno; - - sb->s_flags |= MS_RDONLY; - set_bit(J_ABORTED, &journal->j_state); - -#ifdef CONFIG_REISERFS_CHECK - dump_stack(); -#endif -} diff --git a/ANDROID_3.4.5/fs/reiserfs/lbalance.c b/ANDROID_3.4.5/fs/reiserfs/lbalance.c deleted file mode 100644 index 79e5a8b4..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/lbalance.c +++ /dev/null @@ -1,1311 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <asm/uaccess.h> -#include <linux/string.h> -#include <linux/time.h> -#include "reiserfs.h" -#include <linux/buffer_head.h> - -/* these are used in do_balance.c */ - -/* leaf_move_items - leaf_shift_left - leaf_shift_right - leaf_delete_items - leaf_insert_into_buf - leaf_paste_in_buffer - leaf_cut_from_buffer - leaf_paste_entries - */ - -/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */ -static void leaf_copy_dir_entries(struct buffer_info *dest_bi, - struct buffer_head *source, int last_first, - int item_num, int from, int copy_count) -{ - struct buffer_head *dest = dest_bi->bi_bh; - int item_num_in_dest; /* either the number of target item, - or if we must create a new item, - the number of the item we will - create it next to */ - struct item_head *ih; - struct reiserfs_de_head *deh; - int copy_records_len; /* length of all records in item to be copied */ - char *records; - - ih = B_N_PITEM_HEAD(source, item_num); - - RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); - - /* length of all record to be copied and first byte of the last of them */ - deh = B_I_DEH(source, ih); - if (copy_count) { - copy_records_len = (from ? deh_location(&(deh[from - 1])) : - ih_item_len(ih)) - - deh_location(&(deh[from + copy_count - 1])); - records = - source->b_data + ih_location(ih) + - deh_location(&(deh[from + copy_count - 1])); - } else { - copy_records_len = 0; - records = NULL; - } - - /* when copy last to first, dest buffer can contain 0 items */ - item_num_in_dest = - (last_first == - LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) - - 1); - - /* if there are no items in dest or the first/last item in dest is not item of the same directory */ - if ((item_num_in_dest == -1) || - (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || - (last_first == LAST_TO_FIRST - && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key, - B_N_PKEY(dest, - item_num_in_dest)))) - { - /* create new item in dest */ - struct item_head new_ih; - - /* form item header */ - memcpy(&new_ih.ih_key, &ih->ih_key, KEY_SIZE); - put_ih_version(&new_ih, KEY_FORMAT_3_5); - /* calculate item len */ - put_ih_item_len(&new_ih, - DEH_SIZE * copy_count + copy_records_len); - put_ih_entry_count(&new_ih, 0); - - if (last_first == LAST_TO_FIRST) { - /* form key by the following way */ - if (from < I_ENTRY_COUNT(ih)) { - set_le_ih_k_offset(&new_ih, - deh_offset(&(deh[from]))); - /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */ - } else { - /* no entries will be copied to this item in this function */ - set_le_ih_k_offset(&new_ih, U32_MAX); - /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ - } - set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key), - TYPE_DIRENTRY); - } - - /* insert item into dest buffer */ - leaf_insert_into_buf(dest_bi, - (last_first == - LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest), - &new_ih, NULL, 0); - } else { - /* prepare space for entries */ - leaf_paste_in_buffer(dest_bi, - (last_first == - FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - - 1) : 0, MAX_US_INT, - DEH_SIZE * copy_count + copy_records_len, - records, 0); - } - - item_num_in_dest = - (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0; - - leaf_paste_entries(dest_bi, item_num_in_dest, - (last_first == - FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest, - item_num_in_dest)) - : 0, copy_count, deh + from, records, - DEH_SIZE * copy_count + copy_records_len); -} - -/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or - part of it or nothing (see the return 0 below) from SOURCE to the end - (if last_first) or beginning (!last_first) of the DEST */ -/* returns 1 if anything was copied, else 0 */ -static int leaf_copy_boundary_item(struct buffer_info *dest_bi, - struct buffer_head *src, int last_first, - int bytes_or_entries) -{ - struct buffer_head *dest = dest_bi->bi_bh; - int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ - struct item_head *ih; - struct item_head *dih; - - dest_nr_item = B_NR_ITEMS(dest); - - if (last_first == FIRST_TO_LAST) { - /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects - or of different types ) then there is no need to treat this item differently from the other items - that we copy, so we return */ - ih = B_N_PITEM_HEAD(src, 0); - dih = B_N_PITEM_HEAD(dest, dest_nr_item - 1); - if (!dest_nr_item - || (!op_is_left_mergeable(&(ih->ih_key), src->b_size))) - /* there is nothing to merge */ - return 0; - - RFALSE(!ih_item_len(ih), - "vs-10010: item can not have empty length"); - - if (is_direntry_le_ih(ih)) { - if (bytes_or_entries == -1) - /* copy all entries to dest */ - bytes_or_entries = ih_entry_count(ih); - leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 0, 0, - bytes_or_entries); - return 1; - } - - /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST - part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header - */ - if (bytes_or_entries == -1) - bytes_or_entries = ih_item_len(ih); - -#ifdef CONFIG_REISERFS_CHECK - else { - if (bytes_or_entries == ih_item_len(ih) - && is_indirect_le_ih(ih)) - if (get_ih_free_space(ih)) - reiserfs_panic(sb_from_bi(dest_bi), - "vs-10020", - "last unformatted node " - "must be filled " - "entirely (%h)", ih); - } -#endif - - /* merge first item (or its part) of src buffer with the last - item of dest buffer. Both are of the same file */ - leaf_paste_in_buffer(dest_bi, - dest_nr_item - 1, ih_item_len(dih), - bytes_or_entries, B_I_PITEM(src, ih), 0); - - if (is_indirect_le_ih(dih)) { - RFALSE(get_ih_free_space(dih), - "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space", - ih); - if (bytes_or_entries == ih_item_len(ih)) - set_ih_free_space(dih, get_ih_free_space(ih)); - } - - return 1; - } - - /* copy boundary item to right (last_first == LAST_TO_FIRST) */ - - /* ( DEST is empty or last item of SOURCE and first item of DEST - are the items of different object or of different types ) - */ - src_nr_item = B_NR_ITEMS(src); - ih = B_N_PITEM_HEAD(src, src_nr_item - 1); - dih = B_N_PITEM_HEAD(dest, 0); - - if (!dest_nr_item || !op_is_left_mergeable(&(dih->ih_key), src->b_size)) - return 0; - - if (is_direntry_le_ih(ih)) { - if (bytes_or_entries == -1) - /* bytes_or_entries = entries number in last item body of SOURCE */ - bytes_or_entries = ih_entry_count(ih); - - leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, - src_nr_item - 1, - ih_entry_count(ih) - bytes_or_entries, - bytes_or_entries); - return 1; - } - - /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST; - part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST; - don't create new item header - */ - - RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), - "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)", - ih); - - if (bytes_or_entries == -1) { - /* bytes_or_entries = length of last item body of SOURCE */ - bytes_or_entries = ih_item_len(ih); - - RFALSE(le_ih_k_offset(dih) != - le_ih_k_offset(ih) + op_bytes_number(ih, src->b_size), - "vs-10050: items %h and %h do not match", ih, dih); - - /* change first item key of the DEST */ - set_le_ih_k_offset(dih, le_ih_k_offset(ih)); - - /* item becomes non-mergeable */ - /* or mergeable if left item was */ - set_le_ih_k_type(dih, le_ih_k_type(ih)); - } else { - /* merge to right only part of item */ - RFALSE(ih_item_len(ih) <= bytes_or_entries, - "vs-10060: no so much bytes %lu (needed %lu)", - (unsigned long)ih_item_len(ih), - (unsigned long)bytes_or_entries); - - /* change first item key of the DEST */ - if (is_direct_le_ih(dih)) { - RFALSE(le_ih_k_offset(dih) <= - (unsigned long)bytes_or_entries, - "vs-10070: dih %h, bytes_or_entries(%d)", dih, - bytes_or_entries); - set_le_ih_k_offset(dih, - le_ih_k_offset(dih) - - bytes_or_entries); - } else { - RFALSE(le_ih_k_offset(dih) <= - (bytes_or_entries / UNFM_P_SIZE) * dest->b_size, - "vs-10080: dih %h, bytes_or_entries(%d)", - dih, - (bytes_or_entries / UNFM_P_SIZE) * dest->b_size); - set_le_ih_k_offset(dih, - le_ih_k_offset(dih) - - ((bytes_or_entries / UNFM_P_SIZE) * - dest->b_size)); - } - } - - leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries, - B_I_PITEM(src, - ih) + ih_item_len(ih) - bytes_or_entries, - 0); - return 1; -} - -/* copy cpy_mun items from buffer src to buffer dest - * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest - * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest - */ -static void leaf_copy_items_entirely(struct buffer_info *dest_bi, - struct buffer_head *src, int last_first, - int first, int cpy_num) -{ - struct buffer_head *dest; - int nr, free_space; - int dest_before; - int last_loc, last_inserted_loc, location; - int i, j; - struct block_head *blkh; - struct item_head *ih; - - RFALSE(last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST, - "vs-10090: bad last_first parameter %d", last_first); - RFALSE(B_NR_ITEMS(src) - first < cpy_num, - "vs-10100: too few items in source %d, required %d from %d", - B_NR_ITEMS(src), cpy_num, first); - RFALSE(cpy_num < 0, "vs-10110: can not copy negative amount of items"); - RFALSE(!dest_bi, "vs-10120: can not copy negative amount of items"); - - dest = dest_bi->bi_bh; - - RFALSE(!dest, "vs-10130: can not copy negative amount of items"); - - if (cpy_num == 0) - return; - - blkh = B_BLK_HEAD(dest); - nr = blkh_nr_item(blkh); - free_space = blkh_free_space(blkh); - - /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ - dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; - - /* location of head of first new item */ - ih = B_N_PITEM_HEAD(dest, dest_before); - - RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE, - "vs-10140: not enough free space for headers %d (needed %d)", - B_FREE_SPACE(dest), cpy_num * IH_SIZE); - - /* prepare space for headers */ - memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE); - - /* copy item headers */ - memcpy(ih, B_N_PITEM_HEAD(src, first), cpy_num * IH_SIZE); - - free_space -= (IH_SIZE * cpy_num); - set_blkh_free_space(blkh, free_space); - - /* location of unmovable item */ - j = location = (dest_before == 0) ? dest->b_size : ih_location(ih - 1); - for (i = dest_before; i < nr + cpy_num; i++) { - location -= ih_item_len(ih + i - dest_before); - put_ih_location(ih + i - dest_before, location); - } - - /* prepare space for items */ - last_loc = ih_location(&(ih[nr + cpy_num - 1 - dest_before])); - last_inserted_loc = ih_location(&(ih[cpy_num - 1])); - - /* check free space */ - RFALSE(free_space < j - last_inserted_loc, - "vs-10150: not enough free space for items %d (needed %d)", - free_space, j - last_inserted_loc); - - memmove(dest->b_data + last_loc, - dest->b_data + last_loc + j - last_inserted_loc, - last_inserted_loc - last_loc); - - /* copy items */ - memcpy(dest->b_data + last_inserted_loc, - B_N_PITEM(src, (first + cpy_num - 1)), j - last_inserted_loc); - - /* sizes, item number */ - set_blkh_nr_item(blkh, nr + cpy_num); - set_blkh_free_space(blkh, free_space - (j - last_inserted_loc)); - - do_balance_mark_leaf_dirty(dest_bi->tb, dest, 0); - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); - RFALSE(dc_block_number(t_dc) != dest->b_blocknr, - "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu", - (long unsigned)dest->b_blocknr, - (long unsigned)dc_block_number(t_dc)); - put_dc_size(t_dc, - dc_size(t_dc) + (j - last_inserted_loc + - IH_SIZE * cpy_num)); - - do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, - 0); - } -} - -/* This function splits the (liquid) item into two items (useful when - shifting part of an item into another node.) */ -static void leaf_item_bottle(struct buffer_info *dest_bi, - struct buffer_head *src, int last_first, - int item_num, int cpy_bytes) -{ - struct buffer_head *dest = dest_bi->bi_bh; - struct item_head *ih; - - RFALSE(cpy_bytes == -1, - "vs-10170: bytes == - 1 means: do not split item"); - - if (last_first == FIRST_TO_LAST) { - /* if ( if item in position item_num in buffer SOURCE is directory item ) */ - ih = B_N_PITEM_HEAD(src, item_num); - if (is_direntry_le_ih(ih)) - leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, - item_num, 0, cpy_bytes); - else { - struct item_head n_ih; - - /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST - part defined by 'cpy_bytes'; create new item header; change old item_header (????); - n_ih = new item_header; - */ - memcpy(&n_ih, ih, IH_SIZE); - put_ih_item_len(&n_ih, cpy_bytes); - if (is_indirect_le_ih(ih)) { - RFALSE(cpy_bytes == ih_item_len(ih) - && get_ih_free_space(ih), - "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)", - (long unsigned)get_ih_free_space(ih)); - set_ih_free_space(&n_ih, 0); - } - - RFALSE(op_is_left_mergeable(&(ih->ih_key), src->b_size), - "vs-10190: bad mergeability of item %h", ih); - n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ - leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih, - B_N_PITEM(src, item_num), 0); - } - } else { - /* if ( if item in position item_num in buffer SOURCE is directory item ) */ - ih = B_N_PITEM_HEAD(src, item_num); - if (is_direntry_le_ih(ih)) - leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, - item_num, - I_ENTRY_COUNT(ih) - cpy_bytes, - cpy_bytes); - else { - struct item_head n_ih; - - /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST - part defined by 'cpy_bytes'; create new item header; - n_ih = new item_header; - */ - memcpy(&n_ih, ih, SHORT_KEY_SIZE); - - n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ - - if (is_direct_le_ih(ih)) { - set_le_ih_k_offset(&n_ih, - le_ih_k_offset(ih) + - ih_item_len(ih) - cpy_bytes); - set_le_ih_k_type(&n_ih, TYPE_DIRECT); - set_ih_free_space(&n_ih, MAX_US_INT); - } else { - /* indirect item */ - RFALSE(!cpy_bytes && get_ih_free_space(ih), - "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended"); - set_le_ih_k_offset(&n_ih, - le_ih_k_offset(ih) + - (ih_item_len(ih) - - cpy_bytes) / UNFM_P_SIZE * - dest->b_size); - set_le_ih_k_type(&n_ih, TYPE_INDIRECT); - set_ih_free_space(&n_ih, get_ih_free_space(ih)); - } - - /* set item length */ - put_ih_item_len(&n_ih, cpy_bytes); - - n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ - - leaf_insert_into_buf(dest_bi, 0, &n_ih, - B_N_PITEM(src, - item_num) + - ih_item_len(ih) - cpy_bytes, 0); - } - } -} - -/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. - If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. - From last item copy cpy_num bytes for regular item and cpy_num directory entries for - directory item. */ -static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, - int last_first, int cpy_num, int cpy_bytes) -{ - struct buffer_head *dest; - int pos, i, src_nr_item, bytes; - - dest = dest_bi->bi_bh; - RFALSE(!dest || !src, "vs-10210: !dest || !src"); - RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, - "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST"); - RFALSE(B_NR_ITEMS(src) < cpy_num, - "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src), - cpy_num); - RFALSE(cpy_num < 0, "vs-10240: cpy_num < 0 (%d)", cpy_num); - - if (cpy_num == 0) - return 0; - - if (last_first == FIRST_TO_LAST) { - /* copy items to left */ - pos = 0; - if (cpy_num == 1) - bytes = cpy_bytes; - else - bytes = -1; - - /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ - i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); - cpy_num -= i; - if (cpy_num == 0) - return i; - pos += i; - if (cpy_bytes == -1) - /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ - leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, - pos, cpy_num); - else { - /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ - leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, - pos, cpy_num - 1); - - /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ - leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, - cpy_num + pos - 1, cpy_bytes); - } - } else { - /* copy items to right */ - src_nr_item = B_NR_ITEMS(src); - if (cpy_num == 1) - bytes = cpy_bytes; - else - bytes = -1; - - /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ - i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); - - cpy_num -= i; - if (cpy_num == 0) - return i; - - pos = src_nr_item - cpy_num - i; - if (cpy_bytes == -1) { - /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ - leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, - pos, cpy_num); - } else { - /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ - leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, - pos + 1, cpy_num - 1); - - /* copy part of the item which number is pos to the begin of the DEST */ - leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, - cpy_bytes); - } - } - return i; -} - -/* there are types of coping: from S[0] to L[0], from S[0] to R[0], - from R[0] to L[0]. for each of these we have to define parent and - positions of destination and source buffers */ -static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, - struct buffer_info *dest_bi, - struct buffer_info *src_bi, - int *first_last, - struct buffer_head *Snew) -{ - memset(dest_bi, 0, sizeof(struct buffer_info)); - memset(src_bi, 0, sizeof(struct buffer_info)); - - /* define dest, src, dest parent, dest position */ - switch (shift_mode) { - case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */ - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); - src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); /* src->b_item_order */ - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[0]; - dest_bi->bi_parent = tb->FL[0]; - dest_bi->bi_position = get_left_neighbor_position(tb, 0); - *first_last = FIRST_TO_LAST; - break; - - case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */ - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); - src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[0]; - dest_bi->bi_parent = tb->FR[0]; - dest_bi->bi_position = get_right_neighbor_position(tb, 0); - *first_last = LAST_TO_FIRST; - break; - - case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */ - src_bi->tb = tb; - src_bi->bi_bh = tb->R[0]; - src_bi->bi_parent = tb->FR[0]; - src_bi->bi_position = get_right_neighbor_position(tb, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[0]; - dest_bi->bi_parent = tb->FL[0]; - dest_bi->bi_position = get_left_neighbor_position(tb, 0); - *first_last = FIRST_TO_LAST; - break; - - case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */ - src_bi->tb = tb; - src_bi->bi_bh = tb->L[0]; - src_bi->bi_parent = tb->FL[0]; - src_bi->bi_position = get_left_neighbor_position(tb, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[0]; - dest_bi->bi_parent = tb->FR[0]; - dest_bi->bi_position = get_right_neighbor_position(tb, 0); - *first_last = LAST_TO_FIRST; - break; - - case LEAF_FROM_S_TO_SNEW: - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); - src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = Snew; - dest_bi->bi_parent = NULL; - dest_bi->bi_position = 0; - *first_last = LAST_TO_FIRST; - break; - - default: - reiserfs_panic(sb_from_bi(src_bi), "vs-10250", - "shift type is unknown (%d)", shift_mode); - } - RFALSE(!src_bi->bi_bh || !dest_bi->bi_bh, - "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", - shift_mode, src_bi->bi_bh, dest_bi->bi_bh); -} - -/* copy mov_num items and mov_bytes of the (mov_num-1)th item to - neighbor. Delete them from source */ -int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, - int mov_bytes, struct buffer_head *Snew) -{ - int ret_value; - struct buffer_info dest_bi, src_bi; - int first_last; - - leaf_define_dest_src_infos(shift_mode, tb, &dest_bi, &src_bi, - &first_last, Snew); - - ret_value = - leaf_copy_items(&dest_bi, src_bi.bi_bh, first_last, mov_num, - mov_bytes); - - leaf_delete_items(&src_bi, first_last, - (first_last == - FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) - - mov_num), mov_num, mov_bytes); - - return ret_value; -} - -/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) - from S[0] to L[0] and replace the delimiting key */ -int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) -{ - struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); - int i; - - /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ - i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); - - if (shift_num) { - if (B_NR_ITEMS(S0) == 0) { /* number of items in S[0] == 0 */ - - RFALSE(shift_bytes != -1, - "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", - shift_bytes); -#ifdef CONFIG_REISERFS_CHECK - if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { - print_cur_tb("vs-10275"); - reiserfs_panic(tb->tb_sb, "vs-10275", - "balance condition corrupted " - "(%c)", tb->tb_mode); - } -#endif - - if (PATH_H_POSITION(tb->tb_path, 1) == 0) - replace_key(tb, tb->CFL[0], tb->lkey[0], - PATH_H_PPARENT(tb->tb_path, 0), 0); - - } else { - /* replace lkey in CFL[0] by 0-th key from S[0]; */ - replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0); - - RFALSE((shift_bytes != -1 && - !(is_direntry_le_ih(B_N_PITEM_HEAD(S0, 0)) - && !I_ENTRY_COUNT(B_N_PITEM_HEAD(S0, 0)))) && - (!op_is_left_mergeable - (B_N_PKEY(S0, 0), S0->b_size)), - "vs-10280: item must be mergeable"); - } - } - - return i; -} - -/* CLEANING STOPPED HERE */ - -/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ -int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) -{ - // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); - int ret_value; - - /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ - ret_value = - leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); - - /* replace rkey in CFR[0] by the 0-th key from R[0] */ - if (shift_num) { - replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); - - } - - return ret_value; -} - -static void leaf_delete_items_entirely(struct buffer_info *bi, - int first, int del_num); -/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. - If not. - If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of - the first item. Part defined by del_bytes. Don't delete first item header - If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of - the last item . Part defined by del_bytes. Don't delete last item header. -*/ -void leaf_delete_items(struct buffer_info *cur_bi, int last_first, - int first, int del_num, int del_bytes) -{ - struct buffer_head *bh; - int item_amount = B_NR_ITEMS(bh = cur_bi->bi_bh); - - RFALSE(!bh, "10155: bh is not defined"); - RFALSE(del_num < 0, "10160: del_num can not be < 0. del_num==%d", - del_num); - RFALSE(first < 0 - || first + del_num > item_amount, - "10165: invalid number of first item to be deleted (%d) or " - "no so much items (%d) to delete (only %d)", first, - first + del_num, item_amount); - - if (del_num == 0) - return; - - if (first == 0 && del_num == item_amount && del_bytes == -1) { - make_empty_node(cur_bi); - do_balance_mark_leaf_dirty(cur_bi->tb, bh, 0); - return; - } - - if (del_bytes == -1) - /* delete del_num items beginning from item in position first */ - leaf_delete_items_entirely(cur_bi, first, del_num); - else { - if (last_first == FIRST_TO_LAST) { - /* delete del_num-1 items beginning from item in position first */ - leaf_delete_items_entirely(cur_bi, first, del_num - 1); - - /* delete the part of the first item of the bh - do not delete item header - */ - leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); - } else { - struct item_head *ih; - int len; - - /* delete del_num-1 items beginning from item in position first+1 */ - leaf_delete_items_entirely(cur_bi, first + 1, - del_num - 1); - - ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1); - if (is_direntry_le_ih(ih)) - /* the last item is directory */ - /* len = numbers of directory entries in this item */ - len = ih_entry_count(ih); - else - /* len = body len of item */ - len = ih_item_len(ih); - - /* delete the part of the last item of the bh - do not delete item header - */ - leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, - len - del_bytes, del_bytes); - } - } -} - -/* insert item into the leaf node in position before */ -void leaf_insert_into_buf(struct buffer_info *bi, int before, - struct item_head *inserted_item_ih, - const char *inserted_item_body, int zeros_number) -{ - struct buffer_head *bh = bi->bi_bh; - int nr, free_space; - struct block_head *blkh; - struct item_head *ih; - int i; - int last_loc, unmoved_loc; - char *to; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - free_space = blkh_free_space(blkh); - - /* check free space */ - RFALSE(free_space < ih_item_len(inserted_item_ih) + IH_SIZE, - "vs-10170: not enough free space in block %z, new item %h", - bh, inserted_item_ih); - RFALSE(zeros_number > ih_item_len(inserted_item_ih), - "vs-10172: zero number == %d, item length == %d", - zeros_number, ih_item_len(inserted_item_ih)); - - /* get item new item must be inserted before */ - ih = B_N_PITEM_HEAD(bh, before); - - /* prepare space for the body of new item */ - last_loc = nr ? ih_location(&(ih[nr - before - 1])) : bh->b_size; - unmoved_loc = before ? ih_location(ih - 1) : bh->b_size; - - memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih), - bh->b_data + last_loc, unmoved_loc - last_loc); - - to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih); - memset(to, 0, zeros_number); - to += zeros_number; - - /* copy body to prepared space */ - if (inserted_item_body) - memmove(to, inserted_item_body, - ih_item_len(inserted_item_ih) - zeros_number); - else - memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number); - - /* insert item header */ - memmove(ih + 1, ih, IH_SIZE * (nr - before)); - memmove(ih, inserted_item_ih, IH_SIZE); - - /* change locations */ - for (i = before; i < nr + 1; i++) { - unmoved_loc -= ih_item_len(&(ih[i - before])); - put_ih_location(&(ih[i - before]), unmoved_loc); - } - - /* sizes, free space, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1); - set_blkh_free_space(blkh, - free_space - (IH_SIZE + - ih_item_len(inserted_item_ih))); - do_balance_mark_leaf_dirty(bi->tb, bh, 1); - - if (bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) + (IH_SIZE + - ih_item_len(inserted_item_ih))); - do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); - } -} - -/* paste paste_size bytes to affected_item_num-th item. - When item is a directory, this only prepare space for new entries */ -void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, - int pos_in_item, int paste_size, - const char *body, int zeros_number) -{ - struct buffer_head *bh = bi->bi_bh; - int nr, free_space; - struct block_head *blkh; - struct item_head *ih; - int i; - int last_loc, unmoved_loc; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - free_space = blkh_free_space(blkh); - - /* check free space */ - RFALSE(free_space < paste_size, - "vs-10175: not enough free space: needed %d, available %d", - paste_size, free_space); - -#ifdef CONFIG_REISERFS_CHECK - if (zeros_number > paste_size) { - struct super_block *sb = NULL; - if (bi && bi->tb) - sb = bi->tb->tb_sb; - print_cur_tb("10177"); - reiserfs_panic(sb, "vs-10177", - "zeros_number == %d, paste_size == %d", - zeros_number, paste_size); - } -#endif /* CONFIG_REISERFS_CHECK */ - - /* item to be appended */ - ih = B_N_PITEM_HEAD(bh, affected_item_num); - - last_loc = ih_location(&(ih[nr - affected_item_num - 1])); - unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size; - - /* prepare space */ - memmove(bh->b_data + last_loc - paste_size, bh->b_data + last_loc, - unmoved_loc - last_loc); - - /* change locations */ - for (i = affected_item_num; i < nr; i++) - put_ih_location(&(ih[i - affected_item_num]), - ih_location(&(ih[i - affected_item_num])) - - paste_size); - - if (body) { - if (!is_direntry_le_ih(ih)) { - if (!pos_in_item) { - /* shift data to right */ - memmove(bh->b_data + ih_location(ih) + - paste_size, - bh->b_data + ih_location(ih), - ih_item_len(ih)); - /* paste data in the head of item */ - memset(bh->b_data + ih_location(ih), 0, - zeros_number); - memcpy(bh->b_data + ih_location(ih) + - zeros_number, body, - paste_size - zeros_number); - } else { - memset(bh->b_data + unmoved_loc - paste_size, 0, - zeros_number); - memcpy(bh->b_data + unmoved_loc - paste_size + - zeros_number, body, - paste_size - zeros_number); - } - } - } else - memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size); - - put_ih_item_len(ih, ih_item_len(ih) + paste_size); - - /* change free space */ - set_blkh_free_space(blkh, free_space - paste_size); - - do_balance_mark_leaf_dirty(bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc = - B_N_CHILD(bi->bi_parent, bi->bi_position); - put_dc_size(t_dc, dc_size(t_dc) + paste_size); - do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); - } -} - -/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item - does not have free space, so it moves DEHs and remaining records as - necessary. Return value is size of removed part of directory item - in bytes. */ -static int leaf_cut_entries(struct buffer_head *bh, - struct item_head *ih, int from, int del_count) -{ - char *item; - struct reiserfs_de_head *deh; - int prev_record_offset; /* offset of record, that is (from-1)th */ - char *prev_record; /* */ - int cut_records_len; /* length of all removed records */ - int i; - - /* make sure, that item is directory and there are enough entries to - remove */ - RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); - RFALSE(I_ENTRY_COUNT(ih) < from + del_count, - "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", - I_ENTRY_COUNT(ih), from, del_count); - - if (del_count == 0) - return 0; - - /* first byte of item */ - item = bh->b_data + ih_location(ih); - - /* entry head array */ - deh = B_I_DEH(bh, ih); - - /* first byte of remaining entries, those are BEFORE cut entries - (prev_record) and length of all removed records (cut_records_len) */ - prev_record_offset = - (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih)); - cut_records_len = prev_record_offset /*from_record */ - - deh_location(&(deh[from + del_count - 1])); - prev_record = item + prev_record_offset; - - /* adjust locations of remaining entries */ - for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i--) - put_deh_location(&(deh[i]), - deh_location(&deh[i]) - - (DEH_SIZE * del_count)); - - for (i = 0; i < from; i++) - put_deh_location(&(deh[i]), - deh_location(&deh[i]) - (DEH_SIZE * del_count + - cut_records_len)); - - put_ih_entry_count(ih, ih_entry_count(ih) - del_count); - - /* shift entry head array and entries those are AFTER removed entries */ - memmove((char *)(deh + from), - deh + from + del_count, - prev_record - cut_records_len - (char *)(deh + from + - del_count)); - - /* shift records, those are BEFORE removed entries */ - memmove(prev_record - cut_records_len - DEH_SIZE * del_count, - prev_record, item + ih_item_len(ih) - prev_record); - - return DEH_SIZE * del_count + cut_records_len; -} - -/* when cut item is part of regular file - pos_in_item - first byte that must be cut - cut_size - number of bytes to be cut beginning from pos_in_item - - when cut item is part of directory - pos_in_item - number of first deleted entry - cut_size - count of deleted entries - */ -void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, - int pos_in_item, int cut_size) -{ - int nr; - struct buffer_head *bh = bi->bi_bh; - struct block_head *blkh; - struct item_head *ih; - int last_loc, unmoved_loc; - int i; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - - /* item head of truncated item */ - ih = B_N_PITEM_HEAD(bh, cut_item_num); - - if (is_direntry_le_ih(ih)) { - /* first cut entry () */ - cut_size = leaf_cut_entries(bh, ih, pos_in_item, cut_size); - if (pos_in_item == 0) { - /* change key */ - RFALSE(cut_item_num, - "when 0-th enrty of item is cut, that item must be first in the node, not %d-th", - cut_item_num); - /* change item key by key of first entry in the item */ - set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); - /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */ - } - } else { - /* item is direct or indirect */ - RFALSE(is_statdata_le_ih(ih), "10195: item is stat data"); - RFALSE(pos_in_item && pos_in_item + cut_size != ih_item_len(ih), - "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)", - (long unsigned)pos_in_item, (long unsigned)cut_size, - (long unsigned)ih_item_len(ih)); - - /* shift item body to left if cut is from the head of item */ - if (pos_in_item == 0) { - memmove(bh->b_data + ih_location(ih), - bh->b_data + ih_location(ih) + cut_size, - ih_item_len(ih) - cut_size); - - /* change key of item */ - if (is_direct_le_ih(ih)) - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - cut_size); - else { - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - (cut_size / UNFM_P_SIZE) * - bh->b_size); - RFALSE(ih_item_len(ih) == cut_size - && get_ih_free_space(ih), - "10205: invalid ih_free_space (%h)", ih); - } - } - } - - /* location of the last item */ - last_loc = ih_location(&(ih[nr - cut_item_num - 1])); - - /* location of the item, which is remaining at the same place */ - unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size; - - /* shift */ - memmove(bh->b_data + last_loc + cut_size, bh->b_data + last_loc, - unmoved_loc - last_loc - cut_size); - - /* change item length */ - put_ih_item_len(ih, ih_item_len(ih) - cut_size); - - if (is_indirect_le_ih(ih)) { - if (pos_in_item) - set_ih_free_space(ih, 0); - } - - /* change locations */ - for (i = cut_item_num; i < nr; i++) - put_ih_location(&(ih[i - cut_item_num]), - ih_location(&ih[i - cut_item_num]) + cut_size); - - /* size, free space */ - set_blkh_free_space(blkh, blkh_free_space(blkh) + cut_size); - - do_balance_mark_leaf_dirty(bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position); - put_dc_size(t_dc, dc_size(t_dc) - cut_size); - do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); - } -} - -/* delete del_num items from buffer starting from the first'th item */ -static void leaf_delete_items_entirely(struct buffer_info *bi, - int first, int del_num) -{ - struct buffer_head *bh = bi->bi_bh; - int nr; - int i, j; - int last_loc, last_removed_loc; - struct block_head *blkh; - struct item_head *ih; - - RFALSE(bh == NULL, "10210: buffer is 0"); - RFALSE(del_num < 0, "10215: del_num less than 0 (%d)", del_num); - - if (del_num == 0) - return; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - - RFALSE(first < 0 || first + del_num > nr, - "10220: first=%d, number=%d, there is %d items", first, del_num, - nr); - - if (first == 0 && del_num == nr) { - /* this does not work */ - make_empty_node(bi); - - do_balance_mark_leaf_dirty(bi->tb, bh, 0); - return; - } - - ih = B_N_PITEM_HEAD(bh, first); - - /* location of unmovable item */ - j = (first == 0) ? bh->b_size : ih_location(ih - 1); - - /* delete items */ - last_loc = ih_location(&(ih[nr - 1 - first])); - last_removed_loc = ih_location(&(ih[del_num - 1])); - - memmove(bh->b_data + last_loc + j - last_removed_loc, - bh->b_data + last_loc, last_removed_loc - last_loc); - - /* delete item headers */ - memmove(ih, ih + del_num, (nr - first - del_num) * IH_SIZE); - - /* change item location */ - for (i = first; i < nr - del_num; i++) - put_ih_location(&(ih[i - first]), - ih_location(&(ih[i - first])) + (j - - last_removed_loc)); - - /* sizes, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num); - set_blkh_free_space(blkh, - blkh_free_space(blkh) + (j - last_removed_loc + - IH_SIZE * del_num)); - - do_balance_mark_leaf_dirty(bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc = - B_N_CHILD(bi->bi_parent, bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) - (j - last_removed_loc + - IH_SIZE * del_num)); - do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); - } -} - -/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ -void leaf_paste_entries(struct buffer_info *bi, - int item_num, - int before, - int new_entry_count, - struct reiserfs_de_head *new_dehs, - const char *records, int paste_size) -{ - struct item_head *ih; - char *item; - struct reiserfs_de_head *deh; - char *insert_point; - int i, old_entry_num; - struct buffer_head *bh = bi->bi_bh; - - if (new_entry_count == 0) - return; - - ih = B_N_PITEM_HEAD(bh, item_num); - - /* make sure, that item is directory, and there are enough records in it */ - RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); - RFALSE(I_ENTRY_COUNT(ih) < before, - "10230: there are no entry we paste entries before. entry_count = %d, before = %d", - I_ENTRY_COUNT(ih), before); - - /* first byte of dest item */ - item = bh->b_data + ih_location(ih); - - /* entry head array */ - deh = B_I_DEH(bh, ih); - - /* new records will be pasted at this point */ - insert_point = - item + - (before ? deh_location(&(deh[before - 1])) - : (ih_item_len(ih) - paste_size)); - - /* adjust locations of records that will be AFTER new records */ - for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i--) - put_deh_location(&(deh[i]), - deh_location(&(deh[i])) + - (DEH_SIZE * new_entry_count)); - - /* adjust locations of records that will be BEFORE new records */ - for (i = 0; i < before; i++) - put_deh_location(&(deh[i]), - deh_location(&(deh[i])) + paste_size); - - old_entry_num = I_ENTRY_COUNT(ih); - put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count); - - /* prepare space for pasted records */ - memmove(insert_point + paste_size, insert_point, - item + (ih_item_len(ih) - paste_size) - insert_point); - - /* copy new records */ - memcpy(insert_point + DEH_SIZE * new_entry_count, records, - paste_size - DEH_SIZE * new_entry_count); - - /* prepare space for new entry heads */ - deh += before; - memmove((char *)(deh + new_entry_count), deh, - insert_point - (char *)deh); - - /* copy new entry heads */ - deh = (struct reiserfs_de_head *)((char *)deh); - memcpy(deh, new_dehs, DEH_SIZE * new_entry_count); - - /* set locations of new records */ - for (i = 0; i < new_entry_count; i++) { - put_deh_location(&(deh[i]), - deh_location(&(deh[i])) + - (-deh_location - (&(new_dehs[new_entry_count - 1])) + - insert_point + DEH_SIZE * new_entry_count - - item)); - } - - /* change item key if necessary (when we paste before 0-th entry */ - if (!before) { - set_le_ih_k_offset(ih, deh_offset(new_dehs)); -/* memcpy (&ih->ih_key.k_offset, - &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ - } -#ifdef CONFIG_REISERFS_CHECK - { - int prev, next; - /* check record locations */ - deh = B_I_DEH(bh, ih); - for (i = 0; i < I_ENTRY_COUNT(ih); i++) { - next = - (i < - I_ENTRY_COUNT(ih) - - 1) ? deh_location(&(deh[i + 1])) : 0; - prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0; - - if (prev && prev <= deh_location(&(deh[i]))) - reiserfs_error(sb_from_bi(bi), "vs-10240", - "directory item (%h) " - "corrupted (prev %a, " - "cur(%d) %a)", - ih, deh + i - 1, i, deh + i); - if (next && next >= deh_location(&(deh[i]))) - reiserfs_error(sb_from_bi(bi), "vs-10250", - "directory item (%h) " - "corrupted (cur(%d) %a, " - "next %a)", - ih, i, deh + i, deh + i + 1); - } - } -#endif - -} diff --git a/ANDROID_3.4.5/fs/reiserfs/lock.c b/ANDROID_3.4.5/fs/reiserfs/lock.c deleted file mode 100644 index d735bc84..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/lock.c +++ /dev/null @@ -1,97 +0,0 @@ -#include "reiserfs.h" -#include <linux/mutex.h> - -/* - * The previous reiserfs locking scheme was heavily based on - * the tricky properties of the Bkl: - * - * - it was acquired recursively by a same task - * - the performances relied on the release-while-schedule() property - * - * Now that we replace it by a mutex, we still want to keep the same - * recursive property to avoid big changes in the code structure. - * We use our own lock_owner here because the owner field on a mutex - * is only available in SMP or mutex debugging, also we only need this field - * for this mutex, no need for a system wide mutex facility. - * - * Also this lock is often released before a call that could block because - * reiserfs performances were partially based on the release while schedule() - * property of the Bkl. - */ -void reiserfs_write_lock(struct super_block *s) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(s); - - if (sb_i->lock_owner != current) { - mutex_lock(&sb_i->lock); - sb_i->lock_owner = current; - } - - /* No need to protect it, only the current task touches it */ - sb_i->lock_depth++; -} - -void reiserfs_write_unlock(struct super_block *s) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(s); - - /* - * Are we unlocking without even holding the lock? - * Such a situation must raise a BUG() if we don't want - * to corrupt the data. - */ - BUG_ON(sb_i->lock_owner != current); - - if (--sb_i->lock_depth == -1) { - sb_i->lock_owner = NULL; - mutex_unlock(&sb_i->lock); - } -} - -/* - * If we already own the lock, just exit and don't increase the depth. - * Useful when we don't want to lock more than once. - * - * We always return the lock_depth we had before calling - * this function. - */ -int reiserfs_write_lock_once(struct super_block *s) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(s); - - if (sb_i->lock_owner != current) { - mutex_lock(&sb_i->lock); - sb_i->lock_owner = current; - return sb_i->lock_depth++; - } - - return sb_i->lock_depth; -} - -void reiserfs_write_unlock_once(struct super_block *s, int lock_depth) -{ - if (lock_depth == -1) - reiserfs_write_unlock(s); -} - -/* - * Utility function to force a BUG if it is called without the superblock - * write lock held. caller is the string printed just before calling BUG() - */ -void reiserfs_check_lock_depth(struct super_block *sb, char *caller) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); - - if (sb_i->lock_depth < 0) - reiserfs_panic(sb, "%s called without kernel lock held %d", - caller); -} - -#ifdef CONFIG_REISERFS_CHECK -void reiserfs_lock_check_recursive(struct super_block *sb) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); - - WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n"); -} -#endif diff --git a/ANDROID_3.4.5/fs/reiserfs/namei.c b/ANDROID_3.4.5/fs/reiserfs/namei.c deleted file mode 100644 index 84e8a69c..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/namei.c +++ /dev/null @@ -1,1564 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - * - * Trivial changes by Alan Cox to remove EHASHCOLLISION for compatibility - * - * Trivial Changes: - * Rights granted to Hans Reiser to redistribute under other terms providing - * he accepts all liability including but not limited to patent, fitness - * for purpose, and direct or indirect claims arising from failure to perform. - * - * NO WARRANTY - */ - -#include <linux/time.h> -#include <linux/bitops.h> -#include <linux/slab.h> -#include "reiserfs.h" -#include "acl.h" -#include "xattr.h" -#include <linux/quotaops.h> - -#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } -#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); - -// directory item contains array of entry headers. This performs -// binary search through that array -static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) -{ - struct item_head *ih = de->de_ih; - struct reiserfs_de_head *deh = de->de_deh; - int rbound, lbound, j; - - lbound = 0; - rbound = I_ENTRY_COUNT(ih) - 1; - - for (j = (rbound + lbound) / 2; lbound <= rbound; - j = (rbound + lbound) / 2) { - if (off < deh_offset(deh + j)) { - rbound = j - 1; - continue; - } - if (off > deh_offset(deh + j)) { - lbound = j + 1; - continue; - } - // this is not name found, but matched third key component - de->de_entry_num = j; - return NAME_FOUND; - } - - de->de_entry_num = lbound; - return NAME_NOT_FOUND; -} - -// comment? maybe something like set de to point to what the path points to? -static inline void set_de_item_location(struct reiserfs_dir_entry *de, - struct treepath *path) -{ - de->de_bh = get_last_bh(path); - de->de_ih = get_ih(path); - de->de_deh = B_I_DEH(de->de_bh, de->de_ih); - de->de_item_num = PATH_LAST_POSITION(path); -} - -// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set -inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) -{ - struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; - - BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); - - de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); - de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); - de->de_name = B_I_PITEM(de->de_bh, de->de_ih) + deh_location(deh); - if (de->de_name[de->de_namelen - 1] == 0) - de->de_namelen = strlen(de->de_name); -} - -// what entry points to -static inline void set_de_object_key(struct reiserfs_dir_entry *de) -{ - BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); - de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num])); - de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num])); -} - -static inline void store_de_entry_key(struct reiserfs_dir_entry *de) -{ - struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; - - BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); - - /* store key of the found entry */ - de->de_entry_key.version = KEY_FORMAT_3_5; - de->de_entry_key.on_disk_key.k_dir_id = - le32_to_cpu(de->de_ih->ih_key.k_dir_id); - de->de_entry_key.on_disk_key.k_objectid = - le32_to_cpu(de->de_ih->ih_key.k_objectid); - set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh)); - set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY); -} - -/* We assign a key to each directory item, and place multiple entries -in a single directory item. A directory item has a key equal to the -key of the first directory entry in it. - -This function first calls search_by_key, then, if item whose first -entry matches is not found it looks for the entry inside directory -item found by search_by_key. Fills the path to the entry, and to the -entry position in the item - -*/ - -/* The function is NOT SCHEDULE-SAFE! */ -int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, - struct treepath *path, struct reiserfs_dir_entry *de) -{ - int retval; - - retval = search_item(sb, key, path); - switch (retval) { - case ITEM_NOT_FOUND: - if (!PATH_LAST_POSITION(path)) { - reiserfs_error(sb, "vs-7000", "search_by_key " - "returned item position == 0"); - pathrelse(path); - return IO_ERROR; - } - PATH_LAST_POSITION(path)--; - - case ITEM_FOUND: - break; - - case IO_ERROR: - return retval; - - default: - pathrelse(path); - reiserfs_error(sb, "vs-7002", "no path to here"); - return IO_ERROR; - } - - set_de_item_location(de, path); - -#ifdef CONFIG_REISERFS_CHECK - if (!is_direntry_le_ih(de->de_ih) || - COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) { - print_block(de->de_bh, 0, -1, -1); - reiserfs_panic(sb, "vs-7005", "found item %h is not directory " - "item or does not belong to the same directory " - "as key %K", de->de_ih, key); - } -#endif /* CONFIG_REISERFS_CHECK */ - - /* binary search in directory item by third componen t of the - key. sets de->de_entry_num of de */ - retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); - path->pos_in_item = de->de_entry_num; - if (retval != NAME_NOT_FOUND) { - // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set - set_de_name_and_namelen(de); - set_de_object_key(de); - } - return retval; -} - -/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ - -/* The third component is hashed, and you can choose from more than - one hash function. Per directory hashes are not yet implemented - but are thought about. This function should be moved to hashes.c - Jedi, please do so. -Hans */ - -static __u32 get_third_component(struct super_block *s, - const char *name, int len) -{ - __u32 res; - - if (!len || (len == 1 && name[0] == '.')) - return DOT_OFFSET; - if (len == 2 && name[0] == '.' && name[1] == '.') - return DOT_DOT_OFFSET; - - res = REISERFS_SB(s)->s_hash_function(name, len); - - // take bits from 7-th to 30-th including both bounds - res = GET_HASH_VALUE(res); - if (res == 0) - // needed to have no names before "." and ".." those have hash - // value == 0 and generation conters 1 and 2 accordingly - res = 128; - return res + MAX_GENERATION_NUMBER; -} - -static int reiserfs_match(struct reiserfs_dir_entry *de, - const char *name, int namelen) -{ - int retval = NAME_NOT_FOUND; - - if ((namelen == de->de_namelen) && - !memcmp(de->de_name, name, de->de_namelen)) - retval = - (de_visible(de->de_deh + de->de_entry_num) ? NAME_FOUND : - NAME_FOUND_INVISIBLE); - - return retval; -} - -/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ - - /* used when hash collisions exist */ - -static int linear_search_in_dir_item(struct cpu_key *key, - struct reiserfs_dir_entry *de, - const char *name, int namelen) -{ - struct reiserfs_de_head *deh = de->de_deh; - int retval; - int i; - - i = de->de_entry_num; - - if (i == I_ENTRY_COUNT(de->de_ih) || - GET_HASH_VALUE(deh_offset(deh + i)) != - GET_HASH_VALUE(cpu_key_k_offset(key))) { - i--; - } - - RFALSE(de->de_deh != B_I_DEH(de->de_bh, de->de_ih), - "vs-7010: array of entry headers not found"); - - deh += i; - - for (; i >= 0; i--, deh--) { - if (GET_HASH_VALUE(deh_offset(deh)) != - GET_HASH_VALUE(cpu_key_k_offset(key))) { - // hash value does not match, no need to check whole name - return NAME_NOT_FOUND; - } - - /* mark, that this generation number is used */ - if (de->de_gen_number_bit_string) - set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), - de->de_gen_number_bit_string); - - // calculate pointer to name and namelen - de->de_entry_num = i; - set_de_name_and_namelen(de); - - if ((retval = - reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { - // de's de_name, de_namelen, de_recordlen are set. Fill the rest: - - // key of pointed object - set_de_object_key(de); - - store_de_entry_key(de); - - // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE - return retval; - } - } - - if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) - /* we have reached left most entry in the node. In common we - have to go to the left neighbor, but if generation counter - is 0 already, we know for sure, that there is no name with - the same hash value */ - // FIXME: this work correctly only because hash value can not - // be 0. Btw, in case of Yura's hash it is probably possible, - // so, this is a bug - return NAME_NOT_FOUND; - - RFALSE(de->de_item_num, - "vs-7015: two diritems of the same directory in one node?"); - - return GOTO_PREVIOUS_ITEM; -} - -// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND -// FIXME: should add something like IOERROR -static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, - struct treepath *path_to_entry, - struct reiserfs_dir_entry *de) -{ - struct cpu_key key_to_search; - int retval; - - if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize)) - return NAME_NOT_FOUND; - - /* we will search for this key in the tree */ - make_cpu_key(&key_to_search, dir, - get_third_component(dir->i_sb, name, namelen), - TYPE_DIRENTRY, 3); - - while (1) { - retval = - search_by_entry_key(dir->i_sb, &key_to_search, - path_to_entry, de); - if (retval == IO_ERROR) { - reiserfs_error(dir->i_sb, "zam-7001", "io error"); - return IO_ERROR; - } - - /* compare names for all entries having given hash value */ - retval = - linear_search_in_dir_item(&key_to_search, de, name, - namelen); - if (retval != GOTO_PREVIOUS_ITEM) { - /* there is no need to scan directory anymore. Given entry found or does not exist */ - path_to_entry->pos_in_item = de->de_entry_num; - return retval; - } - - /* there is left neighboring item of this directory and given entry can be there */ - set_cpu_key_k_offset(&key_to_search, - le_ih_k_offset(de->de_ih) - 1); - pathrelse(path_to_entry); - - } /* while (1) */ -} - -static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - int retval; - int lock_depth; - struct inode *inode = NULL; - struct reiserfs_dir_entry de; - INITIALIZE_PATH(path_to_entry); - - if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) - return ERR_PTR(-ENAMETOOLONG); - - /* - * Might be called with or without the write lock, must be careful - * to not recursively hold it in case we want to release the lock - * before rescheduling. - */ - lock_depth = reiserfs_write_lock_once(dir->i_sb); - - de.de_gen_number_bit_string = NULL; - retval = - reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, - &path_to_entry, &de); - pathrelse(&path_to_entry); - if (retval == NAME_FOUND) { - inode = reiserfs_iget(dir->i_sb, - (struct cpu_key *)&(de.de_dir_id)); - if (!inode || IS_ERR(inode)) { - reiserfs_write_unlock_once(dir->i_sb, lock_depth); - return ERR_PTR(-EACCES); - } - - /* Propagate the private flag so we know we're - * in the priv tree */ - if (IS_PRIVATE(dir)) - inode->i_flags |= S_PRIVATE; - } - reiserfs_write_unlock_once(dir->i_sb, lock_depth); - if (retval == IO_ERROR) { - return ERR_PTR(-EIO); - } - - return d_splice_alias(inode, dentry); -} - -/* -** looks up the dentry of the parent directory for child. -** taken from ext2_get_parent -*/ -struct dentry *reiserfs_get_parent(struct dentry *child) -{ - int retval; - struct inode *inode = NULL; - struct reiserfs_dir_entry de; - INITIALIZE_PATH(path_to_entry); - struct inode *dir = child->d_inode; - - if (dir->i_nlink == 0) { - return ERR_PTR(-ENOENT); - } - de.de_gen_number_bit_string = NULL; - - reiserfs_write_lock(dir->i_sb); - retval = reiserfs_find_entry(dir, "..", 2, &path_to_entry, &de); - pathrelse(&path_to_entry); - if (retval != NAME_FOUND) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-ENOENT); - } - inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); - reiserfs_write_unlock(dir->i_sb); - - return d_obtain_alias(inode); -} - -/* add entry to the directory (entry can be hidden). - -insert definition of when hidden directories are used here -Hans - - Does not mark dir inode dirty, do it after successesfull call to it */ - -static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, - struct inode *dir, const char *name, int namelen, - struct inode *inode, int visible) -{ - struct cpu_key entry_key; - struct reiserfs_de_head *deh; - INITIALIZE_PATH(path); - struct reiserfs_dir_entry de; - DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); - int gen_number; - char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc - if we create file with short name */ - char *buffer; - int buflen, paste_size; - int retval; - - BUG_ON(!th->t_trans_id); - - /* cannot allow items to be added into a busy deleted directory */ - if (!namelen) - return -EINVAL; - - if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize)) - return -ENAMETOOLONG; - - /* each entry has unique key. compose it */ - make_cpu_key(&entry_key, dir, - get_third_component(dir->i_sb, name, namelen), - TYPE_DIRENTRY, 3); - - /* get memory for composing the entry */ - buflen = DEH_SIZE + ROUND_UP(namelen); - if (buflen > sizeof(small_buf)) { - buffer = kmalloc(buflen, GFP_NOFS); - if (!buffer) - return -ENOMEM; - } else - buffer = small_buf; - - paste_size = - (get_inode_sd_version(dir) == - STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; - - /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ - deh = (struct reiserfs_de_head *)buffer; - deh->deh_location = 0; /* JDM Endian safe if 0 */ - put_deh_offset(deh, cpu_key_k_offset(&entry_key)); - deh->deh_state = 0; /* JDM Endian safe if 0 */ - /* put key (ino analog) to de */ - deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; /* safe: k_dir_id is le */ - deh->deh_objectid = INODE_PKEY(inode)->k_objectid; /* safe: k_objectid is le */ - - /* copy name */ - memcpy((char *)(deh + 1), name, namelen); - /* padd by 0s to the 4 byte boundary */ - padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); - - /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ - mark_de_without_sd(deh); - visible ? mark_de_visible(deh) : mark_de_hidden(deh); - - /* find the proper place for the new entry */ - memset(bit_string, 0, sizeof(bit_string)); - de.de_gen_number_bit_string = bit_string; - retval = reiserfs_find_entry(dir, name, namelen, &path, &de); - if (retval != NAME_NOT_FOUND) { - if (buffer != small_buf) - kfree(buffer); - pathrelse(&path); - - if (retval == IO_ERROR) { - return -EIO; - } - - if (retval != NAME_FOUND) { - reiserfs_error(dir->i_sb, "zam-7002", - "reiserfs_find_entry() returned " - "unexpected value (%d)", retval); - } - - return -EEXIST; - } - - gen_number = - find_first_zero_bit(bit_string, - MAX_GENERATION_NUMBER + 1); - if (gen_number > MAX_GENERATION_NUMBER) { - /* there is no free generation number */ - reiserfs_warning(dir->i_sb, "reiserfs-7010", - "Congratulations! we have got hash function " - "screwed up"); - if (buffer != small_buf) - kfree(buffer); - pathrelse(&path); - return -EBUSY; - } - /* adjust offset of directory enrty */ - put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number)); - set_cpu_key_k_offset(&entry_key, deh_offset(deh)); - - /* update max-hash-collisions counter in reiserfs_sb_info */ - PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); - - if (gen_number != 0) { /* we need to re-search for the insertion point */ - if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != - NAME_NOT_FOUND) { - reiserfs_warning(dir->i_sb, "vs-7032", - "entry with this key (%K) already " - "exists", &entry_key); - - if (buffer != small_buf) - kfree(buffer); - pathrelse(&path); - return -EBUSY; - } - } - - /* perform the insertion of the entry that we have prepared */ - retval = - reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer, - paste_size); - if (buffer != small_buf) - kfree(buffer); - if (retval) { - reiserfs_check_path(&path); - return retval; - } - - dir->i_size += paste_size; - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; - if (!S_ISDIR(inode->i_mode) && visible) - // reiserfs_mkdir or reiserfs_rename will do that by itself - reiserfs_update_sd(th, dir); - - reiserfs_check_path(&path); - return 0; -} - -/* quota utility function, call if you've had to abort after calling -** new_inode_init, and have not called reiserfs_new_inode yet. -** This should only be called on inodes that do not have stat data -** inserted into the tree yet. -*/ -static int drop_new_inode(struct inode *inode) -{ - dquot_drop(inode); - make_bad_inode(inode); - inode->i_flags |= S_NOQUOTA; - iput(inode); - return 0; -} - -/* utility function that does setup for reiserfs_new_inode. -** dquot_initialize needs lots of credits so it's better to have it -** outside of a transaction, so we had to pull some bits of -** reiserfs_new_inode out into this func. -*/ -static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) -{ - /* Make inode invalid - just in case we are going to drop it before - * the initialization happens */ - INODE_PKEY(inode)->k_objectid = 0; - /* the quota init calls have to know who to charge the quota to, so - ** we have to set uid and gid here - */ - inode_init_owner(inode, dir, mode); - dquot_initialize(inode); - return 0; -} - -static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) -{ - int retval; - struct inode *inode; - /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 2 + - 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + - REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - struct reiserfs_transaction_handle th; - struct reiserfs_security_handle security; - - dquot_initialize(dir); - - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM; - } - new_inode_init(inode, dir, mode); - - jbegin_count += reiserfs_cache_default_acl(dir); - retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security); - if (retval < 0) { - drop_new_inode(inode); - return retval; - } - jbegin_count += retval; - reiserfs_write_lock(dir->i_sb); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode(inode); - goto out_failed; - } - - retval = - reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, - inode, &security); - if (retval) - goto out_failed; - - inode->i_op = &reiserfs_file_inode_operations; - inode->i_fop = &reiserfs_file_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - - retval = - reiserfs_add_entry(&th, dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - if (retval) { - int err; - drop_nlink(inode); - reiserfs_update_sd(&th, inode); - err = journal_end(&th, dir->i_sb, jbegin_count); - if (err) - retval = err; - unlock_new_inode(inode); - iput(inode); - goto out_failed; - } - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - d_instantiate(dentry, inode); - unlock_new_inode(inode); - retval = journal_end(&th, dir->i_sb, jbegin_count); - - out_failed: - reiserfs_write_unlock(dir->i_sb); - return retval; -} - -static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t rdev) -{ - int retval; - struct inode *inode; - struct reiserfs_transaction_handle th; - struct reiserfs_security_handle security; - /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + - 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + - REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - - if (!new_valid_dev(rdev)) - return -EINVAL; - - dquot_initialize(dir); - - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM; - } - new_inode_init(inode, dir, mode); - - jbegin_count += reiserfs_cache_default_acl(dir); - retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security); - if (retval < 0) { - drop_new_inode(inode); - return retval; - } - jbegin_count += retval; - reiserfs_write_lock(dir->i_sb); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode(inode); - goto out_failed; - } - - retval = - reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, - inode, &security); - if (retval) { - goto out_failed; - } - - inode->i_op = &reiserfs_special_inode_operations; - init_special_inode(inode, inode->i_mode, rdev); - - //FIXME: needed for block and char devices only - reiserfs_update_sd(&th, inode); - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - retval = - reiserfs_add_entry(&th, dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - if (retval) { - int err; - drop_nlink(inode); - reiserfs_update_sd(&th, inode); - err = journal_end(&th, dir->i_sb, jbegin_count); - if (err) - retval = err; - unlock_new_inode(inode); - iput(inode); - goto out_failed; - } - - d_instantiate(dentry, inode); - unlock_new_inode(inode); - retval = journal_end(&th, dir->i_sb, jbegin_count); - - out_failed: - reiserfs_write_unlock(dir->i_sb); - return retval; -} - -static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - int retval; - struct inode *inode; - struct reiserfs_transaction_handle th; - struct reiserfs_security_handle security; - int lock_depth; - /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + - 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + - REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - - dquot_initialize(dir); - -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ - REISERFS_I(dir)->new_packing_locality = 1; -#endif - mode = S_IFDIR | mode; - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM; - } - new_inode_init(inode, dir, mode); - - jbegin_count += reiserfs_cache_default_acl(dir); - retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security); - if (retval < 0) { - drop_new_inode(inode); - return retval; - } - jbegin_count += retval; - lock_depth = reiserfs_write_lock_once(dir->i_sb); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode(inode); - goto out_failed; - } - - /* inc the link count now, so another writer doesn't overflow it while - ** we sleep later on. - */ - INC_DIR_INODE_NLINK(dir) - - retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ , - old_format_only(dir->i_sb) ? - EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, - dentry, inode, &security); - if (retval) { - DEC_DIR_INODE_NLINK(dir) - goto out_failed; - } - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - inode->i_op = &reiserfs_dir_inode_operations; - inode->i_fop = &reiserfs_dir_operations; - - // note, _this_ add_entry will not update dir's stat data - retval = - reiserfs_add_entry(&th, dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - if (retval) { - int err; - clear_nlink(inode); - DEC_DIR_INODE_NLINK(dir); - reiserfs_update_sd(&th, inode); - err = journal_end(&th, dir->i_sb, jbegin_count); - if (err) - retval = err; - unlock_new_inode(inode); - iput(inode); - goto out_failed; - } - // the above add_entry did not update dir's stat data - reiserfs_update_sd(&th, dir); - - d_instantiate(dentry, inode); - unlock_new_inode(inode); - retval = journal_end(&th, dir->i_sb, jbegin_count); -out_failed: - reiserfs_write_unlock_once(dir->i_sb, lock_depth); - return retval; -} - -static inline int reiserfs_empty_dir(struct inode *inode) -{ - /* we can cheat because an old format dir cannot have - ** EMPTY_DIR_SIZE, and a new format dir cannot have - ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, - ** regardless of disk format version, the directory is empty. - */ - if (inode->i_size != EMPTY_DIR_SIZE && - inode->i_size != EMPTY_DIR_SIZE_V1) { - return 0; - } - return 1; -} - -static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) -{ - int retval, err; - struct inode *inode; - struct reiserfs_transaction_handle th; - int jbegin_count; - INITIALIZE_PATH(path); - struct reiserfs_dir_entry de; - - /* we will be doing 2 balancings and update 2 stat data, we change quotas - * of the owner of the directory and of the owner of the parent directory. - * The quota structure is possibly deleted only on last iput => outside - * of this transaction */ - jbegin_count = - JOURNAL_PER_BALANCE_CNT * 2 + 2 + - 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - dquot_initialize(dir); - - reiserfs_write_lock(dir->i_sb); - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) - goto out_rmdir; - - de.de_gen_number_bit_string = NULL; - if ((retval = - reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, - &path, &de)) == NAME_NOT_FOUND) { - retval = -ENOENT; - goto end_rmdir; - } else if (retval == IO_ERROR) { - retval = -EIO; - goto end_rmdir; - } - - inode = dentry->d_inode; - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - if (de.de_objectid != inode->i_ino) { - // FIXME: compare key of an object and a key found in the - // entry - retval = -EIO; - goto end_rmdir; - } - if (!reiserfs_empty_dir(inode)) { - retval = -ENOTEMPTY; - goto end_rmdir; - } - - /* cut entry from dir directory */ - retval = reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, /* page */ - 0 /*new file size - not used here */ ); - if (retval < 0) - goto end_rmdir; - - if (inode->i_nlink != 2 && inode->i_nlink != 1) - reiserfs_error(inode->i_sb, "reiserfs-7040", - "empty directory has nlink != 2 (%d)", - inode->i_nlink); - - clear_nlink(inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - reiserfs_update_sd(&th, inode); - - DEC_DIR_INODE_NLINK(dir) - dir->i_size -= (DEH_SIZE + de.de_entrylen); - reiserfs_update_sd(&th, dir); - - /* prevent empty directory from getting lost */ - add_save_link(&th, inode, 0 /* not truncate */ ); - - retval = journal_end(&th, dir->i_sb, jbegin_count); - reiserfs_check_path(&path); - out_rmdir: - reiserfs_write_unlock(dir->i_sb); - return retval; - - end_rmdir: - /* we must release path, because we did not call - reiserfs_cut_from_item, or reiserfs_cut_from_item does not - release path if operation was not complete */ - pathrelse(&path); - err = journal_end(&th, dir->i_sb, jbegin_count); - reiserfs_write_unlock(dir->i_sb); - return err ? err : retval; -} - -static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) -{ - int retval, err; - struct inode *inode; - struct reiserfs_dir_entry de; - INITIALIZE_PATH(path); - struct reiserfs_transaction_handle th; - int jbegin_count; - unsigned long savelink; - int depth; - - dquot_initialize(dir); - - inode = dentry->d_inode; - - /* in this transaction we can be doing at max two balancings and update - * two stat datas, we change quotas of the owner of the directory and of - * the owner of the parent directory. The quota structure is possibly - * deleted only on iput => outside of this transaction */ - jbegin_count = - JOURNAL_PER_BALANCE_CNT * 2 + 2 + - 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - depth = reiserfs_write_lock_once(dir->i_sb); - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) - goto out_unlink; - - de.de_gen_number_bit_string = NULL; - if ((retval = - reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, - &path, &de)) == NAME_NOT_FOUND) { - retval = -ENOENT; - goto end_unlink; - } else if (retval == IO_ERROR) { - retval = -EIO; - goto end_unlink; - } - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - if (de.de_objectid != inode->i_ino) { - // FIXME: compare key of an object and a key found in the - // entry - retval = -EIO; - goto end_unlink; - } - - if (!inode->i_nlink) { - reiserfs_warning(inode->i_sb, "reiserfs-7042", - "deleting nonexistent file (%lu), %d", - inode->i_ino, inode->i_nlink); - set_nlink(inode, 1); - } - - drop_nlink(inode); - - /* - * we schedule before doing the add_save_link call, save the link - * count so we don't race - */ - savelink = inode->i_nlink; - - retval = - reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, - 0); - if (retval < 0) { - inc_nlink(inode); - goto end_unlink; - } - inode->i_ctime = CURRENT_TIME_SEC; - reiserfs_update_sd(&th, inode); - - dir->i_size -= (de.de_entrylen + DEH_SIZE); - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - reiserfs_update_sd(&th, dir); - - if (!savelink) - /* prevent file from getting lost */ - add_save_link(&th, inode, 0 /* not truncate */ ); - - retval = journal_end(&th, dir->i_sb, jbegin_count); - reiserfs_check_path(&path); - reiserfs_write_unlock_once(dir->i_sb, depth); - return retval; - - end_unlink: - pathrelse(&path); - err = journal_end(&th, dir->i_sb, jbegin_count); - reiserfs_check_path(&path); - if (err) - retval = err; - out_unlink: - reiserfs_write_unlock_once(dir->i_sb, depth); - return retval; -} - -static int reiserfs_symlink(struct inode *parent_dir, - struct dentry *dentry, const char *symname) -{ - int retval; - struct inode *inode; - char *name; - int item_len; - struct reiserfs_transaction_handle th; - struct reiserfs_security_handle security; - int mode = S_IFLNK | S_IRWXUGO; - /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + - 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + - REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb)); - - dquot_initialize(parent_dir); - - if (!(inode = new_inode(parent_dir->i_sb))) { - return -ENOMEM; - } - new_inode_init(inode, parent_dir, mode); - - retval = reiserfs_security_init(parent_dir, inode, &dentry->d_name, - &security); - if (retval < 0) { - drop_new_inode(inode); - return retval; - } - jbegin_count += retval; - - reiserfs_write_lock(parent_dir->i_sb); - item_len = ROUND_UP(strlen(symname)); - if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) { - retval = -ENAMETOOLONG; - drop_new_inode(inode); - goto out_failed; - } - - name = kmalloc(item_len, GFP_NOFS); - if (!name) { - drop_new_inode(inode); - retval = -ENOMEM; - goto out_failed; - } - memcpy(name, symname, strlen(symname)); - padd_item(name, item_len, strlen(symname)); - - retval = journal_begin(&th, parent_dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode(inode); - kfree(name); - goto out_failed; - } - - retval = - reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname), - dentry, inode, &security); - kfree(name); - if (retval) { /* reiserfs_new_inode iputs for us */ - goto out_failed; - } - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(parent_dir); - - inode->i_op = &reiserfs_symlink_inode_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - - // must be sure this inode is written with this transaction - // - //reiserfs_update_sd (&th, inode, READ_BLOCKS); - - retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - if (retval) { - int err; - drop_nlink(inode); - reiserfs_update_sd(&th, inode); - err = journal_end(&th, parent_dir->i_sb, jbegin_count); - if (err) - retval = err; - unlock_new_inode(inode); - iput(inode); - goto out_failed; - } - - d_instantiate(dentry, inode); - unlock_new_inode(inode); - retval = journal_end(&th, parent_dir->i_sb, jbegin_count); - out_failed: - reiserfs_write_unlock(parent_dir->i_sb); - return retval; -} - -static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *dentry) -{ - int retval; - struct inode *inode = old_dentry->d_inode; - struct reiserfs_transaction_handle th; - /* We need blocks for transaction + update of quotas for the owners of the directory */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + - 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - dquot_initialize(dir); - - reiserfs_write_lock(dir->i_sb); - if (inode->i_nlink >= REISERFS_LINK_MAX) { - //FIXME: sd_nlink is 32 bit for new files - reiserfs_write_unlock(dir->i_sb); - return -EMLINK; - } - - /* inc before scheduling so reiserfs_unlink knows we are here */ - inc_nlink(inode); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_nlink(inode); - reiserfs_write_unlock(dir->i_sb); - return retval; - } - - /* create new entry */ - retval = - reiserfs_add_entry(&th, dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - if (retval) { - int err; - drop_nlink(inode); - err = journal_end(&th, dir->i_sb, jbegin_count); - reiserfs_write_unlock(dir->i_sb); - return err ? err : retval; - } - - inode->i_ctime = CURRENT_TIME_SEC; - reiserfs_update_sd(&th, inode); - - ihold(inode); - d_instantiate(dentry, inode); - retval = journal_end(&th, dir->i_sb, jbegin_count); - reiserfs_write_unlock(dir->i_sb); - return retval; -} - -/* de contains information pointing to an entry which */ -static int de_still_valid(const char *name, int len, - struct reiserfs_dir_entry *de) -{ - struct reiserfs_dir_entry tmp = *de; - - // recalculate pointer to name and name length - set_de_name_and_namelen(&tmp); - // FIXME: could check more - if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) - return 0; - return 1; -} - -static int entry_points_to_object(const char *name, int len, - struct reiserfs_dir_entry *de, - struct inode *inode) -{ - if (!de_still_valid(name, len, de)) - return 0; - - if (inode) { - if (!de_visible(de->de_deh + de->de_entry_num)) - reiserfs_panic(inode->i_sb, "vs-7042", - "entry must be visible"); - return (de->de_objectid == inode->i_ino) ? 1 : 0; - } - - /* this must be added hidden entry */ - if (de_visible(de->de_deh + de->de_entry_num)) - reiserfs_panic(NULL, "vs-7043", "entry must be visible"); - - return 1; -} - -/* sets key of objectid the entry has to point to */ -static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de, - struct reiserfs_key *key) -{ - /* JDM These operations are endian safe - both are le */ - de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id; - de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; -} - -/* - * process, that is going to call fix_nodes/do_balance must hold only - * one path. If it holds 2 or more, it can get into endless waiting in - * get_empty_nodes or its clones - */ -static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - int retval; - INITIALIZE_PATH(old_entry_path); - INITIALIZE_PATH(new_entry_path); - INITIALIZE_PATH(dot_dot_entry_path); - struct item_head new_entry_ih, old_entry_ih, dot_dot_ih; - struct reiserfs_dir_entry old_de, new_de, dot_dot_de; - struct inode *old_inode, *new_dentry_inode; - struct reiserfs_transaction_handle th; - int jbegin_count; - umode_t old_inode_mode; - unsigned long savelink = 1; - struct timespec ctime; - - /* three balancings: (1) old name removal, (2) new name insertion - and (3) maybe "save" link insertion - stat data updates: (1) old directory, - (2) new directory and (3) maybe old object stat data (when it is - directory) and (4) maybe stat data of object to which new entry - pointed initially and (5) maybe block containing ".." of - renamed directory - quota updates: two parent directories */ - jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + 5 + - 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); - - dquot_initialize(old_dir); - dquot_initialize(new_dir); - - old_inode = old_dentry->d_inode; - new_dentry_inode = new_dentry->d_inode; - - // make sure, that oldname still exists and points to an object we - // are going to rename - old_de.de_gen_number_bit_string = NULL; - reiserfs_write_lock(old_dir->i_sb); - retval = - reiserfs_find_entry(old_dir, old_dentry->d_name.name, - old_dentry->d_name.len, &old_entry_path, - &old_de); - pathrelse(&old_entry_path); - if (retval == IO_ERROR) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) { - reiserfs_write_unlock(old_dir->i_sb); - return -ENOENT; - } - - old_inode_mode = old_inode->i_mode; - if (S_ISDIR(old_inode_mode)) { - // make sure, that directory being renamed has correct ".." - // and that its new parent directory has not too many links - // already - - if (new_dentry_inode) { - if (!reiserfs_empty_dir(new_dentry_inode)) { - reiserfs_write_unlock(old_dir->i_sb); - return -ENOTEMPTY; - } - } - - /* directory is renamed, its parent directory will be changed, - ** so find ".." entry - */ - dot_dot_de.de_gen_number_bit_string = NULL; - retval = - reiserfs_find_entry(old_inode, "..", 2, &dot_dot_entry_path, - &dot_dot_de); - pathrelse(&dot_dot_entry_path); - if (retval != NAME_FOUND) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - /* inode number of .. must equal old_dir->i_ino */ - if (dot_dot_de.de_objectid != old_dir->i_ino) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - } - - retval = journal_begin(&th, old_dir->i_sb, jbegin_count); - if (retval) { - reiserfs_write_unlock(old_dir->i_sb); - return retval; - } - - /* add new entry (or find the existing one) */ - retval = - reiserfs_add_entry(&th, new_dir, new_dentry->d_name.name, - new_dentry->d_name.len, old_inode, 0); - if (retval == -EEXIST) { - if (!new_dentry_inode) { - reiserfs_panic(old_dir->i_sb, "vs-7050", - "new entry is found, new inode == 0"); - } - } else if (retval) { - int err = journal_end(&th, old_dir->i_sb, jbegin_count); - reiserfs_write_unlock(old_dir->i_sb); - return err ? err : retval; - } - - reiserfs_update_inode_transaction(old_dir); - reiserfs_update_inode_transaction(new_dir); - - /* this makes it so an fsync on an open fd for the old name will - ** commit the rename operation - */ - reiserfs_update_inode_transaction(old_inode); - - if (new_dentry_inode) - reiserfs_update_inode_transaction(new_dentry_inode); - - while (1) { - // look for old name using corresponding entry key (found by reiserfs_find_entry) - if ((retval = - search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, - &old_entry_path, - &old_de)) != NAME_FOUND) { - pathrelse(&old_entry_path); - journal_end(&th, old_dir->i_sb, jbegin_count); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - copy_item_head(&old_entry_ih, get_ih(&old_entry_path)); - - reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); - - // look for new name by reiserfs_find_entry - new_de.de_gen_number_bit_string = NULL; - retval = - reiserfs_find_entry(new_dir, new_dentry->d_name.name, - new_dentry->d_name.len, &new_entry_path, - &new_de); - // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from - // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. - if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { - pathrelse(&new_entry_path); - pathrelse(&old_entry_path); - journal_end(&th, old_dir->i_sb, jbegin_count); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - copy_item_head(&new_entry_ih, get_ih(&new_entry_path)); - - reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1); - - if (S_ISDIR(old_inode->i_mode)) { - if ((retval = - search_by_entry_key(new_dir->i_sb, - &dot_dot_de.de_entry_key, - &dot_dot_entry_path, - &dot_dot_de)) != NAME_FOUND) { - pathrelse(&dot_dot_entry_path); - pathrelse(&new_entry_path); - pathrelse(&old_entry_path); - journal_end(&th, old_dir->i_sb, jbegin_count); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - copy_item_head(&dot_dot_ih, - get_ih(&dot_dot_entry_path)); - // node containing ".." gets into transaction - reiserfs_prepare_for_journal(old_inode->i_sb, - dot_dot_de.de_bh, 1); - } - /* we should check seals here, not do - this stuff, yes? Then, having - gathered everything into RAM we - should lock the buffers, yes? -Hans */ - /* probably. our rename needs to hold more - ** than one path at once. The seals would - ** have to be written to deal with multi-path - ** issues -chris - */ - /* sanity checking before doing the rename - avoid races many - ** of the above checks could have scheduled. We have to be - ** sure our items haven't been shifted by another process. - */ - if (item_moved(&new_entry_ih, &new_entry_path) || - !entry_points_to_object(new_dentry->d_name.name, - new_dentry->d_name.len, - &new_de, new_dentry_inode) || - item_moved(&old_entry_ih, &old_entry_path) || - !entry_points_to_object(old_dentry->d_name.name, - old_dentry->d_name.len, - &old_de, old_inode)) { - reiserfs_restore_prepared_buffer(old_inode->i_sb, - new_de.de_bh); - reiserfs_restore_prepared_buffer(old_inode->i_sb, - old_de.de_bh); - if (S_ISDIR(old_inode_mode)) - reiserfs_restore_prepared_buffer(old_inode-> - i_sb, - dot_dot_de. - de_bh); - continue; - } - if (S_ISDIR(old_inode_mode)) { - if (item_moved(&dot_dot_ih, &dot_dot_entry_path) || - !entry_points_to_object("..", 2, &dot_dot_de, - old_dir)) { - reiserfs_restore_prepared_buffer(old_inode-> - i_sb, - old_de.de_bh); - reiserfs_restore_prepared_buffer(old_inode-> - i_sb, - new_de.de_bh); - reiserfs_restore_prepared_buffer(old_inode-> - i_sb, - dot_dot_de. - de_bh); - continue; - } - } - - RFALSE(S_ISDIR(old_inode_mode) && - !buffer_journal_prepared(dot_dot_de.de_bh), ""); - - break; - } - - /* ok, all the changes can be done in one fell swoop when we - have claimed all the buffers needed. */ - - mark_de_visible(new_de.de_deh + new_de.de_entry_num); - set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); - journal_mark_dirty(&th, old_dir->i_sb, new_de.de_bh); - - mark_de_hidden(old_de.de_deh + old_de.de_entry_num); - journal_mark_dirty(&th, old_dir->i_sb, old_de.de_bh); - ctime = CURRENT_TIME_SEC; - old_dir->i_ctime = old_dir->i_mtime = ctime; - new_dir->i_ctime = new_dir->i_mtime = ctime; - /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of - renamed object */ - old_inode->i_ctime = ctime; - - if (new_dentry_inode) { - // adjust link number of the victim - if (S_ISDIR(new_dentry_inode->i_mode)) { - clear_nlink(new_dentry_inode); - } else { - drop_nlink(new_dentry_inode); - } - new_dentry_inode->i_ctime = ctime; - savelink = new_dentry_inode->i_nlink; - } - - if (S_ISDIR(old_inode_mode)) { - /* adjust ".." of renamed directory */ - set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); - journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); - - if (!new_dentry_inode) - /* there (in new_dir) was no directory, so it got new link - (".." of renamed directory) */ - INC_DIR_INODE_NLINK(new_dir); - - /* old directory lost one link - ".. " of renamed directory */ - DEC_DIR_INODE_NLINK(old_dir); - } - // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse - pathrelse(&new_entry_path); - pathrelse(&dot_dot_entry_path); - - // FIXME: this reiserfs_cut_from_item's return value may screw up - // anybody, but it will panic if will not be able to find the - // entry. This needs one more clean up - if (reiserfs_cut_from_item - (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, - 0) < 0) - reiserfs_error(old_dir->i_sb, "vs-7060", - "couldn't not cut old name. Fsck later?"); - - old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; - - reiserfs_update_sd(&th, old_dir); - reiserfs_update_sd(&th, new_dir); - reiserfs_update_sd(&th, old_inode); - - if (new_dentry_inode) { - if (savelink == 0) - add_save_link(&th, new_dentry_inode, - 0 /* not truncate */ ); - reiserfs_update_sd(&th, new_dentry_inode); - } - - retval = journal_end(&th, old_dir->i_sb, jbegin_count); - reiserfs_write_unlock(old_dir->i_sb); - return retval; -} - -/* - * directories can handle most operations... - */ -const struct inode_operations reiserfs_dir_inode_operations = { - //&reiserfs_dir_operations, /* default_file_ops */ - .create = reiserfs_create, - .lookup = reiserfs_lookup, - .link = reiserfs_link, - .unlink = reiserfs_unlink, - .symlink = reiserfs_symlink, - .mkdir = reiserfs_mkdir, - .rmdir = reiserfs_rmdir, - .mknod = reiserfs_mknod, - .rename = reiserfs_rename, - .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, - .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, - .permission = reiserfs_permission, - .get_acl = reiserfs_get_acl, -}; - -/* - * symlink operations.. same as page_symlink_inode_operations, with xattr - * stuff added - */ -const struct inode_operations reiserfs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, - .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, - .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, - .permission = reiserfs_permission, - .get_acl = reiserfs_get_acl, - -}; - -/* - * special file operations.. just xattr/acl stuff - */ -const struct inode_operations reiserfs_special_inode_operations = { - .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, - .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, - .permission = reiserfs_permission, - .get_acl = reiserfs_get_acl, -}; diff --git a/ANDROID_3.4.5/fs/reiserfs/objectid.c b/ANDROID_3.4.5/fs/reiserfs/objectid.c deleted file mode 100644 index f732d6a5..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/objectid.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <linux/string.h> -#include <linux/random.h> -#include <linux/time.h> -#include "reiserfs.h" - -// find where objectid map starts -#define objectid_map(s,rs) (old_format_only (s) ? \ - (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ - (__le32 *)((rs) + 1)) - -#ifdef CONFIG_REISERFS_CHECK - -static void check_objectid_map(struct super_block *s, __le32 * map) -{ - if (le32_to_cpu(map[0]) != 1) - reiserfs_panic(s, "vs-15010", "map corrupted: %lx", - (long unsigned int)le32_to_cpu(map[0])); - - // FIXME: add something else here -} - -#else -static void check_objectid_map(struct super_block *s, __le32 * map) -{; -} -#endif - -/* When we allocate objectids we allocate the first unused objectid. - Each sequence of objectids in use (the odd sequences) is followed - by a sequence of objectids not in use (the even sequences). We - only need to record the last objectid in each of these sequences - (both the odd and even sequences) in order to fully define the - boundaries of the sequences. A consequence of allocating the first - objectid not in use is that under most conditions this scheme is - extremely compact. The exception is immediately after a sequence - of operations which deletes a large number of objects of - non-sequential objectids, and even then it will become compact - again as soon as more objects are created. Note that many - interesting optimizations of layout could result from complicating - objectid assignment, but we have deferred making them for now. */ - -/* get unique object identifier */ -__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) -{ - struct super_block *s = th->t_super; - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - __le32 *map = objectid_map(s, rs); - __u32 unused_objectid; - - BUG_ON(!th->t_trans_id); - - check_objectid_map(s, map); - - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - /* comment needed -Hans */ - unused_objectid = le32_to_cpu(map[1]); - if (unused_objectid == U32_MAX) { - reiserfs_warning(s, "reiserfs-15100", "no more object ids"); - reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)); - return 0; - } - - /* This incrementation allocates the first unused objectid. That - is to say, the first entry on the objectid map is the first - unused objectid, and by incrementing it we use it. See below - where we check to see if we eliminated a sequence of unused - objectids.... */ - map[1] = cpu_to_le32(unused_objectid + 1); - - /* Now we check to see if we eliminated the last remaining member of - the first even sequence (and can eliminate the sequence by - eliminating its last objectid from oids), and can collapse the - first two odd sequences into one sequence. If so, then the net - result is to eliminate a pair of objectids from oids. We do this - by shifting the entire map to the left. */ - if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { - memmove(map + 1, map + 3, - (sb_oid_cursize(rs) - 3) * sizeof(__u32)); - set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); - } - - journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); - return unused_objectid; -} - -/* makes object identifier unused */ -void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, - __u32 objectid_to_release) -{ - struct super_block *s = th->t_super; - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - __le32 *map = objectid_map(s, rs); - int i = 0; - - BUG_ON(!th->t_trans_id); - //return; - check_objectid_map(s, map); - - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); - - /* start at the beginning of the objectid map (i = 0) and go to - the end of it (i = disk_sb->s_oid_cursize). Linear search is - what we use, though it is possible that binary search would be - more efficient after performing lots of deletions (which is - when oids is large.) We only check even i's. */ - while (i < sb_oid_cursize(rs)) { - if (objectid_to_release == le32_to_cpu(map[i])) { - /* This incrementation unallocates the objectid. */ - //map[i]++; - le32_add_cpu(&map[i], 1); - - /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ - if (map[i] == map[i + 1]) { - /* shrink objectid map */ - memmove(map + i, map + i + 2, - (sb_oid_cursize(rs) - i - - 2) * sizeof(__u32)); - //disk_sb->s_oid_cursize -= 2; - set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); - - RFALSE(sb_oid_cursize(rs) < 2 || - sb_oid_cursize(rs) > sb_oid_maxsize(rs), - "vs-15005: objectid map corrupted cur_size == %d (max == %d)", - sb_oid_cursize(rs), sb_oid_maxsize(rs)); - } - return; - } - - if (objectid_to_release > le32_to_cpu(map[i]) && - objectid_to_release < le32_to_cpu(map[i + 1])) { - /* size of objectid map is not changed */ - if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { - //objectid_map[i+1]--; - le32_add_cpu(&map[i + 1], -1); - return; - } - - /* JDM comparing two little-endian values for equality -- safe */ - if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { - /* objectid map must be expanded, but there is no space */ - PROC_INFO_INC(s, leaked_oid); - return; - } - - /* expand the objectid map */ - memmove(map + i + 3, map + i + 1, - (sb_oid_cursize(rs) - i - 1) * sizeof(__u32)); - map[i + 1] = cpu_to_le32(objectid_to_release); - map[i + 2] = cpu_to_le32(objectid_to_release + 1); - set_sb_oid_cursize(rs, sb_oid_cursize(rs) + 2); - return; - } - i += 2; - } - - reiserfs_error(s, "vs-15011", "tried to free free object id (%lu)", - (long unsigned)objectid_to_release); -} - -int reiserfs_convert_objectid_map_v1(struct super_block *s) -{ - struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK(s); - int cur_size = sb_oid_cursize(disk_sb); - int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2; - int old_max = sb_oid_maxsize(disk_sb); - struct reiserfs_super_block_v1 *disk_sb_v1; - __le32 *objectid_map, *new_objectid_map; - int i; - - disk_sb_v1 = - (struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data); - objectid_map = (__le32 *) (disk_sb_v1 + 1); - new_objectid_map = (__le32 *) (disk_sb + 1); - - if (cur_size > new_size) { - /* mark everyone used that was listed as free at the end of the objectid - ** map - */ - objectid_map[new_size - 1] = objectid_map[cur_size - 1]; - set_sb_oid_cursize(disk_sb, new_size); - } - /* move the smaller objectid map past the end of the new super */ - for (i = new_size - 1; i >= 0; i--) { - objectid_map[i + (old_max - new_size)] = objectid_map[i]; - } - - /* set the max size so we don't overflow later */ - set_sb_oid_maxsize(disk_sb, new_size); - - /* Zero out label and generate random UUID */ - memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label)); - generate_random_uuid(disk_sb->s_uuid); - - /* finally, zero out the unused chunk of the new super */ - memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused)); - return 0; -} diff --git a/ANDROID_3.4.5/fs/reiserfs/prints.c b/ANDROID_3.4.5/fs/reiserfs/prints.c deleted file mode 100644 index c0b1112a..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/prints.c +++ /dev/null @@ -1,768 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include <linux/time.h> -#include <linux/fs.h> -#include "reiserfs.h" -#include <linux/string.h> -#include <linux/buffer_head.h> - -#include <stdarg.h> - -static char error_buf[1024]; -static char fmt_buf[1024]; -static char off_buf[80]; - -static char *reiserfs_cpu_offset(struct cpu_key *key) -{ - if (cpu_key_k_type(key) == TYPE_DIRENTRY) - sprintf(off_buf, "%Lu(%Lu)", - (unsigned long long) - GET_HASH_VALUE(cpu_key_k_offset(key)), - (unsigned long long) - GET_GENERATION_NUMBER(cpu_key_k_offset(key))); - else - sprintf(off_buf, "0x%Lx", - (unsigned long long)cpu_key_k_offset(key)); - return off_buf; -} - -static char *le_offset(struct reiserfs_key *key) -{ - int version; - - version = le_key_version(key); - if (le_key_k_type(version, key) == TYPE_DIRENTRY) - sprintf(off_buf, "%Lu(%Lu)", - (unsigned long long) - GET_HASH_VALUE(le_key_k_offset(version, key)), - (unsigned long long) - GET_GENERATION_NUMBER(le_key_k_offset(version, key))); - else - sprintf(off_buf, "0x%Lx", - (unsigned long long)le_key_k_offset(version, key)); - return off_buf; -} - -static char *cpu_type(struct cpu_key *key) -{ - if (cpu_key_k_type(key) == TYPE_STAT_DATA) - return "SD"; - if (cpu_key_k_type(key) == TYPE_DIRENTRY) - return "DIR"; - if (cpu_key_k_type(key) == TYPE_DIRECT) - return "DIRECT"; - if (cpu_key_k_type(key) == TYPE_INDIRECT) - return "IND"; - return "UNKNOWN"; -} - -static char *le_type(struct reiserfs_key *key) -{ - int version; - - version = le_key_version(key); - - if (le_key_k_type(version, key) == TYPE_STAT_DATA) - return "SD"; - if (le_key_k_type(version, key) == TYPE_DIRENTRY) - return "DIR"; - if (le_key_k_type(version, key) == TYPE_DIRECT) - return "DIRECT"; - if (le_key_k_type(version, key) == TYPE_INDIRECT) - return "IND"; - return "UNKNOWN"; -} - -/* %k */ -static void sprintf_le_key(char *buf, struct reiserfs_key *key) -{ - if (key) - sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id), - le32_to_cpu(key->k_objectid), le_offset(key), - le_type(key)); - else - sprintf(buf, "[NULL]"); -} - -/* %K */ -static void sprintf_cpu_key(char *buf, struct cpu_key *key) -{ - if (key) - sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, - key->on_disk_key.k_objectid, reiserfs_cpu_offset(key), - cpu_type(key)); - else - sprintf(buf, "[NULL]"); -} - -static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh) -{ - if (deh) - sprintf(buf, - "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", - deh_offset(deh), deh_dir_id(deh), deh_objectid(deh), - deh_location(deh), deh_state(deh)); - else - sprintf(buf, "[NULL]"); - -} - -static void sprintf_item_head(char *buf, struct item_head *ih) -{ - if (ih) { - strcpy(buf, - (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); - sprintf_le_key(buf + strlen(buf), &(ih->ih_key)); - sprintf(buf + strlen(buf), ", item_len %d, item_location %d, " - "free_space(entry_count) %d", - ih_item_len(ih), ih_location(ih), ih_free_space(ih)); - } else - sprintf(buf, "[NULL]"); -} - -static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de) -{ - char name[20]; - - memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); - name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; - sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); -} - -static void sprintf_block_head(char *buf, struct buffer_head *bh) -{ - sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ", - B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); -} - -static void sprintf_buffer_head(char *buf, struct buffer_head *bh) -{ - char b[BDEVNAME_SIZE]; - - sprintf(buf, - "dev %s, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", - bdevname(bh->b_bdev, b), bh->b_size, - (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), - bh->b_state, bh->b_page, - buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", - buffer_dirty(bh) ? "DIRTY" : "CLEAN", - buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); -} - -static void sprintf_disk_child(char *buf, struct disk_child *dc) -{ - sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), - dc_size(dc)); -} - -static char *is_there_reiserfs_struct(char *fmt, int *what) -{ - char *k = fmt; - - while ((k = strchr(k, '%')) != NULL) { - if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' || - k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') { - *what = k[1]; - break; - } - k++; - } - return k; -} - -/* debugging reiserfs we used to print out a lot of different - variables, like keys, item headers, buffer heads etc. Values of - most fields matter. So it took a long time just to write - appropriative printk. With this reiserfs_warning you can use format - specification for complex structures like you used to do with - printfs for integers, doubles and pointers. For instance, to print - out key structure you have to write just: - reiserfs_warning ("bad key %k", key); - instead of - printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, - key->k_offset, key->k_uniqueness); -*/ -static DEFINE_SPINLOCK(error_lock); -static void prepare_error_buf(const char *fmt, va_list args) -{ - char *fmt1 = fmt_buf; - char *k; - char *p = error_buf; - int what; - - spin_lock(&error_lock); - - strcpy(fmt1, fmt); - - while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) { - *k = 0; - - p += vsprintf(p, fmt1, args); - - switch (what) { - case 'k': - sprintf_le_key(p, va_arg(args, struct reiserfs_key *)); - break; - case 'K': - sprintf_cpu_key(p, va_arg(args, struct cpu_key *)); - break; - case 'h': - sprintf_item_head(p, va_arg(args, struct item_head *)); - break; - case 't': - sprintf_direntry(p, - va_arg(args, - struct reiserfs_dir_entry *)); - break; - case 'y': - sprintf_disk_child(p, - va_arg(args, struct disk_child *)); - break; - case 'z': - sprintf_block_head(p, - va_arg(args, struct buffer_head *)); - break; - case 'b': - sprintf_buffer_head(p, - va_arg(args, struct buffer_head *)); - break; - case 'a': - sprintf_de_head(p, - va_arg(args, - struct reiserfs_de_head *)); - break; - } - - p += strlen(p); - fmt1 = k + 2; - } - vsprintf(p, fmt1, args); - spin_unlock(&error_lock); - -} - -/* in addition to usual conversion specifiers this accepts reiserfs - specific conversion specifiers: - %k to print little endian key, - %K to print cpu key, - %h to print item_head, - %t to print directory entry - %z to print block head (arg must be struct buffer_head * - %b to print buffer_head -*/ - -#define do_reiserfs_warning(fmt)\ -{\ - va_list args;\ - va_start( args, fmt );\ - prepare_error_buf( fmt, args );\ - va_end( args );\ -} - -void __reiserfs_warning(struct super_block *sb, const char *id, - const char *function, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - if (sb) - printk(KERN_WARNING "REISERFS warning (device %s): %s%s%s: " - "%s\n", sb->s_id, id ? id : "", id ? " " : "", - function, error_buf); - else - printk(KERN_WARNING "REISERFS warning: %s%s%s: %s\n", - id ? id : "", id ? " " : "", function, error_buf); -} - -/* No newline.. reiserfs_info calls can be followed by printk's */ -void reiserfs_info(struct super_block *sb, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - if (sb) - printk(KERN_NOTICE "REISERFS (device %s): %s", - sb->s_id, error_buf); - else - printk(KERN_NOTICE "REISERFS %s:", error_buf); -} - -/* No newline.. reiserfs_printk calls can be followed by printk's */ -static void reiserfs_printk(const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - printk(error_buf); -} - -void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) -{ -#ifdef CONFIG_REISERFS_CHECK - do_reiserfs_warning(fmt); - if (s) - printk(KERN_DEBUG "REISERFS debug (device %s): %s\n", - s->s_id, error_buf); - else - printk(KERN_DEBUG "REISERFS debug: %s\n", error_buf); -#endif -} - -/* The format: - - maintainer-errorid: [function-name:] message - - where errorid is unique to the maintainer and function-name is - optional, is recommended, so that anyone can easily find the bug - with a simple grep for the short to type string - maintainer-errorid. Don't bother with reusing errorids, there are - lots of numbers out there. - - Example: - - reiserfs_panic( - p_sb, "reiser-29: reiserfs_new_blocknrs: " - "one of search_start or rn(%d) is equal to MAX_B_NUM," - "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", - rn, bh - ); - - Regular panic()s sometimes clear the screen before the message can - be read, thus the need for the while loop. - - Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it - pointless complexity): - - panics in reiserfs.h have numbers from 1000 to 1999 - super.c 2000 to 2999 - preserve.c (unused) 3000 to 3999 - bitmap.c 4000 to 4999 - stree.c 5000 to 5999 - prints.c 6000 to 6999 - namei.c 7000 to 7999 - fix_nodes.c 8000 to 8999 - dir.c 9000 to 9999 - lbalance.c 10000 to 10999 - ibalance.c 11000 to 11999 not ready - do_balan.c 12000 to 12999 - inode.c 13000 to 13999 - file.c 14000 to 14999 - objectid.c 15000 - 15999 - buffer.c 16000 - 16999 - symlink.c 17000 - 17999 - - . */ - -void __reiserfs_panic(struct super_block *sb, const char *id, - const char *function, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - -#ifdef CONFIG_REISERFS_CHECK - dump_stack(); -#endif - if (sb) - panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n", - sb->s_id, id ? id : "", id ? " " : "", - function, error_buf); - else - panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n", - id ? id : "", id ? " " : "", function, error_buf); -} - -void __reiserfs_error(struct super_block *sb, const char *id, - const char *function, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - - BUG_ON(sb == NULL); - - if (reiserfs_error_panic(sb)) - __reiserfs_panic(sb, id, function, error_buf); - - if (id && id[0]) - printk(KERN_CRIT "REISERFS error (device %s): %s %s: %s\n", - sb->s_id, id, function, error_buf); - else - printk(KERN_CRIT "REISERFS error (device %s): %s: %s\n", - sb->s_id, function, error_buf); - - if (sb->s_flags & MS_RDONLY) - return; - - reiserfs_info(sb, "Remounting filesystem read-only\n"); - sb->s_flags |= MS_RDONLY; - reiserfs_abort_journal(sb, -EIO); -} - -void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - - if (reiserfs_error_panic(sb)) { - panic(KERN_CRIT "REISERFS panic (device %s): %s\n", sb->s_id, - error_buf); - } - - if (reiserfs_is_journal_aborted(SB_JOURNAL(sb))) - return; - - printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id, - error_buf); - - sb->s_flags |= MS_RDONLY; - reiserfs_abort_journal(sb, errno); -} - -/* this prints internal nodes (4 keys/items in line) (dc_number, - dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, - dc_size)...*/ -static int print_internal(struct buffer_head *bh, int first, int last) -{ - struct reiserfs_key *key; - struct disk_child *dc; - int i; - int from, to; - - if (!B_IS_KEYS_LEVEL(bh)) - return 1; - - check_internal(bh); - - if (first == -1) { - from = 0; - to = B_NR_ITEMS(bh); - } else { - from = first; - to = last < B_NR_ITEMS(bh) ? last : B_NR_ITEMS(bh); - } - - reiserfs_printk("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh); - - dc = B_N_CHILD(bh, from); - reiserfs_printk("PTR %d: %y ", from, dc); - - for (i = from, key = B_N_PDELIM_KEY(bh, from), dc++; i < to; - i++, key++, dc++) { - reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); - if (i && i % 4 == 0) - printk("\n"); - } - printk("\n"); - return 0; -} - -static int print_leaf(struct buffer_head *bh, int print_mode, int first, - int last) -{ - struct block_head *blkh; - struct item_head *ih; - int i, nr; - int from, to; - - if (!B_IS_ITEMS_LEVEL(bh)) - return 1; - - check_leaf(bh); - - blkh = B_BLK_HEAD(bh); - ih = B_N_PITEM_HEAD(bh, 0); - nr = blkh_nr_item(blkh); - - printk - ("\n===================================================================\n"); - reiserfs_printk("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh); - - if (!(print_mode & PRINT_LEAF_ITEMS)) { - reiserfs_printk("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n", - &(ih->ih_key), &((ih + nr - 1)->ih_key)); - return 0; - } - - if (first < 0 || first > nr - 1) - from = 0; - else - from = first; - - if (last < 0 || last > nr) - to = nr; - else - to = last; - - ih += from; - printk - ("-------------------------------------------------------------------------------\n"); - printk - ("|##| type | key | ilen | free_space | version | loc |\n"); - for (i = from; i < to; i++, ih++) { - printk - ("-------------------------------------------------------------------------------\n"); - reiserfs_printk("|%2d| %h |\n", i, ih); - if (print_mode & PRINT_LEAF_ITEMS) - op_print_item(ih, B_I_PITEM(bh, ih)); - } - - printk - ("===================================================================\n"); - - return 0; -} - -char *reiserfs_hashname(int code) -{ - if (code == YURA_HASH) - return "rupasov"; - if (code == TEA_HASH) - return "tea"; - if (code == R5_HASH) - return "r5"; - - return "unknown"; -} - -/* return 1 if this is not super block */ -static int print_super_block(struct buffer_head *bh) -{ - struct reiserfs_super_block *rs = - (struct reiserfs_super_block *)(bh->b_data); - int skipped, data_blocks; - char *version; - char b[BDEVNAME_SIZE]; - - if (is_reiserfs_3_5(rs)) { - version = "3.5"; - } else if (is_reiserfs_3_6(rs)) { - version = "3.6"; - } else if (is_reiserfs_jr(rs)) { - version = ((sb_version(rs) == REISERFS_VERSION_2) ? - "3.6" : "3.5"); - } else { - return 1; - } - - printk("%s\'s super block is in block %llu\n", bdevname(bh->b_bdev, b), - (unsigned long long)bh->b_blocknr); - printk("Reiserfs version %s\n", version); - printk("Block count %u\n", sb_block_count(rs)); - printk("Blocksize %d\n", sb_blocksize(rs)); - printk("Free blocks %u\n", sb_free_blocks(rs)); - // FIXME: this would be confusing if - // someone stores reiserfs super block in some data block ;) -// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); - skipped = bh->b_blocknr; - data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - - (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + - 1 : sb_reserved_for_journal(rs)) - sb_free_blocks(rs); - printk - ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n" - "1 super block, %d data blocks\n", skipped, sb_bmap_nr(rs), - (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) : - sb_reserved_for_journal(rs)), data_blocks); - printk("Root block %u\n", sb_root_block(rs)); - printk("Journal block (first) %d\n", sb_jp_journal_1st_block(rs)); - printk("Journal dev %d\n", sb_jp_journal_dev(rs)); - printk("Journal orig size %d\n", sb_jp_journal_size(rs)); - printk("FS state %d\n", sb_fs_state(rs)); - printk("Hash function \"%s\"\n", - reiserfs_hashname(sb_hash_function_code(rs))); - - printk("Tree height %d\n", sb_tree_height(rs)); - return 0; -} - -static int print_desc_block(struct buffer_head *bh) -{ - struct reiserfs_journal_desc *desc; - - if (memcmp(get_journal_desc_magic(bh), JOURNAL_DESC_MAGIC, 8)) - return 1; - - desc = (struct reiserfs_journal_desc *)(bh->b_data); - printk("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)", - (unsigned long long)bh->b_blocknr, get_desc_trans_id(desc), - get_desc_mount_id(desc), get_desc_trans_len(desc)); - - return 0; -} - -void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int last) -{ - va_list args; - int mode, first, last; - - if (!bh) { - printk("print_block: buffer is NULL\n"); - return; - } - - va_start(args, bh); - - mode = va_arg(args, int); - first = va_arg(args, int); - last = va_arg(args, int); - if (print_leaf(bh, mode, first, last)) - if (print_internal(bh, first, last)) - if (print_super_block(bh)) - if (print_desc_block(bh)) - printk - ("Block %llu contains unformatted data\n", - (unsigned long long)bh->b_blocknr); - - va_end(args); -} - -static char print_tb_buf[2048]; - -/* this stores initial state of tree balance in the print_tb_buf */ -void store_print_tb(struct tree_balance *tb) -{ - int h = 0; - int i; - struct buffer_head *tbSh, *tbFh; - - if (!tb) - return; - - sprintf(print_tb_buf, "\n" - "BALANCING %d\n" - "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" - "=====================================================================\n" - "* h * S * L * R * F * FL * FR * CFL * CFR *\n", - REISERFS_SB(tb->tb_sb)->s_do_balance, - tb->tb_mode, PATH_LAST_POSITION(tb->tb_path), - tb->tb_path->pos_in_item); - - for (h = 0; h < ARRAY_SIZE(tb->insert_size); h++) { - if (PATH_H_PATH_OFFSET(tb->tb_path, h) <= - tb->tb_path->path_length - && PATH_H_PATH_OFFSET(tb->tb_path, - h) > ILLEGAL_PATH_ELEMENT_OFFSET) { - tbSh = PATH_H_PBUFFER(tb->tb_path, h); - tbFh = PATH_H_PPARENT(tb->tb_path, h); - } else { - tbSh = NULL; - tbFh = NULL; - } - sprintf(print_tb_buf + strlen(print_tb_buf), - "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n", - h, - (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL), - (tbSh) ? atomic_read(&(tbSh->b_count)) : -1, - (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL), - (tb->L[h]) ? atomic_read(&(tb->L[h]->b_count)) : -1, - (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL), - (tb->R[h]) ? atomic_read(&(tb->R[h]->b_count)) : -1, - (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL), - (tb->FL[h]) ? (long long)(tb->FL[h]-> - b_blocknr) : (-1LL), - (tb->FR[h]) ? (long long)(tb->FR[h]-> - b_blocknr) : (-1LL), - (tb->CFL[h]) ? (long long)(tb->CFL[h]-> - b_blocknr) : (-1LL), - (tb->CFR[h]) ? (long long)(tb->CFR[h]-> - b_blocknr) : (-1LL)); - } - - sprintf(print_tb_buf + strlen(print_tb_buf), - "=====================================================================\n" - "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" - "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", - tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0], - tb->rbytes, tb->blknum[0], tb->s0num, tb->s1num, tb->s1bytes, - tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], - tb->rkey[0]); - - /* this prints balance parameters for non-leaf levels */ - h = 0; - do { - h++; - sprintf(print_tb_buf + strlen(print_tb_buf), - "* %d * %4d * %2d * * %2d * * %2d *\n", - h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], - tb->blknum[h]); - } while (tb->insert_size[h]); - - sprintf(print_tb_buf + strlen(print_tb_buf), - "=====================================================================\n" - "FEB list: "); - - /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */ - h = 0; - for (i = 0; i < ARRAY_SIZE(tb->FEB); i++) - sprintf(print_tb_buf + strlen(print_tb_buf), - "%p (%llu %d)%s", tb->FEB[i], - tb->FEB[i] ? (unsigned long long)tb->FEB[i]-> - b_blocknr : 0ULL, - tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0, - (i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", "); - - sprintf(print_tb_buf + strlen(print_tb_buf), - "======================== the end ====================================\n"); -} - -void print_cur_tb(char *mes) -{ - printk("%s\n%s", mes, print_tb_buf); -} - -static void check_leaf_block_head(struct buffer_head *bh) -{ - struct block_head *blkh; - int nr; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE) - reiserfs_panic(NULL, "vs-6010", "invalid item number %z", - bh); - if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr) - reiserfs_panic(NULL, "vs-6020", "invalid free space %z", - bh); - -} - -static void check_internal_block_head(struct buffer_head *bh) -{ - struct block_head *blkh; - - blkh = B_BLK_HEAD(bh); - if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT)) - reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh); - - if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) - reiserfs_panic(NULL, "vs-6030", "invalid item number %z", bh); - - if (B_FREE_SPACE(bh) != - bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) - - DC_SIZE * (B_NR_ITEMS(bh) + 1)) - reiserfs_panic(NULL, "vs-6040", "invalid free space %z", bh); - -} - -void check_leaf(struct buffer_head *bh) -{ - int i; - struct item_head *ih; - - if (!bh) - return; - check_leaf_block_head(bh); - for (i = 0, ih = B_N_PITEM_HEAD(bh, 0); i < B_NR_ITEMS(bh); i++, ih++) - op_check_item(ih, B_I_PITEM(bh, ih)); -} - -void check_internal(struct buffer_head *bh) -{ - if (!bh) - return; - check_internal_block_head(bh); -} - -void print_statistics(struct super_block *s) -{ - - /* - printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \ - bmap with search %d, without %d, dir2ind %d, ind2dir %d\n", - REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes, - REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search, - REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct); - */ - -} diff --git a/ANDROID_3.4.5/fs/reiserfs/procfs.c b/ANDROID_3.4.5/fs/reiserfs/procfs.c deleted file mode 100644 index 2c1ade69..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/procfs.c +++ /dev/null @@ -1,575 +0,0 @@ -/* -*- linux-c -*- */ - -/* fs/reiserfs/procfs.c */ - -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/* proc info support a la one created by Sizif@Botik.RU for PGC */ - -#include <linux/module.h> -#include <linux/time.h> -#include <linux/seq_file.h> -#include <asm/uaccess.h> -#include "reiserfs.h" -#include <linux/init.h> -#include <linux/proc_fs.h> - -/* - * LOCKING: - * - * We rely on new Alexander Viro's super-block locking. - * - */ - -static int show_version(struct seq_file *m, struct super_block *sb) -{ - char *format; - - if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { - format = "3.6"; - } else if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5)) { - format = "3.5"; - } else { - format = "unknown"; - } - - seq_printf(m, "%s format\twith checks %s\n", format, -#if defined( CONFIG_REISERFS_CHECK ) - "on" -#else - "off" -#endif - ); - return 0; -} - -#define SF( x ) ( r -> x ) -#define SFP( x ) SF( s_proc_info_data.x ) -#define SFPL( x ) SFP( x[ level ] ) -#define SFPF( x ) SFP( scan_bitmap.x ) -#define SFPJ( x ) SFP( journal.x ) - -#define D2C( x ) le16_to_cpu( x ) -#define D4C( x ) le32_to_cpu( x ) -#define DF( x ) D2C( rs -> s_v1.x ) -#define DFL( x ) D4C( rs -> s_v1.x ) - -#define objectid_map( s, rs ) (old_format_only (s) ? \ - (__le32 *)((struct reiserfs_super_block_v1 *)rs + 1) : \ - (__le32 *)(rs + 1)) -#define MAP( i ) D4C( objectid_map( sb, rs )[ i ] ) - -#define DJF( x ) le32_to_cpu( rs -> x ) -#define DJV( x ) le32_to_cpu( s_v1 -> x ) -#define DJP( x ) le32_to_cpu( jp -> x ) -#define JF( x ) ( r -> s_journal -> x ) - -static int show_super(struct seq_file *m, struct super_block *sb) -{ - struct reiserfs_sb_info *r = REISERFS_SB(sb); - - seq_printf(m, "state: \t%s\n" - "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n" - "gen. counter: \t%i\n" - "s_disk_reads: \t%i\n" - "s_disk_writes: \t%i\n" - "s_fix_nodes: \t%i\n" - "s_do_balance: \t%i\n" - "s_unneeded_left_neighbor: \t%i\n" - "s_good_search_by_key_reada: \t%i\n" - "s_bmaps: \t%i\n" - "s_bmaps_without_search: \t%i\n" - "s_direct2indirect: \t%i\n" - "s_indirect2direct: \t%i\n" - "\n" - "max_hash_collisions: \t%i\n" - "breads: \t%lu\n" - "bread_misses: \t%lu\n" - "search_by_key: \t%lu\n" - "search_by_key_fs_changed: \t%lu\n" - "search_by_key_restarted: \t%lu\n" - "insert_item_restarted: \t%lu\n" - "paste_into_item_restarted: \t%lu\n" - "cut_from_item_restarted: \t%lu\n" - "delete_solid_item_restarted: \t%lu\n" - "delete_item_restarted: \t%lu\n" - "leaked_oid: \t%lu\n" - "leaves_removable: \t%lu\n", - SF(s_mount_state) == REISERFS_VALID_FS ? - "REISERFS_VALID_FS" : "REISERFS_ERROR_FS", - reiserfs_r5_hash(sb) ? "FORCE_R5 " : "", - reiserfs_rupasov_hash(sb) ? "FORCE_RUPASOV " : "", - reiserfs_tea_hash(sb) ? "FORCE_TEA " : "", - reiserfs_hash_detect(sb) ? "DETECT_HASH " : "", - reiserfs_no_border(sb) ? "NO_BORDER " : "BORDER ", - reiserfs_no_unhashed_relocation(sb) ? - "NO_UNHASHED_RELOCATION " : "", - reiserfs_hashed_relocation(sb) ? "UNHASHED_RELOCATION " : "", - reiserfs_test4(sb) ? "TEST4 " : "", - have_large_tails(sb) ? "TAILS " : have_small_tails(sb) ? - "SMALL_TAILS " : "NO_TAILS ", - replay_only(sb) ? "REPLAY_ONLY " : "", - convert_reiserfs(sb) ? "CONV " : "", - atomic_read(&r->s_generation_counter), - SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes), - SF(s_do_balance), SF(s_unneeded_left_neighbor), - SF(s_good_search_by_key_reada), SF(s_bmaps), - SF(s_bmaps_without_search), SF(s_direct2indirect), - SF(s_indirect2direct), SFP(max_hash_collisions), SFP(breads), - SFP(bread_miss), SFP(search_by_key), - SFP(search_by_key_fs_changed), SFP(search_by_key_restarted), - SFP(insert_item_restarted), SFP(paste_into_item_restarted), - SFP(cut_from_item_restarted), - SFP(delete_solid_item_restarted), SFP(delete_item_restarted), - SFP(leaked_oid), SFP(leaves_removable)); - - return 0; -} - -static int show_per_level(struct seq_file *m, struct super_block *sb) -{ - struct reiserfs_sb_info *r = REISERFS_SB(sb); - int level; - - seq_printf(m, "level\t" - " balances" - " [sbk: reads" - " fs_changed" - " restarted]" - " free space" - " items" - " can_remove" - " lnum" - " rnum" - " lbytes" - " rbytes" - " get_neig" - " get_neig_res" " need_l_neig" " need_r_neig" "\n"); - - for (level = 0; level < MAX_HEIGHT; ++level) { - seq_printf(m, "%i\t" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12li" - " %12li" - " %12li" - " %12li" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - "\n", - level, - SFPL(balance_at), - SFPL(sbk_read_at), - SFPL(sbk_fs_changed), - SFPL(sbk_restarted), - SFPL(free_at), - SFPL(items_at), - SFPL(can_node_be_removed), - SFPL(lnum), - SFPL(rnum), - SFPL(lbytes), - SFPL(rbytes), - SFPL(get_neighbors), - SFPL(get_neighbors_restart), - SFPL(need_l_neighbor), SFPL(need_r_neighbor) - ); - } - return 0; -} - -static int show_bitmap(struct seq_file *m, struct super_block *sb) -{ - struct reiserfs_sb_info *r = REISERFS_SB(sb); - - seq_printf(m, "free_block: %lu\n" - " scan_bitmap:" - " wait" - " bmap" - " retry" - " stolen" - " journal_hint" - "journal_nohint" - "\n" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - "\n", - SFP(free_block), - SFPF(call), - SFPF(wait), - SFPF(bmap), - SFPF(retry), - SFPF(stolen), - SFPF(in_journal_hint), SFPF(in_journal_nohint)); - - return 0; -} - -static int show_on_disk_super(struct seq_file *m, struct super_block *sb) -{ - struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); - struct reiserfs_super_block *rs = sb_info->s_rs; - int hash_code = DFL(s_hash_function_code); - __u32 flags = DJF(s_flags); - - seq_printf(m, "block_count: \t%i\n" - "free_blocks: \t%i\n" - "root_block: \t%i\n" - "blocksize: \t%i\n" - "oid_maxsize: \t%i\n" - "oid_cursize: \t%i\n" - "umount_state: \t%i\n" - "magic: \t%10.10s\n" - "fs_state: \t%i\n" - "hash: \t%s\n" - "tree_height: \t%i\n" - "bmap_nr: \t%i\n" - "version: \t%i\n" - "flags: \t%x[%s]\n" - "reserved_for_journal: \t%i\n", - DFL(s_block_count), - DFL(s_free_blocks), - DFL(s_root_block), - DF(s_blocksize), - DF(s_oid_maxsize), - DF(s_oid_cursize), - DF(s_umount_state), - rs->s_v1.s_magic, - DF(s_fs_state), - hash_code == TEA_HASH ? "tea" : - (hash_code == YURA_HASH) ? "rupasov" : - (hash_code == R5_HASH) ? "r5" : - (hash_code == UNSET_HASH) ? "unset" : "unknown", - DF(s_tree_height), - DF(s_bmap_nr), - DF(s_version), flags, (flags & reiserfs_attrs_cleared) - ? "attrs_cleared" : "", DF(s_reserved_for_journal)); - - return 0; -} - -static int show_oidmap(struct seq_file *m, struct super_block *sb) -{ - struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); - struct reiserfs_super_block *rs = sb_info->s_rs; - unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); - unsigned long total_used = 0; - int i; - - for (i = 0; i < mapsize; ++i) { - __u32 right; - - right = (i == mapsize - 1) ? MAX_KEY_OBJECTID : MAP(i + 1); - seq_printf(m, "%s: [ %x .. %x )\n", - (i & 1) ? "free" : "used", MAP(i), right); - if (!(i & 1)) { - total_used += right - MAP(i); - } - } -#if defined( REISERFS_USE_OIDMAPF ) - if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) { - loff_t size = sb_info->oidmap.mapf->f_path.dentry->d_inode->i_size; - total_used += size / sizeof(reiserfs_oidinterval_d_t); - } -#endif - seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n", - mapsize, - mapsize, le16_to_cpu(rs->s_v1.s_oid_maxsize), total_used); - return 0; -} - -static int show_journal(struct seq_file *m, struct super_block *sb) -{ - struct reiserfs_sb_info *r = REISERFS_SB(sb); - struct reiserfs_super_block *rs = r->s_rs; - struct journal_params *jp = &rs->s_v1.s_journal; - char b[BDEVNAME_SIZE]; - - seq_printf(m, /* on-disk fields */ - "jp_journal_1st_block: \t%i\n" - "jp_journal_dev: \t%s[%x]\n" - "jp_journal_size: \t%i\n" - "jp_journal_trans_max: \t%i\n" - "jp_journal_magic: \t%i\n" - "jp_journal_max_batch: \t%i\n" - "jp_journal_max_commit_age: \t%i\n" - "jp_journal_max_trans_age: \t%i\n" - /* incore fields */ - "j_1st_reserved_block: \t%i\n" - "j_state: \t%li\n" - "j_trans_id: \t%u\n" - "j_mount_id: \t%lu\n" - "j_start: \t%lu\n" - "j_len: \t%lu\n" - "j_len_alloc: \t%lu\n" - "j_wcount: \t%i\n" - "j_bcount: \t%lu\n" - "j_first_unflushed_offset: \t%lu\n" - "j_last_flush_trans_id: \t%u\n" - "j_trans_start_time: \t%li\n" - "j_list_bitmap_index: \t%i\n" - "j_must_wait: \t%i\n" - "j_next_full_flush: \t%i\n" - "j_next_async_flush: \t%i\n" - "j_cnode_used: \t%i\n" "j_cnode_free: \t%i\n" "\n" - /* reiserfs_proc_info_data_t.journal fields */ - "in_journal: \t%12lu\n" - "in_journal_bitmap: \t%12lu\n" - "in_journal_reusable: \t%12lu\n" - "lock_journal: \t%12lu\n" - "lock_journal_wait: \t%12lu\n" - "journal_begin: \t%12lu\n" - "journal_relock_writers: \t%12lu\n" - "journal_relock_wcount: \t%12lu\n" - "mark_dirty: \t%12lu\n" - "mark_dirty_already: \t%12lu\n" - "mark_dirty_notjournal: \t%12lu\n" - "restore_prepared: \t%12lu\n" - "prepare: \t%12lu\n" - "prepare_retry: \t%12lu\n", - DJP(jp_journal_1st_block), - bdevname(SB_JOURNAL(sb)->j_dev_bd, b), - DJP(jp_journal_dev), - DJP(jp_journal_size), - DJP(jp_journal_trans_max), - DJP(jp_journal_magic), - DJP(jp_journal_max_batch), - SB_JOURNAL(sb)->j_max_commit_age, - DJP(jp_journal_max_trans_age), - JF(j_1st_reserved_block), - JF(j_state), - JF(j_trans_id), - JF(j_mount_id), - JF(j_start), - JF(j_len), - JF(j_len_alloc), - atomic_read(&r->s_journal->j_wcount), - JF(j_bcount), - JF(j_first_unflushed_offset), - JF(j_last_flush_trans_id), - JF(j_trans_start_time), - JF(j_list_bitmap_index), - JF(j_must_wait), - JF(j_next_full_flush), - JF(j_next_async_flush), - JF(j_cnode_used), - JF(j_cnode_free), - SFPJ(in_journal), - SFPJ(in_journal_bitmap), - SFPJ(in_journal_reusable), - SFPJ(lock_journal), - SFPJ(lock_journal_wait), - SFPJ(journal_being), - SFPJ(journal_relock_writers), - SFPJ(journal_relock_wcount), - SFPJ(mark_dirty), - SFPJ(mark_dirty_already), - SFPJ(mark_dirty_notjournal), - SFPJ(restore_prepared), SFPJ(prepare), SFPJ(prepare_retry) - ); - return 0; -} - -/* iterator */ -static int test_sb(struct super_block *sb, void *data) -{ - return data == sb; -} - -static int set_sb(struct super_block *sb, void *data) -{ - return -ENOENT; -} - -static void *r_start(struct seq_file *m, loff_t * pos) -{ - struct proc_dir_entry *de = m->private; - struct super_block *s = de->parent->data; - loff_t l = *pos; - - if (l) - return NULL; - - if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, s))) - return NULL; - - up_write(&s->s_umount); - return s; -} - -static void *r_next(struct seq_file *m, void *v, loff_t * pos) -{ - ++*pos; - if (v) - deactivate_super(v); - return NULL; -} - -static void r_stop(struct seq_file *m, void *v) -{ - if (v) - deactivate_super(v); -} - -static int r_show(struct seq_file *m, void *v) -{ - struct proc_dir_entry *de = m->private; - int (*show) (struct seq_file *, struct super_block *) = de->data; - return show(m, v); -} - -static const struct seq_operations r_ops = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = r_show, -}; - -static int r_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &r_ops); - - if (!ret) { - struct seq_file *m = file->private_data; - m->private = PDE(inode); - } - return ret; -} - -static const struct file_operations r_file_operations = { - .open = r_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, - .owner = THIS_MODULE, -}; - -static struct proc_dir_entry *proc_info_root = NULL; -static const char proc_info_root_name[] = "fs/reiserfs"; - -static void add_file(struct super_block *sb, char *name, - int (*func) (struct seq_file *, struct super_block *)) -{ - proc_create_data(name, 0, REISERFS_SB(sb)->procdir, - &r_file_operations, func); -} - -int reiserfs_proc_info_init(struct super_block *sb) -{ - char b[BDEVNAME_SIZE]; - char *s; - - /* Some block devices use /'s */ - strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE); - s = strchr(b, '/'); - if (s) - *s = '!'; - - spin_lock_init(&__PINFO(sb).lock); - REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root); - if (REISERFS_SB(sb)->procdir) { - REISERFS_SB(sb)->procdir->data = sb; - add_file(sb, "version", show_version); - add_file(sb, "super", show_super); - add_file(sb, "per-level", show_per_level); - add_file(sb, "bitmap", show_bitmap); - add_file(sb, "on-disk-super", show_on_disk_super); - add_file(sb, "oidmap", show_oidmap); - add_file(sb, "journal", show_journal); - return 0; - } - reiserfs_warning(sb, "cannot create /proc/%s/%s", - proc_info_root_name, b); - return 1; -} - -int reiserfs_proc_info_done(struct super_block *sb) -{ - struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; - char b[BDEVNAME_SIZE]; - char *s; - - /* Some block devices use /'s */ - strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE); - s = strchr(b, '/'); - if (s) - *s = '!'; - - if (de) { - remove_proc_entry("journal", de); - remove_proc_entry("oidmap", de); - remove_proc_entry("on-disk-super", de); - remove_proc_entry("bitmap", de); - remove_proc_entry("per-level", de); - remove_proc_entry("super", de); - remove_proc_entry("version", de); - } - spin_lock(&__PINFO(sb).lock); - __PINFO(sb).exiting = 1; - spin_unlock(&__PINFO(sb).lock); - if (proc_info_root) { - remove_proc_entry(b, proc_info_root); - REISERFS_SB(sb)->procdir = NULL; - } - return 0; -} - -int reiserfs_proc_info_global_init(void) -{ - if (proc_info_root == NULL) { - proc_info_root = proc_mkdir(proc_info_root_name, NULL); - if (!proc_info_root) { - reiserfs_warning(NULL, "cannot create /proc/%s", - proc_info_root_name); - return 1; - } - } - return 0; -} - -int reiserfs_proc_info_global_done(void) -{ - if (proc_info_root != NULL) { - proc_info_root = NULL; - remove_proc_entry(proc_info_root_name, NULL); - } - return 0; -} -/* - * Revision 1.1.8.2 2001/07/15 17:08:42 god - * . use get_super() in procfs.c - * . remove remove_save_link() from reiserfs_do_truncate() - * - * I accept terms and conditions stated in the Legal Agreement - * (available at http://www.namesys.com/legalese.html) - * - * Revision 1.1.8.1 2001/07/11 16:48:50 god - * proc info support - * - * I accept terms and conditions stated in the Legal Agreement - * (available at http://www.namesys.com/legalese.html) - * - */ - -/* - * Make Linus happy. - * Local variables: - * c-indentation-style: "K&R" - * mode-name: "LC" - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ diff --git a/ANDROID_3.4.5/fs/reiserfs/reiserfs.h b/ANDROID_3.4.5/fs/reiserfs/reiserfs.h deleted file mode 100644 index a59d2712..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/reiserfs.h +++ /dev/null @@ -1,2923 +0,0 @@ -/* - * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details - */ - -#include <linux/reiserfs_fs.h> - -#include <linux/slab.h> -#include <linux/interrupt.h> -#include <linux/sched.h> -#include <linux/bug.h> -#include <linux/workqueue.h> -#include <asm/unaligned.h> -#include <linux/bitops.h> -#include <linux/proc_fs.h> -#include <linux/buffer_head.h> - -/* the 32 bit compat definitions with int argument */ -#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) -#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION -#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION - -struct reiserfs_journal_list; - -/** bitmasks for i_flags field in reiserfs-specific part of inode */ -typedef enum { - /** this says what format of key do all items (but stat data) of - an object have. If this is set, that format is 3.6 otherwise - - 3.5 */ - i_item_key_version_mask = 0x0001, - /** If this is unset, object has 3.5 stat data, otherwise, it has - 3.6 stat data with 64bit size, 32bit nlink etc. */ - i_stat_data_version_mask = 0x0002, - /** file might need tail packing on close */ - i_pack_on_close_mask = 0x0004, - /** don't pack tail of file */ - i_nopack_mask = 0x0008, - /** If those is set, "safe link" was created for this file during - truncate or unlink. Safe link is used to avoid leakage of disk - space on crash with some files open, but unlinked. */ - i_link_saved_unlink_mask = 0x0010, - i_link_saved_truncate_mask = 0x0020, - i_has_xattr_dir = 0x0040, - i_data_log = 0x0080, -} reiserfs_inode_flags; - -struct reiserfs_inode_info { - __u32 i_key[4]; /* key is still 4 32 bit integers */ - /** transient inode flags that are never stored on disk. Bitmasks - for this field are defined above. */ - __u32 i_flags; - - __u32 i_first_direct_byte; // offset of first byte stored in direct item. - - /* copy of persistent inode flags read from sd_attrs. */ - __u32 i_attrs; - - int i_prealloc_block; /* first unused block of a sequence of unused blocks */ - int i_prealloc_count; /* length of that sequence */ - struct list_head i_prealloc_list; /* per-transaction list of inodes which - * have preallocated blocks */ - - unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks - * for the contents of this directory should be - * displaced */ - - /* we use these for fsync or O_SYNC to decide which transaction - ** needs to be committed in order for this inode to be properly - ** flushed */ - unsigned int i_trans_id; - struct reiserfs_journal_list *i_jl; - atomic_t openers; - struct mutex tailpack; -#ifdef CONFIG_REISERFS_FS_XATTR - struct rw_semaphore i_xattr_sem; -#endif - struct inode vfs_inode; -}; - -typedef enum { - reiserfs_attrs_cleared = 0x00000001, -} reiserfs_super_block_flags; - -/* struct reiserfs_super_block accessors/mutators - * since this is a disk structure, it will always be in - * little endian format. */ -#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) -#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) -#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) -#define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v)) -#define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block)) -#define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v)) - -#define sb_jp_journal_1st_block(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block)) -#define set_sb_jp_journal_1st_block(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v)) -#define sb_jp_journal_dev(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev)) -#define set_sb_jp_journal_dev(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v)) -#define sb_jp_journal_size(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size)) -#define set_sb_jp_journal_size(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v)) -#define sb_jp_journal_trans_max(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max)) -#define set_sb_jp_journal_trans_max(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v)) -#define sb_jp_journal_magic(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic)) -#define set_sb_jp_journal_magic(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v)) -#define sb_jp_journal_max_batch(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch)) -#define set_sb_jp_journal_max_batch(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v)) -#define sb_jp_jourmal_max_commit_age(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age)) -#define set_sb_jp_journal_max_commit_age(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v)) - -#define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize)) -#define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v)) -#define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize)) -#define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v)) -#define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize)) -#define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v)) -#define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state)) -#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v)) -#define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state)) -#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v)) -#define sb_hash_function_code(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_hash_function_code)) -#define set_sb_hash_function_code(sbp,v) \ - ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v)) -#define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height)) -#define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v)) -#define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr)) -#define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v)) -#define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version)) -#define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v)) - -#define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count)) -#define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v)) - -#define sb_reserved_for_journal(sbp) \ - (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal)) -#define set_sb_reserved_for_journal(sbp,v) \ - ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v)) - -/* LOGGING -- */ - -/* These all interelate for performance. -** -** If the journal block count is smaller than n transactions, you lose speed. -** I don't know what n is yet, I'm guessing 8-16. -** -** typical transaction size depends on the application, how often fsync is -** called, and how many metadata blocks you dirty in a 30 second period. -** The more small files (<16k) you use, the larger your transactions will -** be. -** -** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal -** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough -** to prevent wrapping before dirty meta blocks get to disk. -** -** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal -** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping. -** -** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash. -** -*/ - -/* don't mess with these for a while */ - /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ -#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ -#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ -#define JOURNAL_HASH_SIZE 8192 -#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ - -/* One of these for every block in every transaction -** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a -** hash of all the in memory transactions. -** next and prev are used by the current transaction (journal_hash). -** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash -** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging -** to a given transaction. -*/ -struct reiserfs_journal_cnode { - struct buffer_head *bh; /* real buffer head */ - struct super_block *sb; /* dev of real buffer head */ - __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */ - unsigned long state; - struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */ - struct reiserfs_journal_cnode *next; /* next in transaction list */ - struct reiserfs_journal_cnode *prev; /* prev in transaction list */ - struct reiserfs_journal_cnode *hprev; /* prev in hash list */ - struct reiserfs_journal_cnode *hnext; /* next in hash list */ -}; - -struct reiserfs_bitmap_node { - int id; - char *data; - struct list_head list; -}; - -struct reiserfs_list_bitmap { - struct reiserfs_journal_list *journal_list; - struct reiserfs_bitmap_node **bitmaps; -}; - -/* -** one of these for each transaction. The most important part here is the j_realblock. -** this list of cnodes is used to hash all the blocks in all the commits, to mark all the -** real buffer heads dirty once all the commits hit the disk, -** and to make sure every real block in a transaction is on disk before allowing the log area -** to be overwritten */ -struct reiserfs_journal_list { - unsigned long j_start; - unsigned long j_state; - unsigned long j_len; - atomic_t j_nonzerolen; - atomic_t j_commit_left; - atomic_t j_older_commits_done; /* all commits older than this on disk */ - struct mutex j_commit_mutex; - unsigned int j_trans_id; - time_t j_timestamp; - struct reiserfs_list_bitmap *j_list_bitmap; - struct buffer_head *j_commit_bh; /* commit buffer head */ - struct reiserfs_journal_cnode *j_realblock; - struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */ - /* time ordered list of all active transactions */ - struct list_head j_list; - - /* time ordered list of all transactions we haven't tried to flush yet */ - struct list_head j_working_list; - - /* list of tail conversion targets in need of flush before commit */ - struct list_head j_tail_bh_list; - /* list of data=ordered buffers in need of flush before commit */ - struct list_head j_bh_list; - int j_refcount; -}; - -struct reiserfs_journal { - struct buffer_head **j_ap_blocks; /* journal blocks on disk */ - struct reiserfs_journal_cnode *j_last; /* newest journal block */ - struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ - - struct block_device *j_dev_bd; - fmode_t j_dev_mode; - int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ - - unsigned long j_state; - unsigned int j_trans_id; - unsigned long j_mount_id; - unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */ - unsigned long j_len; /* length of current waiting commit */ - unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */ - atomic_t j_wcount; /* count of writers for current commit */ - unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ - unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ - unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */ - struct buffer_head *j_header_bh; - - time_t j_trans_start_time; /* time this transaction started */ - struct mutex j_mutex; - struct mutex j_flush_mutex; - wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ - atomic_t j_jlock; /* lock for j_join_wait */ - int j_list_bitmap_index; /* number of next list bitmap to use */ - int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */ - int j_next_full_flush; /* next journal_end will flush all journal list */ - int j_next_async_flush; /* next journal_end will flush all async commits */ - - int j_cnode_used; /* number of cnodes on the used list */ - int j_cnode_free; /* number of cnodes on the free list */ - - unsigned int j_trans_max; /* max number of blocks in a transaction. */ - unsigned int j_max_batch; /* max number of blocks to batch into a trans */ - unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */ - unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */ - unsigned int j_default_max_commit_age; /* the default for the max commit age */ - - struct reiserfs_journal_cnode *j_cnode_free_list; - struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */ - - struct reiserfs_journal_list *j_current_jl; - int j_free_bitmap_nodes; - int j_used_bitmap_nodes; - - int j_num_lists; /* total number of active transactions */ - int j_num_work_lists; /* number that need attention from kreiserfsd */ - - /* debugging to make sure things are flushed in order */ - unsigned int j_last_flush_id; - - /* debugging to make sure things are committed in order */ - unsigned int j_last_commit_id; - - struct list_head j_bitmap_nodes; - struct list_head j_dirty_buffers; - spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */ - - /* list of all active transactions */ - struct list_head j_journal_list; - /* lists that haven't been touched by writeback attempts */ - struct list_head j_working_list; - - struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */ - struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */ - struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all - the transactions */ - struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ - int j_persistent_trans; - unsigned long j_max_trans_size; - unsigned long j_max_batch_size; - - int j_errno; - - /* when flushing ordered buffers, throttle new ordered writers */ - struct delayed_work j_work; - struct super_block *j_work_sb; - atomic_t j_async_throttle; -}; - -enum journal_state_bits { - J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ - J_WRITERS_QUEUED, /* set when log is full due to too many writers */ - J_ABORTED, /* set when log is aborted */ -}; - -#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ - -typedef __u32(*hashf_t) (const signed char *, int); - -struct reiserfs_bitmap_info { - __u32 free_count; -}; - -struct proc_dir_entry; - -#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) -typedef unsigned long int stat_cnt_t; -typedef struct reiserfs_proc_info_data { - spinlock_t lock; - int exiting; - int max_hash_collisions; - - stat_cnt_t breads; - stat_cnt_t bread_miss; - stat_cnt_t search_by_key; - stat_cnt_t search_by_key_fs_changed; - stat_cnt_t search_by_key_restarted; - - stat_cnt_t insert_item_restarted; - stat_cnt_t paste_into_item_restarted; - stat_cnt_t cut_from_item_restarted; - stat_cnt_t delete_solid_item_restarted; - stat_cnt_t delete_item_restarted; - - stat_cnt_t leaked_oid; - stat_cnt_t leaves_removable; - - /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */ - stat_cnt_t balance_at[5]; /* XXX */ - /* sbk == search_by_key */ - stat_cnt_t sbk_read_at[5]; /* XXX */ - stat_cnt_t sbk_fs_changed[5]; - stat_cnt_t sbk_restarted[5]; - stat_cnt_t items_at[5]; /* XXX */ - stat_cnt_t free_at[5]; /* XXX */ - stat_cnt_t can_node_be_removed[5]; /* XXX */ - long int lnum[5]; /* XXX */ - long int rnum[5]; /* XXX */ - long int lbytes[5]; /* XXX */ - long int rbytes[5]; /* XXX */ - stat_cnt_t get_neighbors[5]; - stat_cnt_t get_neighbors_restart[5]; - stat_cnt_t need_l_neighbor[5]; - stat_cnt_t need_r_neighbor[5]; - - stat_cnt_t free_block; - struct __scan_bitmap_stats { - stat_cnt_t call; - stat_cnt_t wait; - stat_cnt_t bmap; - stat_cnt_t retry; - stat_cnt_t in_journal_hint; - stat_cnt_t in_journal_nohint; - stat_cnt_t stolen; - } scan_bitmap; - struct __journal_stats { - stat_cnt_t in_journal; - stat_cnt_t in_journal_bitmap; - stat_cnt_t in_journal_reusable; - stat_cnt_t lock_journal; - stat_cnt_t lock_journal_wait; - stat_cnt_t journal_being; - stat_cnt_t journal_relock_writers; - stat_cnt_t journal_relock_wcount; - stat_cnt_t mark_dirty; - stat_cnt_t mark_dirty_already; - stat_cnt_t mark_dirty_notjournal; - stat_cnt_t restore_prepared; - stat_cnt_t prepare; - stat_cnt_t prepare_retry; - } journal; -} reiserfs_proc_info_data_t; -#else -typedef struct reiserfs_proc_info_data { -} reiserfs_proc_info_data_t; -#endif - -/* reiserfs union of in-core super block data */ -struct reiserfs_sb_info { - struct buffer_head *s_sbh; /* Buffer containing the super block */ - /* both the comment and the choice of - name are unclear for s_rs -Hans */ - struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */ - struct reiserfs_bitmap_info *s_ap_bitmap; - struct reiserfs_journal *s_journal; /* pointer to journal information */ - unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ - - /* Serialize writers access, replace the old bkl */ - struct mutex lock; - /* Owner of the lock (can be recursive) */ - struct task_struct *lock_owner; - /* Depth of the lock, start from -1 like the bkl */ - int lock_depth; - - /* Comment? -Hans */ - void (*end_io_handler) (struct buffer_head *, int); - hashf_t s_hash_function; /* pointer to function which is used - to sort names in directory. Set on - mount */ - unsigned long s_mount_opt; /* reiserfs's mount options are set - here (currently - NOTAIL, NOLOG, - REPLAYONLY) */ - - struct { /* This is a structure that describes block allocator options */ - unsigned long bits; /* Bitfield for enable/disable kind of options */ - unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ - int border; /* percentage of disk, border takes */ - int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ - int preallocsize; /* Number of blocks we try to prealloc when file - reaches preallocmin size (in blocks) or - prealloc_list is empty. */ - } s_alloc_options; - - /* Comment? -Hans */ - wait_queue_head_t s_wait; - /* To be obsoleted soon by per buffer seals.. -Hans */ - atomic_t s_generation_counter; // increased by one every time the - // tree gets re-balanced - unsigned long s_properties; /* File system properties. Currently holds - on-disk FS format */ - - /* session statistics */ - int s_disk_reads; - int s_disk_writes; - int s_fix_nodes; - int s_do_balance; - int s_unneeded_left_neighbor; - int s_good_search_by_key_reada; - int s_bmaps; - int s_bmaps_without_search; - int s_direct2indirect; - int s_indirect2direct; - /* set up when it's ok for reiserfs_read_inode2() to read from - disk inode with nlink==0. Currently this is only used during - finish_unfinished() processing at mount time */ - int s_is_unlinked_ok; - reiserfs_proc_info_data_t s_proc_info_data; - struct proc_dir_entry *procdir; - int reserved_blocks; /* amount of blocks reserved for further allocations */ - spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ - struct dentry *priv_root; /* root of /.reiserfs_priv */ - struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ - int j_errno; -#ifdef CONFIG_QUOTA - char *s_qf_names[MAXQUOTAS]; - int s_jquota_fmt; -#endif - char *s_jdev; /* Stored jdev for mount option showing */ -#ifdef CONFIG_REISERFS_CHECK - - struct tree_balance *cur_tb; /* - * Detects whether more than one - * copy of tb exists per superblock - * as a means of checking whether - * do_balance is executing concurrently - * against another tree reader/writer - * on a same mount point. - */ -#endif -}; - -/* Definitions of reiserfs on-disk properties: */ -#define REISERFS_3_5 0 -#define REISERFS_3_6 1 -#define REISERFS_OLD_FORMAT 2 - -enum reiserfs_mount_options { -/* Mount options */ - REISERFS_LARGETAIL, /* large tails will be created in a session */ - REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ - REPLAYONLY, /* replay journal and return 0. Use by fsck */ - REISERFS_CONVERT, /* -o conv: causes conversion of old - format super block to the new - format. If not specified - old - partition will be dealt with in a - manner of 3.5.x */ - -/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting -** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option -** is not required. If the normal autodection code can't determine which -** hash to use (because both hashes had the same value for a file) -** use this option to force a specific hash. It won't allow you to override -** the existing hash on the FS, so if you have a tea hash disk, and mount -** with -o hash=rupasov, the mount will fail. -*/ - FORCE_TEA_HASH, /* try to force tea hash on mount */ - FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ - FORCE_R5_HASH, /* try to force rupasov hash on mount */ - FORCE_HASH_DETECT, /* try to detect hash function on mount */ - - REISERFS_DATA_LOG, - REISERFS_DATA_ORDERED, - REISERFS_DATA_WRITEBACK, - -/* used for testing experimental features, makes benchmarking new - features with and without more convenient, should never be used by - users in any code shipped to users (ideally) */ - - REISERFS_NO_BORDER, - REISERFS_NO_UNHASHED_RELOCATION, - REISERFS_HASHED_RELOCATION, - REISERFS_ATTRS, - REISERFS_XATTRS_USER, - REISERFS_POSIXACL, - REISERFS_EXPOSE_PRIVROOT, - REISERFS_BARRIER_NONE, - REISERFS_BARRIER_FLUSH, - - /* Actions on error */ - REISERFS_ERROR_PANIC, - REISERFS_ERROR_RO, - REISERFS_ERROR_CONTINUE, - - REISERFS_USRQUOTA, /* User quota option specified */ - REISERFS_GRPQUOTA, /* Group quota option specified */ - - REISERFS_TEST1, - REISERFS_TEST2, - REISERFS_TEST3, - REISERFS_TEST4, - REISERFS_UNSUPPORTED_OPT, -}; - -#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH)) -#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH)) -#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH)) -#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT)) -#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER)) -#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION)) -#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) -#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4)) - -#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL)) -#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL)) -#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY)) -#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS)) -#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5)) -#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT)) -#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) -#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) -#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) -#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) -#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) -#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) -#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) -#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) -#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) - -#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC)) -#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO)) - -void reiserfs_file_buffer(struct buffer_head *bh, int list); -extern struct file_system_type reiserfs_fs_type; -int reiserfs_resize(struct super_block *, unsigned long); - -#define CARRY_ON 0 -#define SCHEDULE_OCCURRED 1 - -#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh) -#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal) -#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block) -#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) -#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap) - -#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->) - -/* A safe version of the "bdevname", which returns the "s_id" field of - * a superblock or else "Null superblock" if the super block is NULL. - */ -static inline char *reiserfs_bdevname(struct super_block *s) -{ - return (s == NULL) ? "Null superblock" : s->s_id; -} - -#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal))) -static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal - *journal) -{ - return test_bit(J_ABORTED, &journal->j_state); -} - -/* - * Locking primitives. The write lock is a per superblock - * special mutex that has properties close to the Big Kernel Lock - * which was used in the previous locking scheme. - */ -void reiserfs_write_lock(struct super_block *s); -void reiserfs_write_unlock(struct super_block *s); -int reiserfs_write_lock_once(struct super_block *s); -void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); - -#ifdef CONFIG_REISERFS_CHECK -void reiserfs_lock_check_recursive(struct super_block *s); -#else -static inline void reiserfs_lock_check_recursive(struct super_block *s) { } -#endif - -/* - * Several mutexes depend on the write lock. - * However sometimes we want to relax the write lock while we hold - * these mutexes, according to the release/reacquire on schedule() - * properties of the Bkl that were used. - * Reiserfs performances and locking were based on this scheme. - * Now that the write lock is a mutex and not the bkl anymore, doing so - * may result in a deadlock: - * - * A acquire write_lock - * A acquire j_commit_mutex - * A release write_lock and wait for something - * B acquire write_lock - * B can't acquire j_commit_mutex and sleep - * A can't acquire write lock anymore - * deadlock - * - * What we do here is avoiding such deadlock by playing the same game - * than the Bkl: if we can't acquire a mutex that depends on the write lock, - * we release the write lock, wait a bit and then retry. - * - * The mutexes concerned by this hack are: - * - The commit mutex of a journal list - * - The flush mutex - * - The journal lock - * - The inode mutex - */ -static inline void reiserfs_mutex_lock_safe(struct mutex *m, - struct super_block *s) -{ - reiserfs_lock_check_recursive(s); - reiserfs_write_unlock(s); - mutex_lock(m); - reiserfs_write_lock(s); -} - -static inline void -reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, - struct super_block *s) -{ - reiserfs_lock_check_recursive(s); - reiserfs_write_unlock(s); - mutex_lock_nested(m, subclass); - reiserfs_write_lock(s); -} - -static inline void -reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) -{ - reiserfs_lock_check_recursive(s); - reiserfs_write_unlock(s); - down_read(sem); - reiserfs_write_lock(s); -} - -/* - * When we schedule, we usually want to also release the write lock, - * according to the previous bkl based locking scheme of reiserfs. - */ -static inline void reiserfs_cond_resched(struct super_block *s) -{ - if (need_resched()) { - reiserfs_write_unlock(s); - schedule(); - reiserfs_write_lock(s); - } -} - -struct fid; - -/* in reading the #defines, it may help to understand that they employ - the following abbreviations: - - B = Buffer - I = Item header - H = Height within the tree (should be changed to LEV) - N = Number of the item in the node - STAT = stat data - DEH = Directory Entry Header - EC = Entry Count - E = Entry number - UL = Unsigned Long - BLKH = BLocK Header - UNFM = UNForMatted node - DC = Disk Child - P = Path - - These #defines are named by concatenating these abbreviations, - where first comes the arguments, and last comes the return value, - of the macro. - -*/ - -#define USE_INODE_GENERATION_COUNTER - -#define REISERFS_PREALLOCATE -#define DISPLACE_NEW_PACKING_LOCALITIES -#define PREALLOCATION_SIZE 9 - -/* n must be power of 2 */ -#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) - -// to be ok for alpha and others we have to align structures to 8 byte -// boundary. -// FIXME: do not change 4 by anything else: there is code which relies on that -#define ROUND_UP(x) _ROUND_UP(x,8LL) - -/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug -** messages. -*/ -#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ - -void __reiserfs_warning(struct super_block *s, const char *id, - const char *func, const char *fmt, ...); -#define reiserfs_warning(s, id, fmt, args...) \ - __reiserfs_warning(s, id, __func__, fmt, ##args) -/* assertions handling */ - -/** always check a condition and panic if it's false. */ -#define __RASSERT(cond, scond, format, args...) \ -do { \ - if (!(cond)) \ - reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \ - __FILE__ ":%i:%s: " format "\n", \ - in_interrupt() ? -1 : task_pid_nr(current), \ - __LINE__, __func__ , ##args); \ -} while (0) - -#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) - -#if defined( CONFIG_REISERFS_CHECK ) -#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args) -#else -#define RFALSE( cond, format, args... ) do {;} while( 0 ) -#endif - -#define CONSTF __attribute_const__ -/* - * Disk Data Structures - */ - -/***************************************************************************/ -/* SUPER BLOCK */ -/***************************************************************************/ - -/* - * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs - * the version in RAM is part of a larger structure containing fields never written to disk. - */ -#define UNSET_HASH 0 // read_super will guess about, what hash names - // in directories were sorted with -#define TEA_HASH 1 -#define YURA_HASH 2 -#define R5_HASH 3 -#define DEFAULT_HASH R5_HASH - -struct journal_params { - __le32 jp_journal_1st_block; /* where does journal start from on its - * device */ - __le32 jp_journal_dev; /* journal device st_rdev */ - __le32 jp_journal_size; /* size of the journal */ - __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */ - __le32 jp_journal_magic; /* random value made on fs creation (this - * was sb_journal_block_count) */ - __le32 jp_journal_max_batch; /* max number of blocks to batch into a - * trans */ - __le32 jp_journal_max_commit_age; /* in seconds, how old can an async - * commit be */ - __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction - * be */ -}; - -/* this is the super from 3.5.X, where X >= 10 */ -struct reiserfs_super_block_v1 { - __le32 s_block_count; /* blocks count */ - __le32 s_free_blocks; /* free blocks count */ - __le32 s_root_block; /* root block number */ - struct journal_params s_journal; - __le16 s_blocksize; /* block size */ - __le16 s_oid_maxsize; /* max size of object id array, see - * get_objectid() commentary */ - __le16 s_oid_cursize; /* current size of object id array */ - __le16 s_umount_state; /* this is set to 1 when filesystem was - * umounted, to 2 - when not */ - char s_magic[10]; /* reiserfs magic string indicates that - * file system is reiserfs: - * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ - __le16 s_fs_state; /* it is set to used by fsck to mark which - * phase of rebuilding is done */ - __le32 s_hash_function_code; /* indicate, what hash function is being use - * to sort names in a directory*/ - __le16 s_tree_height; /* height of disk tree */ - __le16 s_bmap_nr; /* amount of bitmap blocks needed to address - * each block of file system */ - __le16 s_version; /* this field is only reliable on filesystem - * with non-standard journal */ - __le16 s_reserved_for_journal; /* size in blocks of journal area on main - * device, we need to keep after - * making fs with non-standard journal */ -} __attribute__ ((__packed__)); - -#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) - -/* this is the on disk super block */ -struct reiserfs_super_block { - struct reiserfs_super_block_v1 s_v1; - __le32 s_inode_generation; - __le32 s_flags; /* Right now used only by inode-attributes, if enabled */ - unsigned char s_uuid[16]; /* filesystem unique identifier */ - unsigned char s_label[16]; /* filesystem volume label */ - __le16 s_mnt_count; /* Count of mounts since last fsck */ - __le16 s_max_mnt_count; /* Maximum mounts before check */ - __le32 s_lastcheck; /* Timestamp of last fsck */ - __le32 s_check_interval; /* Interval between checks */ - char s_unused[76]; /* zero filled by mkreiserfs and - * reiserfs_convert_objectid_map_v1() - * so any additions must be updated - * there as well. */ -} __attribute__ ((__packed__)); - -#define SB_SIZE (sizeof(struct reiserfs_super_block)) - -#define REISERFS_VERSION_1 0 -#define REISERFS_VERSION_2 2 - -// on-disk super block fields converted to cpu form -#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) -#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) -#define SB_BLOCKSIZE(s) \ - le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize)) -#define SB_BLOCK_COUNT(s) \ - le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count)) -#define SB_FREE_BLOCKS(s) \ - le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks)) -#define SB_REISERFS_MAGIC(s) \ - (SB_V1_DISK_SUPER_BLOCK(s)->s_magic) -#define SB_ROOT_BLOCK(s) \ - le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block)) -#define SB_TREE_HEIGHT(s) \ - le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height)) -#define SB_REISERFS_STATE(s) \ - le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state)) -#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version)) -#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr)) - -#define PUT_SB_BLOCK_COUNT(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0) -#define PUT_SB_FREE_BLOCKS(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0) -#define PUT_SB_ROOT_BLOCK(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0) -#define PUT_SB_TREE_HEIGHT(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0) -#define PUT_SB_REISERFS_STATE(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0) -#define PUT_SB_VERSION(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0) -#define PUT_SB_BMAP_NR(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0) - -#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal) -#define SB_ONDISK_JOURNAL_SIZE(s) \ - le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size)) -#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \ - le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block)) -#define SB_ONDISK_JOURNAL_DEVICE(s) \ - le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev)) -#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \ - le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal)) - -#define is_block_in_log_or_reserved_area(s, block) \ - block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \ - && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \ - ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \ - SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s))) - -int is_reiserfs_3_5(struct reiserfs_super_block *rs); -int is_reiserfs_3_6(struct reiserfs_super_block *rs); -int is_reiserfs_jr(struct reiserfs_super_block *rs); - -/* ReiserFS leaves the first 64k unused, so that partition labels have - enough space. If someone wants to write a fancy bootloader that - needs more than 64k, let us know, and this will be increased in size. - This number must be larger than than the largest block size on any - platform, or code will break. -Hans */ -#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) -#define REISERFS_FIRST_BLOCK unused_define -#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES - -/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */ -#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) - -/* reiserfs internal error code (used by search_by_key and fix_nodes)) */ -#define CARRY_ON 0 -#define REPEAT_SEARCH -1 -#define IO_ERROR -2 -#define NO_DISK_SPACE -3 -#define NO_BALANCING_NEEDED (-4) -#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5) -#define QUOTA_EXCEEDED -6 - -typedef __u32 b_blocknr_t; -typedef __le32 unp_t; - -struct unfm_nodeinfo { - unp_t unfm_nodenum; - unsigned short unfm_freespace; -}; - -/* there are two formats of keys: 3.5 and 3.6 - */ -#define KEY_FORMAT_3_5 0 -#define KEY_FORMAT_3_6 1 - -/* there are two stat datas */ -#define STAT_DATA_V1 0 -#define STAT_DATA_V2 1 - -static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode) -{ - return container_of(inode, struct reiserfs_inode_info, vfs_inode); -} - -static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) -{ - return sb->s_fs_info; -} - -/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 - * which overflows on large file systems. */ -static inline __u32 reiserfs_bmap_count(struct super_block *sb) -{ - return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; -} - -static inline int bmap_would_wrap(unsigned bmap_nr) -{ - return bmap_nr > ((1LL << 16) - 1); -} - -/** this says about version of key of all items (but stat data) the - object consists of */ -#define get_inode_item_key_version( inode ) \ - ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) - -#define set_inode_item_key_version( inode, version ) \ - ({ if((version)==KEY_FORMAT_3_6) \ - REISERFS_I(inode)->i_flags |= i_item_key_version_mask; \ - else \ - REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; }) - -#define get_inode_sd_version(inode) \ - ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1) - -#define set_inode_sd_version(inode, version) \ - ({ if((version)==STAT_DATA_V2) \ - REISERFS_I(inode)->i_flags |= i_stat_data_version_mask; \ - else \ - REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) - -/* This is an aggressive tail suppression policy, I am hoping it - improves our benchmarks. The principle behind it is that percentage - space saving is what matters, not absolute space saving. This is - non-intuitive, but it helps to understand it if you consider that the - cost to access 4 blocks is not much more than the cost to access 1 - block, if you have to do a seek and rotate. A tail risks a - non-linear disk access that is significant as a percentage of total - time cost for a 4 block file and saves an amount of space that is - less significant as a percentage of space, or so goes the hypothesis. - -Hans */ -#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ -(\ - (!(n_tail_size)) || \ - (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ - ( (n_file_size) >= (n_block_size) * 4 ) || \ - ( ( (n_file_size) >= (n_block_size) * 3 ) && \ - ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \ - ( ( (n_file_size) >= (n_block_size) * 2 ) && \ - ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \ - ( ( (n_file_size) >= (n_block_size) ) && \ - ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ -) - -/* Another strategy for tails, this one means only create a tail if all the - file would fit into one DIRECT item. - Primary intention for this one is to increase performance by decreasing - seeking. -*/ -#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ -(\ - (!(n_tail_size)) || \ - (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \ -) - -/* - * values for s_umount_state field - */ -#define REISERFS_VALID_FS 1 -#define REISERFS_ERROR_FS 2 - -// -// there are 5 item types currently -// -#define TYPE_STAT_DATA 0 -#define TYPE_INDIRECT 1 -#define TYPE_DIRECT 2 -#define TYPE_DIRENTRY 3 -#define TYPE_MAXTYPE 3 -#define TYPE_ANY 15 // FIXME: comment is required - -/***************************************************************************/ -/* KEY & ITEM HEAD */ -/***************************************************************************/ - -// -// directories use this key as well as old files -// -struct offset_v1 { - __le32 k_offset; - __le32 k_uniqueness; -} __attribute__ ((__packed__)); - -struct offset_v2 { - __le64 v; -} __attribute__ ((__packed__)); - -static inline __u16 offset_v2_k_type(const struct offset_v2 *v2) -{ - __u8 type = le64_to_cpu(v2->v) >> 60; - return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY; -} - -static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type) -{ - v2->v = - (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60); -} - -static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2) -{ - return le64_to_cpu(v2->v) & (~0ULL >> 4); -} - -static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset) -{ - offset &= (~0ULL >> 4); - v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); -} - -/* Key of an item determines its location in the S+tree, and - is composed of 4 components */ -struct reiserfs_key { - __le32 k_dir_id; /* packing locality: by default parent - directory object id */ - __le32 k_objectid; /* object identifier */ - union { - struct offset_v1 k_offset_v1; - struct offset_v2 k_offset_v2; - } __attribute__ ((__packed__)) u; -} __attribute__ ((__packed__)); - -struct in_core_key { - __u32 k_dir_id; /* packing locality: by default parent - directory object id */ - __u32 k_objectid; /* object identifier */ - __u64 k_offset; - __u8 k_type; -}; - -struct cpu_key { - struct in_core_key on_disk_key; - int version; - int key_length; /* 3 in all cases but direct2indirect and - indirect2direct conversion */ -}; - -/* Our function for comparing keys can compare keys of different - lengths. It takes as a parameter the length of the keys it is to - compare. These defines are used in determining what is to be passed - to it as that parameter. */ -#define REISERFS_FULL_KEY_LEN 4 -#define REISERFS_SHORT_KEY_LEN 2 - -/* The result of the key compare */ -#define FIRST_GREATER 1 -#define SECOND_GREATER -1 -#define KEYS_IDENTICAL 0 -#define KEY_FOUND 1 -#define KEY_NOT_FOUND 0 - -#define KEY_SIZE (sizeof(struct reiserfs_key)) -#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32)) - -/* return values for search_by_key and clones */ -#define ITEM_FOUND 1 -#define ITEM_NOT_FOUND 0 -#define ENTRY_FOUND 1 -#define ENTRY_NOT_FOUND 0 -#define DIRECTORY_NOT_FOUND -1 -#define REGULAR_FILE_FOUND -2 -#define DIRECTORY_FOUND -3 -#define BYTE_FOUND 1 -#define BYTE_NOT_FOUND 0 -#define FILE_NOT_FOUND -1 - -#define POSITION_FOUND 1 -#define POSITION_NOT_FOUND 0 - -// return values for reiserfs_find_entry and search_by_entry_key -#define NAME_FOUND 1 -#define NAME_NOT_FOUND 0 -#define GOTO_PREVIOUS_ITEM 2 -#define NAME_FOUND_INVISIBLE 3 - -/* Everything in the filesystem is stored as a set of items. The - item head contains the key of the item, its free space (for - indirect items) and specifies the location of the item itself - within the block. */ - -struct item_head { - /* Everything in the tree is found by searching for it based on - * its key.*/ - struct reiserfs_key ih_key; - union { - /* The free space in the last unformatted node of an - indirect item if this is an indirect item. This - equals 0xFFFF iff this is a direct item or stat data - item. Note that the key, not this field, is used to - determine the item type, and thus which field this - union contains. */ - __le16 ih_free_space_reserved; - /* Iff this is a directory item, this field equals the - number of directory entries in the directory item. */ - __le16 ih_entry_count; - } __attribute__ ((__packed__)) u; - __le16 ih_item_len; /* total size of the item body */ - __le16 ih_item_location; /* an offset to the item body - * within the block */ - __le16 ih_version; /* 0 for all old items, 2 for new - ones. Highest bit is set by fsck - temporary, cleaned after all - done */ -} __attribute__ ((__packed__)); -/* size of item header */ -#define IH_SIZE (sizeof(struct item_head)) - -#define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved) -#define ih_version(ih) le16_to_cpu((ih)->ih_version) -#define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count) -#define ih_location(ih) le16_to_cpu((ih)->ih_item_location) -#define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len) - -#define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0) -#define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0) -#define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0) -#define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0) -#define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0) - -#define unreachable_item(ih) (ih_version(ih) & (1 << 15)) - -#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) -#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) - -/* these operate on indirect items, where you've got an array of ints -** at a possibly unaligned location. These are a noop on ia32 -** -** p is the array of __u32, i is the index into the array, v is the value -** to store there. -*/ -#define get_block_num(p, i) get_unaligned_le32((p) + (i)) -#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) - -// -// in old version uniqueness field shows key type -// -#define V1_SD_UNIQUENESS 0 -#define V1_INDIRECT_UNIQUENESS 0xfffffffe -#define V1_DIRECT_UNIQUENESS 0xffffffff -#define V1_DIRENTRY_UNIQUENESS 500 -#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required - -// -// here are conversion routines -// -static inline int uniqueness2type(__u32 uniqueness) CONSTF; -static inline int uniqueness2type(__u32 uniqueness) -{ - switch ((int)uniqueness) { - case V1_SD_UNIQUENESS: - return TYPE_STAT_DATA; - case V1_INDIRECT_UNIQUENESS: - return TYPE_INDIRECT; - case V1_DIRECT_UNIQUENESS: - return TYPE_DIRECT; - case V1_DIRENTRY_UNIQUENESS: - return TYPE_DIRENTRY; - case V1_ANY_UNIQUENESS: - default: - return TYPE_ANY; - } -} - -static inline __u32 type2uniqueness(int type) CONSTF; -static inline __u32 type2uniqueness(int type) -{ - switch (type) { - case TYPE_STAT_DATA: - return V1_SD_UNIQUENESS; - case TYPE_INDIRECT: - return V1_INDIRECT_UNIQUENESS; - case TYPE_DIRECT: - return V1_DIRECT_UNIQUENESS; - case TYPE_DIRENTRY: - return V1_DIRENTRY_UNIQUENESS; - case TYPE_ANY: - default: - return V1_ANY_UNIQUENESS; - } -} - -// -// key is pointer to on disk key which is stored in le, result is cpu, -// there is no way to get version of object from key, so, provide -// version to these defines -// -static inline loff_t le_key_k_offset(int version, - const struct reiserfs_key *key) -{ - return (version == KEY_FORMAT_3_5) ? - le32_to_cpu(key->u.k_offset_v1.k_offset) : - offset_v2_k_offset(&(key->u.k_offset_v2)); -} - -static inline loff_t le_ih_k_offset(const struct item_head *ih) -{ - return le_key_k_offset(ih_version(ih), &(ih->ih_key)); -} - -static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) -{ - return (version == KEY_FORMAT_3_5) ? - uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) : - offset_v2_k_type(&(key->u.k_offset_v2)); -} - -static inline loff_t le_ih_k_type(const struct item_head *ih) -{ - return le_key_k_type(ih_version(ih), &(ih->ih_key)); -} - -static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, - loff_t offset) -{ - (version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) : /* jdm check */ - (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset)); -} - -static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) -{ - set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); -} - -static inline void set_le_key_k_type(int version, struct reiserfs_key *key, - int type) -{ - (version == KEY_FORMAT_3_5) ? - (void)(key->u.k_offset_v1.k_uniqueness = - cpu_to_le32(type2uniqueness(type))) - : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type)); -} - -static inline void set_le_ih_k_type(struct item_head *ih, int type) -{ - set_le_key_k_type(ih_version(ih), &(ih->ih_key), type); -} - -static inline int is_direntry_le_key(int version, struct reiserfs_key *key) -{ - return le_key_k_type(version, key) == TYPE_DIRENTRY; -} - -static inline int is_direct_le_key(int version, struct reiserfs_key *key) -{ - return le_key_k_type(version, key) == TYPE_DIRECT; -} - -static inline int is_indirect_le_key(int version, struct reiserfs_key *key) -{ - return le_key_k_type(version, key) == TYPE_INDIRECT; -} - -static inline int is_statdata_le_key(int version, struct reiserfs_key *key) -{ - return le_key_k_type(version, key) == TYPE_STAT_DATA; -} - -// -// item header has version. -// -static inline int is_direntry_le_ih(struct item_head *ih) -{ - return is_direntry_le_key(ih_version(ih), &ih->ih_key); -} - -static inline int is_direct_le_ih(struct item_head *ih) -{ - return is_direct_le_key(ih_version(ih), &ih->ih_key); -} - -static inline int is_indirect_le_ih(struct item_head *ih) -{ - return is_indirect_le_key(ih_version(ih), &ih->ih_key); -} - -static inline int is_statdata_le_ih(struct item_head *ih) -{ - return is_statdata_le_key(ih_version(ih), &ih->ih_key); -} - -// -// key is pointer to cpu key, result is cpu -// -static inline loff_t cpu_key_k_offset(const struct cpu_key *key) -{ - return key->on_disk_key.k_offset; -} - -static inline loff_t cpu_key_k_type(const struct cpu_key *key) -{ - return key->on_disk_key.k_type; -} - -static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset) -{ - key->on_disk_key.k_offset = offset; -} - -static inline void set_cpu_key_k_type(struct cpu_key *key, int type) -{ - key->on_disk_key.k_type = type; -} - -static inline void cpu_key_k_offset_dec(struct cpu_key *key) -{ - key->on_disk_key.k_offset--; -} - -#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) -#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT) -#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) -#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) - -/* are these used ? */ -#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key))) -#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key))) -#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) -#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) - -#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \ - (!COMP_SHORT_KEYS(ih, key) && \ - I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize)) - -/* maximal length of item */ -#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) -#define MIN_ITEM_LEN 1 - -/* object identifier for root dir */ -#define REISERFS_ROOT_OBJECTID 2 -#define REISERFS_ROOT_PARENT_OBJECTID 1 - -extern struct reiserfs_key root_key; - -/* - * Picture represents a leaf of the S+tree - * ______________________________________________________ - * | | Array of | | | - * |Block | Object-Item | F r e e | Objects- | - * | head | Headers | S p a c e | Items | - * |______|_______________|___________________|___________| - */ - -/* Header of a disk block. More precisely, header of a formatted leaf - or internal node, and not the header of an unformatted node. */ -struct block_head { - __le16 blk_level; /* Level of a block in the tree. */ - __le16 blk_nr_item; /* Number of keys/items in a block. */ - __le16 blk_free_space; /* Block free space in bytes. */ - __le16 blk_reserved; - /* dump this in v4/planA */ - struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */ -}; - -#define BLKH_SIZE (sizeof(struct block_head)) -#define blkh_level(p_blkh) (le16_to_cpu((p_blkh)->blk_level)) -#define blkh_nr_item(p_blkh) (le16_to_cpu((p_blkh)->blk_nr_item)) -#define blkh_free_space(p_blkh) (le16_to_cpu((p_blkh)->blk_free_space)) -#define blkh_reserved(p_blkh) (le16_to_cpu((p_blkh)->blk_reserved)) -#define set_blkh_level(p_blkh,val) ((p_blkh)->blk_level = cpu_to_le16(val)) -#define set_blkh_nr_item(p_blkh,val) ((p_blkh)->blk_nr_item = cpu_to_le16(val)) -#define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val)) -#define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val)) -#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) -#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) - -/* - * values for blk_level field of the struct block_head - */ - -#define FREE_LEVEL 0 /* when node gets removed from the tree its - blk_level is set to FREE_LEVEL. It is then - used to see whether the node is still in the - tree */ - -#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ - -/* Given the buffer head of a formatted node, resolve to the block head of that node. */ -#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) -/* Number of items that are in buffer. */ -#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) -#define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh))) -#define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh))) - -#define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0) -#define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0) -#define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0) - -/* Get right delimiting key. -- little endian */ -#define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh)))) - -/* Does the buffer contain a disk leaf. */ -#define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL) - -/* Does the buffer contain a disk internal node */ -#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ - && B_LEVEL(bh) <= MAX_HEIGHT) - -/***************************************************************************/ -/* STAT DATA */ -/***************************************************************************/ - -// -// old stat data is 32 bytes long. We are going to distinguish new one by -// different size -// -struct stat_data_v1 { - __le16 sd_mode; /* file type, permissions */ - __le16 sd_nlink; /* number of hard links */ - __le16 sd_uid; /* owner */ - __le16 sd_gid; /* group */ - __le32 sd_size; /* file size */ - __le32 sd_atime; /* time of last access */ - __le32 sd_mtime; /* time file was last modified */ - __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ - union { - __le32 sd_rdev; - __le32 sd_blocks; /* number of blocks file uses */ - } __attribute__ ((__packed__)) u; - __le32 sd_first_direct_byte; /* first byte of file which is stored - in a direct item: except that if it - equals 1 it is a symlink and if it - equals ~(__u32)0 there is no - direct item. The existence of this - field really grates on me. Let's - replace it with a macro based on - sd_size and our tail suppression - policy. Someday. -Hans */ -} __attribute__ ((__packed__)); - -#define SD_V1_SIZE (sizeof(struct stat_data_v1)) -#define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5) -#define sd_v1_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) -#define set_sd_v1_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) -#define sd_v1_nlink(sdp) (le16_to_cpu((sdp)->sd_nlink)) -#define set_sd_v1_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le16(v)) -#define sd_v1_uid(sdp) (le16_to_cpu((sdp)->sd_uid)) -#define set_sd_v1_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le16(v)) -#define sd_v1_gid(sdp) (le16_to_cpu((sdp)->sd_gid)) -#define set_sd_v1_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le16(v)) -#define sd_v1_size(sdp) (le32_to_cpu((sdp)->sd_size)) -#define set_sd_v1_size(sdp,v) ((sdp)->sd_size = cpu_to_le32(v)) -#define sd_v1_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) -#define set_sd_v1_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) -#define sd_v1_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) -#define set_sd_v1_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) -#define sd_v1_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) -#define set_sd_v1_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) -#define sd_v1_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) -#define set_sd_v1_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) -#define sd_v1_blocks(sdp) (le32_to_cpu((sdp)->u.sd_blocks)) -#define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v)) -#define sd_v1_first_direct_byte(sdp) \ - (le32_to_cpu((sdp)->sd_first_direct_byte)) -#define set_sd_v1_first_direct_byte(sdp,v) \ - ((sdp)->sd_first_direct_byte = cpu_to_le32(v)) - -/* inode flags stored in sd_attrs (nee sd_reserved) */ - -/* we want common flags to have the same values as in ext2, - so chattr(1) will work without problems */ -#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL -#define REISERFS_APPEND_FL FS_APPEND_FL -#define REISERFS_SYNC_FL FS_SYNC_FL -#define REISERFS_NOATIME_FL FS_NOATIME_FL -#define REISERFS_NODUMP_FL FS_NODUMP_FL -#define REISERFS_SECRM_FL FS_SECRM_FL -#define REISERFS_UNRM_FL FS_UNRM_FL -#define REISERFS_COMPR_FL FS_COMPR_FL -#define REISERFS_NOTAIL_FL FS_NOTAIL_FL - -/* persistent flags that file inherits from the parent directory */ -#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ - REISERFS_SYNC_FL | \ - REISERFS_NOATIME_FL | \ - REISERFS_NODUMP_FL | \ - REISERFS_SECRM_FL | \ - REISERFS_COMPR_FL | \ - REISERFS_NOTAIL_FL ) - -/* Stat Data on disk (reiserfs version of UFS disk inode minus the - address blocks) */ -struct stat_data { - __le16 sd_mode; /* file type, permissions */ - __le16 sd_attrs; /* persistent inode flags */ - __le32 sd_nlink; /* number of hard links */ - __le64 sd_size; /* file size */ - __le32 sd_uid; /* owner */ - __le32 sd_gid; /* group */ - __le32 sd_atime; /* time of last access */ - __le32 sd_mtime; /* time file was last modified */ - __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ - __le32 sd_blocks; - union { - __le32 sd_rdev; - __le32 sd_generation; - //__le32 sd_first_direct_byte; - /* first byte of file which is stored in a - direct item: except that if it equals 1 - it is a symlink and if it equals - ~(__u32)0 there is no direct item. The - existence of this field really grates - on me. Let's replace it with a macro - based on sd_size and our tail - suppression policy? */ - } __attribute__ ((__packed__)) u; -} __attribute__ ((__packed__)); -// -// this is 44 bytes long -// -#define SD_SIZE (sizeof(struct stat_data)) -#define SD_V2_SIZE SD_SIZE -#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) -#define sd_v2_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) -#define set_sd_v2_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) -/* sd_reserved */ -/* set_sd_reserved */ -#define sd_v2_nlink(sdp) (le32_to_cpu((sdp)->sd_nlink)) -#define set_sd_v2_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le32(v)) -#define sd_v2_size(sdp) (le64_to_cpu((sdp)->sd_size)) -#define set_sd_v2_size(sdp,v) ((sdp)->sd_size = cpu_to_le64(v)) -#define sd_v2_uid(sdp) (le32_to_cpu((sdp)->sd_uid)) -#define set_sd_v2_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le32(v)) -#define sd_v2_gid(sdp) (le32_to_cpu((sdp)->sd_gid)) -#define set_sd_v2_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le32(v)) -#define sd_v2_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) -#define set_sd_v2_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) -#define sd_v2_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) -#define set_sd_v2_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) -#define sd_v2_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) -#define set_sd_v2_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) -#define sd_v2_blocks(sdp) (le32_to_cpu((sdp)->sd_blocks)) -#define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v)) -#define sd_v2_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) -#define set_sd_v2_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) -#define sd_v2_generation(sdp) (le32_to_cpu((sdp)->u.sd_generation)) -#define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v)) -#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) -#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) - -/***************************************************************************/ -/* DIRECTORY STRUCTURE */ -/***************************************************************************/ -/* - Picture represents the structure of directory items - ________________________________________________ - | Array of | | | | | | - | directory |N-1| N-2 | .... | 1st |0th| - | entry headers | | | | | | - |_______________|___|_____|________|_______|___| - <---- directory entries ------> - - First directory item has k_offset component 1. We store "." and ".." - in one item, always, we never split "." and ".." into differing - items. This makes, among other things, the code for removing - directories simpler. */ -#define SD_OFFSET 0 -#define SD_UNIQUENESS 0 -#define DOT_OFFSET 1 -#define DOT_DOT_OFFSET 2 -#define DIRENTRY_UNIQUENESS 500 - -/* */ -#define FIRST_ITEM_OFFSET 1 - -/* - Q: How to get key of object pointed to by entry from entry? - - A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key - of object, entry points to */ - -/* NOT IMPLEMENTED: - Directory will someday contain stat data of object */ - -struct reiserfs_de_head { - __le32 deh_offset; /* third component of the directory entry key */ - __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced - by directory entry */ - __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ - __le16 deh_location; /* offset of name in the whole item */ - __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether - entry is hidden (unlinked) */ -} __attribute__ ((__packed__)); -#define DEH_SIZE sizeof(struct reiserfs_de_head) -#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) -#define deh_dir_id(p_deh) (le32_to_cpu((p_deh)->deh_dir_id)) -#define deh_objectid(p_deh) (le32_to_cpu((p_deh)->deh_objectid)) -#define deh_location(p_deh) (le16_to_cpu((p_deh)->deh_location)) -#define deh_state(p_deh) (le16_to_cpu((p_deh)->deh_state)) - -#define put_deh_offset(p_deh,v) ((p_deh)->deh_offset = cpu_to_le32((v))) -#define put_deh_dir_id(p_deh,v) ((p_deh)->deh_dir_id = cpu_to_le32((v))) -#define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v))) -#define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v))) -#define put_deh_state(p_deh,v) ((p_deh)->deh_state = cpu_to_le16((v))) - -/* empty directory contains two entries "." and ".." and their headers */ -#define EMPTY_DIR_SIZE \ -(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen (".."))) - -/* old format directories have this size when empty */ -#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) - -#define DEH_Statdata 0 /* not used now */ -#define DEH_Visible 2 - -/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */ -#if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__) -# define ADDR_UNALIGNED_BITS (3) -#endif - -/* These are only used to manipulate deh_state. - * Because of this, we'll use the ext2_ bit routines, - * since they are little endian */ -#ifdef ADDR_UNALIGNED_BITS - -# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) -# define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3) - -# define set_bit_unaligned(nr, addr) \ - __test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) -# define clear_bit_unaligned(nr, addr) \ - __test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) -# define test_bit_unaligned(nr, addr) \ - test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) - -#else - -# define set_bit_unaligned(nr, addr) __test_and_set_bit_le(nr, addr) -# define clear_bit_unaligned(nr, addr) __test_and_clear_bit_le(nr, addr) -# define test_bit_unaligned(nr, addr) test_bit_le(nr, addr) - -#endif - -#define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) -#define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) -#define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state)) -#define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state)) - -#define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) -#define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) -#define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) - -extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid); -extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid); - -/* array of the entry headers */ - /* get item body */ -#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) ) -#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih))) - -/* length of the directory entry in directory item. This define - calculates length of i-th directory entry using directory entry - locations from dir entry head. When it calculates length of 0-th - directory entry, it uses length of whole item in place of entry - location of the non-existent following entry in the calculation. - See picture above.*/ -/* -#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \ -((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh)))) -*/ -static inline int entry_length(const struct buffer_head *bh, - const struct item_head *ih, int pos_in_item) -{ - struct reiserfs_de_head *deh; - - deh = B_I_DEH(bh, ih) + pos_in_item; - if (pos_in_item) - return deh_location(deh - 1) - deh_location(deh); - - return ih_item_len(ih) - deh_location(deh); -} - -/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */ -#define I_ENTRY_COUNT(ih) (ih_entry_count((ih))) - -/* name by bh, ih and entry_num */ -#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num)))) - -// two entries per block (at least) -#define REISERFS_MAX_NAME(block_size) 255 - -/* this structure is used for operations on directory entries. It is - not a disk structure. */ -/* When reiserfs_find_entry or search_by_entry_key find directory - entry, they return filled reiserfs_dir_entry structure */ -struct reiserfs_dir_entry { - struct buffer_head *de_bh; - int de_item_num; - struct item_head *de_ih; - int de_entry_num; - struct reiserfs_de_head *de_deh; - int de_entrylen; - int de_namelen; - char *de_name; - unsigned long *de_gen_number_bit_string; - - __u32 de_dir_id; - __u32 de_objectid; - - struct cpu_key de_entry_key; -}; - -/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ - -/* pointer to file name, stored in entry */ -#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh)) - -/* length of name */ -#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ -(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0)) - -/* hash value occupies bits from 7 up to 30 */ -#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) -/* generation number occupies 7 bits starting from 0 up to 6 */ -#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL) -#define MAX_GENERATION_NUMBER 127 - -#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number)) - -/* - * Picture represents an internal node of the reiserfs tree - * ______________________________________________________ - * | | Array of | Array of | Free | - * |block | keys | pointers | space | - * | head | N | N+1 | | - * |______|_______________|___________________|___________| - */ - -/***************************************************************************/ -/* DISK CHILD */ -/***************************************************************************/ -/* Disk child pointer: The pointer from an internal node of the tree - to a node that is on disk. */ -struct disk_child { - __le32 dc_block_number; /* Disk child's block number. */ - __le16 dc_size; /* Disk child's used space. */ - __le16 dc_reserved; -}; - -#define DC_SIZE (sizeof(struct disk_child)) -#define dc_block_number(dc_p) (le32_to_cpu((dc_p)->dc_block_number)) -#define dc_size(dc_p) (le16_to_cpu((dc_p)->dc_size)) -#define put_dc_block_number(dc_p, val) do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0) -#define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0) - -/* Get disk child by buffer header and position in the tree node. */ -#define B_N_CHILD(bh, n_pos) ((struct disk_child *)\ -((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos))) - -/* Get disk child number by buffer header and position in the tree node. */ -#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos))) -#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \ - (put_dc_block_number(B_N_CHILD(bh, n_pos), val)) - - /* maximal value of field child_size in structure disk_child */ - /* child size is the combined size of all items and their headers */ -#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE )) - -/* amount of used space in buffer (not including block head) */ -#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur))) - -/* max and min number of keys in internal node */ -#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) -#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) - -/***************************************************************************/ -/* PATH STRUCTURES AND DEFINES */ -/***************************************************************************/ - -/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the - key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it - does not find them in the cache it reads them from disk. For each node search_by_key finds using - reiserfs_bread it then uses bin_search to look through that node. bin_search will find the - position of the block_number of the next node if it is looking through an internal node. If it - is looking through a leaf node bin_search will find the position of the item which has key either - equal to given key, or which is the maximal key less than the given key. */ - -struct path_element { - struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */ - int pe_position; /* Position in the tree node which is placed in the */ - /* buffer above. */ -}; - -#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ -#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ -#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ - -#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ -#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ - -/* We need to keep track of who the ancestors of nodes are. When we - perform a search we record which nodes were visited while - descending the tree looking for the node we searched for. This list - of nodes is called the path. This information is used while - performing balancing. Note that this path information may become - invalid, and this means we must check it when using it to see if it - is still valid. You'll need to read search_by_key and the comments - in it, especially about decrement_counters_in_path(), to understand - this structure. - -Paths make the code so much harder to work with and debug.... An -enormous number of bugs are due to them, and trying to write or modify -code that uses them just makes my head hurt. They are based on an -excessive effort to avoid disturbing the precious VFS code.:-( The -gods only know how we are going to SMP the code that uses them. -znodes are the way! */ - -#define PATH_READA 0x1 /* do read ahead */ -#define PATH_READA_BACK 0x2 /* read backwards */ - -struct treepath { - int path_length; /* Length of the array above. */ - int reada; - struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ - int pos_in_item; -}; - -#define pos_in_item(path) ((path)->pos_in_item) - -#define INITIALIZE_PATH(var) \ -struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} - -/* Get path element by path and path position. */ -#define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset)) - -/* Get buffer header at the path by path and path position. */ -#define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer) - -/* Get position in the element at the path by path and path position. */ -#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) - -#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) - /* you know, to the person who didn't - write this the macro name does not - at first suggest what it does. - Maybe POSITION_FROM_PATH_END? Or - maybe we should just focus on - dumping paths... -Hans */ -#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) - -#define PATH_PITEM_HEAD(path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)) - -/* in do_balance leaf has h == 0 in contrast with path structure, - where root has level == 0. That is why we need these defines */ -#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */ -#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ -#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) -#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ - -#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) - -#define get_last_bh(path) PATH_PLAST_BUFFER(path) -#define get_ih(path) PATH_PITEM_HEAD(path) -#define get_item_pos(path) PATH_LAST_POSITION(path) -#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path))) -#define item_moved(ih,path) comp_items(ih, path) -#define path_changed(ih,path) comp_items (ih, path) - -/***************************************************************************/ -/* MISC */ -/***************************************************************************/ - -/* Size of pointer to the unformatted node. */ -#define UNFM_P_SIZE (sizeof(unp_t)) -#define UNFM_P_SHIFT 2 - -// in in-core inode key is stored on le form -#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) - -#define MAX_UL_INT 0xffffffff -#define MAX_INT 0x7ffffff -#define MAX_US_INT 0xffff - -// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset -#define U32_MAX (~(__u32)0) - -static inline loff_t max_reiserfs_offset(struct inode *inode) -{ - if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) - return (loff_t) U32_MAX; - - return (loff_t) ((~(__u64) 0) >> 4); -} - -/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/ -#define MAX_KEY_OBJECTID MAX_UL_INT - -#define MAX_B_NUM MAX_UL_INT -#define MAX_FC_NUM MAX_US_INT - -/* the purpose is to detect overflow of an unsigned short */ -#define REISERFS_LINK_MAX (MAX_US_INT - 1000) - -/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ -#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ -#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ - -#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) -#define get_generation(s) atomic_read (&fs_generation(s)) -#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) -#define __fs_changed(gen,s) (gen != get_generation (s)) -#define fs_changed(gen,s) \ -({ \ - reiserfs_cond_resched(s); \ - __fs_changed(gen, s); \ -}) - -/***************************************************************************/ -/* FIXATE NODES */ -/***************************************************************************/ - -#define VI_TYPE_LEFT_MERGEABLE 1 -#define VI_TYPE_RIGHT_MERGEABLE 2 - -/* To make any changes in the tree we always first find node, that - contains item to be changed/deleted or place to insert a new - item. We call this node S. To do balancing we need to decide what - we will shift to left/right neighbor, or to a new node, where new - item will be etc. To make this analysis simpler we build virtual - node. Virtual node is an array of items, that will replace items of - node S. (For instance if we are going to delete an item, virtual - node does not contain it). Virtual node keeps information about - item sizes and types, mergeability of first and last items, sizes - of all entries in directory item. We use this array of items when - calculating what we can shift to neighbors and how many nodes we - have to have if we do not any shiftings, if we shift to left/right - neighbor or to both. */ -struct virtual_item { - int vi_index; // index in the array of item operations - unsigned short vi_type; // left/right mergeability - unsigned short vi_item_len; /* length of item that it will have after balancing */ - struct item_head *vi_ih; - const char *vi_item; // body of item (old or new) - const void *vi_new_data; // 0 always but paste mode - void *vi_uarea; // item specific area -}; - -struct virtual_node { - char *vn_free_ptr; /* this is a pointer to the free space in the buffer */ - unsigned short vn_nr_item; /* number of items in virtual node */ - short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ - short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ - short vn_affected_item_num; - short vn_pos_in_item; - struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */ - const void *vn_data; - struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */ -}; - -/* used by directory items when creating virtual nodes */ -struct direntry_uarea { - int flags; - __u16 entry_count; - __u16 entry_sizes[1]; -} __attribute__ ((__packed__)); - -/***************************************************************************/ -/* TREE BALANCE */ -/***************************************************************************/ - -/* This temporary structure is used in tree balance algorithms, and - constructed as we go to the extent that its various parts are - needed. It contains arrays of nodes that can potentially be - involved in the balancing of node S, and parameters that define how - each of the nodes must be balanced. Note that in these algorithms - for balancing the worst case is to need to balance the current node - S and the left and right neighbors and all of their parents plus - create a new node. We implement S1 balancing for the leaf nodes - and S0 balancing for the internal nodes (S1 and S0 are defined in - our papers.)*/ - -#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */ - -/* maximum number of FEB blocknrs on a single level */ -#define MAX_AMOUNT_NEEDED 2 - -/* someday somebody will prefix every field in this struct with tb_ */ -struct tree_balance { - int tb_mode; - int need_balance_dirty; - struct super_block *tb_sb; - struct reiserfs_transaction_handle *transaction_handle; - struct treepath *tb_path; - struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ - struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ - struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ - struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ - struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ - struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ - - struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals - cur_blknum. */ - struct buffer_head *used[MAX_FEB_SIZE]; - struct buffer_head *thrown[MAX_FEB_SIZE]; - int lnum[MAX_HEIGHT]; /* array of number of items which must be - shifted to the left in order to balance the - current node; for leaves includes item that - will be partially shifted; for internal - nodes, it is the number of child pointers - rather than items. It includes the new item - being created. The code sometimes subtracts - one to get the number of wholly shifted - items for other purposes. */ - int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ - int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and - S[h] to its item number within the node CFL[h] */ - int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ - int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from - S[h]. A negative value means removing. */ - int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after - balancing on the level h of the tree. If 0 then S is - being deleted, if 1 then S is remaining and no new nodes - are being created, if 2 or 3 then 1 or 2 new nodes is - being created */ - - /* fields that are used only for balancing leaves of the tree */ - int cur_blknum; /* number of empty blocks having been already allocated */ - int s0num; /* number of items that fall into left most node when S[0] splits */ - int s1num; /* number of items that fall into first new node when S[0] splits */ - int s2num; /* number of items that fall into second new node when S[0] splits */ - int lbytes; /* number of bytes which can flow to the left neighbor from the left */ - /* most liquid item that cannot be shifted from S[0] entirely */ - /* if -1 then nothing will be partially shifted */ - int rbytes; /* number of bytes which will flow to the right neighbor from the right */ - /* most liquid item that cannot be shifted from S[0] entirely */ - /* if -1 then nothing will be partially shifted */ - int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ - /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ - int s2bytes; - struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ - char *vn_buf; /* kmalloced memory. Used to create - virtual node and keep map of - dirtied bitmap blocks */ - int vn_buf_size; /* size of the vn_buf */ - struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */ - - int fs_gen; /* saved value of `reiserfs_generation' counter - see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - struct in_core_key key; /* key pointer, to pass to block allocator or - another low-level subsystem */ -#endif -}; - -/* These are modes of balancing */ - -/* When inserting an item. */ -#define M_INSERT 'i' -/* When inserting into (directories only) or appending onto an already - existent item. */ -#define M_PASTE 'p' -/* When deleting an item. */ -#define M_DELETE 'd' -/* When truncating an item or removing an entry from a (directory) item. */ -#define M_CUT 'c' - -/* used when balancing on leaf level skipped (in reiserfsck) */ -#define M_INTERNAL 'n' - -/* When further balancing is not needed, then do_balance does not need - to be called. */ -#define M_SKIP_BALANCING 's' -#define M_CONVERT 'v' - -/* modes of leaf_move_items */ -#define LEAF_FROM_S_TO_L 0 -#define LEAF_FROM_S_TO_R 1 -#define LEAF_FROM_R_TO_L 2 -#define LEAF_FROM_L_TO_R 3 -#define LEAF_FROM_S_TO_SNEW 4 - -#define FIRST_TO_LAST 0 -#define LAST_TO_FIRST 1 - -/* used in do_balance for passing parent of node information that has - been gotten from tb struct */ -struct buffer_info { - struct tree_balance *tb; - struct buffer_head *bi_bh; - struct buffer_head *bi_parent; - int bi_position; -}; - -static inline struct super_block *sb_from_tb(struct tree_balance *tb) -{ - return tb ? tb->tb_sb : NULL; -} - -static inline struct super_block *sb_from_bi(struct buffer_info *bi) -{ - return bi ? sb_from_tb(bi->tb) : NULL; -} - -/* there are 4 types of items: stat data, directory item, indirect, direct. -+-------------------+------------+--------------+------------+ -| | k_offset | k_uniqueness | mergeable? | -+-------------------+------------+--------------+------------+ -| stat data | 0 | 0 | no | -+-------------------+------------+--------------+------------+ -| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no | -| non 1st directory | hash value | | yes | -| item | | | | -+-------------------+------------+--------------+------------+ -| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object -+-------------------+------------+--------------+------------+ -| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object -+-------------------+------------+--------------+------------+ -*/ - -struct item_operations { - int (*bytes_number) (struct item_head * ih, int block_size); - void (*decrement_key) (struct cpu_key *); - int (*is_left_mergeable) (struct reiserfs_key * ih, - unsigned long bsize); - void (*print_item) (struct item_head *, char *item); - void (*check_item) (struct item_head *, char *item); - - int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, - int is_affected, int insert_size); - int (*check_left) (struct virtual_item * vi, int free, - int start_skip, int end_skip); - int (*check_right) (struct virtual_item * vi, int free); - int (*part_size) (struct virtual_item * vi, int from, int to); - int (*unit_num) (struct virtual_item * vi); - void (*print_vi) (struct virtual_item * vi); -}; - -extern struct item_operations *item_ops[TYPE_ANY + 1]; - -#define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) -#define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) -#define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item) -#define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item) -#define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size) -#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip) -#define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free) -#define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to) -#define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi) -#define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi) - -#define COMP_SHORT_KEYS comp_short_keys - -/* number of blocks pointed to by the indirect item */ -#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) - -/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ -#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) - -/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ - -/* get the item header */ -#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) ) - -/* get key */ -#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) ) - -/* get the key */ -#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) ) - -/* get item body */ -#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num)))) - -/* get the stat data by the buffer header and the item order */ -#define B_N_STAT_DATA(bh,nr) \ -( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) ) - - /* following defines use reiserfs buffer header and item header */ - -/* get stat-data */ -#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) - -// this is 3976 for size==4096 -#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) - -/* indirect items consist of entries which contain blocknrs, pos - indicates which entry, and B_I_POS_UNFM_POINTER resolves to the - blocknr contained by the entry pos points to */ -#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos))) -#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0) - -struct reiserfs_iget_args { - __u32 objectid; - __u32 dirid; -}; - -/***************************************************************************/ -/* FUNCTION DECLARATIONS */ -/***************************************************************************/ - -#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) - -#define journal_trans_half(blocksize) \ - ((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32)) - -/* journal.c see journal.c for all the comments here */ - -/* first block written in a commit. */ -struct reiserfs_journal_desc { - __le32 j_trans_id; /* id of commit */ - __le32 j_len; /* length of commit. len +1 is the commit block */ - __le32 j_mount_id; /* mount id of this trans */ - __le32 j_realblock[1]; /* real locations for each block */ -}; - -#define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id) -#define get_desc_trans_len(d) le32_to_cpu((d)->j_len) -#define get_desc_mount_id(d) le32_to_cpu((d)->j_mount_id) - -#define set_desc_trans_id(d,val) do { (d)->j_trans_id = cpu_to_le32 (val); } while (0) -#define set_desc_trans_len(d,val) do { (d)->j_len = cpu_to_le32 (val); } while (0) -#define set_desc_mount_id(d,val) do { (d)->j_mount_id = cpu_to_le32 (val); } while (0) - -/* last block written in a commit */ -struct reiserfs_journal_commit { - __le32 j_trans_id; /* must match j_trans_id from the desc block */ - __le32 j_len; /* ditto */ - __le32 j_realblock[1]; /* real locations for each block */ -}; - -#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id) -#define get_commit_trans_len(c) le32_to_cpu((c)->j_len) -#define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id) - -#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) -#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) - -/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the -** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk, -** and this transaction does not need to be replayed. -*/ -struct reiserfs_journal_header { - __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */ - __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */ - __le32 j_mount_id; - /* 12 */ struct journal_params jh_journal; -}; - -/* biggest tunable defines are right here */ -#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ -#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ -#define JOURNAL_TRANS_MIN_DEFAULT 256 -#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ -#define JOURNAL_MIN_RATIO 2 -#define JOURNAL_MAX_COMMIT_AGE 30 -#define JOURNAL_MAX_TRANS_AGE 30 -#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) -#define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \ - 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \ - REISERFS_QUOTA_TRANS_BLOCKS(sb))) - -#ifdef CONFIG_QUOTA -#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA)) -/* We need to update data and inode (atime) */ -#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0) -/* 1 balancing, 1 bitmap, 1 data per write + stat data update */ -#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ -(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0) -/* same as with INIT */ -#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ -(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0) -#else -#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0 -#define REISERFS_QUOTA_INIT_BLOCKS(s) 0 -#define REISERFS_QUOTA_DEL_BLOCKS(s) 0 -#endif - -/* both of these can be as low as 1, or as high as you want. The min is the -** number of 4k bitmap nodes preallocated on mount. New nodes are allocated -** as needed, and released when transactions are committed. On release, if -** the current number of nodes is > max, the node is freed, otherwise, -** it is put on a free list for faster use later. -*/ -#define REISERFS_MIN_BITMAP_NODES 10 -#define REISERFS_MAX_BITMAP_NODES 100 - -#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ -#define JBH_HASH_MASK 8191 - -#define _jhashfn(sb,block) \ - (((unsigned long)sb>>L1_CACHE_SHIFT) ^ \ - (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) -#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) - -// We need these to make journal.c code more readable -#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) -#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) -#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) - -enum reiserfs_bh_state_bits { - BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ - BH_JDirty_wait, - BH_JNew, /* disk block was taken off free list before - * being in a finished transaction, or - * written to disk. Can be reused immed. */ - BH_JPrepared, - BH_JRestore_dirty, - BH_JTest, // debugging only will go away -}; - -BUFFER_FNS(JDirty, journaled); -TAS_BUFFER_FNS(JDirty, journaled); -BUFFER_FNS(JDirty_wait, journal_dirty); -TAS_BUFFER_FNS(JDirty_wait, journal_dirty); -BUFFER_FNS(JNew, journal_new); -TAS_BUFFER_FNS(JNew, journal_new); -BUFFER_FNS(JPrepared, journal_prepared); -TAS_BUFFER_FNS(JPrepared, journal_prepared); -BUFFER_FNS(JRestore_dirty, journal_restore_dirty); -TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty); -BUFFER_FNS(JTest, journal_test); -TAS_BUFFER_FNS(JTest, journal_test); - -/* -** transaction handle which is passed around for all journal calls -*/ -struct reiserfs_transaction_handle { - struct super_block *t_super; /* super for this FS when journal_begin was - called. saves calls to reiserfs_get_super - also used by nested transactions to make - sure they are nesting on the right FS - _must_ be first in the handle - */ - int t_refcount; - int t_blocks_logged; /* number of blocks this writer has logged */ - int t_blocks_allocated; /* number of blocks this writer allocated */ - unsigned int t_trans_id; /* sanity check, equals the current trans id */ - void *t_handle_save; /* save existing current->journal_info */ - unsigned displace_new_blocks:1; /* if new block allocation occurres, that block - should be displaced from others */ - struct list_head t_list; -}; - -/* used to keep track of ordered and tail writes, attached to the buffer - * head through b_journal_head. - */ -struct reiserfs_jh { - struct reiserfs_journal_list *jl; - struct buffer_head *bh; - struct list_head list; -}; - -void reiserfs_free_jh(struct buffer_head *bh); -int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); -int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); -int journal_mark_dirty(struct reiserfs_transaction_handle *, - struct super_block *, struct buffer_head *bh); - -static inline int reiserfs_file_data_log(struct inode *inode) -{ - if (reiserfs_data_log(inode->i_sb) || - (REISERFS_I(inode)->i_flags & i_data_log)) - return 1; - return 0; -} - -static inline int reiserfs_transaction_running(struct super_block *s) -{ - struct reiserfs_transaction_handle *th = current->journal_info; - if (th && th->t_super == s) - return 1; - if (th && th->t_super == NULL) - BUG(); - return 0; -} - -static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th) -{ - return th->t_blocks_allocated - th->t_blocks_logged; -} - -struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct - super_block - *, - int count); -int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); -int reiserfs_commit_page(struct inode *inode, struct page *page, - unsigned from, unsigned to); -int reiserfs_flush_old_commits(struct super_block *); -int reiserfs_commit_for_inode(struct inode *); -int reiserfs_inode_needs_commit(struct inode *); -void reiserfs_update_inode_transaction(struct inode *); -void reiserfs_wait_on_write_block(struct super_block *s); -void reiserfs_block_writes(struct reiserfs_transaction_handle *th); -void reiserfs_allow_writes(struct super_block *s); -void reiserfs_check_lock_depth(struct super_block *s, char *caller); -int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, - int wait); -void reiserfs_restore_prepared_buffer(struct super_block *, - struct buffer_head *bh); -int journal_init(struct super_block *, const char *j_dev_name, int old_format, - unsigned int); -int journal_release(struct reiserfs_transaction_handle *, struct super_block *); -int journal_release_error(struct reiserfs_transaction_handle *, - struct super_block *); -int journal_end(struct reiserfs_transaction_handle *, struct super_block *, - unsigned long); -int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, - unsigned long); -int journal_mark_freed(struct reiserfs_transaction_handle *, - struct super_block *, b_blocknr_t blocknr); -int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); -int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr, - int bit_nr, int searchall, b_blocknr_t *next); -int journal_begin(struct reiserfs_transaction_handle *, - struct super_block *sb, unsigned long); -int journal_join_abort(struct reiserfs_transaction_handle *, - struct super_block *sb, unsigned long); -void reiserfs_abort_journal(struct super_block *sb, int errno); -void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); -int reiserfs_allocate_list_bitmaps(struct super_block *s, - struct reiserfs_list_bitmap *, unsigned int); - -void add_save_link(struct reiserfs_transaction_handle *th, - struct inode *inode, int truncate); -int remove_save_link(struct inode *inode, int truncate); - -/* objectid.c */ -__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th); -void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, - __u32 objectid_to_release); -int reiserfs_convert_objectid_map_v1(struct super_block *); - -/* stree.c */ -int B_IS_IN_TREE(const struct buffer_head *); -extern void copy_item_head(struct item_head *to, - const struct item_head *from); - -// first key is in cpu form, second - le -extern int comp_short_keys(const struct reiserfs_key *le_key, - const struct cpu_key *cpu_key); -extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); - -// both are in le form -extern int comp_le_keys(const struct reiserfs_key *, - const struct reiserfs_key *); -extern int comp_short_le_keys(const struct reiserfs_key *, - const struct reiserfs_key *); - -// -// get key version from on disk key - kludge -// -static inline int le_key_version(const struct reiserfs_key *key) -{ - int type; - - type = offset_v2_k_type(&(key->u.k_offset_v2)); - if (type != TYPE_DIRECT && type != TYPE_INDIRECT - && type != TYPE_DIRENTRY) - return KEY_FORMAT_3_5; - - return KEY_FORMAT_3_6; - -} - -static inline void copy_key(struct reiserfs_key *to, - const struct reiserfs_key *from) -{ - memcpy(to, from, KEY_SIZE); -} - -int comp_items(const struct item_head *stored_ih, const struct treepath *path); -const struct reiserfs_key *get_rkey(const struct treepath *chk_path, - const struct super_block *sb); -int search_by_key(struct super_block *, const struct cpu_key *, - struct treepath *, int); -#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) -int search_for_position_by_key(struct super_block *sb, - const struct cpu_key *cpu_key, - struct treepath *search_path); -extern void decrement_bcount(struct buffer_head *bh); -void decrement_counters_in_path(struct treepath *search_path); -void pathrelse(struct treepath *search_path); -int reiserfs_check_path(struct treepath *p); -void pathrelse_and_restore(struct super_block *s, struct treepath *search_path); - -int reiserfs_insert_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - const struct cpu_key *key, - struct item_head *ih, - struct inode *inode, const char *body); - -int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - const struct cpu_key *key, - struct inode *inode, - const char *body, int paste_size); - -int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - struct cpu_key *key, - struct inode *inode, - struct page *page, loff_t new_file_size); - -int reiserfs_delete_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - const struct cpu_key *key, - struct inode *inode, struct buffer_head *un_bh); - -void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, - struct inode *inode, struct reiserfs_key *key); -int reiserfs_delete_object(struct reiserfs_transaction_handle *th, - struct inode *inode); -int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, - struct inode *inode, struct page *, - int update_timestamps); - -#define i_block_size(inode) ((inode)->i_sb->s_blocksize) -#define file_size(inode) ((inode)->i_size) -#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1)) - -#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\ -!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 ) - -void padd_item(char *item, int total_length, int length); - -/* inode.c */ -/* args for the create parameter of reiserfs_get_block */ -#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ -#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ -#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ -#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ -#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ -#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ - -void reiserfs_read_locked_inode(struct inode *inode, - struct reiserfs_iget_args *args); -int reiserfs_find_actor(struct inode *inode, void *p); -int reiserfs_init_locked_inode(struct inode *inode, void *p); -void reiserfs_evict_inode(struct inode *inode); -int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc); -int reiserfs_get_block(struct inode *inode, sector_t block, - struct buffer_head *bh_result, int create); -struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type); -struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type); -int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, - int connectable); - -int reiserfs_truncate_file(struct inode *, int update_timestamps); -void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, - int type, int key_length); -void make_le_item_head(struct item_head *ih, const struct cpu_key *key, - int version, - loff_t offset, int type, int length, int entry_count); -struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key); - -struct reiserfs_security_handle; -int reiserfs_new_inode(struct reiserfs_transaction_handle *th, - struct inode *dir, umode_t mode, - const char *symname, loff_t i_size, - struct dentry *dentry, struct inode *inode, - struct reiserfs_security_handle *security); - -void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, - struct inode *inode, loff_t size); - -static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, - struct inode *inode) -{ - reiserfs_update_sd_size(th, inode, inode->i_size); -} - -void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); -void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); -int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); - -int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); - -/* namei.c */ -void set_de_name_and_namelen(struct reiserfs_dir_entry *de); -int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, - struct treepath *path, struct reiserfs_dir_entry *de); -struct dentry *reiserfs_get_parent(struct dentry *); - -#ifdef CONFIG_REISERFS_PROC_INFO -int reiserfs_proc_info_init(struct super_block *sb); -int reiserfs_proc_info_done(struct super_block *sb); -int reiserfs_proc_info_global_init(void); -int reiserfs_proc_info_global_done(void); - -#define PROC_EXP( e ) e - -#define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data -#define PROC_INFO_MAX( sb, field, value ) \ - __PINFO( sb ).field = \ - max( REISERFS_SB( sb ) -> s_proc_info_data.field, value ) -#define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) ) -#define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) ) -#define PROC_INFO_BH_STAT( sb, bh, level ) \ - PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] ); \ - PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \ - PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) ) -#else -static inline int reiserfs_proc_info_init(struct super_block *sb) -{ - return 0; -} - -static inline int reiserfs_proc_info_done(struct super_block *sb) -{ - return 0; -} - -static inline int reiserfs_proc_info_global_init(void) -{ - return 0; -} - -static inline int reiserfs_proc_info_global_done(void) -{ - return 0; -} - -#define PROC_EXP( e ) -#define VOID_V ( ( void ) 0 ) -#define PROC_INFO_MAX( sb, field, value ) VOID_V -#define PROC_INFO_INC( sb, field ) VOID_V -#define PROC_INFO_ADD( sb, field, val ) VOID_V -#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V -#endif - -/* dir.c */ -extern const struct inode_operations reiserfs_dir_inode_operations; -extern const struct inode_operations reiserfs_symlink_inode_operations; -extern const struct inode_operations reiserfs_special_inode_operations; -extern const struct file_operations reiserfs_dir_operations; -int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *); - -/* tail_conversion.c */ -int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, - struct treepath *, struct buffer_head *, loff_t); -int indirect2direct(struct reiserfs_transaction_handle *, struct inode *, - struct page *, struct treepath *, const struct cpu_key *, - loff_t, char *); -void reiserfs_unmap_buffer(struct buffer_head *); - -/* file.c */ -extern const struct inode_operations reiserfs_file_inode_operations; -extern const struct file_operations reiserfs_file_operations; -extern const struct address_space_operations reiserfs_address_space_operations; - -/* fix_nodes.c */ - -int fix_nodes(int n_op_mode, struct tree_balance *tb, - struct item_head *ins_ih, const void *); -void unfix_nodes(struct tree_balance *); - -/* prints.c */ -void __reiserfs_panic(struct super_block *s, const char *id, - const char *function, const char *fmt, ...) - __attribute__ ((noreturn)); -#define reiserfs_panic(s, id, fmt, args...) \ - __reiserfs_panic(s, id, __func__, fmt, ##args) -void __reiserfs_error(struct super_block *s, const char *id, - const char *function, const char *fmt, ...); -#define reiserfs_error(s, id, fmt, args...) \ - __reiserfs_error(s, id, __func__, fmt, ##args) -void reiserfs_info(struct super_block *s, const char *fmt, ...); -void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...); -void print_indirect_item(struct buffer_head *bh, int item_num); -void store_print_tb(struct tree_balance *tb); -void print_cur_tb(char *mes); -void print_de(struct reiserfs_dir_entry *de); -void print_bi(struct buffer_info *bi, char *mes); -#define PRINT_LEAF_ITEMS 1 /* print all items */ -#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ -#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ -void print_block(struct buffer_head *bh, ...); -void print_bmap(struct super_block *s, int silent); -void print_bmap_block(int i, char *data, int size, int silent); -/*void print_super_block (struct super_block * s, char * mes);*/ -void print_objectid_map(struct super_block *s); -void print_block_head(struct buffer_head *bh, char *mes); -void check_leaf(struct buffer_head *bh); -void check_internal(struct buffer_head *bh); -void print_statistics(struct super_block *s); -char *reiserfs_hashname(int code); - -/* lbalance.c */ -int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, - int mov_bytes, struct buffer_head *Snew); -int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes); -int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes); -void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first, - int del_num, int del_bytes); -void leaf_insert_into_buf(struct buffer_info *bi, int before, - struct item_head *inserted_item_ih, - const char *inserted_item_body, int zeros_number); -void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num, - int pos_in_item, int paste_size, const char *body, - int zeros_number); -void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, - int pos_in_item, int cut_size); -void leaf_paste_entries(struct buffer_info *bi, int item_num, int before, - int new_entry_count, struct reiserfs_de_head *new_dehs, - const char *records, int paste_size); -/* ibalance.c */ -int balance_internal(struct tree_balance *, int, int, struct item_head *, - struct buffer_head **); - -/* do_balance.c */ -void do_balance_mark_leaf_dirty(struct tree_balance *tb, - struct buffer_head *bh, int flag); -#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty -#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty - -void do_balance(struct tree_balance *tb, struct item_head *ih, - const char *body, int flag); -void reiserfs_invalidate_buffer(struct tree_balance *tb, - struct buffer_head *bh); - -int get_left_neighbor_position(struct tree_balance *tb, int h); -int get_right_neighbor_position(struct tree_balance *tb, int h); -void replace_key(struct tree_balance *tb, struct buffer_head *, int, - struct buffer_head *, int); -void make_empty_node(struct buffer_info *); -struct buffer_head *get_FEB(struct tree_balance *); - -/* bitmap.c */ - -/* structure contains hints for block allocator, and it is a container for - * arguments, such as node, search path, transaction_handle, etc. */ -struct __reiserfs_blocknr_hint { - struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ - sector_t block; /* file offset, in blocks */ - struct in_core_key key; - struct treepath *path; /* search path, used by allocator to deternine search_start by - * various ways */ - struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and - * bitmap blocks changes */ - b_blocknr_t beg, end; - b_blocknr_t search_start; /* a field used to transfer search start value (block number) - * between different block allocator procedures - * (determine_search_start() and others) */ - int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed - * function that do actual allocation */ - - unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for - * formatted/unformatted blocks with/without preallocation */ - unsigned preallocate:1; -}; - -typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t; - -int reiserfs_parse_alloc_options(struct super_block *, char *); -void reiserfs_init_alloc_options(struct super_block *s); - -/* - * given a directory, this will tell you what packing locality - * to use for a new object underneat it. The locality is returned - * in disk byte order (le). - */ -__le32 reiserfs_choose_packing(struct inode *dir); - -int reiserfs_init_bitmap_cache(struct super_block *sb); -void reiserfs_free_bitmap_cache(struct super_block *sb); -void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); -struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap); -int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); -void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *, - b_blocknr_t, int for_unformatted); -int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int, - int); -static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb, - b_blocknr_t * new_blocknrs, - int amount_needed) -{ - reiserfs_blocknr_hint_t hint = { - .th = tb->transaction_handle, - .path = tb->tb_path, - .inode = NULL, - .key = tb->key, - .block = 0, - .formatted_node = 1 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, - 0); -} - -static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle - *th, struct inode *inode, - b_blocknr_t * new_blocknrs, - struct treepath *path, - sector_t block) -{ - reiserfs_blocknr_hint_t hint = { - .th = th, - .path = path, - .inode = inode, - .block = block, - .formatted_node = 0, - .preallocate = 0 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); -} - -#ifdef REISERFS_PREALLOCATE -static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle - *th, struct inode *inode, - b_blocknr_t * new_blocknrs, - struct treepath *path, - sector_t block) -{ - reiserfs_blocknr_hint_t hint = { - .th = th, - .path = path, - .inode = inode, - .block = block, - .formatted_node = 0, - .preallocate = 1 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); -} - -void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, - struct inode *inode); -void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th); -#endif - -/* hashes.c */ -__u32 keyed_hash(const signed char *msg, int len); -__u32 yura_hash(const signed char *msg, int len); -__u32 r5_hash(const signed char *msg, int len); - -#define reiserfs_set_le_bit __set_bit_le -#define reiserfs_test_and_set_le_bit __test_and_set_bit_le -#define reiserfs_clear_le_bit __clear_bit_le -#define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le -#define reiserfs_test_le_bit test_bit_le -#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le - -/* sometimes reiserfs_truncate may require to allocate few new blocks - to perform indirect2direct conversion. People probably used to - think, that truncate should work without problems on a filesystem - without free disk space. They may complain that they can not - truncate due to lack of free disk space. This spare space allows us - to not worry about it. 500 is probably too much, but it should be - absolutely safe */ -#define SPARE_SPACE 500 - -/* prototypes from ioctl.c */ -long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -long reiserfs_compat_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg); -int reiserfs_unpack(struct inode *inode, struct file *filp); diff --git a/ANDROID_3.4.5/fs/reiserfs/resize.c b/ANDROID_3.4.5/fs/reiserfs/resize.c deleted file mode 100644 index 9a17f63c..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/resize.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/* - * Written by Alexander Zarochentcev. - * - * The kernel part of the (on-line) reiserfs resizer. - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/vmalloc.h> -#include <linux/string.h> -#include <linux/errno.h> -#include "reiserfs.h" -#include <linux/buffer_head.h> - -int reiserfs_resize(struct super_block *s, unsigned long block_count_new) -{ - int err = 0; - struct reiserfs_super_block *sb; - struct reiserfs_bitmap_info *bitmap; - struct reiserfs_bitmap_info *info; - struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s); - struct buffer_head *bh; - struct reiserfs_transaction_handle th; - unsigned int bmap_nr_new, bmap_nr; - unsigned int block_r_new, block_r; - - struct reiserfs_list_bitmap *jb; - struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS]; - - unsigned long int block_count, free_blocks; - int i; - int copy_size; - - sb = SB_DISK_SUPER_BLOCK(s); - - if (SB_BLOCK_COUNT(s) >= block_count_new) { - printk("can\'t shrink filesystem on-line\n"); - return -EINVAL; - } - - /* check the device size */ - bh = sb_bread(s, block_count_new - 1); - if (!bh) { - printk("reiserfs_resize: can\'t read last block\n"); - return -EINVAL; - } - bforget(bh); - - /* old disk layout detection; those partitions can be mounted, but - * cannot be resized */ - if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size - != REISERFS_DISK_OFFSET_IN_BYTES) { - printk - ("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n"); - return -ENOTSUPP; - } - - /* count used bits in last bitmap block */ - block_r = SB_BLOCK_COUNT(s) - - (reiserfs_bmap_count(s) - 1) * s->s_blocksize * 8; - - /* count bitmap blocks in new fs */ - bmap_nr_new = block_count_new / (s->s_blocksize * 8); - block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8; - if (block_r_new) - bmap_nr_new++; - else - block_r_new = s->s_blocksize * 8; - - /* save old values */ - block_count = SB_BLOCK_COUNT(s); - bmap_nr = reiserfs_bmap_count(s); - - /* resizing of reiserfs bitmaps (journal and real), if needed */ - if (bmap_nr_new > bmap_nr) { - /* reallocate journal bitmaps */ - if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { - printk - ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); - return -ENOMEM; - } - /* the new journal bitmaps are zero filled, now we copy in the bitmap - ** node pointers from the old journal bitmap structs, and then - ** transfer the new data structures into the journal struct. - ** - ** using the copy_size var below allows this code to work for - ** both shrinking and expanding the FS. - */ - copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr; - copy_size = - copy_size * sizeof(struct reiserfs_list_bitmap_node *); - for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { - struct reiserfs_bitmap_node **node_tmp; - jb = SB_JOURNAL(s)->j_list_bitmap + i; - memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); - - /* just in case vfree schedules on us, copy the new - ** pointer into the journal struct before freeing the - ** old one - */ - node_tmp = jb->bitmaps; - jb->bitmaps = jbitmap[i].bitmaps; - vfree(node_tmp); - } - - /* allocate additional bitmap blocks, reallocate array of bitmap - * block pointers */ - bitmap = - vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); - if (!bitmap) { - /* Journal bitmaps are still supersized, but the memory isn't - * leaked, so I guess it's ok */ - printk("reiserfs_resize: unable to allocate memory.\n"); - return -ENOMEM; - } - for (i = 0; i < bmap_nr; i++) - bitmap[i] = old_bitmap[i]; - - /* This doesn't go through the journal, but it doesn't have to. - * The changes are still atomic: We're synced up when the journal - * transaction begins, and the new bitmaps don't matter if the - * transaction fails. */ - for (i = bmap_nr; i < bmap_nr_new; i++) { - /* don't use read_bitmap_block since it will cache - * the uninitialized bitmap */ - bh = sb_bread(s, i * s->s_blocksize * 8); - if (!bh) { - vfree(bitmap); - return -EIO; - } - memset(bh->b_data, 0, sb_blocksize(sb)); - reiserfs_set_le_bit(0, bh->b_data); - reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); - - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - reiserfs_write_unlock(s); - sync_dirty_buffer(bh); - reiserfs_write_lock(s); - // update bitmap_info stuff - bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; - brelse(bh); - } - /* free old bitmap blocks array */ - SB_AP_BITMAP(s) = bitmap; - vfree(old_bitmap); - } - - /* begin transaction, if there was an error, it's fine. Yes, we have - * incorrect bitmaps now, but none of it is ever going to touch the - * disk anyway. */ - err = journal_begin(&th, s, 10); - if (err) - return err; - - /* Extend old last bitmap block - new blocks have been made available */ - info = SB_AP_BITMAP(s) + bmap_nr - 1; - bh = reiserfs_read_bitmap_block(s, bmap_nr - 1); - if (!bh) { - int jerr = journal_end(&th, s, 10); - if (jerr) - return jerr; - return -EIO; - } - - reiserfs_prepare_for_journal(s, bh, 1); - for (i = block_r; i < s->s_blocksize * 8; i++) - reiserfs_clear_le_bit(i, bh->b_data); - info->free_count += s->s_blocksize * 8 - block_r; - - journal_mark_dirty(&th, s, bh); - brelse(bh); - - /* Correct new last bitmap block - It may not be full */ - info = SB_AP_BITMAP(s) + bmap_nr_new - 1; - bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1); - if (!bh) { - int jerr = journal_end(&th, s, 10); - if (jerr) - return jerr; - return -EIO; - } - - reiserfs_prepare_for_journal(s, bh, 1); - for (i = block_r_new; i < s->s_blocksize * 8; i++) - reiserfs_set_le_bit(i, bh->b_data); - journal_mark_dirty(&th, s, bh); - brelse(bh); - - info->free_count -= s->s_blocksize * 8 - block_r_new; - /* update super */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - free_blocks = SB_FREE_BLOCKS(s); - PUT_SB_FREE_BLOCKS(s, - free_blocks + (block_count_new - block_count - - (bmap_nr_new - bmap_nr))); - PUT_SB_BLOCK_COUNT(s, block_count_new); - PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); - s->s_dirt = 1; - - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); - - SB_JOURNAL(s)->j_must_wait = 1; - return journal_end(&th, s, 10); -} diff --git a/ANDROID_3.4.5/fs/reiserfs/stree.c b/ANDROID_3.4.5/fs/reiserfs/stree.c deleted file mode 100644 index f8afa4b1..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/stree.c +++ /dev/null @@ -1,2120 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/* - * Written by Anatoly P. Pinchuk pap@namesys.botik.ru - * Programm System Institute - * Pereslavl-Zalessky Russia - */ - -/* - * This file contains functions dealing with S+tree - * - * B_IS_IN_TREE - * copy_item_head - * comp_short_keys - * comp_keys - * comp_short_le_keys - * le_key2cpu_key - * comp_le_keys - * bin_search - * get_lkey - * get_rkey - * key_in_buffer - * decrement_bcount - * reiserfs_check_path - * pathrelse_and_restore - * pathrelse - * search_by_key_reada - * search_by_key - * search_for_position_by_key - * comp_items - * prepare_for_direct_item - * prepare_for_direntry_item - * prepare_for_delete_or_cut - * calc_deleted_bytes_number - * init_tb_struct - * padd_item - * reiserfs_delete_item - * reiserfs_delete_solid_item - * reiserfs_delete_object - * maybe_indirect_to_direct - * indirect_to_direct_roll_back - * reiserfs_cut_from_item - * truncate_directory - * reiserfs_do_truncate - * reiserfs_paste_into_item - * reiserfs_insert_item - */ - -#include <linux/time.h> -#include <linux/string.h> -#include <linux/pagemap.h> -#include "reiserfs.h" -#include <linux/buffer_head.h> -#include <linux/quotaops.h> - -/* Does the buffer contain a disk block which is in the tree. */ -inline int B_IS_IN_TREE(const struct buffer_head *bh) -{ - - RFALSE(B_LEVEL(bh) > MAX_HEIGHT, - "PAP-1010: block (%b) has too big level (%z)", bh, bh); - - return (B_LEVEL(bh) != FREE_LEVEL); -} - -// -// to gets item head in le form -// -inline void copy_item_head(struct item_head *to, - const struct item_head *from) -{ - memcpy(to, from, IH_SIZE); -} - -/* k1 is pointer to on-disk structure which is stored in little-endian - form. k2 is pointer to cpu variable. For key of items of the same - object this returns 0. - Returns: -1 if key1 < key2 - 0 if key1 == key2 - 1 if key1 > key2 */ -inline int comp_short_keys(const struct reiserfs_key *le_key, - const struct cpu_key *cpu_key) -{ - __u32 n; - n = le32_to_cpu(le_key->k_dir_id); - if (n < cpu_key->on_disk_key.k_dir_id) - return -1; - if (n > cpu_key->on_disk_key.k_dir_id) - return 1; - n = le32_to_cpu(le_key->k_objectid); - if (n < cpu_key->on_disk_key.k_objectid) - return -1; - if (n > cpu_key->on_disk_key.k_objectid) - return 1; - return 0; -} - -/* k1 is pointer to on-disk structure which is stored in little-endian - form. k2 is pointer to cpu variable. - Compare keys using all 4 key fields. - Returns: -1 if key1 < key2 0 - if key1 = key2 1 if key1 > key2 */ -static inline int comp_keys(const struct reiserfs_key *le_key, - const struct cpu_key *cpu_key) -{ - int retval; - - retval = comp_short_keys(le_key, cpu_key); - if (retval) - return retval; - if (le_key_k_offset(le_key_version(le_key), le_key) < - cpu_key_k_offset(cpu_key)) - return -1; - if (le_key_k_offset(le_key_version(le_key), le_key) > - cpu_key_k_offset(cpu_key)) - return 1; - - if (cpu_key->key_length == 3) - return 0; - - /* this part is needed only when tail conversion is in progress */ - if (le_key_k_type(le_key_version(le_key), le_key) < - cpu_key_k_type(cpu_key)) - return -1; - - if (le_key_k_type(le_key_version(le_key), le_key) > - cpu_key_k_type(cpu_key)) - return 1; - - return 0; -} - -inline int comp_short_le_keys(const struct reiserfs_key *key1, - const struct reiserfs_key *key2) -{ - __u32 *k1_u32, *k2_u32; - int key_length = REISERFS_SHORT_KEY_LEN; - - k1_u32 = (__u32 *) key1; - k2_u32 = (__u32 *) key2; - for (; key_length--; ++k1_u32, ++k2_u32) { - if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32)) - return -1; - if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32)) - return 1; - } - return 0; -} - -inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from) -{ - int version; - to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); - to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); - - // find out version of the key - version = le_key_version(from); - to->version = version; - to->on_disk_key.k_offset = le_key_k_offset(version, from); - to->on_disk_key.k_type = le_key_k_type(version, from); -} - -// this does not say which one is bigger, it only returns 1 if keys -// are not equal, 0 otherwise -inline int comp_le_keys(const struct reiserfs_key *k1, - const struct reiserfs_key *k2) -{ - return memcmp(k1, k2, sizeof(struct reiserfs_key)); -} - -/************************************************************************** - * Binary search toolkit function * - * Search for an item in the array by the item key * - * Returns: 1 if found, 0 if not found; * - * *pos = number of the searched element if found, else the * - * number of the first element that is larger than key. * - **************************************************************************/ -/* For those not familiar with binary search: lbound is the leftmost item that it - could be, rbound the rightmost item that it could be. We examine the item - halfway between lbound and rbound, and that tells us either that we can increase - lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that - there are no possible items, and we have not found it. With each examination we - cut the number of possible items it could be by one more than half rounded down, - or we find it. */ -static inline int bin_search(const void *key, /* Key to search for. */ - const void *base, /* First item in the array. */ - int num, /* Number of items in the array. */ - int width, /* Item size in the array. - searched. Lest the reader be - confused, note that this is crafted - as a general function, and when it - is applied specifically to the array - of item headers in a node, width - is actually the item header size not - the item size. */ - int *pos /* Number of the searched for element. */ - ) -{ - int rbound, lbound, j; - - for (j = ((rbound = num - 1) + (lbound = 0)) / 2; - lbound <= rbound; j = (rbound + lbound) / 2) - switch (comp_keys - ((struct reiserfs_key *)((char *)base + j * width), - (struct cpu_key *)key)) { - case -1: - lbound = j + 1; - continue; - case 1: - rbound = j - 1; - continue; - case 0: - *pos = j; - return ITEM_FOUND; /* Key found in the array. */ - } - - /* bin_search did not find given key, it returns position of key, - that is minimal and greater than the given one. */ - *pos = lbound; - return ITEM_NOT_FOUND; -} - - -/* Minimal possible key. It is never in the tree. */ -const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; - -/* Maximal possible key. It is never in the tree. */ -static const struct reiserfs_key MAX_KEY = { - __constant_cpu_to_le32(0xffffffff), - __constant_cpu_to_le32(0xffffffff), - {{__constant_cpu_to_le32(0xffffffff), - __constant_cpu_to_le32(0xffffffff)},} -}; - -/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom - of the path, and going upwards. We must check the path's validity at each step. If the key is not in - the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this - case we return a special key, either MIN_KEY or MAX_KEY. */ -static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, - const struct super_block *sb) -{ - int position, path_offset = chk_path->path_length; - struct buffer_head *parent; - - RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, - "PAP-5010: invalid offset in the path"); - - /* While not higher in path than first element. */ - while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { - - RFALSE(!buffer_uptodate - (PATH_OFFSET_PBUFFER(chk_path, path_offset)), - "PAP-5020: parent is not uptodate"); - - /* Parent at the path is not in the tree now. */ - if (!B_IS_IN_TREE - (parent = - PATH_OFFSET_PBUFFER(chk_path, path_offset))) - return &MAX_KEY; - /* Check whether position in the parent is correct. */ - if ((position = - PATH_OFFSET_POSITION(chk_path, - path_offset)) > - B_NR_ITEMS(parent)) - return &MAX_KEY; - /* Check whether parent at the path really points to the child. */ - if (B_N_CHILD_NUM(parent, position) != - PATH_OFFSET_PBUFFER(chk_path, - path_offset + 1)->b_blocknr) - return &MAX_KEY; - /* Return delimiting key if position in the parent is not equal to zero. */ - if (position) - return B_N_PDELIM_KEY(parent, position - 1); - } - /* Return MIN_KEY if we are in the root of the buffer tree. */ - if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> - b_blocknr == SB_ROOT_BLOCK(sb)) - return &MIN_KEY; - return &MAX_KEY; -} - -/* Get delimiting key of the buffer at the path and its right neighbor. */ -inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, - const struct super_block *sb) -{ - int position, path_offset = chk_path->path_length; - struct buffer_head *parent; - - RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, - "PAP-5030: invalid offset in the path"); - - while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { - - RFALSE(!buffer_uptodate - (PATH_OFFSET_PBUFFER(chk_path, path_offset)), - "PAP-5040: parent is not uptodate"); - - /* Parent at the path is not in the tree now. */ - if (!B_IS_IN_TREE - (parent = - PATH_OFFSET_PBUFFER(chk_path, path_offset))) - return &MIN_KEY; - /* Check whether position in the parent is correct. */ - if ((position = - PATH_OFFSET_POSITION(chk_path, - path_offset)) > - B_NR_ITEMS(parent)) - return &MIN_KEY; - /* Check whether parent at the path really points to the child. */ - if (B_N_CHILD_NUM(parent, position) != - PATH_OFFSET_PBUFFER(chk_path, - path_offset + 1)->b_blocknr) - return &MIN_KEY; - /* Return delimiting key if position in the parent is not the last one. */ - if (position != B_NR_ITEMS(parent)) - return B_N_PDELIM_KEY(parent, position); - } - /* Return MAX_KEY if we are in the root of the buffer tree. */ - if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> - b_blocknr == SB_ROOT_BLOCK(sb)) - return &MAX_KEY; - return &MIN_KEY; -} - -/* Check whether a key is contained in the tree rooted from a buffer at a path. */ -/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in - the path. These delimiting keys are stored at least one level above that buffer in the tree. If the - buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in - this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ -static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */ - const struct cpu_key *key, /* Key which should be checked. */ - struct super_block *sb - ) -{ - - RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET - || chk_path->path_length > MAX_HEIGHT, - "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", - key, chk_path->path_length); - RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev, - "PAP-5060: device must not be NODEV"); - - if (comp_keys(get_lkey(chk_path, sb), key) == 1) - /* left delimiting key is bigger, that the key we look for */ - return 0; - /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */ - if (comp_keys(get_rkey(chk_path, sb), key) != 1) - /* key must be less than right delimitiing key */ - return 0; - return 1; -} - -int reiserfs_check_path(struct treepath *p) -{ - RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, - "path not properly relsed"); - return 0; -} - -/* Drop the reference to each buffer in a path and restore - * dirty bits clean when preparing the buffer for the log. - * This version should only be called from fix_nodes() */ -void pathrelse_and_restore(struct super_block *sb, - struct treepath *search_path) -{ - int path_offset = search_path->path_length; - - RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, - "clm-4000: invalid path offset"); - - while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { - struct buffer_head *bh; - bh = PATH_OFFSET_PBUFFER(search_path, path_offset--); - reiserfs_restore_prepared_buffer(sb, bh); - brelse(bh); - } - search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; -} - -/* Drop the reference to each buffer in a path */ -void pathrelse(struct treepath *search_path) -{ - int path_offset = search_path->path_length; - - RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, - "PAP-5090: invalid path offset"); - - while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) - brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--)); - - search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; -} - -static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) -{ - struct block_head *blkh; - struct item_head *ih; - int used_space; - int prev_location; - int i; - int nr; - - blkh = (struct block_head *)buf; - if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { - reiserfs_warning(NULL, "reiserfs-5080", - "this should be caught earlier"); - return 0; - } - - nr = blkh_nr_item(blkh); - if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { - /* item number is too big or too small */ - reiserfs_warning(NULL, "reiserfs-5081", - "nr_item seems wrong: %z", bh); - return 0; - } - ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; - used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); - if (used_space != blocksize - blkh_free_space(blkh)) { - /* free space does not match to calculated amount of use space */ - reiserfs_warning(NULL, "reiserfs-5082", - "free space seems wrong: %z", bh); - return 0; - } - // FIXME: it is_leaf will hit performance too much - we may have - // return 1 here - - /* check tables of item heads */ - ih = (struct item_head *)(buf + BLKH_SIZE); - prev_location = blocksize; - for (i = 0; i < nr; i++, ih++) { - if (le_ih_k_type(ih) == TYPE_ANY) { - reiserfs_warning(NULL, "reiserfs-5083", - "wrong item type for item %h", - ih); - return 0; - } - if (ih_location(ih) >= blocksize - || ih_location(ih) < IH_SIZE * nr) { - reiserfs_warning(NULL, "reiserfs-5084", - "item location seems wrong: %h", - ih); - return 0; - } - if (ih_item_len(ih) < 1 - || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) { - reiserfs_warning(NULL, "reiserfs-5085", - "item length seems wrong: %h", - ih); - return 0; - } - if (prev_location - ih_location(ih) != ih_item_len(ih)) { - reiserfs_warning(NULL, "reiserfs-5086", - "item location seems wrong " - "(second one): %h", ih); - return 0; - } - prev_location = ih_location(ih); - } - - // one may imagine much more checks - return 1; -} - -/* returns 1 if buf looks like an internal node, 0 otherwise */ -static int is_internal(char *buf, int blocksize, struct buffer_head *bh) -{ - struct block_head *blkh; - int nr; - int used_space; - - blkh = (struct block_head *)buf; - nr = blkh_level(blkh); - if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { - /* this level is not possible for internal nodes */ - reiserfs_warning(NULL, "reiserfs-5087", - "this should be caught earlier"); - return 0; - } - - nr = blkh_nr_item(blkh); - if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { - /* for internal which is not root we might check min number of keys */ - reiserfs_warning(NULL, "reiserfs-5088", - "number of key seems wrong: %z", bh); - return 0; - } - - used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); - if (used_space != blocksize - blkh_free_space(blkh)) { - reiserfs_warning(NULL, "reiserfs-5089", - "free space seems wrong: %z", bh); - return 0; - } - // one may imagine much more checks - return 1; -} - -// make sure that bh contains formatted node of reiserfs tree of -// 'level'-th level -static int is_tree_node(struct buffer_head *bh, int level) -{ - if (B_LEVEL(bh) != level) { - reiserfs_warning(NULL, "reiserfs-5090", "node level %d does " - "not match to the expected one %d", - B_LEVEL(bh), level); - return 0; - } - if (level == DISK_LEAF_NODE_LEVEL) - return is_leaf(bh->b_data, bh->b_size, bh); - - return is_internal(bh->b_data, bh->b_size, bh); -} - -#define SEARCH_BY_KEY_READA 16 - -/* - * The function is NOT SCHEDULE-SAFE! - * It might unlock the write lock if we needed to wait for a block - * to be read. Note that in this case it won't recover the lock to avoid - * high contention resulting from too much lock requests, especially - * the caller (search_by_key) will perform other schedule-unsafe - * operations just after calling this function. - * - * @return true if we have unlocked - */ -static bool search_by_key_reada(struct super_block *s, - struct buffer_head **bh, - b_blocknr_t *b, int num) -{ - int i, j; - bool unlocked = false; - - for (i = 0; i < num; i++) { - bh[i] = sb_getblk(s, b[i]); - } - /* - * We are going to read some blocks on which we - * have a reference. It's safe, though we might be - * reading blocks concurrently changed if we release - * the lock. But it's still fine because we check later - * if the tree changed - */ - for (j = 0; j < i; j++) { - /* - * note, this needs attention if we are getting rid of the BKL - * you have to make sure the prepared bit isn't set on this buffer - */ - if (!buffer_uptodate(bh[j])) { - if (!unlocked) { - reiserfs_write_unlock(s); - unlocked = true; - } - ll_rw_block(READA, 1, bh + j); - } - brelse(bh[j]); - } - return unlocked; -} - -/************************************************************************** - * Algorithm SearchByKey * - * look for item in the Disk S+Tree by its key * - * Input: sb - super block * - * key - pointer to the key to search * - * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * - * search_path - path from the root to the needed leaf * - **************************************************************************/ - -/* This function fills up the path from the root to the leaf as it - descends the tree looking for the key. It uses reiserfs_bread to - try to find buffers in the cache given their block number. If it - does not find them in the cache it reads them from disk. For each - node search_by_key finds using reiserfs_bread it then uses - bin_search to look through that node. bin_search will find the - position of the block_number of the next node if it is looking - through an internal node. If it is looking through a leaf node - bin_search will find the position of the item which has key either - equal to given key, or which is the maximal key less than the given - key. search_by_key returns a path that must be checked for the - correctness of the top of the path but need not be checked for the - correctness of the bottom of the path */ -/* The function is NOT SCHEDULE-SAFE! */ -int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */ - struct treepath *search_path,/* This structure was - allocated and initialized - by the calling - function. It is filled up - by this function. */ - int stop_level /* How far down the tree to search. To - stop at leaf level - set to - DISK_LEAF_NODE_LEVEL */ - ) -{ - b_blocknr_t block_number; - int expected_level; - struct buffer_head *bh; - struct path_element *last_element; - int node_level, retval; - int right_neighbor_of_leaf_node; - int fs_gen; - struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; - b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA]; - int reada_count = 0; - -#ifdef CONFIG_REISERFS_CHECK - int repeat_counter = 0; -#endif - - PROC_INFO_INC(sb, search_by_key); - - /* As we add each node to a path we increase its count. This means that - we must be careful to release all nodes in a path before we either - discard the path struct or re-use the path struct, as we do here. */ - - pathrelse(search_path); - - right_neighbor_of_leaf_node = 0; - - /* With each iteration of this loop we search through the items in the - current node, and calculate the next current node(next path element) - for the next iteration of this loop.. */ - block_number = SB_ROOT_BLOCK(sb); - expected_level = -1; - while (1) { - -#ifdef CONFIG_REISERFS_CHECK - if (!(++repeat_counter % 50000)) - reiserfs_warning(sb, "PAP-5100", - "%s: there were %d iterations of " - "while loop looking for key %K", - current->comm, repeat_counter, - key); -#endif - - /* prep path to have another element added to it. */ - last_element = - PATH_OFFSET_PELEMENT(search_path, - ++search_path->path_length); - fs_gen = get_generation(sb); - - /* Read the next tree node, and set the last element in the path to - have a pointer to it. */ - if ((bh = last_element->pe_buffer = - sb_getblk(sb, block_number))) { - bool unlocked = false; - - if (!buffer_uptodate(bh) && reada_count > 1) - /* may unlock the write lock */ - unlocked = search_by_key_reada(sb, reada_bh, - reada_blocks, reada_count); - /* - * If we haven't already unlocked the write lock, - * then we need to do that here before reading - * the current block - */ - if (!buffer_uptodate(bh) && !unlocked) { - reiserfs_write_unlock(sb); - unlocked = true; - } - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - - if (unlocked) - reiserfs_write_lock(sb); - if (!buffer_uptodate(bh)) - goto io_error; - } else { - io_error: - search_path->path_length--; - pathrelse(search_path); - return IO_ERROR; - } - reada_count = 0; - if (expected_level == -1) - expected_level = SB_TREE_HEIGHT(sb); - expected_level--; - - /* It is possible that schedule occurred. We must check whether the key - to search is still in the tree rooted from the current buffer. If - not then repeat search from the root. */ - if (fs_changed(fs_gen, sb) && - (!B_IS_IN_TREE(bh) || - B_LEVEL(bh) != expected_level || - !key_in_buffer(search_path, key, sb))) { - PROC_INFO_INC(sb, search_by_key_fs_changed); - PROC_INFO_INC(sb, search_by_key_restarted); - PROC_INFO_INC(sb, - sbk_restarted[expected_level - 1]); - pathrelse(search_path); - - /* Get the root block number so that we can repeat the search - starting from the root. */ - block_number = SB_ROOT_BLOCK(sb); - expected_level = -1; - right_neighbor_of_leaf_node = 0; - - /* repeat search from the root */ - continue; - } - - /* only check that the key is in the buffer if key is not - equal to the MAX_KEY. Latter case is only possible in - "finish_unfinished()" processing during mount. */ - RFALSE(comp_keys(&MAX_KEY, key) && - !key_in_buffer(search_path, key, sb), - "PAP-5130: key is not in the buffer"); -#ifdef CONFIG_REISERFS_CHECK - if (REISERFS_SB(sb)->cur_tb) { - print_cur_tb("5140"); - reiserfs_panic(sb, "PAP-5140", - "schedule occurred in do_balance!"); - } -#endif - - // make sure, that the node contents look like a node of - // certain level - if (!is_tree_node(bh, expected_level)) { - reiserfs_error(sb, "vs-5150", - "invalid format found in block %ld. " - "Fsck?", bh->b_blocknr); - pathrelse(search_path); - return IO_ERROR; - } - - /* ok, we have acquired next formatted node in the tree */ - node_level = B_LEVEL(bh); - - PROC_INFO_BH_STAT(sb, bh, node_level - 1); - - RFALSE(node_level < stop_level, - "vs-5152: tree level (%d) is less than stop level (%d)", - node_level, stop_level); - - retval = bin_search(key, B_N_PITEM_HEAD(bh, 0), - B_NR_ITEMS(bh), - (node_level == - DISK_LEAF_NODE_LEVEL) ? IH_SIZE : - KEY_SIZE, - &(last_element->pe_position)); - if (node_level == stop_level) { - return retval; - } - - /* we are not in the stop level */ - if (retval == ITEM_FOUND) - /* item has been found, so we choose the pointer which is to the right of the found one */ - last_element->pe_position++; - - /* if item was not found we choose the position which is to - the left of the found item. This requires no code, - bin_search did it already. */ - - /* So we have chosen a position in the current node which is - an internal node. Now we calculate child block number by - position in the node. */ - block_number = - B_N_CHILD_NUM(bh, last_element->pe_position); - - /* if we are going to read leaf nodes, try for read ahead as well */ - if ((search_path->reada & PATH_READA) && - node_level == DISK_LEAF_NODE_LEVEL + 1) { - int pos = last_element->pe_position; - int limit = B_NR_ITEMS(bh); - struct reiserfs_key *le_key; - - if (search_path->reada & PATH_READA_BACK) - limit = 0; - while (reada_count < SEARCH_BY_KEY_READA) { - if (pos == limit) - break; - reada_blocks[reada_count++] = - B_N_CHILD_NUM(bh, pos); - if (search_path->reada & PATH_READA_BACK) - pos--; - else - pos++; - - /* - * check to make sure we're in the same object - */ - le_key = B_N_PDELIM_KEY(bh, pos); - if (le32_to_cpu(le_key->k_objectid) != - key->on_disk_key.k_objectid) { - break; - } - } - } - } -} - -/* Form the path to an item and position in this item which contains - file byte defined by key. If there is no such item - corresponding to the key, we point the path to the item with - maximal key less than key, and *pos_in_item is set to one - past the last entry/byte in the item. If searching for entry in a - directory item, and it is not found, *pos_in_item is set to one - entry more than the entry with maximal key which is less than the - sought key. - - Note that if there is no entry in this same node which is one more, - then we point to an imaginary entry. for direct items, the - position is in units of bytes, for indirect items the position is - in units of blocknr entries, for directory items the position is in - units of directory entries. */ - -/* The function is NOT SCHEDULE-SAFE! */ -int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */ - const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ - struct treepath *search_path /* Filled up by this function. */ - ) -{ - struct item_head *p_le_ih; /* pointer to on-disk structure */ - int blk_size; - loff_t item_offset, offset; - struct reiserfs_dir_entry de; - int retval; - - /* If searching for directory entry. */ - if (is_direntry_cpu_key(p_cpu_key)) - return search_by_entry_key(sb, p_cpu_key, search_path, - &de); - - /* If not searching for directory entry. */ - - /* If item is found. */ - retval = search_item(sb, p_cpu_key, search_path); - if (retval == IO_ERROR) - return retval; - if (retval == ITEM_FOUND) { - - RFALSE(!ih_item_len - (B_N_PITEM_HEAD - (PATH_PLAST_BUFFER(search_path), - PATH_LAST_POSITION(search_path))), - "PAP-5165: item length equals zero"); - - pos_in_item(search_path) = 0; - return POSITION_FOUND; - } - - RFALSE(!PATH_LAST_POSITION(search_path), - "PAP-5170: position equals zero"); - - /* Item is not found. Set path to the previous item. */ - p_le_ih = - B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path), - --PATH_LAST_POSITION(search_path)); - blk_size = sb->s_blocksize; - - if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { - return FILE_NOT_FOUND; - } - // FIXME: quite ugly this far - - item_offset = le_ih_k_offset(p_le_ih); - offset = cpu_key_k_offset(p_cpu_key); - - /* Needed byte is contained in the item pointed to by the path. */ - if (item_offset <= offset && - item_offset + op_bytes_number(p_le_ih, blk_size) > offset) { - pos_in_item(search_path) = offset - item_offset; - if (is_indirect_le_ih(p_le_ih)) { - pos_in_item(search_path) /= blk_size; - } - return POSITION_FOUND; - } - - /* Needed byte is not contained in the item pointed to by the - path. Set pos_in_item out of the item. */ - if (is_indirect_le_ih(p_le_ih)) - pos_in_item(search_path) = - ih_item_len(p_le_ih) / UNFM_P_SIZE; - else - pos_in_item(search_path) = ih_item_len(p_le_ih); - - return POSITION_NOT_FOUND; -} - -/* Compare given item and item pointed to by the path. */ -int comp_items(const struct item_head *stored_ih, const struct treepath *path) -{ - struct buffer_head *bh = PATH_PLAST_BUFFER(path); - struct item_head *ih; - - /* Last buffer at the path is not in the tree. */ - if (!B_IS_IN_TREE(bh)) - return 1; - - /* Last path position is invalid. */ - if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh)) - return 1; - - /* we need only to know, whether it is the same item */ - ih = get_ih(path); - return memcmp(stored_ih, ih, IH_SIZE); -} - -/* unformatted nodes are not logged anymore, ever. This is safe -** now -*/ -#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) - -// block can not be forgotten as it is in I/O or held by someone -#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) - -// prepare for delete or cut of direct item -static inline int prepare_for_direct_item(struct treepath *path, - struct item_head *le_ih, - struct inode *inode, - loff_t new_file_length, int *cut_size) -{ - loff_t round_len; - - if (new_file_length == max_reiserfs_offset(inode)) { - /* item has to be deleted */ - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; - } - // new file gets truncated - if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { - // - round_len = ROUND_UP(new_file_length); - /* this was new_file_length < le_ih ... */ - if (round_len < le_ih_k_offset(le_ih)) { - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; /* Delete this item. */ - } - /* Calculate first position and size for cutting from item. */ - pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1); - *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); - - return M_CUT; /* Cut from this item. */ - } - - // old file: items may have any length - - if (new_file_length < le_ih_k_offset(le_ih)) { - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; /* Delete this item. */ - } - /* Calculate first position and size for cutting from item. */ - *cut_size = -(ih_item_len(le_ih) - - (pos_in_item(path) = - new_file_length + 1 - le_ih_k_offset(le_ih))); - return M_CUT; /* Cut from this item. */ -} - -static inline int prepare_for_direntry_item(struct treepath *path, - struct item_head *le_ih, - struct inode *inode, - loff_t new_file_length, - int *cut_size) -{ - if (le_ih_k_offset(le_ih) == DOT_OFFSET && - new_file_length == max_reiserfs_offset(inode)) { - RFALSE(ih_entry_count(le_ih) != 2, - "PAP-5220: incorrect empty directory item (%h)", le_ih); - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ - } - - if (ih_entry_count(le_ih) == 1) { - /* Delete the directory item such as there is one record only - in this item */ - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; - } - - /* Cut one record from the directory item. */ - *cut_size = - -(DEH_SIZE + - entry_length(get_last_bh(path), le_ih, pos_in_item(path))); - return M_CUT; -} - -#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) - -/* If the path points to a directory or direct item, calculate mode and the size cut, for balance. - If the path points to an indirect item, remove some number of its unformatted nodes. - In case of file truncate calculate whether this item must be deleted/truncated or last - unformatted node of this item will be converted to a direct item. - This function returns a determination of what balance mode the calling function should employ. */ -static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed - from end of the file. */ - int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */ - ) -{ - struct super_block *sb = inode->i_sb; - struct item_head *p_le_ih = PATH_PITEM_HEAD(path); - struct buffer_head *bh = PATH_PLAST_BUFFER(path); - - BUG_ON(!th->t_trans_id); - - /* Stat_data item. */ - if (is_statdata_le_ih(p_le_ih)) { - - RFALSE(new_file_length != max_reiserfs_offset(inode), - "PAP-5210: mode must be M_DELETE"); - - *cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); - return M_DELETE; - } - - /* Directory item. */ - if (is_direntry_le_ih(p_le_ih)) - return prepare_for_direntry_item(path, p_le_ih, inode, - new_file_length, - cut_size); - - /* Direct item. */ - if (is_direct_le_ih(p_le_ih)) - return prepare_for_direct_item(path, p_le_ih, inode, - new_file_length, cut_size); - - /* Case of an indirect item. */ - { - int blk_size = sb->s_blocksize; - struct item_head s_ih; - int need_re_search; - int delete = 0; - int result = M_CUT; - int pos = 0; - - if ( new_file_length == max_reiserfs_offset (inode) ) { - /* prepare_for_delete_or_cut() is called by - * reiserfs_delete_item() */ - new_file_length = 0; - delete = 1; - } - - do { - need_re_search = 0; - *cut_size = 0; - bh = PATH_PLAST_BUFFER(path); - copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); - pos = I_UNFM_NUM(&s_ih); - - while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) { - __le32 *unfm; - __u32 block; - - /* Each unformatted block deletion may involve one additional - * bitmap block into the transaction, thereby the initial - * journal space reservation might not be enough. */ - if (!delete && (*cut_size) != 0 && - reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) - break; - - unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1; - block = get_block_num(unfm, 0); - - if (block != 0) { - reiserfs_prepare_for_journal(sb, bh, 1); - put_block_num(unfm, 0, 0); - journal_mark_dirty(th, sb, bh); - reiserfs_free_block(th, inode, block, 1); - } - - reiserfs_write_unlock(sb); - cond_resched(); - reiserfs_write_lock(sb); - - if (item_moved (&s_ih, path)) { - need_re_search = 1; - break; - } - - pos --; - (*removed)++; - (*cut_size) -= UNFM_P_SIZE; - - if (pos == 0) { - (*cut_size) -= IH_SIZE; - result = M_DELETE; - break; - } - } - /* a trick. If the buffer has been logged, this will do nothing. If - ** we've broken the loop without logging it, it will restore the - ** buffer */ - reiserfs_restore_prepared_buffer(sb, bh); - } while (need_re_search && - search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); - pos_in_item(path) = pos * UNFM_P_SIZE; - - if (*cut_size == 0) { - /* Nothing were cut. maybe convert last unformatted node to the - * direct item? */ - result = M_CONVERT; - } - return result; - } -} - -/* Calculate number of bytes which will be deleted or cut during balance */ -static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) -{ - int del_size; - struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path); - - if (is_statdata_le_ih(p_le_ih)) - return 0; - - del_size = - (mode == - M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; - if (is_direntry_le_ih(p_le_ih)) { - /* return EMPTY_DIR_SIZE; We delete emty directoris only. - * we can't use EMPTY_DIR_SIZE, as old format dirs have a different - * empty size. ick. FIXME, is this right? */ - return del_size; - } - - if (is_indirect_le_ih(p_le_ih)) - del_size = (del_size / UNFM_P_SIZE) * - (PATH_PLAST_BUFFER(tb->tb_path)->b_size); - return del_size; -} - -static void init_tb_struct(struct reiserfs_transaction_handle *th, - struct tree_balance *tb, - struct super_block *sb, - struct treepath *path, int size) -{ - - BUG_ON(!th->t_trans_id); - - memset(tb, '\0', sizeof(struct tree_balance)); - tb->transaction_handle = th; - tb->tb_sb = sb; - tb->tb_path = path; - PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; - PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; - tb->insert_size[0] = size; -} - -void padd_item(char *item, int total_length, int length) -{ - int i; - - for (i = total_length; i > length;) - item[--i] = 0; -} - -#ifdef REISERQUOTA_DEBUG -char key2type(struct reiserfs_key *ih) -{ - if (is_direntry_le_key(2, ih)) - return 'd'; - if (is_direct_le_key(2, ih)) - return 'D'; - if (is_indirect_le_key(2, ih)) - return 'i'; - if (is_statdata_le_key(2, ih)) - return 's'; - return 'u'; -} - -char head2type(struct item_head *ih) -{ - if (is_direntry_le_ih(ih)) - return 'd'; - if (is_direct_le_ih(ih)) - return 'D'; - if (is_indirect_le_ih(ih)) - return 'i'; - if (is_statdata_le_ih(ih)) - return 's'; - return 'u'; -} -#endif - -/* Delete object item. - * th - active transaction handle - * path - path to the deleted item - * item_key - key to search for the deleted item - * indode - used for updating i_blocks and quotas - * un_bh - NULL or unformatted node pointer - */ -int reiserfs_delete_item(struct reiserfs_transaction_handle *th, - struct treepath *path, const struct cpu_key *item_key, - struct inode *inode, struct buffer_head *un_bh) -{ - struct super_block *sb = inode->i_sb; - struct tree_balance s_del_balance; - struct item_head s_ih; - struct item_head *q_ih; - int quota_cut_bytes; - int ret_value, del_size, removed; - -#ifdef CONFIG_REISERFS_CHECK - char mode; - int iter = 0; -#endif - - BUG_ON(!th->t_trans_id); - - init_tb_struct(th, &s_del_balance, sb, path, - 0 /*size is unknown */ ); - - while (1) { - removed = 0; - -#ifdef CONFIG_REISERFS_CHECK - iter++; - mode = -#endif - prepare_for_delete_or_cut(th, inode, path, - item_key, &removed, - &del_size, - max_reiserfs_offset(inode)); - - RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); - - copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); - s_del_balance.insert_size[0] = del_size; - - ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); - if (ret_value != REPEAT_SEARCH) - break; - - PROC_INFO_INC(sb, delete_item_restarted); - - // file system changed, repeat search - ret_value = - search_for_position_by_key(sb, item_key, path); - if (ret_value == IO_ERROR) - break; - if (ret_value == FILE_NOT_FOUND) { - reiserfs_warning(sb, "vs-5340", - "no items of the file %K found", - item_key); - break; - } - } /* while (1) */ - - if (ret_value != CARRY_ON) { - unfix_nodes(&s_del_balance); - return 0; - } - // reiserfs_delete_item returns item length when success - ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); - q_ih = get_ih(path); - quota_cut_bytes = ih_item_len(q_ih); - - /* hack so the quota code doesn't have to guess if the file - ** has a tail. On tail insert, we allocate quota for 1 unformatted node. - ** We test the offset because the tail might have been - ** split into multiple items, and we only want to decrement for - ** the unfm node once - */ - if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { - if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { - quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; - } else { - quota_cut_bytes = 0; - } - } - - if (un_bh) { - int off; - char *data; - - /* We are in direct2indirect conversion, so move tail contents - to the unformatted node */ - /* note, we do the copy before preparing the buffer because we - ** don't care about the contents of the unformatted node yet. - ** the only thing we really care about is the direct item's data - ** is in the unformatted node. - ** - ** Otherwise, we would have to call reiserfs_prepare_for_journal on - ** the unformatted node, which might schedule, meaning we'd have to - ** loop all the way back up to the start of the while loop. - ** - ** The unformatted node must be dirtied later on. We can't be - ** sure here if the entire tail has been deleted yet. - ** - ** un_bh is from the page cache (all unformatted nodes are - ** from the page cache) and might be a highmem page. So, we - ** can't use un_bh->b_data. - ** -clm - */ - - data = kmap_atomic(un_bh->b_page); - off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); - memcpy(data + off, - B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), - ret_value); - kunmap_atomic(data); - } - /* Perform balancing after all resources have been collected at once. */ - do_balance(&s_del_balance, NULL, NULL, M_DELETE); - -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "reiserquota delete_item(): freeing %u, id=%u type=%c", - quota_cut_bytes, inode->i_uid, head2type(&s_ih)); -#endif - dquot_free_space_nodirty(inode, quota_cut_bytes); - - /* Return deleted body length */ - return ret_value; -} - -/* Summary Of Mechanisms For Handling Collisions Between Processes: - - deletion of the body of the object is performed by iput(), with the - result that if multiple processes are operating on a file, the - deletion of the body of the file is deferred until the last process - that has an open inode performs its iput(). - - writes and truncates are protected from collisions by use of - semaphores. - - creates, linking, and mknod are protected from collisions with other - processes by making the reiserfs_add_entry() the last step in the - creation, and then rolling back all changes if there was a collision. - - Hans -*/ - -/* this deletes item which never gets split */ -void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, - struct inode *inode, struct reiserfs_key *key) -{ - struct tree_balance tb; - INITIALIZE_PATH(path); - int item_len = 0; - int tb_init = 0; - struct cpu_key cpu_key; - int retval; - int quota_cut_bytes = 0; - - BUG_ON(!th->t_trans_id); - - le_key2cpu_key(&cpu_key, key); - - while (1) { - retval = search_item(th->t_super, &cpu_key, &path); - if (retval == IO_ERROR) { - reiserfs_error(th->t_super, "vs-5350", - "i/o failure occurred trying " - "to delete %K", &cpu_key); - break; - } - if (retval != ITEM_FOUND) { - pathrelse(&path); - // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir - if (! - ((unsigned long long) - GET_HASH_VALUE(le_key_k_offset - (le_key_version(key), key)) == 0 - && (unsigned long long) - GET_GENERATION_NUMBER(le_key_k_offset - (le_key_version(key), - key)) == 1)) - reiserfs_warning(th->t_super, "vs-5355", - "%k not found", key); - break; - } - if (!tb_init) { - tb_init = 1; - item_len = ih_item_len(PATH_PITEM_HEAD(&path)); - init_tb_struct(th, &tb, th->t_super, &path, - -(IH_SIZE + item_len)); - } - quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)); - - retval = fix_nodes(M_DELETE, &tb, NULL, NULL); - if (retval == REPEAT_SEARCH) { - PROC_INFO_INC(th->t_super, delete_solid_item_restarted); - continue; - } - - if (retval == CARRY_ON) { - do_balance(&tb, NULL, NULL, M_DELETE); - if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, - "reiserquota delete_solid_item(): freeing %u id=%u type=%c", - quota_cut_bytes, inode->i_uid, - key2type(key)); -#endif - dquot_free_space_nodirty(inode, - quota_cut_bytes); - } - break; - } - // IO_ERROR, NO_DISK_SPACE, etc - reiserfs_warning(th->t_super, "vs-5360", - "could not delete %K due to fix_nodes failure", - &cpu_key); - unfix_nodes(&tb); - break; - } - - reiserfs_check_path(&path); -} - -int reiserfs_delete_object(struct reiserfs_transaction_handle *th, - struct inode *inode) -{ - int err; - inode->i_size = 0; - BUG_ON(!th->t_trans_id); - - /* for directory this deletes item containing "." and ".." */ - err = - reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ ); - if (err) - return err; - -#if defined( USE_INODE_GENERATION_COUNTER ) - if (!old_format_only(th->t_super)) { - __le32 *inode_generation; - - inode_generation = - &REISERFS_SB(th->t_super)->s_rs->s_inode_generation; - le32_add_cpu(inode_generation, 1); - } -/* USE_INODE_GENERATION_COUNTER */ -#endif - reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); - - return err; -} - -static void unmap_buffers(struct page *page, loff_t pos) -{ - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *next; - unsigned long tail_index; - unsigned long cur_index; - - if (page) { - if (page_has_buffers(page)) { - tail_index = pos & (PAGE_CACHE_SIZE - 1); - cur_index = 0; - head = page_buffers(page); - bh = head; - do { - next = bh->b_this_page; - - /* we want to unmap the buffers that contain the tail, and - ** all the buffers after it (since the tail must be at the - ** end of the file). We don't want to unmap file data - ** before the tail, since it might be dirty and waiting to - ** reach disk - */ - cur_index += bh->b_size; - if (cur_index > tail_index) { - reiserfs_unmap_buffer(bh); - } - bh = next; - } while (bh != head); - } - } -} - -static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct page *page, - struct treepath *path, - const struct cpu_key *item_key, - loff_t new_file_size, char *mode) -{ - struct super_block *sb = inode->i_sb; - int block_size = sb->s_blocksize; - int cut_bytes; - BUG_ON(!th->t_trans_id); - BUG_ON(new_file_size != inode->i_size); - - /* the page being sent in could be NULL if there was an i/o error - ** reading in the last block. The user will hit problems trying to - ** read the file, but for now we just skip the indirect2direct - */ - if (atomic_read(&inode->i_count) > 1 || - !tail_has_to_be_packed(inode) || - !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) { - /* leave tail in an unformatted node */ - *mode = M_SKIP_BALANCING; - cut_bytes = - block_size - (new_file_size & (block_size - 1)); - pathrelse(path); - return cut_bytes; - } - /* Perform the conversion to a direct_item. */ - /* return indirect_to_direct(inode, path, item_key, - new_file_size, mode); */ - return indirect2direct(th, inode, page, path, item_key, - new_file_size, mode); -} - -/* we did indirect_to_direct conversion. And we have inserted direct - item successesfully, but there were no disk space to cut unfm - pointer being converted. Therefore we have to delete inserted - direct item(s) */ -static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, - struct inode *inode, struct treepath *path) -{ - struct cpu_key tail_key; - int tail_len; - int removed; - BUG_ON(!th->t_trans_id); - - make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!! - tail_key.key_length = 4; - - tail_len = - (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; - while (tail_len) { - /* look for the last byte of the tail */ - if (search_for_position_by_key(inode->i_sb, &tail_key, path) == - POSITION_NOT_FOUND) - reiserfs_panic(inode->i_sb, "vs-5615", - "found invalid item"); - RFALSE(path->pos_in_item != - ih_item_len(PATH_PITEM_HEAD(path)) - 1, - "vs-5616: appended bytes found"); - PATH_LAST_POSITION(path)--; - - removed = - reiserfs_delete_item(th, path, &tail_key, inode, - NULL /*unbh not needed */ ); - RFALSE(removed <= 0 - || removed > tail_len, - "vs-5617: there was tail %d bytes, removed item length %d bytes", - tail_len, removed); - tail_len -= removed; - set_cpu_key_k_offset(&tail_key, - cpu_key_k_offset(&tail_key) - removed); - } - reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " - "conversion has been rolled back due to " - "lack of disk space"); - //mark_file_without_tail (inode); - mark_inode_dirty(inode); -} - -/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ -int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - struct cpu_key *item_key, - struct inode *inode, - struct page *page, loff_t new_file_size) -{ - struct super_block *sb = inode->i_sb; - /* Every function which is going to call do_balance must first - create a tree_balance structure. Then it must fill up this - structure by using the init_tb_struct and fix_nodes functions. - After that we can make tree balancing. */ - struct tree_balance s_cut_balance; - struct item_head *p_le_ih; - int cut_size = 0, /* Amount to be cut. */ - ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */ - is_inode_locked = 0; - char mode; /* Mode of the balance. */ - int retval2 = -1; - int quota_cut_bytes; - loff_t tail_pos = 0; - - BUG_ON(!th->t_trans_id); - - init_tb_struct(th, &s_cut_balance, inode->i_sb, path, - cut_size); - - /* Repeat this loop until we either cut the item without needing - to balance, or we fix_nodes without schedule occurring */ - while (1) { - /* Determine the balance mode, position of the first byte to - be cut, and size to be cut. In case of the indirect item - free unformatted nodes which are pointed to by the cut - pointers. */ - - mode = - prepare_for_delete_or_cut(th, inode, path, - item_key, &removed, - &cut_size, new_file_size); - if (mode == M_CONVERT) { - /* convert last unformatted node to direct item or leave - tail in the unformatted node */ - RFALSE(ret_value != CARRY_ON, - "PAP-5570: can not convert twice"); - - ret_value = - maybe_indirect_to_direct(th, inode, page, - path, item_key, - new_file_size, &mode); - if (mode == M_SKIP_BALANCING) - /* tail has been left in the unformatted node */ - return ret_value; - - is_inode_locked = 1; - - /* removing of last unformatted node will change value we - have to return to truncate. Save it */ - retval2 = ret_value; - /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */ - - /* So, we have performed the first part of the conversion: - inserting the new direct item. Now we are removing the - last unformatted node pointer. Set key to search for - it. */ - set_cpu_key_k_type(item_key, TYPE_INDIRECT); - item_key->key_length = 4; - new_file_size -= - (new_file_size & (sb->s_blocksize - 1)); - tail_pos = new_file_size; - set_cpu_key_k_offset(item_key, new_file_size + 1); - if (search_for_position_by_key - (sb, item_key, - path) == POSITION_NOT_FOUND) { - print_block(PATH_PLAST_BUFFER(path), 3, - PATH_LAST_POSITION(path) - 1, - PATH_LAST_POSITION(path) + 1); - reiserfs_panic(sb, "PAP-5580", "item to " - "convert does not exist (%K)", - item_key); - } - continue; - } - if (cut_size == 0) { - pathrelse(path); - return 0; - } - - s_cut_balance.insert_size[0] = cut_size; - - ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL); - if (ret_value != REPEAT_SEARCH) - break; - - PROC_INFO_INC(sb, cut_from_item_restarted); - - ret_value = - search_for_position_by_key(sb, item_key, path); - if (ret_value == POSITION_FOUND) - continue; - - reiserfs_warning(sb, "PAP-5610", "item %K not found", - item_key); - unfix_nodes(&s_cut_balance); - return (ret_value == IO_ERROR) ? -EIO : -ENOENT; - } /* while */ - - // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) - if (ret_value != CARRY_ON) { - if (is_inode_locked) { - // FIXME: this seems to be not needed: we are always able - // to cut item - indirect_to_direct_roll_back(th, inode, path); - } - if (ret_value == NO_DISK_SPACE) - reiserfs_warning(sb, "reiserfs-5092", - "NO_DISK_SPACE"); - unfix_nodes(&s_cut_balance); - return -EIO; - } - - /* go ahead and perform balancing */ - - RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode"); - - /* Calculate number of bytes that need to be cut from the item. */ - quota_cut_bytes = - (mode == - M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance. - insert_size[0]; - if (retval2 == -1) - ret_value = calc_deleted_bytes_number(&s_cut_balance, mode); - else - ret_value = retval2; - - /* For direct items, we only change the quota when deleting the last - ** item. - */ - p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path); - if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { - if (mode == M_DELETE && - (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == - 1) { - // FIXME: this is to keep 3.5 happy - REISERFS_I(inode)->i_first_direct_byte = U32_MAX; - quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; - } else { - quota_cut_bytes = 0; - } - } -#ifdef CONFIG_REISERFS_CHECK - if (is_inode_locked) { - struct item_head *le_ih = - PATH_PITEM_HEAD(s_cut_balance.tb_path); - /* we are going to complete indirect2direct conversion. Make - sure, that we exactly remove last unformatted node pointer - of the item */ - if (!is_indirect_le_ih(le_ih)) - reiserfs_panic(sb, "vs-5652", - "item must be indirect %h", le_ih); - - if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) - reiserfs_panic(sb, "vs-5653", "completing " - "indirect2direct conversion indirect " - "item %h being deleted must be of " - "4 byte long", le_ih); - - if (mode == M_CUT - && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { - reiserfs_panic(sb, "vs-5654", "can not complete " - "indirect2direct conversion of %h " - "(CUT, insert_size==%d)", - le_ih, s_cut_balance.insert_size[0]); - } - /* it would be useful to make sure, that right neighboring - item is direct item of this file */ - } -#endif - - do_balance(&s_cut_balance, NULL, NULL, mode); - if (is_inode_locked) { - /* we've done an indirect->direct conversion. when the data block - ** was freed, it was removed from the list of blocks that must - ** be flushed before the transaction commits, make sure to - ** unmap and invalidate it - */ - unmap_buffers(page, tail_pos); - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - } -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, - "reiserquota cut_from_item(): freeing %u id=%u type=%c", - quota_cut_bytes, inode->i_uid, '?'); -#endif - dquot_free_space_nodirty(inode, quota_cut_bytes); - return ret_value; -} - -static void truncate_directory(struct reiserfs_transaction_handle *th, - struct inode *inode) -{ - BUG_ON(!th->t_trans_id); - if (inode->i_nlink) - reiserfs_error(inode->i_sb, "vs-5655", "link count != 0"); - - set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET); - set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY); - reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); - reiserfs_update_sd(th, inode); - set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET); - set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); -} - -/* Truncate file to the new size. Note, this must be called with a transaction - already started */ -int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, - struct inode *inode, /* ->i_size contains new size */ - struct page *page, /* up to date for last block */ - int update_timestamps /* when it is called by - file_release to convert - the tail - no timestamps - should be updated */ - ) -{ - INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ - struct item_head *p_le_ih; /* Pointer to an item header. */ - struct cpu_key s_item_key; /* Key to search for a previous file item. */ - loff_t file_size, /* Old file size. */ - new_file_size; /* New file size. */ - int deleted; /* Number of deleted or truncated bytes. */ - int retval; - int err = 0; - - BUG_ON(!th->t_trans_id); - if (! - (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) - || S_ISLNK(inode->i_mode))) - return 0; - - if (S_ISDIR(inode->i_mode)) { - // deletion of directory - no need to update timestamps - truncate_directory(th, inode); - return 0; - } - - /* Get new file size. */ - new_file_size = inode->i_size; - - // FIXME: note, that key type is unimportant here - make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), - TYPE_DIRECT, 3); - - retval = - search_for_position_by_key(inode->i_sb, &s_item_key, - &s_search_path); - if (retval == IO_ERROR) { - reiserfs_error(inode->i_sb, "vs-5657", - "i/o failure occurred trying to truncate %K", - &s_item_key); - err = -EIO; - goto out; - } - if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { - reiserfs_error(inode->i_sb, "PAP-5660", - "wrong result %d of search for %K", retval, - &s_item_key); - - err = -EIO; - goto out; - } - - s_search_path.pos_in_item--; - - /* Get real file size (total length of all file items) */ - p_le_ih = PATH_PITEM_HEAD(&s_search_path); - if (is_statdata_le_ih(p_le_ih)) - file_size = 0; - else { - loff_t offset = le_ih_k_offset(p_le_ih); - int bytes = - op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); - - /* this may mismatch with real file size: if last direct item - had no padding zeros and last unformatted node had no free - space, this file would have this file size */ - file_size = offset + bytes - 1; - } - /* - * are we doing a full truncate or delete, if so - * kick in the reada code - */ - if (new_file_size == 0) - s_search_path.reada = PATH_READA | PATH_READA_BACK; - - if (file_size == 0 || file_size < new_file_size) { - goto update_and_out; - } - - /* Update key to search for the last file item. */ - set_cpu_key_k_offset(&s_item_key, file_size); - - do { - /* Cut or delete file item. */ - deleted = - reiserfs_cut_from_item(th, &s_search_path, &s_item_key, - inode, page, new_file_size); - if (deleted < 0) { - reiserfs_warning(inode->i_sb, "vs-5665", - "reiserfs_cut_from_item failed"); - reiserfs_check_path(&s_search_path); - return 0; - } - - RFALSE(deleted > file_size, - "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", - deleted, file_size, &s_item_key); - - /* Change key to search the last file item. */ - file_size -= deleted; - - set_cpu_key_k_offset(&s_item_key, file_size); - - /* While there are bytes to truncate and previous file item is presented in the tree. */ - - /* - ** This loop could take a really long time, and could log - ** many more blocks than a transaction can hold. So, we do a polite - ** journal end here, and if the transaction needs ending, we make - ** sure the file is consistent before ending the current trans - ** and starting a new one - */ - if (journal_transaction_should_end(th, 0) || - reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { - int orig_len_alloc = th->t_blocks_allocated; - pathrelse(&s_search_path); - - if (update_timestamps) { - inode->i_mtime = CURRENT_TIME_SEC; - inode->i_ctime = CURRENT_TIME_SEC; - } - reiserfs_update_sd(th, inode); - - err = journal_end(th, inode->i_sb, orig_len_alloc); - if (err) - goto out; - err = journal_begin(th, inode->i_sb, - JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ; - if (err) - goto out; - reiserfs_update_inode_transaction(inode); - } - } while (file_size > ROUND_UP(new_file_size) && - search_for_position_by_key(inode->i_sb, &s_item_key, - &s_search_path) == POSITION_FOUND); - - RFALSE(file_size > ROUND_UP(new_file_size), - "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", - new_file_size, file_size, s_item_key.on_disk_key.k_objectid); - - update_and_out: - if (update_timestamps) { - // this is truncate, not file closing - inode->i_mtime = CURRENT_TIME_SEC; - inode->i_ctime = CURRENT_TIME_SEC; - } - reiserfs_update_sd(th, inode); - - out: - pathrelse(&s_search_path); - return err; -} - -#ifdef CONFIG_REISERFS_CHECK -// this makes sure, that we __append__, not overwrite or add holes -static void check_research_for_paste(struct treepath *path, - const struct cpu_key *key) -{ - struct item_head *found_ih = get_ih(path); - - if (is_direct_le_ih(found_ih)) { - if (le_ih_k_offset(found_ih) + - op_bytes_number(found_ih, - get_last_bh(path)->b_size) != - cpu_key_k_offset(key) - || op_bytes_number(found_ih, - get_last_bh(path)->b_size) != - pos_in_item(path)) - reiserfs_panic(NULL, "PAP-5720", "found direct item " - "%h or position (%d) does not match " - "to key %K", found_ih, - pos_in_item(path), key); - } - if (is_indirect_le_ih(found_ih)) { - if (le_ih_k_offset(found_ih) + - op_bytes_number(found_ih, - get_last_bh(path)->b_size) != - cpu_key_k_offset(key) - || I_UNFM_NUM(found_ih) != pos_in_item(path) - || get_ih_free_space(found_ih) != 0) - reiserfs_panic(NULL, "PAP-5730", "found indirect " - "item (%h) or position (%d) does not " - "match to key (%K)", - found_ih, pos_in_item(path), key); - } -} -#endif /* config reiserfs check */ - -/* Paste bytes to the existing item. Returns bytes number pasted into the item. */ -int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */ - const struct cpu_key *key, /* Key to search for the needed item. */ - struct inode *inode, /* Inode item belongs to */ - const char *body, /* Pointer to the bytes to paste. */ - int pasted_size) -{ /* Size of pasted bytes. */ - struct tree_balance s_paste_balance; - int retval; - int fs_gen; - - BUG_ON(!th->t_trans_id); - - fs_gen = get_generation(inode->i_sb); - -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, - "reiserquota paste_into_item(): allocating %u id=%u type=%c", - pasted_size, inode->i_uid, - key2type(&(key->on_disk_key))); -#endif - - retval = dquot_alloc_space_nodirty(inode, pasted_size); - if (retval) { - pathrelse(search_path); - return retval; - } - init_tb_struct(th, &s_paste_balance, th->t_super, search_path, - pasted_size); -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - s_paste_balance.key = key->on_disk_key; -#endif - - /* DQUOT_* can schedule, must check before the fix_nodes */ - if (fs_changed(fs_gen, inode->i_sb)) { - goto search_again; - } - - while ((retval = - fix_nodes(M_PASTE, &s_paste_balance, NULL, - body)) == REPEAT_SEARCH) { - search_again: - /* file system changed while we were in the fix_nodes */ - PROC_INFO_INC(th->t_super, paste_into_item_restarted); - retval = - search_for_position_by_key(th->t_super, key, - search_path); - if (retval == IO_ERROR) { - retval = -EIO; - goto error_out; - } - if (retval == POSITION_FOUND) { - reiserfs_warning(inode->i_sb, "PAP-5710", - "entry or pasted byte (%K) exists", - key); - retval = -EEXIST; - goto error_out; - } -#ifdef CONFIG_REISERFS_CHECK - check_research_for_paste(search_path, key); -#endif - } - - /* Perform balancing after all resources are collected by fix_nodes, and - accessing them will not risk triggering schedule. */ - if (retval == CARRY_ON) { - do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); - return 0; - } - retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; - error_out: - /* this also releases the path */ - unfix_nodes(&s_paste_balance); -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, - "reiserquota paste_into_item(): freeing %u id=%u type=%c", - pasted_size, inode->i_uid, - key2type(&(key->on_disk_key))); -#endif - dquot_free_space_nodirty(inode, pasted_size); - return retval; -} - -/* Insert new item into the buffer at the path. - * th - active transaction handle - * path - path to the inserted item - * ih - pointer to the item header to insert - * body - pointer to the bytes to insert - */ -int reiserfs_insert_item(struct reiserfs_transaction_handle *th, - struct treepath *path, const struct cpu_key *key, - struct item_head *ih, struct inode *inode, - const char *body) -{ - struct tree_balance s_ins_balance; - int retval; - int fs_gen = 0; - int quota_bytes = 0; - - BUG_ON(!th->t_trans_id); - - if (inode) { /* Do we count quotas for item? */ - fs_gen = get_generation(inode->i_sb); - quota_bytes = ih_item_len(ih); - - /* hack so the quota code doesn't have to guess if the file has - ** a tail, links are always tails, so there's no guessing needed - */ - if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) - quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, - "reiserquota insert_item(): allocating %u id=%u type=%c", - quota_bytes, inode->i_uid, head2type(ih)); -#endif - /* We can't dirty inode here. It would be immediately written but - * appropriate stat item isn't inserted yet... */ - retval = dquot_alloc_space_nodirty(inode, quota_bytes); - if (retval) { - pathrelse(path); - return retval; - } - } - init_tb_struct(th, &s_ins_balance, th->t_super, path, - IH_SIZE + ih_item_len(ih)); -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - s_ins_balance.key = key->on_disk_key; -#endif - /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ - if (inode && fs_changed(fs_gen, inode->i_sb)) { - goto search_again; - } - - while ((retval = - fix_nodes(M_INSERT, &s_ins_balance, ih, - body)) == REPEAT_SEARCH) { - search_again: - /* file system changed while we were in the fix_nodes */ - PROC_INFO_INC(th->t_super, insert_item_restarted); - retval = search_item(th->t_super, key, path); - if (retval == IO_ERROR) { - retval = -EIO; - goto error_out; - } - if (retval == ITEM_FOUND) { - reiserfs_warning(th->t_super, "PAP-5760", - "key %K already exists in the tree", - key); - retval = -EEXIST; - goto error_out; - } - } - - /* make balancing after all resources will be collected at a time */ - if (retval == CARRY_ON) { - do_balance(&s_ins_balance, ih, body, M_INSERT); - return 0; - } - - retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; - error_out: - /* also releases the path */ - unfix_nodes(&s_ins_balance); -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, - "reiserquota insert_item(): freeing %u id=%u type=%c", - quota_bytes, inode->i_uid, head2type(ih)); -#endif - if (inode) - dquot_free_space_nodirty(inode, quota_bytes); - return retval; -} diff --git a/ANDROID_3.4.5/fs/reiserfs/super.c b/ANDROID_3.4.5/fs/reiserfs/super.c deleted file mode 100644 index 8b7616ef..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/super.c +++ /dev/null @@ -1,2369 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - * - * Trivial changes by Alan Cox to add the LFS fixes - * - * Trivial Changes: - * Rights granted to Hans Reiser to redistribute under other terms providing - * he accepts all liability including but not limited to patent, fitness - * for purpose, and direct or indirect claims arising from failure to perform. - * - * NO WARRANTY - */ - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/time.h> -#include <asm/uaccess.h> -#include "reiserfs.h" -#include "acl.h" -#include "xattr.h" -#include <linux/init.h> -#include <linux/blkdev.h> -#include <linux/buffer_head.h> -#include <linux/exportfs.h> -#include <linux/quotaops.h> -#include <linux/vfs.h> -#include <linux/mount.h> -#include <linux/namei.h> -#include <linux/crc32.h> -#include <linux/seq_file.h> - -struct file_system_type reiserfs_fs_type; - -static const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING; -static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING; -static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING; - -int is_reiserfs_3_5(struct reiserfs_super_block *rs) -{ - return !strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string, - strlen(reiserfs_3_5_magic_string)); -} - -int is_reiserfs_3_6(struct reiserfs_super_block *rs) -{ - return !strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string, - strlen(reiserfs_3_6_magic_string)); -} - -int is_reiserfs_jr(struct reiserfs_super_block *rs) -{ - return !strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string, - strlen(reiserfs_jr_magic_string)); -} - -static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) -{ - return (is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) || - is_reiserfs_jr(rs)); -} - -static int reiserfs_remount(struct super_block *s, int *flags, char *data); -static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); -void show_alloc_options(struct seq_file *seq, struct super_block *s); - -static int reiserfs_sync_fs(struct super_block *s, int wait) -{ - struct reiserfs_transaction_handle th; - - reiserfs_write_lock(s); - if (!journal_begin(&th, s, 1)) - if (!journal_end_sync(&th, s, 1)) - reiserfs_flush_old_commits(s); - s->s_dirt = 0; /* Even if it's not true. - * We'll loop forever in sync_supers otherwise */ - reiserfs_write_unlock(s); - return 0; -} - -static void reiserfs_write_super(struct super_block *s) -{ - reiserfs_sync_fs(s, 1); -} - -static int reiserfs_freeze(struct super_block *s) -{ - struct reiserfs_transaction_handle th; - reiserfs_write_lock(s); - if (!(s->s_flags & MS_RDONLY)) { - int err = journal_begin(&th, s, 1); - if (err) { - reiserfs_block_writes(&th); - } else { - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), - 1); - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); - reiserfs_block_writes(&th); - journal_end_sync(&th, s, 1); - } - } - s->s_dirt = 0; - reiserfs_write_unlock(s); - return 0; -} - -static int reiserfs_unfreeze(struct super_block *s) -{ - reiserfs_allow_writes(s); - return 0; -} - -extern const struct in_core_key MAX_IN_CORE_KEY; - -/* this is used to delete "save link" when there are no items of a - file it points to. It can either happen if unlink is completed but - "save unlink" removal, or if file has both unlink and truncate - pending and as unlink completes first (because key of "save link" - protecting unlink is bigger that a key lf "save link" which - protects truncate), so there left no items to make truncate - completion on */ -static int remove_save_link_only(struct super_block *s, - struct reiserfs_key *key, int oid_free) -{ - struct reiserfs_transaction_handle th; - int err; - - /* we are going to do one balancing */ - err = journal_begin(&th, s, JOURNAL_PER_BALANCE_CNT); - if (err) - return err; - - reiserfs_delete_solid_item(&th, NULL, key); - if (oid_free) - /* removals are protected by direct items */ - reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid)); - - return journal_end(&th, s, JOURNAL_PER_BALANCE_CNT); -} - -#ifdef CONFIG_QUOTA -static int reiserfs_quota_on_mount(struct super_block *, int); -#endif - -/* look for uncompleted unlinks and truncates and complete them */ -static int finish_unfinished(struct super_block *s) -{ - INITIALIZE_PATH(path); - struct cpu_key max_cpu_key, obj_key; - struct reiserfs_key save_link_key, last_inode_key; - int retval = 0; - struct item_head *ih; - struct buffer_head *bh; - int item_pos; - char *item; - int done; - struct inode *inode; - int truncate; -#ifdef CONFIG_QUOTA - int i; - int ms_active_set; - int quota_enabled[MAXQUOTAS]; -#endif - - /* compose key to look for "save" links */ - max_cpu_key.version = KEY_FORMAT_3_5; - max_cpu_key.on_disk_key.k_dir_id = ~0U; - max_cpu_key.on_disk_key.k_objectid = ~0U; - set_cpu_key_k_offset(&max_cpu_key, ~0U); - max_cpu_key.key_length = 3; - - memset(&last_inode_key, 0, sizeof(last_inode_key)); - -#ifdef CONFIG_QUOTA - /* Needed for iput() to work correctly and not trash data */ - if (s->s_flags & MS_ACTIVE) { - ms_active_set = 0; - } else { - ms_active_set = 1; - s->s_flags |= MS_ACTIVE; - } - /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < MAXQUOTAS; i++) { - quota_enabled[i] = 1; - if (REISERFS_SB(s)->s_qf_names[i]) { - int ret; - - if (sb_has_quota_active(s, i)) { - quota_enabled[i] = 0; - continue; - } - ret = reiserfs_quota_on_mount(s, i); - if (ret < 0) - reiserfs_warning(s, "reiserfs-2500", - "cannot turn on journaled " - "quota: error %d", ret); - } - } -#endif - - done = 0; - REISERFS_SB(s)->s_is_unlinked_ok = 1; - while (!retval) { - retval = search_item(s, &max_cpu_key, &path); - if (retval != ITEM_NOT_FOUND) { - reiserfs_error(s, "vs-2140", - "search_by_key returned %d", retval); - break; - } - - bh = get_last_bh(&path); - item_pos = get_item_pos(&path); - if (item_pos != B_NR_ITEMS(bh)) { - reiserfs_warning(s, "vs-2060", - "wrong position found"); - break; - } - item_pos--; - ih = B_N_PITEM_HEAD(bh, item_pos); - - if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) - /* there are no "save" links anymore */ - break; - - save_link_key = ih->ih_key; - if (is_indirect_le_ih(ih)) - truncate = 1; - else - truncate = 0; - - /* reiserfs_iget needs k_dirid and k_objectid only */ - item = B_I_PITEM(bh, ih); - obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item); - obj_key.on_disk_key.k_objectid = - le32_to_cpu(ih->ih_key.k_objectid); - obj_key.on_disk_key.k_offset = 0; - obj_key.on_disk_key.k_type = 0; - - pathrelse(&path); - - inode = reiserfs_iget(s, &obj_key); - if (!inode) { - /* the unlink almost completed, it just did not manage to remove - "save" link and release objectid */ - reiserfs_warning(s, "vs-2180", "iget failed for %K", - &obj_key); - retval = remove_save_link_only(s, &save_link_key, 1); - continue; - } - - if (!truncate && inode->i_nlink) { - /* file is not unlinked */ - reiserfs_warning(s, "vs-2185", - "file %K is not unlinked", - &obj_key); - retval = remove_save_link_only(s, &save_link_key, 0); - continue; - } - dquot_initialize(inode); - - if (truncate && S_ISDIR(inode->i_mode)) { - /* We got a truncate request for a dir which is impossible. - The only imaginable way is to execute unfinished truncate request - then boot into old kernel, remove the file and create dir with - the same key. */ - reiserfs_warning(s, "green-2101", - "impossible truncate on a " - "directory %k. Please report", - INODE_PKEY(inode)); - retval = remove_save_link_only(s, &save_link_key, 0); - truncate = 0; - iput(inode); - continue; - } - - if (truncate) { - REISERFS_I(inode)->i_flags |= - i_link_saved_truncate_mask; - /* not completed truncate found. New size was committed together - with "save" link */ - reiserfs_info(s, "Truncating %k to %Ld ..", - INODE_PKEY(inode), inode->i_size); - reiserfs_truncate_file(inode, - 0 - /*don't update modification time */ - ); - retval = remove_save_link(inode, truncate); - } else { - REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; - /* not completed unlink (rmdir) found */ - reiserfs_info(s, "Removing %k..", INODE_PKEY(inode)); - if (memcmp(&last_inode_key, INODE_PKEY(inode), - sizeof(last_inode_key))){ - last_inode_key = *INODE_PKEY(inode); - /* removal gets completed in iput */ - retval = 0; - } else { - reiserfs_warning(s, "super-2189", "Dead loop " - "in finish_unfinished " - "detected, just remove " - "save link\n"); - retval = remove_save_link_only(s, - &save_link_key, 0); - } - } - - iput(inode); - printk("done\n"); - done++; - } - REISERFS_SB(s)->s_is_unlinked_ok = 0; - -#ifdef CONFIG_QUOTA - /* Turn quotas off */ - for (i = 0; i < MAXQUOTAS; i++) { - if (sb_dqopt(s)->files[i] && quota_enabled[i]) - dquot_quota_off(s, i); - } - if (ms_active_set) - /* Restore the flag back */ - s->s_flags &= ~MS_ACTIVE; -#endif - pathrelse(&path); - if (done) - reiserfs_info(s, "There were %d uncompleted unlinks/truncates. " - "Completed\n", done); - return retval; -} - -/* to protect file being unlinked from getting lost we "safe" link files - being unlinked. This link will be deleted in the same transaction with last - item of file. mounting the filesystem we scan all these links and remove - files which almost got lost */ -void add_save_link(struct reiserfs_transaction_handle *th, - struct inode *inode, int truncate) -{ - INITIALIZE_PATH(path); - int retval; - struct cpu_key key; - struct item_head ih; - __le32 link; - - BUG_ON(!th->t_trans_id); - - /* file can only get one "save link" of each kind */ - RFALSE(truncate && - (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask), - "saved link already exists for truncated inode %lx", - (long)inode->i_ino); - RFALSE(!truncate && - (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask), - "saved link already exists for unlinked inode %lx", - (long)inode->i_ino); - - /* setup key of "save" link */ - key.version = KEY_FORMAT_3_5; - key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; - key.on_disk_key.k_objectid = inode->i_ino; - if (!truncate) { - /* unlink, rmdir, rename */ - set_cpu_key_k_offset(&key, 1 + inode->i_sb->s_blocksize); - set_cpu_key_k_type(&key, TYPE_DIRECT); - - /* item head of "safe" link */ - make_le_item_head(&ih, &key, key.version, - 1 + inode->i_sb->s_blocksize, TYPE_DIRECT, - 4 /*length */ , 0xffff /*free space */ ); - } else { - /* truncate */ - if (S_ISDIR(inode->i_mode)) - reiserfs_warning(inode->i_sb, "green-2102", - "Adding a truncate savelink for " - "a directory %k! Please report", - INODE_PKEY(inode)); - set_cpu_key_k_offset(&key, 1); - set_cpu_key_k_type(&key, TYPE_INDIRECT); - - /* item head of "safe" link */ - make_le_item_head(&ih, &key, key.version, 1, TYPE_INDIRECT, - 4 /*length */ , 0 /*free space */ ); - } - key.key_length = 3; - - /* look for its place in the tree */ - retval = search_item(inode->i_sb, &key, &path); - if (retval != ITEM_NOT_FOUND) { - if (retval != -ENOSPC) - reiserfs_error(inode->i_sb, "vs-2100", - "search_by_key (%K) returned %d", &key, - retval); - pathrelse(&path); - return; - } - - /* body of "save" link */ - link = INODE_PKEY(inode)->k_dir_id; - - /* put "save" link into tree, don't charge quota to anyone */ - retval = - reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); - if (retval) { - if (retval != -ENOSPC) - reiserfs_error(inode->i_sb, "vs-2120", - "insert_item returned %d", retval); - } else { - if (truncate) - REISERFS_I(inode)->i_flags |= - i_link_saved_truncate_mask; - else - REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; - } -} - -/* this opens transaction unlike add_save_link */ -int remove_save_link(struct inode *inode, int truncate) -{ - struct reiserfs_transaction_handle th; - struct reiserfs_key key; - int err; - - /* we are going to do one balancing only */ - err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); - if (err) - return err; - - /* setup key of "save" link */ - key.k_dir_id = cpu_to_le32(MAX_KEY_OBJECTID); - key.k_objectid = INODE_PKEY(inode)->k_objectid; - if (!truncate) { - /* unlink, rmdir, rename */ - set_le_key_k_offset(KEY_FORMAT_3_5, &key, - 1 + inode->i_sb->s_blocksize); - set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_DIRECT); - } else { - /* truncate */ - set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1); - set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_INDIRECT); - } - - if ((truncate && - (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask)) || - (!truncate && - (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask))) - /* don't take quota bytes from anywhere */ - reiserfs_delete_solid_item(&th, NULL, &key); - if (!truncate) { - reiserfs_release_objectid(&th, inode->i_ino); - REISERFS_I(inode)->i_flags &= ~i_link_saved_unlink_mask; - } else - REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask; - - return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); -} - -static void reiserfs_kill_sb(struct super_block *s) -{ - if (REISERFS_SB(s)) { - /* - * Force any pending inode evictions to occur now. Any - * inodes to be removed that have extended attributes - * associated with them need to clean them up before - * we can release the extended attribute root dentries. - * shrink_dcache_for_umount will BUG if we don't release - * those before it's called so ->put_super is too late. - */ - shrink_dcache_sb(s); - - dput(REISERFS_SB(s)->xattr_root); - REISERFS_SB(s)->xattr_root = NULL; - dput(REISERFS_SB(s)->priv_root); - REISERFS_SB(s)->priv_root = NULL; - } - - kill_block_super(s); -} - -static void reiserfs_put_super(struct super_block *s) -{ - struct reiserfs_transaction_handle th; - th.t_trans_id = 0; - - dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); - - reiserfs_write_lock(s); - - if (s->s_dirt) - reiserfs_write_super(s); - - /* change file system state to current state if it was mounted with read-write permissions */ - if (!(s->s_flags & MS_RDONLY)) { - if (!journal_begin(&th, s, 10)) { - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), - 1); - set_sb_umount_state(SB_DISK_SUPER_BLOCK(s), - REISERFS_SB(s)->s_mount_state); - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); - } - } - - /* note, journal_release checks for readonly mount, and can decide not - ** to do a journal_end - */ - journal_release(&th, s); - - reiserfs_free_bitmap_cache(s); - - brelse(SB_BUFFER_WITH_SB(s)); - - print_statistics(s); - - if (REISERFS_SB(s)->reserved_blocks != 0) { - reiserfs_warning(s, "green-2005", "reserved blocks left %d", - REISERFS_SB(s)->reserved_blocks); - } - - reiserfs_proc_info_done(s); - - reiserfs_write_unlock(s); - mutex_destroy(&REISERFS_SB(s)->lock); - kfree(s->s_fs_info); - s->s_fs_info = NULL; -} - -static struct kmem_cache *reiserfs_inode_cachep; - -static struct inode *reiserfs_alloc_inode(struct super_block *sb) -{ - struct reiserfs_inode_info *ei; - ei = (struct reiserfs_inode_info *) - kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); - if (!ei) - return NULL; - atomic_set(&ei->openers, 0); - mutex_init(&ei->tailpack); - return &ei->vfs_inode; -} - -static void reiserfs_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); -} - -static void reiserfs_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, reiserfs_i_callback); -} - -static void init_once(void *foo) -{ - struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; - - INIT_LIST_HEAD(&ei->i_prealloc_list); - inode_init_once(&ei->vfs_inode); -} - -static int init_inodecache(void) -{ - reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", - sizeof(struct - reiserfs_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once); - if (reiserfs_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -static void destroy_inodecache(void) -{ - kmem_cache_destroy(reiserfs_inode_cachep); -} - -/* we don't mark inodes dirty, we just log them */ -static void reiserfs_dirty_inode(struct inode *inode, int flags) -{ - struct reiserfs_transaction_handle th; - - int err = 0; - int lock_depth; - - if (inode->i_sb->s_flags & MS_RDONLY) { - reiserfs_warning(inode->i_sb, "clm-6006", - "writing inode %lu on readonly FS", - inode->i_ino); - return; - } - lock_depth = reiserfs_write_lock_once(inode->i_sb); - - /* this is really only used for atime updates, so they don't have - ** to be included in O_SYNC or fsync - */ - err = journal_begin(&th, inode->i_sb, 1); - if (err) - goto out; - - reiserfs_update_sd(&th, inode); - journal_end(&th, inode->i_sb, 1); - -out: - reiserfs_write_unlock_once(inode->i_sb, lock_depth); -} - -static int reiserfs_show_options(struct seq_file *seq, struct dentry *root) -{ - struct super_block *s = root->d_sb; - struct reiserfs_journal *journal = SB_JOURNAL(s); - long opts = REISERFS_SB(s)->s_mount_opt; - - if (opts & (1 << REISERFS_LARGETAIL)) - seq_puts(seq, ",tails=on"); - else if (!(opts & (1 << REISERFS_SMALLTAIL))) - seq_puts(seq, ",notail"); - /* tails=small is default so we don't show it */ - - if (!(opts & (1 << REISERFS_BARRIER_FLUSH))) - seq_puts(seq, ",barrier=none"); - /* barrier=flush is default so we don't show it */ - - if (opts & (1 << REISERFS_ERROR_CONTINUE)) - seq_puts(seq, ",errors=continue"); - else if (opts & (1 << REISERFS_ERROR_PANIC)) - seq_puts(seq, ",errors=panic"); - /* errors=ro is default so we don't show it */ - - if (opts & (1 << REISERFS_DATA_LOG)) - seq_puts(seq, ",data=journal"); - else if (opts & (1 << REISERFS_DATA_WRITEBACK)) - seq_puts(seq, ",data=writeback"); - /* data=ordered is default so we don't show it */ - - if (opts & (1 << REISERFS_ATTRS)) - seq_puts(seq, ",attrs"); - - if (opts & (1 << REISERFS_XATTRS_USER)) - seq_puts(seq, ",user_xattr"); - - if (opts & (1 << REISERFS_EXPOSE_PRIVROOT)) - seq_puts(seq, ",expose_privroot"); - - if (opts & (1 << REISERFS_POSIXACL)) - seq_puts(seq, ",acl"); - - if (REISERFS_SB(s)->s_jdev) - seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev); - - if (journal->j_max_commit_age != journal->j_default_max_commit_age) - seq_printf(seq, ",commit=%d", journal->j_max_commit_age); - -#ifdef CONFIG_QUOTA - if (REISERFS_SB(s)->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]); - else if (opts & (1 << REISERFS_USRQUOTA)) - seq_puts(seq, ",usrquota"); - if (REISERFS_SB(s)->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]); - else if (opts & (1 << REISERFS_GRPQUOTA)) - seq_puts(seq, ",grpquota"); - if (REISERFS_SB(s)->s_jquota_fmt) { - if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD) - seq_puts(seq, ",jqfmt=vfsold"); - else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0) - seq_puts(seq, ",jqfmt=vfsv0"); - } -#endif - - /* Block allocator options */ - if (opts & (1 << REISERFS_NO_BORDER)) - seq_puts(seq, ",block-allocator=noborder"); - if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION)) - seq_puts(seq, ",block-allocator=no_unhashed_relocation"); - if (opts & (1 << REISERFS_HASHED_RELOCATION)) - seq_puts(seq, ",block-allocator=hashed_relocation"); - if (opts & (1 << REISERFS_TEST4)) - seq_puts(seq, ",block-allocator=test4"); - show_alloc_options(seq, s); - return 0; -} - -#ifdef CONFIG_QUOTA -static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, - size_t, loff_t); -static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, - loff_t); -#endif - -static const struct super_operations reiserfs_sops = { - .alloc_inode = reiserfs_alloc_inode, - .destroy_inode = reiserfs_destroy_inode, - .write_inode = reiserfs_write_inode, - .dirty_inode = reiserfs_dirty_inode, - .evict_inode = reiserfs_evict_inode, - .put_super = reiserfs_put_super, - .write_super = reiserfs_write_super, - .sync_fs = reiserfs_sync_fs, - .freeze_fs = reiserfs_freeze, - .unfreeze_fs = reiserfs_unfreeze, - .statfs = reiserfs_statfs, - .remount_fs = reiserfs_remount, - .show_options = reiserfs_show_options, -#ifdef CONFIG_QUOTA - .quota_read = reiserfs_quota_read, - .quota_write = reiserfs_quota_write, -#endif -}; - -#ifdef CONFIG_QUOTA -#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") - -static int reiserfs_write_dquot(struct dquot *); -static int reiserfs_acquire_dquot(struct dquot *); -static int reiserfs_release_dquot(struct dquot *); -static int reiserfs_mark_dquot_dirty(struct dquot *); -static int reiserfs_write_info(struct super_block *, int); -static int reiserfs_quota_on(struct super_block *, int, int, struct path *); - -static const struct dquot_operations reiserfs_quota_operations = { - .write_dquot = reiserfs_write_dquot, - .acquire_dquot = reiserfs_acquire_dquot, - .release_dquot = reiserfs_release_dquot, - .mark_dirty = reiserfs_mark_dquot_dirty, - .write_info = reiserfs_write_info, - .alloc_dquot = dquot_alloc, - .destroy_dquot = dquot_destroy, -}; - -static const struct quotactl_ops reiserfs_qctl_operations = { - .quota_on = reiserfs_quota_on, - .quota_off = dquot_quota_off, - .quota_sync = dquot_quota_sync, - .get_info = dquot_get_dqinfo, - .set_info = dquot_set_dqinfo, - .get_dqblk = dquot_get_dqblk, - .set_dqblk = dquot_set_dqblk, -}; -#endif - -static const struct export_operations reiserfs_export_ops = { - .encode_fh = reiserfs_encode_fh, - .fh_to_dentry = reiserfs_fh_to_dentry, - .fh_to_parent = reiserfs_fh_to_parent, - .get_parent = reiserfs_get_parent, -}; - -/* this struct is used in reiserfs_getopt () for containing the value for those - mount options that have values rather than being toggles. */ -typedef struct { - char *value; - int setmask; /* bitmask which is to set on mount_options bitmask when this - value is found, 0 is no bits are to be changed. */ - int clrmask; /* bitmask which is to clear on mount_options bitmask when this - value is found, 0 is no bits are to be changed. This is - applied BEFORE setmask */ -} arg_desc_t; - -/* Set this bit in arg_required to allow empty arguments */ -#define REISERFS_OPT_ALLOWEMPTY 31 - -/* this struct is used in reiserfs_getopt() for describing the set of reiserfs - mount options */ -typedef struct { - char *option_name; - int arg_required; /* 0 if argument is not required, not 0 otherwise */ - const arg_desc_t *values; /* list of values accepted by an option */ - int setmask; /* bitmask which is to set on mount_options bitmask when this - value is found, 0 is no bits are to be changed. */ - int clrmask; /* bitmask which is to clear on mount_options bitmask when this - value is found, 0 is no bits are to be changed. This is - applied BEFORE setmask */ -} opt_desc_t; - -/* possible values for -o data= */ -static const arg_desc_t logging_mode[] = { - {"ordered", 1 << REISERFS_DATA_ORDERED, - (1 << REISERFS_DATA_LOG | 1 << REISERFS_DATA_WRITEBACK)}, - {"journal", 1 << REISERFS_DATA_LOG, - (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)}, - {"writeback", 1 << REISERFS_DATA_WRITEBACK, - (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)}, - {.value = NULL} -}; - -/* possible values for -o barrier= */ -static const arg_desc_t barrier_mode[] = { - {"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH}, - {"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE}, - {.value = NULL} -}; - -/* possible values for "-o block-allocator=" and bits which are to be set in - s_mount_opt of reiserfs specific part of in-core super block */ -static const arg_desc_t balloc[] = { - {"noborder", 1 << REISERFS_NO_BORDER, 0}, - {"border", 0, 1 << REISERFS_NO_BORDER}, - {"no_unhashed_relocation", 1 << REISERFS_NO_UNHASHED_RELOCATION, 0}, - {"hashed_relocation", 1 << REISERFS_HASHED_RELOCATION, 0}, - {"test4", 1 << REISERFS_TEST4, 0}, - {"notest4", 0, 1 << REISERFS_TEST4}, - {NULL, 0, 0} -}; - -static const arg_desc_t tails[] = { - {"on", 1 << REISERFS_LARGETAIL, 1 << REISERFS_SMALLTAIL}, - {"off", 0, (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, - {"small", 1 << REISERFS_SMALLTAIL, 1 << REISERFS_LARGETAIL}, - {NULL, 0, 0} -}; - -static const arg_desc_t error_actions[] = { - {"panic", 1 << REISERFS_ERROR_PANIC, - (1 << REISERFS_ERROR_RO | 1 << REISERFS_ERROR_CONTINUE)}, - {"ro-remount", 1 << REISERFS_ERROR_RO, - (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_CONTINUE)}, -#ifdef REISERFS_JOURNAL_ERROR_ALLOWS_NO_LOG - {"continue", 1 << REISERFS_ERROR_CONTINUE, - (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_RO)}, -#endif - {NULL, 0, 0}, -}; - -/* proceed only one option from a list *cur - string containing of mount options - opts - array of options which are accepted - opt_arg - if option is found and requires an argument and if it is specifed - in the input - pointer to the argument is stored here - bit_flags - if option requires to set a certain bit - it is set here - return -1 if unknown option is found, opt->arg_required otherwise */ -static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, - char **opt_arg, unsigned long *bit_flags) -{ - char *p; - /* foo=bar, - ^ ^ ^ - | | +-- option_end - | +-- arg_start - +-- option_start - */ - const opt_desc_t *opt; - const arg_desc_t *arg; - - p = *cur; - - /* assume argument cannot contain commas */ - *cur = strchr(p, ','); - if (*cur) { - *(*cur) = '\0'; - (*cur)++; - } - - if (!strncmp(p, "alloc=", 6)) { - /* Ugly special case, probably we should redo options parser so that - it can understand several arguments for some options, also so that - it can fill several bitfields with option values. */ - if (reiserfs_parse_alloc_options(s, p + 6)) { - return -1; - } else { - return 0; - } - } - - /* for every option in the list */ - for (opt = opts; opt->option_name; opt++) { - if (!strncmp(p, opt->option_name, strlen(opt->option_name))) { - if (bit_flags) { - if (opt->clrmask == - (1 << REISERFS_UNSUPPORTED_OPT)) - reiserfs_warning(s, "super-6500", - "%s not supported.\n", - p); - else - *bit_flags &= ~opt->clrmask; - if (opt->setmask == - (1 << REISERFS_UNSUPPORTED_OPT)) - reiserfs_warning(s, "super-6501", - "%s not supported.\n", - p); - else - *bit_flags |= opt->setmask; - } - break; - } - } - if (!opt->option_name) { - reiserfs_warning(s, "super-6502", - "unknown mount option \"%s\"", p); - return -1; - } - - p += strlen(opt->option_name); - switch (*p) { - case '=': - if (!opt->arg_required) { - reiserfs_warning(s, "super-6503", - "the option \"%s\" does not " - "require an argument\n", - opt->option_name); - return -1; - } - break; - - case 0: - if (opt->arg_required) { - reiserfs_warning(s, "super-6504", - "the option \"%s\" requires an " - "argument\n", opt->option_name); - return -1; - } - break; - default: - reiserfs_warning(s, "super-6505", - "head of option \"%s\" is only correct\n", - opt->option_name); - return -1; - } - - /* move to the argument, or to next option if argument is not required */ - p++; - - if (opt->arg_required - && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY)) - && !strlen(p)) { - /* this catches "option=," if not allowed */ - reiserfs_warning(s, "super-6506", - "empty argument for \"%s\"\n", - opt->option_name); - return -1; - } - - if (!opt->values) { - /* *=NULLopt_arg contains pointer to argument */ - *opt_arg = p; - return opt->arg_required & ~(1 << REISERFS_OPT_ALLOWEMPTY); - } - - /* values possible for this option are listed in opt->values */ - for (arg = opt->values; arg->value; arg++) { - if (!strcmp(p, arg->value)) { - if (bit_flags) { - *bit_flags &= ~arg->clrmask; - *bit_flags |= arg->setmask; - } - return opt->arg_required; - } - } - - reiserfs_warning(s, "super-6506", - "bad value \"%s\" for option \"%s\"\n", p, - opt->option_name); - return -1; -} - -/* returns 0 if something is wrong in option string, 1 - otherwise */ -static int reiserfs_parse_options(struct super_block *s, char *options, /* string given via mount's -o */ - unsigned long *mount_options, - /* after the parsing phase, contains the - collection of bitflags defining what - mount options were selected. */ - unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ - char **jdev_name, - unsigned int *commit_max_age, - char **qf_names, - unsigned int *qfmt) -{ - int c; - char *arg = NULL; - char *pos; - opt_desc_t opts[] = { - /* Compatibility stuff, so that -o notail for old setups still work */ - {"tails",.arg_required = 't',.values = tails}, - {"notail",.clrmask = - (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, - {"conv",.setmask = 1 << REISERFS_CONVERT}, - {"attrs",.setmask = 1 << REISERFS_ATTRS}, - {"noattrs",.clrmask = 1 << REISERFS_ATTRS}, - {"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT}, -#ifdef CONFIG_REISERFS_FS_XATTR - {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER}, - {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER}, -#else - {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, - {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, -#endif -#ifdef CONFIG_REISERFS_FS_POSIX_ACL - {"acl",.setmask = 1 << REISERFS_POSIXACL}, - {"noacl",.clrmask = 1 << REISERFS_POSIXACL}, -#else - {"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, - {"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, -#endif - {.option_name = "nolog"}, - {"replayonly",.setmask = 1 << REPLAYONLY}, - {"block-allocator",.arg_required = 'a',.values = balloc}, - {"data",.arg_required = 'd',.values = logging_mode}, - {"barrier",.arg_required = 'b',.values = barrier_mode}, - {"resize",.arg_required = 'r',.values = NULL}, - {"jdev",.arg_required = 'j',.values = NULL}, - {"nolargeio",.arg_required = 'w',.values = NULL}, - {"commit",.arg_required = 'c',.values = NULL}, - {"usrquota",.setmask = 1 << REISERFS_USRQUOTA}, - {"grpquota",.setmask = 1 << REISERFS_GRPQUOTA}, - {"noquota",.clrmask = 1 << REISERFS_USRQUOTA | 1 << REISERFS_GRPQUOTA}, - {"errors",.arg_required = 'e',.values = error_actions}, - {"usrjquota",.arg_required = - 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, - {"grpjquota",.arg_required = - 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, - {"jqfmt",.arg_required = 'f',.values = NULL}, - {.option_name = NULL} - }; - - *blocks = 0; - if (!options || !*options) - /* use default configuration: create tails, journaling on, no - conversion to newest format */ - return 1; - - for (pos = options; pos;) { - c = reiserfs_getopt(s, &pos, opts, &arg, mount_options); - if (c == -1) - /* wrong option is given */ - return 0; - - if (c == 'r') { - char *p; - - p = NULL; - /* "resize=NNN" or "resize=auto" */ - - if (!strcmp(arg, "auto")) { - /* From JFS code, to auto-get the size. */ - *blocks = - s->s_bdev->bd_inode->i_size >> s-> - s_blocksize_bits; - } else { - *blocks = simple_strtoul(arg, &p, 0); - if (*p != '\0') { - /* NNN does not look like a number */ - reiserfs_warning(s, "super-6507", - "bad value %s for " - "-oresize\n", arg); - return 0; - } - } - } - - if (c == 'c') { - char *p = NULL; - unsigned long val = simple_strtoul(arg, &p, 0); - /* commit=NNN (time in seconds) */ - if (*p != '\0' || val >= (unsigned int)-1) { - reiserfs_warning(s, "super-6508", - "bad value %s for -ocommit\n", - arg); - return 0; - } - *commit_max_age = (unsigned int)val; - } - - if (c == 'w') { - reiserfs_warning(s, "super-6509", "nolargeio option " - "is no longer supported"); - return 0; - } - - if (c == 'j') { - if (arg && *arg && jdev_name) { - if (*jdev_name) { //Hm, already assigned? - reiserfs_warning(s, "super-6510", - "journal device was " - "already specified to " - "be %s", *jdev_name); - return 0; - } - *jdev_name = arg; - } - } -#ifdef CONFIG_QUOTA - if (c == 'u' || c == 'g') { - int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; - - if (sb_any_quota_loaded(s) && - (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) { - reiserfs_warning(s, "super-6511", - "cannot change journaled " - "quota options when quota " - "turned on."); - return 0; - } - if (*arg) { /* Some filename specified? */ - if (REISERFS_SB(s)->s_qf_names[qtype] - && strcmp(REISERFS_SB(s)->s_qf_names[qtype], - arg)) { - reiserfs_warning(s, "super-6512", - "%s quota file " - "already specified.", - QTYPE2NAME(qtype)); - return 0; - } - if (strchr(arg, '/')) { - reiserfs_warning(s, "super-6513", - "quotafile must be " - "on filesystem root."); - return 0; - } - qf_names[qtype] = - kmalloc(strlen(arg) + 1, GFP_KERNEL); - if (!qf_names[qtype]) { - reiserfs_warning(s, "reiserfs-2502", - "not enough memory " - "for storing " - "quotafile name."); - return 0; - } - strcpy(qf_names[qtype], arg); - if (qtype == USRQUOTA) - *mount_options |= 1 << REISERFS_USRQUOTA; - else - *mount_options |= 1 << REISERFS_GRPQUOTA; - } else { - if (qf_names[qtype] != - REISERFS_SB(s)->s_qf_names[qtype]) - kfree(qf_names[qtype]); - qf_names[qtype] = NULL; - if (qtype == USRQUOTA) - *mount_options &= ~(1 << REISERFS_USRQUOTA); - else - *mount_options &= ~(1 << REISERFS_GRPQUOTA); - } - } - if (c == 'f') { - if (!strcmp(arg, "vfsold")) - *qfmt = QFMT_VFS_OLD; - else if (!strcmp(arg, "vfsv0")) - *qfmt = QFMT_VFS_V0; - else { - reiserfs_warning(s, "super-6514", - "unknown quota format " - "specified."); - return 0; - } - if (sb_any_quota_loaded(s) && - *qfmt != REISERFS_SB(s)->s_jquota_fmt) { - reiserfs_warning(s, "super-6515", - "cannot change journaled " - "quota options when quota " - "turned on."); - return 0; - } - } -#else - if (c == 'u' || c == 'g' || c == 'f') { - reiserfs_warning(s, "reiserfs-2503", "journaled " - "quota options not supported."); - return 0; - } -#endif - } - -#ifdef CONFIG_QUOTA - if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt - && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) { - reiserfs_warning(s, "super-6515", - "journaled quota format not specified."); - return 0; - } - if ((!(*mount_options & (1 << REISERFS_USRQUOTA)) && - sb_has_quota_loaded(s, USRQUOTA)) || - (!(*mount_options & (1 << REISERFS_GRPQUOTA)) && - sb_has_quota_loaded(s, GRPQUOTA))) { - reiserfs_warning(s, "super-6516", "quota options must " - "be present when quota is turned on."); - return 0; - } -#endif - - return 1; -} - -static void switch_data_mode(struct super_block *s, unsigned long mode) -{ - REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) | - (1 << REISERFS_DATA_ORDERED) | - (1 << REISERFS_DATA_WRITEBACK)); - REISERFS_SB(s)->s_mount_opt |= (1 << mode); -} - -static void handle_data_mode(struct super_block *s, unsigned long mount_options) -{ - if (mount_options & (1 << REISERFS_DATA_LOG)) { - if (!reiserfs_data_log(s)) { - switch_data_mode(s, REISERFS_DATA_LOG); - reiserfs_info(s, "switching to journaled data mode\n"); - } - } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) { - if (!reiserfs_data_ordered(s)) { - switch_data_mode(s, REISERFS_DATA_ORDERED); - reiserfs_info(s, "switching to ordered data mode\n"); - } - } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) { - if (!reiserfs_data_writeback(s)) { - switch_data_mode(s, REISERFS_DATA_WRITEBACK); - reiserfs_info(s, "switching to writeback data mode\n"); - } - } -} - -static void handle_barrier_mode(struct super_block *s, unsigned long bits) -{ - int flush = (1 << REISERFS_BARRIER_FLUSH); - int none = (1 << REISERFS_BARRIER_NONE); - int all_barrier = flush | none; - - if (bits & all_barrier) { - REISERFS_SB(s)->s_mount_opt &= ~all_barrier; - if (bits & flush) { - REISERFS_SB(s)->s_mount_opt |= flush; - printk("reiserfs: enabling write barrier flush mode\n"); - } else if (bits & none) { - REISERFS_SB(s)->s_mount_opt |= none; - printk("reiserfs: write barriers turned off\n"); - } - } -} - -static void handle_attrs(struct super_block *s) -{ - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - - if (reiserfs_attrs(s)) { - if (old_format_only(s)) { - reiserfs_warning(s, "super-6517", "cannot support " - "attributes on 3.5.x disk format"); - REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); - return; - } - if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) { - reiserfs_warning(s, "super-6518", "cannot support " - "attributes until flag is set in " - "super-block"); - REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); - } - } -} - -#ifdef CONFIG_QUOTA -static void handle_quota_files(struct super_block *s, char **qf_names, - unsigned int *qfmt) -{ - int i; - - for (i = 0; i < MAXQUOTAS; i++) { - if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) - kfree(REISERFS_SB(s)->s_qf_names[i]); - REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; - } - if (*qfmt) - REISERFS_SB(s)->s_jquota_fmt = *qfmt; -} -#endif - -static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) -{ - struct reiserfs_super_block *rs; - struct reiserfs_transaction_handle th; - unsigned long blocks; - unsigned long mount_options = REISERFS_SB(s)->s_mount_opt; - unsigned long safe_mask = 0; - unsigned int commit_max_age = (unsigned int)-1; - struct reiserfs_journal *journal = SB_JOURNAL(s); - char *new_opts = kstrdup(arg, GFP_KERNEL); - int err; - char *qf_names[MAXQUOTAS]; - unsigned int qfmt = 0; -#ifdef CONFIG_QUOTA - int i; -#endif - - reiserfs_write_lock(s); - -#ifdef CONFIG_QUOTA - memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); -#endif - - rs = SB_DISK_SUPER_BLOCK(s); - - if (!reiserfs_parse_options - (s, arg, &mount_options, &blocks, NULL, &commit_max_age, - qf_names, &qfmt)) { -#ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) - if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) - kfree(qf_names[i]); -#endif - err = -EINVAL; - goto out_err; - } -#ifdef CONFIG_QUOTA - handle_quota_files(s, qf_names, &qfmt); -#endif - - handle_attrs(s); - - /* Add options that are safe here */ - safe_mask |= 1 << REISERFS_SMALLTAIL; - safe_mask |= 1 << REISERFS_LARGETAIL; - safe_mask |= 1 << REISERFS_NO_BORDER; - safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION; - safe_mask |= 1 << REISERFS_HASHED_RELOCATION; - safe_mask |= 1 << REISERFS_TEST4; - safe_mask |= 1 << REISERFS_ATTRS; - safe_mask |= 1 << REISERFS_XATTRS_USER; - safe_mask |= 1 << REISERFS_POSIXACL; - safe_mask |= 1 << REISERFS_BARRIER_FLUSH; - safe_mask |= 1 << REISERFS_BARRIER_NONE; - safe_mask |= 1 << REISERFS_ERROR_RO; - safe_mask |= 1 << REISERFS_ERROR_CONTINUE; - safe_mask |= 1 << REISERFS_ERROR_PANIC; - safe_mask |= 1 << REISERFS_USRQUOTA; - safe_mask |= 1 << REISERFS_GRPQUOTA; - - /* Update the bitmask, taking care to keep - * the bits we're not allowed to change here */ - REISERFS_SB(s)->s_mount_opt = - (REISERFS_SB(s)-> - s_mount_opt & ~safe_mask) | (mount_options & safe_mask); - - if (commit_max_age != 0 && commit_max_age != (unsigned int)-1) { - journal->j_max_commit_age = commit_max_age; - journal->j_max_trans_age = commit_max_age; - } else if (commit_max_age == 0) { - /* 0 means restore defaults. */ - journal->j_max_commit_age = journal->j_default_max_commit_age; - journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; - } - - if (blocks) { - err = reiserfs_resize(s, blocks); - if (err != 0) - goto out_err; - } - - if (*mount_flags & MS_RDONLY) { - reiserfs_xattr_init(s, *mount_flags); - /* remount read-only */ - if (s->s_flags & MS_RDONLY) - /* it is read-only already */ - goto out_ok; - - err = dquot_suspend(s, -1); - if (err < 0) - goto out_err; - - /* try to remount file system with read-only permissions */ - if (sb_umount_state(rs) == REISERFS_VALID_FS - || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { - goto out_ok; - } - - err = journal_begin(&th, s, 10); - if (err) - goto out_err; - - /* Mounting a rw partition read-only. */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state); - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); - } else { - /* remount read-write */ - if (!(s->s_flags & MS_RDONLY)) { - reiserfs_xattr_init(s, *mount_flags); - goto out_ok; /* We are read-write already */ - } - - if (reiserfs_is_journal_aborted(journal)) { - err = journal->j_errno; - goto out_err; - } - - handle_data_mode(s, mount_options); - handle_barrier_mode(s, mount_options); - REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); - s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ - err = journal_begin(&th, s, 10); - if (err) - goto out_err; - - /* Mount a partition which is read-only, read-write */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); - s->s_flags &= ~MS_RDONLY; - set_sb_umount_state(rs, REISERFS_ERROR_FS); - if (!old_format_only(s)) - set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); - /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); - REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS; - } - /* this will force a full flush of all journal lists */ - SB_JOURNAL(s)->j_must_wait = 1; - err = journal_end(&th, s, 10); - if (err) - goto out_err; - s->s_dirt = 0; - - if (!(*mount_flags & MS_RDONLY)) { - dquot_resume(s, -1); - finish_unfinished(s); - reiserfs_xattr_init(s, *mount_flags); - } - -out_ok: - replace_mount_options(s, new_opts); - reiserfs_write_unlock(s); - return 0; - -out_err: - kfree(new_opts); - reiserfs_write_unlock(s); - return err; -} - -static int read_super_block(struct super_block *s, int offset) -{ - struct buffer_head *bh; - struct reiserfs_super_block *rs; - int fs_blocksize; - - bh = sb_bread(s, offset / s->s_blocksize); - if (!bh) { - reiserfs_warning(s, "sh-2006", - "bread failed (dev %s, block %lu, size %lu)", - reiserfs_bdevname(s), offset / s->s_blocksize, - s->s_blocksize); - return 1; - } - - rs = (struct reiserfs_super_block *)bh->b_data; - if (!is_any_reiserfs_magic_string(rs)) { - brelse(bh); - return 1; - } - // - // ok, reiserfs signature (old or new) found in at the given offset - // - fs_blocksize = sb_blocksize(rs); - brelse(bh); - sb_set_blocksize(s, fs_blocksize); - - bh = sb_bread(s, offset / s->s_blocksize); - if (!bh) { - reiserfs_warning(s, "sh-2007", - "bread failed (dev %s, block %lu, size %lu)", - reiserfs_bdevname(s), offset / s->s_blocksize, - s->s_blocksize); - return 1; - } - - rs = (struct reiserfs_super_block *)bh->b_data; - if (sb_blocksize(rs) != s->s_blocksize) { - reiserfs_warning(s, "sh-2011", "can't find a reiserfs " - "filesystem on (dev %s, block %Lu, size %lu)", - reiserfs_bdevname(s), - (unsigned long long)bh->b_blocknr, - s->s_blocksize); - brelse(bh); - return 1; - } - - if (rs->s_v1.s_root_block == cpu_to_le32(-1)) { - brelse(bh); - reiserfs_warning(s, "super-6519", "Unfinished reiserfsck " - "--rebuild-tree run detected. Please run\n" - "reiserfsck --rebuild-tree and wait for a " - "completion. If that fails\n" - "get newer reiserfsprogs package"); - return 1; - } - - SB_BUFFER_WITH_SB(s) = bh; - SB_DISK_SUPER_BLOCK(s) = rs; - - if (is_reiserfs_jr(rs)) { - /* magic is of non-standard journal filesystem, look at s_version to - find which format is in use */ - if (sb_version(rs) == REISERFS_VERSION_2) - reiserfs_info(s, "found reiserfs format \"3.6\"" - " with non-standard journal\n"); - else if (sb_version(rs) == REISERFS_VERSION_1) - reiserfs_info(s, "found reiserfs format \"3.5\"" - " with non-standard journal\n"); - else { - reiserfs_warning(s, "sh-2012", "found unknown " - "format \"%u\" of reiserfs with " - "non-standard magic", sb_version(rs)); - return 1; - } - } else - /* s_version of standard format may contain incorrect information, - so we just look at the magic string */ - reiserfs_info(s, - "found reiserfs format \"%s\" with standard journal\n", - is_reiserfs_3_5(rs) ? "3.5" : "3.6"); - - s->s_op = &reiserfs_sops; - s->s_export_op = &reiserfs_export_ops; -#ifdef CONFIG_QUOTA - s->s_qcop = &reiserfs_qctl_operations; - s->dq_op = &reiserfs_quota_operations; -#endif - - /* new format is limited by the 32 bit wide i_blocks field, want to - ** be one full block below that. - */ - s->s_maxbytes = (512LL << 32) - s->s_blocksize; - return 0; -} - -/* after journal replay, reread all bitmap and super blocks */ -static int reread_meta_blocks(struct super_block *s) -{ - ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); - wait_on_buffer(SB_BUFFER_WITH_SB(s)); - if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { - reiserfs_warning(s, "reiserfs-2504", "error reading the super"); - return 1; - } - - return 0; -} - -///////////////////////////////////////////////////// -// hash detection stuff - -// if root directory is empty - we set default - Yura's - hash and -// warn about it -// FIXME: we look for only one name in a directory. If tea and yura -// bith have the same value - we ask user to send report to the -// mailing list -static __u32 find_hash_out(struct super_block *s) -{ - int retval; - struct inode *inode; - struct cpu_key key; - INITIALIZE_PATH(path); - struct reiserfs_dir_entry de; - __u32 hash = DEFAULT_HASH; - - inode = s->s_root->d_inode; - - do { // Some serious "goto"-hater was there ;) - u32 teahash, r5hash, yurahash; - - make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); - retval = search_by_entry_key(s, &key, &path, &de); - if (retval == IO_ERROR) { - pathrelse(&path); - return UNSET_HASH; - } - if (retval == NAME_NOT_FOUND) - de.de_entry_num--; - set_de_name_and_namelen(&de); - if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) { - /* allow override in this case */ - if (reiserfs_rupasov_hash(s)) { - hash = YURA_HASH; - } - reiserfs_info(s, "FS seems to be empty, autodetect " - "is using the default hash\n"); - break; - } - r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); - teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen)); - yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen)); - if (((teahash == r5hash) - && - (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) - == r5hash)) || ((teahash == yurahash) - && (yurahash == - GET_HASH_VALUE(deh_offset - (& - (de. - de_deh[de. - de_entry_num]))))) - || ((r5hash == yurahash) - && (yurahash == - GET_HASH_VALUE(deh_offset - (&(de.de_deh[de.de_entry_num])))))) { - reiserfs_warning(s, "reiserfs-2506", "Unable to " - "automatically detect hash function. " - "Please mount with -o " - "hash={tea,rupasov,r5}"); - hash = UNSET_HASH; - break; - } - if (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) == - yurahash) - hash = YURA_HASH; - else if (GET_HASH_VALUE - (deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash) - hash = TEA_HASH; - else if (GET_HASH_VALUE - (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) - hash = R5_HASH; - else { - reiserfs_warning(s, "reiserfs-2506", - "Unrecognised hash function"); - hash = UNSET_HASH; - } - } while (0); - - pathrelse(&path); - return hash; -} - -// finds out which hash names are sorted with -static int what_hash(struct super_block *s) -{ - __u32 code; - - code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); - - /* reiserfs_hash_detect() == true if any of the hash mount options - ** were used. We must check them to make sure the user isn't - ** using a bad hash value - */ - if (code == UNSET_HASH || reiserfs_hash_detect(s)) - code = find_hash_out(s); - - if (code != UNSET_HASH && reiserfs_hash_detect(s)) { - /* detection has found the hash, and we must check against the - ** mount options - */ - if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { - reiserfs_warning(s, "reiserfs-2507", - "Error, %s hash detected, " - "unable to force rupasov hash", - reiserfs_hashname(code)); - code = UNSET_HASH; - } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { - reiserfs_warning(s, "reiserfs-2508", - "Error, %s hash detected, " - "unable to force tea hash", - reiserfs_hashname(code)); - code = UNSET_HASH; - } else if (reiserfs_r5_hash(s) && code != R5_HASH) { - reiserfs_warning(s, "reiserfs-2509", - "Error, %s hash detected, " - "unable to force r5 hash", - reiserfs_hashname(code)); - code = UNSET_HASH; - } - } else { - /* find_hash_out was not called or could not determine the hash */ - if (reiserfs_rupasov_hash(s)) { - code = YURA_HASH; - } else if (reiserfs_tea_hash(s)) { - code = TEA_HASH; - } else if (reiserfs_r5_hash(s)) { - code = R5_HASH; - } - } - - /* if we are mounted RW, and we have a new valid hash code, update - ** the super - */ - if (code != UNSET_HASH && - !(s->s_flags & MS_RDONLY) && - code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { - set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); - } - return code; -} - -// return pointer to appropriate function -static hashf_t hash_function(struct super_block *s) -{ - switch (what_hash(s)) { - case TEA_HASH: - reiserfs_info(s, "Using tea hash to sort names\n"); - return keyed_hash; - case YURA_HASH: - reiserfs_info(s, "Using rupasov hash to sort names\n"); - return yura_hash; - case R5_HASH: - reiserfs_info(s, "Using r5 hash to sort names\n"); - return r5_hash; - } - return NULL; -} - -// this is used to set up correct value for old partitions -static int function2code(hashf_t func) -{ - if (func == keyed_hash) - return TEA_HASH; - if (func == yura_hash) - return YURA_HASH; - if (func == r5_hash) - return R5_HASH; - - BUG(); // should never happen - - return 0; -} - -#define SWARN(silent, s, id, ...) \ - if (!(silent)) \ - reiserfs_warning(s, id, __VA_ARGS__) - -static int reiserfs_fill_super(struct super_block *s, void *data, int silent) -{ - struct inode *root_inode; - struct reiserfs_transaction_handle th; - int old_format = 0; - unsigned long blocks; - unsigned int commit_max_age = 0; - int jinit_done = 0; - struct reiserfs_iget_args args; - struct reiserfs_super_block *rs; - char *jdev_name; - struct reiserfs_sb_info *sbi; - int errval = -EINVAL; - char *qf_names[MAXQUOTAS] = {}; - unsigned int qfmt = 0; - - save_mount_options(s, data); - - sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - s->s_fs_info = sbi; - /* Set default values for options: non-aggressive tails, RO on errors */ - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); - /* no preallocation minimum, be smart in - reiserfs_file_write instead */ - REISERFS_SB(s)->s_alloc_options.preallocmin = 0; - /* Preallocate by 16 blocks (17-1) at once */ - REISERFS_SB(s)->s_alloc_options.preallocsize = 17; - /* setup default block allocator options */ - reiserfs_init_alloc_options(s); - - mutex_init(&REISERFS_SB(s)->lock); - REISERFS_SB(s)->lock_depth = -1; - - jdev_name = NULL; - if (reiserfs_parse_options - (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, - &commit_max_age, qf_names, &qfmt) == 0) { - goto error_unlocked; - } - if (jdev_name && jdev_name[0]) { - REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); - if (!REISERFS_SB(s)->s_jdev) { - SWARN(silent, s, "", "Cannot allocate memory for " - "journal device name"); - goto error; - } - } -#ifdef CONFIG_QUOTA - handle_quota_files(s, qf_names, &qfmt); -#endif - - if (blocks) { - SWARN(silent, s, "jmacd-7", "resize option for remount only"); - goto error_unlocked; - } - - /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ - if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) - old_format = 1; - /* try new format (64-th 1k block), which can contain reiserfs super block */ - else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { - SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", - reiserfs_bdevname(s)); - goto error_unlocked; - } - - rs = SB_DISK_SUPER_BLOCK(s); - /* Let's do basic sanity check to verify that underlying device is not - smaller than the filesystem. If the check fails then abort and scream, - because bad stuff will happen otherwise. */ - if (s->s_bdev && s->s_bdev->bd_inode - && i_size_read(s->s_bdev->bd_inode) < - sb_block_count(rs) * sb_blocksize(rs)) { - SWARN(silent, s, "", "Filesystem cannot be " - "mounted because it is bigger than the device"); - SWARN(silent, s, "", "You may need to run fsck " - "or increase size of your LVM partition"); - SWARN(silent, s, "", "Or may be you forgot to " - "reboot after fdisk when it told you to"); - goto error_unlocked; - } - - sbi->s_mount_state = SB_REISERFS_STATE(s); - sbi->s_mount_state = REISERFS_VALID_FS; - - if ((errval = reiserfs_init_bitmap_cache(s))) { - SWARN(silent, s, "jmacd-8", "unable to read bitmap"); - goto error_unlocked; - } - - errval = -EINVAL; -#ifdef CONFIG_REISERFS_CHECK - SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); - SWARN(silent, s, "", "- it is slow mode for debugging."); -#endif - - /* make data=ordered the default */ - if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && - !reiserfs_data_writeback(s)) { - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); - } - - if (reiserfs_data_log(s)) { - reiserfs_info(s, "using journaled data mode\n"); - } else if (reiserfs_data_ordered(s)) { - reiserfs_info(s, "using ordered data mode\n"); - } else { - reiserfs_info(s, "using writeback data mode\n"); - } - if (reiserfs_barrier_flush(s)) { - printk("reiserfs: using flush barriers\n"); - } - - // set_device_ro(s->s_dev, 1) ; - if (journal_init(s, jdev_name, old_format, commit_max_age)) { - SWARN(silent, s, "sh-2022", - "unable to initialize journal space"); - goto error_unlocked; - } else { - jinit_done = 1; /* once this is set, journal_release must be called - ** if we error out of the mount - */ - } - - if (reread_meta_blocks(s)) { - SWARN(silent, s, "jmacd-9", - "unable to reread meta blocks after journal init"); - goto error_unlocked; - } - - if (replay_only(s)) - goto error_unlocked; - - if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { - SWARN(silent, s, "clm-7000", - "Detected readonly device, marking FS readonly"); - s->s_flags |= MS_RDONLY; - } - args.objectid = REISERFS_ROOT_OBJECTID; - args.dirid = REISERFS_ROOT_PARENT_OBJECTID; - root_inode = - iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, - reiserfs_init_locked_inode, (void *)(&args)); - if (!root_inode) { - SWARN(silent, s, "jmacd-10", "get root inode failed"); - goto error_unlocked; - } - - /* - * This path assumed to be called with the BKL in the old times. - * Now we have inherited the big reiserfs lock from it and many - * reiserfs helpers called in the mount path and elsewhere require - * this lock to be held even if it's not always necessary. Let's be - * conservative and hold it early. The window can be reduced after - * careful review of the code. - */ - reiserfs_write_lock(s); - - if (root_inode->i_state & I_NEW) { - reiserfs_read_locked_inode(root_inode, &args); - unlock_new_inode(root_inode); - } - - s->s_root = d_make_root(root_inode); - if (!s->s_root) - goto error; - // define and initialize hash function - sbi->s_hash_function = hash_function(s); - if (sbi->s_hash_function == NULL) { - dput(s->s_root); - s->s_root = NULL; - goto error; - } - - if (is_reiserfs_3_5(rs) - || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) - set_bit(REISERFS_3_5, &(sbi->s_properties)); - else if (old_format) - set_bit(REISERFS_OLD_FORMAT, &(sbi->s_properties)); - else - set_bit(REISERFS_3_6, &(sbi->s_properties)); - - if (!(s->s_flags & MS_RDONLY)) { - - errval = journal_begin(&th, s, 1); - if (errval) { - dput(s->s_root); - s->s_root = NULL; - goto error; - } - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - - set_sb_umount_state(rs, REISERFS_ERROR_FS); - set_sb_fs_state(rs, 0); - - /* Clear out s_bmap_nr if it would wrap. We can handle this - * case, but older revisions can't. This will cause the - * file system to fail mount on those older implementations, - * avoiding corruption. -jeffm */ - if (bmap_would_wrap(reiserfs_bmap_count(s)) && - sb_bmap_nr(rs) != 0) { - reiserfs_warning(s, "super-2030", "This file system " - "claims to use %u bitmap blocks in " - "its super block, but requires %u. " - "Clearing to zero.", sb_bmap_nr(rs), - reiserfs_bmap_count(s)); - - set_sb_bmap_nr(rs, 0); - } - - if (old_format_only(s)) { - /* filesystem of format 3.5 either with standard or non-standard - journal */ - if (convert_reiserfs(s)) { - /* and -o conv is given */ - if (!silent) - reiserfs_info(s, - "converting 3.5 filesystem to the 3.6 format"); - - if (is_reiserfs_3_5(rs)) - /* put magic string of 3.6 format. 2.2 will not be able to - mount this filesystem anymore */ - memcpy(rs->s_v1.s_magic, - reiserfs_3_6_magic_string, - sizeof - (reiserfs_3_6_magic_string)); - - set_sb_version(rs, REISERFS_VERSION_2); - reiserfs_convert_objectid_map_v1(s); - set_bit(REISERFS_3_6, &(sbi->s_properties)); - clear_bit(REISERFS_3_5, &(sbi->s_properties)); - } else if (!silent) { - reiserfs_info(s, "using 3.5.x disk format\n"); - } - } else - set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); - - - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); - errval = journal_end(&th, s, 1); - if (errval) { - dput(s->s_root); - s->s_root = NULL; - goto error; - } - - if ((errval = reiserfs_lookup_privroot(s)) || - (errval = reiserfs_xattr_init(s, s->s_flags))) { - dput(s->s_root); - s->s_root = NULL; - goto error; - } - - /* look for files which were to be removed in previous session */ - finish_unfinished(s); - } else { - if (old_format_only(s) && !silent) { - reiserfs_info(s, "using 3.5.x disk format\n"); - } - - if ((errval = reiserfs_lookup_privroot(s)) || - (errval = reiserfs_xattr_init(s, s->s_flags))) { - dput(s->s_root); - s->s_root = NULL; - goto error; - } - } - // mark hash in super block: it could be unset. overwrite should be ok - set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); - - handle_attrs(s); - - reiserfs_proc_info_init(s); - - init_waitqueue_head(&(sbi->s_wait)); - spin_lock_init(&sbi->bitmap_lock); - - reiserfs_write_unlock(s); - - return (0); - -error: - reiserfs_write_unlock(s); - -error_unlocked: - /* kill the commit thread, free journal ram */ - if (jinit_done) { - reiserfs_write_lock(s); - journal_release_error(NULL, s); - reiserfs_write_unlock(s); - } - - reiserfs_free_bitmap_cache(s); - if (SB_BUFFER_WITH_SB(s)) - brelse(SB_BUFFER_WITH_SB(s)); -#ifdef CONFIG_QUOTA - { - int j; - for (j = 0; j < MAXQUOTAS; j++) - kfree(qf_names[j]); - } -#endif - kfree(sbi); - - s->s_fs_info = NULL; - return errval; -} - -static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(dentry->d_sb); - - buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize)); - buf->f_bfree = sb_free_blocks(rs); - buf->f_bavail = buf->f_bfree; - buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; - buf->f_bsize = dentry->d_sb->s_blocksize; - /* changed to accommodate gcc folks. */ - buf->f_type = REISERFS_SUPER_MAGIC; - buf->f_fsid.val[0] = (u32)crc32_le(0, rs->s_uuid, sizeof(rs->s_uuid)/2); - buf->f_fsid.val[1] = (u32)crc32_le(0, rs->s_uuid + sizeof(rs->s_uuid)/2, - sizeof(rs->s_uuid)/2); - - return 0; -} - -#ifdef CONFIG_QUOTA -static int reiserfs_write_dquot(struct dquot *dquot) -{ - struct reiserfs_transaction_handle th; - int ret, err; - - reiserfs_write_lock(dquot->dq_sb); - ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); - if (ret) - goto out; - ret = dquot_commit(dquot); - err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); - if (!ret && err) - ret = err; - out: - reiserfs_write_unlock(dquot->dq_sb); - return ret; -} - -static int reiserfs_acquire_dquot(struct dquot *dquot) -{ - struct reiserfs_transaction_handle th; - int ret, err; - - reiserfs_write_lock(dquot->dq_sb); - ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); - if (ret) - goto out; - ret = dquot_acquire(dquot); - err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); - if (!ret && err) - ret = err; - out: - reiserfs_write_unlock(dquot->dq_sb); - return ret; -} - -static int reiserfs_release_dquot(struct dquot *dquot) -{ - struct reiserfs_transaction_handle th; - int ret, err; - - reiserfs_write_lock(dquot->dq_sb); - ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (ret) { - /* Release dquot anyway to avoid endless cycle in dqput() */ - dquot_release(dquot); - goto out; - } - ret = dquot_release(dquot); - err = - journal_end(&th, dquot->dq_sb, - REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (!ret && err) - ret = err; - out: - reiserfs_write_unlock(dquot->dq_sb); - return ret; -} - -static int reiserfs_mark_dquot_dirty(struct dquot *dquot) -{ - /* Are we journaling quotas? */ - if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { - dquot_mark_dquot_dirty(dquot); - return reiserfs_write_dquot(dquot); - } else - return dquot_mark_dquot_dirty(dquot); -} - -static int reiserfs_write_info(struct super_block *sb, int type) -{ - struct reiserfs_transaction_handle th; - int ret, err; - - /* Data block + inode block */ - reiserfs_write_lock(sb); - ret = journal_begin(&th, sb, 2); - if (ret) - goto out; - ret = dquot_commit_info(sb, type); - err = journal_end(&th, sb, 2); - if (!ret && err) - ret = err; - out: - reiserfs_write_unlock(sb); - return ret; -} - -/* - * Turn on quotas during mount time - we need to find the quota file and such... - */ -static int reiserfs_quota_on_mount(struct super_block *sb, int type) -{ - return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], - REISERFS_SB(sb)->s_jquota_fmt, type); -} - -/* - * Standard function to be called on quota_on - */ -static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, - struct path *path) -{ - int err; - struct inode *inode; - struct reiserfs_transaction_handle th; - int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; - - if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) - return -EINVAL; - - /* Quotafile not on the same filesystem? */ - if (path->dentry->d_sb != sb) { - err = -EXDEV; - goto out; - } - inode = path->dentry->d_inode; - /* We must not pack tails for quota files on reiserfs for quota IO to work */ - if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { - err = reiserfs_unpack(inode, NULL); - if (err) { - reiserfs_warning(sb, "super-6520", - "Unpacking tail of quota file failed" - " (%d). Cannot turn on quotas.", err); - err = -EINVAL; - goto out; - } - mark_inode_dirty(inode); - } - /* Journaling quota? */ - if (REISERFS_SB(sb)->s_qf_names[type]) { - /* Quotafile not of fs root? */ - if (path->dentry->d_parent != sb->s_root) - reiserfs_warning(sb, "super-6521", - "Quota file not on filesystem root. " - "Journalled quota will not work."); - } - - /* - * When we journal data on quota file, we have to flush journal to see - * all updates to the file when we bypass pagecache... - */ - if (reiserfs_file_data_log(inode)) { - /* Just start temporary transaction and finish it */ - err = journal_begin(&th, sb, 1); - if (err) - goto out; - err = journal_end_sync(&th, sb, 1); - if (err) - goto out; - } - err = dquot_quota_on(sb, type, format_id, path); -out: - return err; -} - -/* Read data from quotafile - avoid pagecache and such because we cannot afford - * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and no one else should touch the files) - * we don't have to be afraid of races */ -static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, - size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - unsigned long blk = off >> sb->s_blocksize_bits; - int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; - size_t toread; - struct buffer_head tmp_bh, *bh; - loff_t i_size = i_size_read(inode); - - if (off > i_size) - return 0; - if (off + len > i_size) - len = i_size - off; - toread = len; - while (toread > 0) { - tocopy = - sb->s_blocksize - offset < - toread ? sb->s_blocksize - offset : toread; - tmp_bh.b_state = 0; - /* Quota files are without tails so we can safely use this function */ - reiserfs_write_lock(sb); - err = reiserfs_get_block(inode, blk, &tmp_bh, 0); - reiserfs_write_unlock(sb); - if (err) - return err; - if (!buffer_mapped(&tmp_bh)) /* A hole? */ - memset(data, 0, tocopy); - else { - bh = sb_bread(sb, tmp_bh.b_blocknr); - if (!bh) - return -EIO; - memcpy(data, bh->b_data + offset, tocopy); - brelse(bh); - } - offset = 0; - toread -= tocopy; - data += tocopy; - blk++; - } - return len; -} - -/* Write to quotafile (we know the transaction is already started and has - * enough credits) */ -static ssize_t reiserfs_quota_write(struct super_block *sb, int type, - const char *data, size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - unsigned long blk = off >> sb->s_blocksize_bits; - int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; - int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; - size_t towrite = len; - struct buffer_head tmp_bh, *bh; - - if (!current->journal_info) { - printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)" - " cancelled because transaction is not started.\n", - (unsigned long long)off, (unsigned long long)len); - return -EIO; - } - mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - tmp_bh.b_state = 0; - err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); - if (err) - goto out; - if (offset || tocopy != sb->s_blocksize) - bh = sb_bread(sb, tmp_bh.b_blocknr); - else - bh = sb_getblk(sb, tmp_bh.b_blocknr); - if (!bh) { - err = -EIO; - goto out; - } - lock_buffer(bh); - memcpy(bh->b_data + offset, data, tocopy); - flush_dcache_page(bh->b_page); - set_buffer_uptodate(bh); - unlock_buffer(bh); - reiserfs_prepare_for_journal(sb, bh, 1); - journal_mark_dirty(current->journal_info, sb, bh); - if (!journal_quota) - reiserfs_add_ordered_list(inode, bh); - brelse(bh); - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; - } -out: - if (len == towrite) { - mutex_unlock(&inode->i_mutex); - return err; - } - if (inode->i_size < off + len - towrite) - i_size_write(inode, off + len - towrite); - inode->i_version++; - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); - mutex_unlock(&inode->i_mutex); - return len - towrite; -} - -#endif - -static struct dentry *get_super_block(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); -} - -static int __init init_reiserfs_fs(void) -{ - int ret; - - if ((ret = init_inodecache())) { - return ret; - } - - reiserfs_proc_info_global_init(); - - ret = register_filesystem(&reiserfs_fs_type); - - if (ret == 0) { - return 0; - } - - reiserfs_proc_info_global_done(); - destroy_inodecache(); - - return ret; -} - -static void __exit exit_reiserfs_fs(void) -{ - reiserfs_proc_info_global_done(); - unregister_filesystem(&reiserfs_fs_type); - destroy_inodecache(); -} - -struct file_system_type reiserfs_fs_type = { - .owner = THIS_MODULE, - .name = "reiserfs", - .mount = get_super_block, - .kill_sb = reiserfs_kill_sb, - .fs_flags = FS_REQUIRES_DEV, -}; - -MODULE_DESCRIPTION("ReiserFS journaled filesystem"); -MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>"); -MODULE_LICENSE("GPL"); - -module_init(init_reiserfs_fs); -module_exit(exit_reiserfs_fs); diff --git a/ANDROID_3.4.5/fs/reiserfs/tail_conversion.c b/ANDROID_3.4.5/fs/reiserfs/tail_conversion.c deleted file mode 100644 index 5e2624d1..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/tail_conversion.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details - */ - -#include <linux/time.h> -#include <linux/pagemap.h> -#include <linux/buffer_head.h> -#include "reiserfs.h" - -/* access to tail : when one is going to read tail it must make sure, that is not running. - direct2indirect and indirect2direct can not run concurrently */ - -/* Converts direct items to an unformatted node. Panics if file has no - tail. -ENOSPC if no disk space for conversion */ -/* path points to first direct item of the file regarless of how many of - them are there */ -int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, - struct treepath *path, struct buffer_head *unbh, - loff_t tail_offset) -{ - struct super_block *sb = inode->i_sb; - struct buffer_head *up_to_date_bh; - struct item_head *p_le_ih = PATH_PITEM_HEAD(path); - unsigned long total_tail = 0; - struct cpu_key end_key; /* Key to search for the last byte of the - converted item. */ - struct item_head ind_ih; /* new indirect item to be inserted or - key of unfm pointer to be pasted */ - int blk_size, retval; /* returned value for reiserfs_insert_item and clones */ - unp_t unfm_ptr; /* Handle on an unformatted node - that will be inserted in the - tree. */ - - BUG_ON(!th->t_trans_id); - - REISERFS_SB(sb)->s_direct2indirect++; - - blk_size = sb->s_blocksize; - - /* and key to search for append or insert pointer to the new - unformatted node. */ - copy_item_head(&ind_ih, p_le_ih); - set_le_ih_k_offset(&ind_ih, tail_offset); - set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); - - /* Set the key to search for the place for new unfm pointer */ - make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4); - - /* FIXME: we could avoid this */ - if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) { - reiserfs_error(sb, "PAP-14030", - "pasted or inserted byte exists in " - "the tree %K. Use fsck to repair.", &end_key); - pathrelse(path); - return -EIO; - } - - p_le_ih = PATH_PITEM_HEAD(path); - - unfm_ptr = cpu_to_le32(unbh->b_blocknr); - - if (is_statdata_le_ih(p_le_ih)) { - /* Insert new indirect item. */ - set_ih_free_space(&ind_ih, 0); /* delete at nearest future */ - put_ih_item_len(&ind_ih, UNFM_P_SIZE); - PATH_LAST_POSITION(path)++; - retval = - reiserfs_insert_item(th, path, &end_key, &ind_ih, inode, - (char *)&unfm_ptr); - } else { - /* Paste into last indirect item of an object. */ - retval = reiserfs_paste_into_item(th, path, &end_key, inode, - (char *)&unfm_ptr, - UNFM_P_SIZE); - } - if (retval) { - return retval; - } - // note: from here there are two keys which have matching first - // three key components. They only differ by the fourth one. - - /* Set the key to search for the direct items of the file */ - make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, - 4); - - /* Move bytes from the direct items to the new unformatted node - and delete them. */ - while (1) { - int tail_size; - - /* end_key.k_offset is set so, that we will always have found - last item of the file */ - if (search_for_position_by_key(sb, &end_key, path) == - POSITION_FOUND) - reiserfs_panic(sb, "PAP-14050", - "direct item (%K) not found", &end_key); - p_le_ih = PATH_PITEM_HEAD(path); - RFALSE(!is_direct_le_ih(p_le_ih), - "vs-14055: direct item expected(%K), found %h", - &end_key, p_le_ih); - tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) - + ih_item_len(p_le_ih) - 1; - - /* we only send the unbh pointer if the buffer is not up to date. - ** this avoids overwriting good data from writepage() with old data - ** from the disk or buffer cache - ** Special case: unbh->b_page will be NULL if we are coming through - ** DIRECT_IO handler here. - */ - if (!unbh->b_page || buffer_uptodate(unbh) - || PageUptodate(unbh->b_page)) { - up_to_date_bh = NULL; - } else { - up_to_date_bh = unbh; - } - retval = reiserfs_delete_item(th, path, &end_key, inode, - up_to_date_bh); - - total_tail += retval; - if (tail_size == retval) - // done: file does not have direct items anymore - break; - - } - /* if we've copied bytes from disk into the page, we need to zero - ** out the unused part of the block (it was not up to date before) - */ - if (up_to_date_bh) { - unsigned pgoff = - (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); - char *kaddr = kmap_atomic(up_to_date_bh->b_page); - memset(kaddr + pgoff, 0, blk_size - total_tail); - kunmap_atomic(kaddr); - } - - REISERFS_I(inode)->i_first_direct_byte = U32_MAX; - - return 0; -} - -/* stolen from fs/buffer.c */ -void reiserfs_unmap_buffer(struct buffer_head *bh) -{ - lock_buffer(bh); - if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { - BUG(); - } - clear_buffer_dirty(bh); - /* Remove the buffer from whatever list it belongs to. We are mostly - interested in removing it from per-sb j_dirty_buffers list, to avoid - BUG() on attempt to write not mapped buffer */ - if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { - struct inode *inode = bh->b_page->mapping->host; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); - spin_lock(&j->j_dirty_buffers_lock); - list_del_init(&bh->b_assoc_buffers); - reiserfs_free_jh(bh); - spin_unlock(&j->j_dirty_buffers_lock); - } - clear_buffer_mapped(bh); - clear_buffer_req(bh); - clear_buffer_new(bh); - bh->b_bdev = NULL; - unlock_buffer(bh); -} - -/* this first locks inode (neither reads nor sync are permitted), - reads tail through page cache, insert direct item. When direct item - inserted successfully inode is left locked. Return value is always - what we expect from it (number of cut bytes). But when tail remains - in the unformatted node, we set mode to SKIP_BALANCING and unlock - inode */ -int indirect2direct(struct reiserfs_transaction_handle *th, - struct inode *inode, struct page *page, - struct treepath *path, /* path to the indirect item. */ - const struct cpu_key *item_key, /* Key to look for - * unformatted node - * pointer to be cut. */ - loff_t n_new_file_size, /* New file size. */ - char *mode) -{ - struct super_block *sb = inode->i_sb; - struct item_head s_ih; - unsigned long block_size = sb->s_blocksize; - char *tail; - int tail_len, round_tail_len; - loff_t pos, pos1; /* position of first byte of the tail */ - struct cpu_key key; - - BUG_ON(!th->t_trans_id); - - REISERFS_SB(sb)->s_indirect2direct++; - - *mode = M_SKIP_BALANCING; - - /* store item head path points to. */ - copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); - - tail_len = (n_new_file_size & (block_size - 1)); - if (get_inode_sd_version(inode) == STAT_DATA_V2) - round_tail_len = ROUND_UP(tail_len); - else - round_tail_len = tail_len; - - pos = - le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE - - 1) * sb->s_blocksize; - pos1 = pos; - - // we are protected by i_mutex. The tail can not disapper, not - // append can be done either - // we are in truncate or packing tail in file_release - - tail = (char *)kmap(page); /* this can schedule */ - - if (path_changed(&s_ih, path)) { - /* re-search indirect item */ - if (search_for_position_by_key(sb, item_key, path) - == POSITION_NOT_FOUND) - reiserfs_panic(sb, "PAP-5520", - "item to be converted %K does not exist", - item_key); - copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); -#ifdef CONFIG_REISERFS_CHECK - pos = le_ih_k_offset(&s_ih) - 1 + - (ih_item_len(&s_ih) / UNFM_P_SIZE - - 1) * sb->s_blocksize; - if (pos != pos1) - reiserfs_panic(sb, "vs-5530", "tail position " - "changed while we were reading it"); -#endif - } - - /* Set direct item header to insert. */ - make_le_item_head(&s_ih, NULL, get_inode_item_key_version(inode), - pos1 + 1, TYPE_DIRECT, round_tail_len, - 0xffff /*ih_free_space */ ); - - /* we want a pointer to the first byte of the tail in the page. - ** the page was locked and this part of the page was up to date when - ** indirect2direct was called, so we know the bytes are still valid - */ - tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); - - PATH_LAST_POSITION(path)++; - - key = *item_key; - set_cpu_key_k_type(&key, TYPE_DIRECT); - key.key_length = 4; - /* Insert tail as new direct item in the tree */ - if (reiserfs_insert_item(th, path, &key, &s_ih, inode, - tail ? tail : NULL) < 0) { - /* No disk memory. So we can not convert last unformatted node - to the direct item. In this case we used to adjust - indirect items's ih_free_space. Now ih_free_space is not - used, it would be ideal to write zeros to corresponding - unformatted node. For now i_size is considered as guard for - going out of file size */ - kunmap(page); - return block_size - round_tail_len; - } - kunmap(page); - - /* make sure to get the i_blocks changes from reiserfs_insert_item */ - reiserfs_update_sd(th, inode); - - // note: we have now the same as in above direct2indirect - // conversion: there are two keys which have matching first three - // key components. They only differ by the fouhth one. - - /* We have inserted new direct item and must remove last - unformatted node. */ - *mode = M_CUT; - - /* we store position of first direct item in the in-core inode */ - /* mark_file_with_tail (inode, pos1 + 1); */ - REISERFS_I(inode)->i_first_direct_byte = pos1 + 1; - - return block_size - round_tail_len; -} diff --git a/ANDROID_3.4.5/fs/reiserfs/xattr.c b/ANDROID_3.4.5/fs/reiserfs/xattr.c deleted file mode 100644 index 46fc1c20..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/xattr.c +++ /dev/null @@ -1,1021 +0,0 @@ -/* - * linux/fs/reiserfs/xattr.c - * - * Copyright (c) 2002 by Jeff Mahoney, <jeffm@suse.com> - * - */ - -/* - * In order to implement EA/ACLs in a clean, backwards compatible manner, - * they are implemented as files in a "private" directory. - * Each EA is in it's own file, with the directory layout like so (/ is assumed - * to be relative to fs root). Inside the /.reiserfs_priv/xattrs directory, - * directories named using the capital-hex form of the objectid and - * generation number are used. Inside each directory are individual files - * named with the name of the extended attribute. - * - * So, for objectid 12648430, we could have: - * /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_access - * /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_default - * /.reiserfs_priv/xattrs/C0FFEE.0/user.Content-Type - * .. or similar. - * - * The file contents are the text of the EA. The size is known based on the - * stat data describing the file. - * - * In the case of system.posix_acl_access and system.posix_acl_default, since - * these are special cases for filesystem ACLs, they are interpreted by the - * kernel, in addition, they are negatively and positively cached and attached - * to the inode so that unnecessary lookups are avoided. - * - * Locking works like so: - * Directory components (xattr root, xattr dir) are protectd by their i_mutex. - * The xattrs themselves are protected by the xattr_sem. - */ - -#include "reiserfs.h" -#include <linux/capability.h> -#include <linux/dcache.h> -#include <linux/namei.h> -#include <linux/errno.h> -#include <linux/gfp.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/pagemap.h> -#include <linux/xattr.h> -#include "xattr.h" -#include "acl.h" -#include <asm/uaccess.h> -#include <net/checksum.h> -#include <linux/stat.h> -#include <linux/quotaops.h> -#include <linux/security.h> - -#define PRIVROOT_NAME ".reiserfs_priv" -#define XAROOT_NAME "xattrs" - - -/* Helpers for inode ops. We do this so that we don't have all the VFS - * overhead and also for proper i_mutex annotation. - * dir->i_mutex must be held for all of them. */ -#ifdef CONFIG_REISERFS_FS_XATTR -static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) -{ - BUG_ON(!mutex_is_locked(&dir->i_mutex)); - return dir->i_op->create(dir, dentry, mode, NULL); -} -#endif - -static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - BUG_ON(!mutex_is_locked(&dir->i_mutex)); - return dir->i_op->mkdir(dir, dentry, mode); -} - -/* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr - * mutation ops aren't called during rename or splace, which are the - * only other users of I_MUTEX_CHILD. It violates the ordering, but that's - * better than allocating another subclass just for this code. */ -static int xattr_unlink(struct inode *dir, struct dentry *dentry) -{ - int error; - BUG_ON(!mutex_is_locked(&dir->i_mutex)); - - reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, - I_MUTEX_CHILD, dir->i_sb); - error = dir->i_op->unlink(dir, dentry); - mutex_unlock(&dentry->d_inode->i_mutex); - - if (!error) - d_delete(dentry); - return error; -} - -static int xattr_rmdir(struct inode *dir, struct dentry *dentry) -{ - int error; - BUG_ON(!mutex_is_locked(&dir->i_mutex)); - - reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, - I_MUTEX_CHILD, dir->i_sb); - error = dir->i_op->rmdir(dir, dentry); - if (!error) - dentry->d_inode->i_flags |= S_DEAD; - mutex_unlock(&dentry->d_inode->i_mutex); - if (!error) - d_delete(dentry); - - return error; -} - -#define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) - -static struct dentry *open_xa_root(struct super_block *sb, int flags) -{ - struct dentry *privroot = REISERFS_SB(sb)->priv_root; - struct dentry *xaroot; - if (!privroot->d_inode) - return ERR_PTR(-ENODATA); - - mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); - - xaroot = dget(REISERFS_SB(sb)->xattr_root); - if (!xaroot) - xaroot = ERR_PTR(-ENODATA); - else if (!xaroot->d_inode) { - int err = -ENODATA; - if (xattr_may_create(flags)) - err = xattr_mkdir(privroot->d_inode, xaroot, 0700); - if (err) { - dput(xaroot); - xaroot = ERR_PTR(err); - } - } - - mutex_unlock(&privroot->d_inode->i_mutex); - return xaroot; -} - -static struct dentry *open_xa_dir(const struct inode *inode, int flags) -{ - struct dentry *xaroot, *xadir; - char namebuf[17]; - - xaroot = open_xa_root(inode->i_sb, flags); - if (IS_ERR(xaroot)) - return xaroot; - - snprintf(namebuf, sizeof(namebuf), "%X.%X", - le32_to_cpu(INODE_PKEY(inode)->k_objectid), - inode->i_generation); - - mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR); - - xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); - if (!IS_ERR(xadir) && !xadir->d_inode) { - int err = -ENODATA; - if (xattr_may_create(flags)) - err = xattr_mkdir(xaroot->d_inode, xadir, 0700); - if (err) { - dput(xadir); - xadir = ERR_PTR(err); - } - } - - mutex_unlock(&xaroot->d_inode->i_mutex); - dput(xaroot); - return xadir; -} - -/* The following are side effects of other operations that aren't explicitly - * modifying extended attributes. This includes operations such as permissions - * or ownership changes, object deletions, etc. */ -struct reiserfs_dentry_buf { - struct dentry *xadir; - int count; - struct dentry *dentries[8]; -}; - -static int -fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, - u64 ino, unsigned int d_type) -{ - struct reiserfs_dentry_buf *dbuf = buf; - struct dentry *dentry; - WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); - - if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) - return -ENOSPC; - - if (name[0] == '.' && (name[1] == '\0' || - (name[1] == '.' && name[2] == '\0'))) - return 0; - - dentry = lookup_one_len(name, dbuf->xadir, namelen); - if (IS_ERR(dentry)) { - return PTR_ERR(dentry); - } else if (!dentry->d_inode) { - /* A directory entry exists, but no file? */ - reiserfs_error(dentry->d_sb, "xattr-20003", - "Corrupted directory: xattr %s listed but " - "not found for file %s.\n", - dentry->d_name.name, dbuf->xadir->d_name.name); - dput(dentry); - return -EIO; - } - - dbuf->dentries[dbuf->count++] = dentry; - return 0; -} - -static void -cleanup_dentry_buf(struct reiserfs_dentry_buf *buf) -{ - int i; - for (i = 0; i < buf->count; i++) - if (buf->dentries[i]) - dput(buf->dentries[i]); -} - -static int reiserfs_for_each_xattr(struct inode *inode, - int (*action)(struct dentry *, void *), - void *data) -{ - struct dentry *dir; - int i, err = 0; - loff_t pos = 0; - struct reiserfs_dentry_buf buf = { - .count = 0, - }; - - /* Skip out, an xattr has no xattrs associated with it */ - if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) - return 0; - - reiserfs_write_unlock(inode->i_sb); - dir = open_xa_dir(inode, XATTR_REPLACE); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - reiserfs_write_lock(inode->i_sb); - goto out; - } else if (!dir->d_inode) { - err = 0; - reiserfs_write_lock(inode->i_sb); - goto out_dir; - } - - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); - - reiserfs_write_lock(inode->i_sb); - - buf.xadir = dir; - err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); - while ((err == 0 || err == -ENOSPC) && buf.count) { - err = 0; - - for (i = 0; i < buf.count && buf.dentries[i]; i++) { - int lerr = 0; - struct dentry *dentry = buf.dentries[i]; - - if (err == 0 && !S_ISDIR(dentry->d_inode->i_mode)) - lerr = action(dentry, data); - - dput(dentry); - buf.dentries[i] = NULL; - err = lerr ?: err; - } - buf.count = 0; - if (!err) - err = reiserfs_readdir_dentry(dir, &buf, - fill_with_dentries, &pos); - } - mutex_unlock(&dir->d_inode->i_mutex); - - /* Clean up after a failed readdir */ - cleanup_dentry_buf(&buf); - - if (!err) { - /* We start a transaction here to avoid a ABBA situation - * between the xattr root's i_mutex and the journal lock. - * This doesn't incur much additional overhead since the - * new transaction will just nest inside the - * outer transaction. */ - int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + - 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); - struct reiserfs_transaction_handle th; - err = journal_begin(&th, inode->i_sb, blocks); - if (!err) { - int jerror; - reiserfs_mutex_lock_nested_safe( - &dir->d_parent->d_inode->i_mutex, - I_MUTEX_XATTR, inode->i_sb); - err = action(dir, data); - jerror = journal_end(&th, inode->i_sb, blocks); - mutex_unlock(&dir->d_parent->d_inode->i_mutex); - err = jerror ?: err; - } - } -out_dir: - dput(dir); -out: - /* -ENODATA isn't an error */ - if (err == -ENODATA) - err = 0; - return err; -} - -static int delete_one_xattr(struct dentry *dentry, void *data) -{ - struct inode *dir = dentry->d_parent->d_inode; - - /* This is the xattr dir, handle specially. */ - if (S_ISDIR(dentry->d_inode->i_mode)) - return xattr_rmdir(dir, dentry); - - return xattr_unlink(dir, dentry); -} - -static int chown_one_xattr(struct dentry *dentry, void *data) -{ - struct iattr *attrs = data; - return reiserfs_setattr(dentry, attrs); -} - -/* No i_mutex, but the inode is unconnected. */ -int reiserfs_delete_xattrs(struct inode *inode) -{ - int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL); - if (err) - reiserfs_warning(inode->i_sb, "jdm-20004", - "Couldn't delete all xattrs (%d)\n", err); - return err; -} - -/* inode->i_mutex: down */ -int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) -{ - int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs); - if (err) - reiserfs_warning(inode->i_sb, "jdm-20007", - "Couldn't chown all xattrs (%d)\n", err); - return err; -} - -#ifdef CONFIG_REISERFS_FS_XATTR -/* Returns a dentry corresponding to a specific extended attribute file - * for the inode. If flags allow, the file is created. Otherwise, a - * valid or negative dentry, or an error is returned. */ -static struct dentry *xattr_lookup(struct inode *inode, const char *name, - int flags) -{ - struct dentry *xadir, *xafile; - int err = 0; - - xadir = open_xa_dir(inode, flags); - if (IS_ERR(xadir)) - return ERR_CAST(xadir); - - mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); - xafile = lookup_one_len(name, xadir, strlen(name)); - if (IS_ERR(xafile)) { - err = PTR_ERR(xafile); - goto out; - } - - if (xafile->d_inode && (flags & XATTR_CREATE)) - err = -EEXIST; - - if (!xafile->d_inode) { - err = -ENODATA; - if (xattr_may_create(flags)) - err = xattr_create(xadir->d_inode, xafile, - 0700|S_IFREG); - } - - if (err) - dput(xafile); -out: - mutex_unlock(&xadir->d_inode->i_mutex); - dput(xadir); - if (err) - return ERR_PTR(err); - return xafile; -} - -/* Internal operations on file data */ -static inline void reiserfs_put_page(struct page *page) -{ - kunmap(page); - page_cache_release(page); -} - -static struct page *reiserfs_get_page(struct inode *dir, size_t n) -{ - struct address_space *mapping = dir->i_mapping; - struct page *page; - /* We can deadlock if we try to free dentries, - and an unlink/rmdir has just occurred - GFP_NOFS avoids this */ - mapping_set_gfp_mask(mapping, GFP_NOFS); - page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); - if (!IS_ERR(page)) { - kmap(page); - if (PageError(page)) - goto fail; - } - return page; - - fail: - reiserfs_put_page(page); - return ERR_PTR(-EIO); -} - -static inline __u32 xattr_hash(const char *msg, int len) -{ - return csum_partial(msg, len, 0); -} - -int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to); - -static void update_ctime(struct inode *inode) -{ - struct timespec now = current_fs_time(inode->i_sb); - if (inode_unhashed(inode) || !inode->i_nlink || - timespec_equal(&inode->i_ctime, &now)) - return; - - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); -} - -static int lookup_and_delete_xattr(struct inode *inode, const char *name) -{ - int err = 0; - struct dentry *dentry, *xadir; - - xadir = open_xa_dir(inode, XATTR_REPLACE); - if (IS_ERR(xadir)) - return PTR_ERR(xadir); - - mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); - dentry = lookup_one_len(name, xadir, strlen(name)); - if (IS_ERR(dentry)) { - err = PTR_ERR(dentry); - goto out_dput; - } - - if (dentry->d_inode) { - reiserfs_write_lock(inode->i_sb); - err = xattr_unlink(xadir->d_inode, dentry); - reiserfs_write_unlock(inode->i_sb); - update_ctime(inode); - } - - dput(dentry); -out_dput: - mutex_unlock(&xadir->d_inode->i_mutex); - dput(xadir); - return err; -} - - -/* Generic extended attribute operations that can be used by xa plugins */ - -/* - * inode->i_mutex: down - */ -int -reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, - struct inode *inode, const char *name, - const void *buffer, size_t buffer_size, int flags) -{ - int err = 0; - struct dentry *dentry; - struct page *page; - char *data; - size_t file_pos = 0; - size_t buffer_pos = 0; - size_t new_size; - __u32 xahash = 0; - - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - reiserfs_write_unlock(inode->i_sb); - - if (!buffer) { - err = lookup_and_delete_xattr(inode, name); - reiserfs_write_lock(inode->i_sb); - return err; - } - - dentry = xattr_lookup(inode, name, flags); - if (IS_ERR(dentry)) { - reiserfs_write_lock(inode->i_sb); - return PTR_ERR(dentry); - } - - down_write(&REISERFS_I(inode)->i_xattr_sem); - - reiserfs_write_lock(inode->i_sb); - - xahash = xattr_hash(buffer, buffer_size); - while (buffer_pos < buffer_size || buffer_pos == 0) { - size_t chunk; - size_t skip = 0; - size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1)); - if (buffer_size - buffer_pos > PAGE_CACHE_SIZE) - chunk = PAGE_CACHE_SIZE; - else - chunk = buffer_size - buffer_pos; - - page = reiserfs_get_page(dentry->d_inode, file_pos); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out_unlock; - } - - lock_page(page); - data = page_address(page); - - if (file_pos == 0) { - struct reiserfs_xattr_header *rxh; - skip = file_pos = sizeof(struct reiserfs_xattr_header); - if (chunk + skip > PAGE_CACHE_SIZE) - chunk = PAGE_CACHE_SIZE - skip; - rxh = (struct reiserfs_xattr_header *)data; - rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC); - rxh->h_hash = cpu_to_le32(xahash); - } - - err = __reiserfs_write_begin(page, page_offset, chunk + skip); - if (!err) { - if (buffer) - memcpy(data + skip, buffer + buffer_pos, chunk); - err = reiserfs_commit_write(NULL, page, page_offset, - page_offset + chunk + - skip); - } - unlock_page(page); - reiserfs_put_page(page); - buffer_pos += chunk; - file_pos += chunk; - skip = 0; - if (err || buffer_size == 0 || !buffer) - break; - } - - new_size = buffer_size + sizeof(struct reiserfs_xattr_header); - if (!err && new_size < i_size_read(dentry->d_inode)) { - struct iattr newattrs = { - .ia_ctime = current_fs_time(inode->i_sb), - .ia_size = new_size, - .ia_valid = ATTR_SIZE | ATTR_CTIME, - }; - - reiserfs_write_unlock(inode->i_sb); - mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); - inode_dio_wait(dentry->d_inode); - reiserfs_write_lock(inode->i_sb); - - err = reiserfs_setattr(dentry, &newattrs); - mutex_unlock(&dentry->d_inode->i_mutex); - } else - update_ctime(inode); -out_unlock: - up_write(&REISERFS_I(inode)->i_xattr_sem); - dput(dentry); - return err; -} - -/* We need to start a transaction to maintain lock ordering */ -int reiserfs_xattr_set(struct inode *inode, const char *name, - const void *buffer, size_t buffer_size, int flags) -{ - - struct reiserfs_transaction_handle th; - int error, error2; - size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size); - - if (!(flags & XATTR_REPLACE)) - jbegin_count += reiserfs_xattr_jcreate_nblocks(inode); - - reiserfs_write_lock(inode->i_sb); - error = journal_begin(&th, inode->i_sb, jbegin_count); - if (error) { - reiserfs_write_unlock(inode->i_sb); - return error; - } - - error = reiserfs_xattr_set_handle(&th, inode, name, - buffer, buffer_size, flags); - - error2 = journal_end(&th, inode->i_sb, jbegin_count); - if (error == 0) - error = error2; - reiserfs_write_unlock(inode->i_sb); - - return error; -} - -/* - * inode->i_mutex: down - */ -int -reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer, - size_t buffer_size) -{ - ssize_t err = 0; - struct dentry *dentry; - size_t isize; - size_t file_pos = 0; - size_t buffer_pos = 0; - struct page *page; - __u32 hash = 0; - - if (name == NULL) - return -EINVAL; - - /* We can't have xattrs attached to v1 items since they don't have - * generation numbers */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - dentry = xattr_lookup(inode, name, XATTR_REPLACE); - if (IS_ERR(dentry)) { - err = PTR_ERR(dentry); - goto out; - } - - down_read(&REISERFS_I(inode)->i_xattr_sem); - - isize = i_size_read(dentry->d_inode); - - /* Just return the size needed */ - if (buffer == NULL) { - err = isize - sizeof(struct reiserfs_xattr_header); - goto out_unlock; - } - - if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) { - err = -ERANGE; - goto out_unlock; - } - - while (file_pos < isize) { - size_t chunk; - char *data; - size_t skip = 0; - if (isize - file_pos > PAGE_CACHE_SIZE) - chunk = PAGE_CACHE_SIZE; - else - chunk = isize - file_pos; - - page = reiserfs_get_page(dentry->d_inode, file_pos); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out_unlock; - } - - lock_page(page); - data = page_address(page); - if (file_pos == 0) { - struct reiserfs_xattr_header *rxh = - (struct reiserfs_xattr_header *)data; - skip = file_pos = sizeof(struct reiserfs_xattr_header); - chunk -= skip; - /* Magic doesn't match up.. */ - if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) { - unlock_page(page); - reiserfs_put_page(page); - reiserfs_warning(inode->i_sb, "jdm-20001", - "Invalid magic for xattr (%s) " - "associated with %k", name, - INODE_PKEY(inode)); - err = -EIO; - goto out_unlock; - } - hash = le32_to_cpu(rxh->h_hash); - } - memcpy(buffer + buffer_pos, data + skip, chunk); - unlock_page(page); - reiserfs_put_page(page); - file_pos += chunk; - buffer_pos += chunk; - skip = 0; - } - err = isize - sizeof(struct reiserfs_xattr_header); - - if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) != - hash) { - reiserfs_warning(inode->i_sb, "jdm-20002", - "Invalid hash for xattr (%s) associated " - "with %k", name, INODE_PKEY(inode)); - err = -EIO; - } - -out_unlock: - up_read(&REISERFS_I(inode)->i_xattr_sem); - dput(dentry); - -out: - return err; -} - -/* - * In order to implement different sets of xattr operations for each xattr - * prefix with the generic xattr API, a filesystem should create a - * null-terminated array of struct xattr_handler (one for each prefix) and - * hang a pointer to it off of the s_xattr field of the superblock. - * - * The generic_fooxattr() functions will use this list to dispatch xattr - * operations to the correct xattr_handler. - */ -#define for_each_xattr_handler(handlers, handler) \ - for ((handler) = *(handlers)++; \ - (handler) != NULL; \ - (handler) = *(handlers)++) - -/* This is the implementation for the xattr plugin infrastructure */ -static inline const struct xattr_handler * -find_xattr_handler_prefix(const struct xattr_handler **handlers, - const char *name) -{ - const struct xattr_handler *xah; - - if (!handlers) - return NULL; - - for_each_xattr_handler(handlers, xah) { - if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0) - break; - } - - return xah; -} - - -/* - * Inode operation getxattr() - */ -ssize_t -reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, - size_t size) -{ - const struct xattr_handler *handler; - - handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); - - if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - return handler->get(dentry, name, buffer, size, handler->flags); -} - -/* - * Inode operation setxattr() - * - * dentry->d_inode->i_mutex down - */ -int -reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) -{ - const struct xattr_handler *handler; - - handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); - - if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - return handler->set(dentry, name, value, size, flags, handler->flags); -} - -/* - * Inode operation removexattr() - * - * dentry->d_inode->i_mutex down - */ -int reiserfs_removexattr(struct dentry *dentry, const char *name) -{ - const struct xattr_handler *handler; - handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); - - if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags); -} - -struct listxattr_buf { - size_t size; - size_t pos; - char *buf; - struct dentry *dentry; -}; - -static int listxattr_filler(void *buf, const char *name, int namelen, - loff_t offset, u64 ino, unsigned int d_type) -{ - struct listxattr_buf *b = (struct listxattr_buf *)buf; - size_t size; - if (name[0] != '.' || - (namelen != 1 && (name[1] != '.' || namelen != 2))) { - const struct xattr_handler *handler; - handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr, - name); - if (!handler) /* Unsupported xattr name */ - return 0; - if (b->buf) { - size = handler->list(b->dentry, b->buf + b->pos, - b->size, name, namelen, - handler->flags); - if (size > b->size) - return -ERANGE; - } else { - size = handler->list(b->dentry, NULL, 0, name, - namelen, handler->flags); - } - - b->pos += size; - } - return 0; -} - -/* - * Inode operation listxattr() - * - * We totally ignore the generic listxattr here because it would be stupid - * not to. Since the xattrs are organized in a directory, we can just - * readdir to find them. - */ -ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) -{ - struct dentry *dir; - int err = 0; - loff_t pos = 0; - struct listxattr_buf buf = { - .dentry = dentry, - .buf = buffer, - .size = buffer ? size : 0, - }; - - if (!dentry->d_inode) - return -EINVAL; - - if (!dentry->d_sb->s_xattr || - get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - if (err == -ENODATA) - err = 0; /* Not an error if there aren't any xattrs */ - goto out; - } - - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); - err = reiserfs_readdir_dentry(dir, &buf, listxattr_filler, &pos); - mutex_unlock(&dir->d_inode->i_mutex); - - if (!err) - err = buf.pos; - - dput(dir); -out: - return err; -} - -static int create_privroot(struct dentry *dentry) -{ - int err; - struct inode *inode = dentry->d_parent->d_inode; - WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); - - err = xattr_mkdir(inode, dentry, 0700); - if (err || !dentry->d_inode) { - reiserfs_warning(dentry->d_sb, "jdm-20006", - "xattrs/ACLs enabled and couldn't " - "find/create .reiserfs_priv. " - "Failing mount."); - return -EOPNOTSUPP; - } - - dentry->d_inode->i_flags |= S_PRIVATE; - reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " - "storage.\n", PRIVROOT_NAME); - - return 0; -} - -#else -int __init reiserfs_xattr_register_handlers(void) { return 0; } -void reiserfs_xattr_unregister_handlers(void) {} -static int create_privroot(struct dentry *dentry) { return 0; } -#endif - -/* Actual operations that are exported to VFS-land */ -const struct xattr_handler *reiserfs_xattr_handlers[] = { -#ifdef CONFIG_REISERFS_FS_XATTR - &reiserfs_xattr_user_handler, - &reiserfs_xattr_trusted_handler, -#endif -#ifdef CONFIG_REISERFS_FS_SECURITY - &reiserfs_xattr_security_handler, -#endif -#ifdef CONFIG_REISERFS_FS_POSIX_ACL - &reiserfs_posix_acl_access_handler, - &reiserfs_posix_acl_default_handler, -#endif - NULL -}; - -static int xattr_mount_check(struct super_block *s) -{ - /* We need generation numbers to ensure that the oid mapping is correct - * v3.5 filesystems don't have them. */ - if (old_format_only(s)) { - if (reiserfs_xattrs_optional(s)) { - /* Old format filesystem, but optional xattrs have - * been enabled. Error out. */ - reiserfs_warning(s, "jdm-2005", - "xattrs/ACLs not supported " - "on pre-v3.6 format filesystems. " - "Failing mount."); - return -EOPNOTSUPP; - } - } - - return 0; -} - -int reiserfs_permission(struct inode *inode, int mask) -{ - /* - * We don't do permission checks on the internal objects. - * Permissions are determined by the "owning" object. - */ - if (IS_PRIVATE(inode)) - return 0; - - return generic_permission(inode, mask); -} - -static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - return -EPERM; -} - -static const struct dentry_operations xattr_lookup_poison_ops = { - .d_revalidate = xattr_hide_revalidate, -}; - -int reiserfs_lookup_privroot(struct super_block *s) -{ - struct dentry *dentry; - int err = 0; - - /* If we don't have the privroot located yet - go find it */ - reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s); - dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, - strlen(PRIVROOT_NAME)); - if (!IS_ERR(dentry)) { - REISERFS_SB(s)->priv_root = dentry; - d_set_d_op(dentry, &xattr_lookup_poison_ops); - if (dentry->d_inode) - dentry->d_inode->i_flags |= S_PRIVATE; - } else - err = PTR_ERR(dentry); - mutex_unlock(&s->s_root->d_inode->i_mutex); - - return err; -} - -/* We need to take a copy of the mount flags since things like - * MS_RDONLY don't get set until *after* we're called. - * mount_flags != mount_options */ -int reiserfs_xattr_init(struct super_block *s, int mount_flags) -{ - int err = 0; - struct dentry *privroot = REISERFS_SB(s)->priv_root; - - err = xattr_mount_check(s); - if (err) - goto error; - - if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { - reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s); - err = create_privroot(REISERFS_SB(s)->priv_root); - mutex_unlock(&s->s_root->d_inode->i_mutex); - } - - if (privroot->d_inode) { - s->s_xattr = reiserfs_xattr_handlers; - reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s); - if (!REISERFS_SB(s)->xattr_root) { - struct dentry *dentry; - dentry = lookup_one_len(XAROOT_NAME, privroot, - strlen(XAROOT_NAME)); - if (!IS_ERR(dentry)) - REISERFS_SB(s)->xattr_root = dentry; - else - err = PTR_ERR(dentry); - } - mutex_unlock(&privroot->d_inode->i_mutex); - } - -error: - if (err) { - clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); - clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); - } - - /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ - if (reiserfs_posixacl(s)) - s->s_flags |= MS_POSIXACL; - else - s->s_flags &= ~MS_POSIXACL; - - return err; -} diff --git a/ANDROID_3.4.5/fs/reiserfs/xattr.h b/ANDROID_3.4.5/fs/reiserfs/xattr.h deleted file mode 100644 index f59626c5..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/xattr.h +++ /dev/null @@ -1,122 +0,0 @@ -#include <linux/reiserfs_xattr.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/rwsem.h> - -struct inode; -struct dentry; -struct iattr; -struct super_block; -struct nameidata; - -int reiserfs_xattr_register_handlers(void) __init; -void reiserfs_xattr_unregister_handlers(void); -int reiserfs_xattr_init(struct super_block *sb, int mount_flags); -int reiserfs_lookup_privroot(struct super_block *sb); -int reiserfs_delete_xattrs(struct inode *inode); -int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); -int reiserfs_permission(struct inode *inode, int mask); - -#ifdef CONFIG_REISERFS_FS_XATTR -#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) -ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size); -int reiserfs_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); -int reiserfs_removexattr(struct dentry *dentry, const char *name); - -int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); -int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); -int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *, - struct inode *, const char *, const void *, - size_t, int); - -extern const struct xattr_handler reiserfs_xattr_user_handler; -extern const struct xattr_handler reiserfs_xattr_trusted_handler; -extern const struct xattr_handler reiserfs_xattr_security_handler; -#ifdef CONFIG_REISERFS_FS_SECURITY -int reiserfs_security_init(struct inode *dir, struct inode *inode, - const struct qstr *qstr, - struct reiserfs_security_handle *sec); -int reiserfs_security_write(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct reiserfs_security_handle *sec); -void reiserfs_security_free(struct reiserfs_security_handle *sec); -#endif - -static inline int reiserfs_xattrs_initialized(struct super_block *sb) -{ - return REISERFS_SB(sb)->priv_root != NULL; -} - -#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header)) -static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size) -{ - loff_t ret = 0; - if (reiserfs_file_data_log(inode)) { - ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize); - ret >>= inode->i_sb->s_blocksize_bits; - } - return ret; -} - -/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file. - * Let's try to be smart about it. - * xattr root: We cache it. If it's not cached, we may need to create it. - * xattr dir: If anything has been loaded for this inode, we can set a flag - * saying so. - * xattr file: Since we don't cache xattrs, we can't tell. We always include - * blocks for it. - * - * However, since root and dir can be created between calls - YOU MUST SAVE - * THIS VALUE. - */ -static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode) -{ - size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - - if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) { - nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode) - nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - } - - return nblocks; -} - -static inline void reiserfs_init_xattr_rwsem(struct inode *inode) -{ - init_rwsem(&REISERFS_I(inode)->i_xattr_sem); -} - -#else - -#define reiserfs_getxattr NULL -#define reiserfs_setxattr NULL -#define reiserfs_listxattr NULL -#define reiserfs_removexattr NULL - -static inline void reiserfs_init_xattr_rwsem(struct inode *inode) -{ -} -#endif /* CONFIG_REISERFS_FS_XATTR */ - -#ifndef CONFIG_REISERFS_FS_SECURITY -static inline int reiserfs_security_init(struct inode *dir, - struct inode *inode, - const struct qstr *qstr, - struct reiserfs_security_handle *sec) -{ - return 0; -} -static inline int -reiserfs_security_write(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct reiserfs_security_handle *sec) -{ - return 0; -} -static inline void reiserfs_security_free(struct reiserfs_security_handle *sec) -{} -#endif diff --git a/ANDROID_3.4.5/fs/reiserfs/xattr_acl.c b/ANDROID_3.4.5/fs/reiserfs/xattr_acl.c deleted file mode 100644 index 44474f9b..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/xattr_acl.c +++ /dev/null @@ -1,504 +0,0 @@ -#include <linux/capability.h> -#include <linux/fs.h> -#include <linux/posix_acl.h> -#include "reiserfs.h" -#include <linux/errno.h> -#include <linux/pagemap.h> -#include <linux/xattr.h> -#include <linux/slab.h> -#include <linux/posix_acl_xattr.h> -#include "xattr.h" -#include "acl.h" -#include <asm/uaccess.h> - -static int reiserfs_set_acl(struct reiserfs_transaction_handle *th, - struct inode *inode, int type, - struct posix_acl *acl); - -static int -posix_acl_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags, int type) -{ - struct inode *inode = dentry->d_inode; - struct posix_acl *acl; - int error, error2; - struct reiserfs_transaction_handle th; - size_t jcreate_blocks; - if (!reiserfs_posixacl(inode->i_sb)) - return -EOPNOTSUPP; - if (!inode_owner_or_capable(inode)) - return -EPERM; - - if (value) { - acl = posix_acl_from_xattr(value, size); - if (IS_ERR(acl)) { - return PTR_ERR(acl); - } else if (acl) { - error = posix_acl_valid(acl); - if (error) - goto release_and_out; - } - } else - acl = NULL; - - /* Pessimism: We can't assume that anything from the xattr root up - * has been created. */ - - jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + - reiserfs_xattr_nblocks(inode, size) * 2; - - reiserfs_write_lock(inode->i_sb); - error = journal_begin(&th, inode->i_sb, jcreate_blocks); - if (error == 0) { - error = reiserfs_set_acl(&th, inode, type, acl); - error2 = journal_end(&th, inode->i_sb, jcreate_blocks); - if (error2) - error = error2; - } - reiserfs_write_unlock(inode->i_sb); - - release_and_out: - posix_acl_release(acl); - return error; -} - -static int -posix_acl_get(struct dentry *dentry, const char *name, void *buffer, - size_t size, int type) -{ - struct posix_acl *acl; - int error; - - if (!reiserfs_posixacl(dentry->d_sb)) - return -EOPNOTSUPP; - - acl = reiserfs_get_acl(dentry->d_inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); - posix_acl_release(acl); - - return error; -} - -/* - * Convert from filesystem to in-memory representation. - */ -static struct posix_acl *posix_acl_from_disk(const void *value, size_t size) -{ - const char *end = (char *)value + size; - int n, count; - struct posix_acl *acl; - - if (!value) - return NULL; - if (size < sizeof(reiserfs_acl_header)) - return ERR_PTR(-EINVAL); - if (((reiserfs_acl_header *) value)->a_version != - cpu_to_le32(REISERFS_ACL_VERSION)) - return ERR_PTR(-EINVAL); - value = (char *)value + sizeof(reiserfs_acl_header); - count = reiserfs_acl_count(size); - if (count < 0) - return ERR_PTR(-EINVAL); - if (count == 0) - return NULL; - acl = posix_acl_alloc(count, GFP_NOFS); - if (!acl) - return ERR_PTR(-ENOMEM); - for (n = 0; n < count; n++) { - reiserfs_acl_entry *entry = (reiserfs_acl_entry *) value; - if ((char *)value + sizeof(reiserfs_acl_entry_short) > end) - goto fail; - acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); - acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); - switch (acl->a_entries[n].e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - value = (char *)value + - sizeof(reiserfs_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; - break; - - case ACL_USER: - case ACL_GROUP: - value = (char *)value + sizeof(reiserfs_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); - break; - - default: - goto fail; - } - } - if (value != end) - goto fail; - return acl; - - fail: - posix_acl_release(acl); - return ERR_PTR(-EINVAL); -} - -/* - * Convert from in-memory to filesystem representation. - */ -static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size) -{ - reiserfs_acl_header *ext_acl; - char *e; - int n; - - *size = reiserfs_acl_size(acl->a_count); - ext_acl = kmalloc(sizeof(reiserfs_acl_header) + - acl->a_count * - sizeof(reiserfs_acl_entry), - GFP_NOFS); - if (!ext_acl) - return ERR_PTR(-ENOMEM); - ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); - e = (char *)ext_acl + sizeof(reiserfs_acl_header); - for (n = 0; n < acl->a_count; n++) { - reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e; - entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); - entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch (acl->a_entries[n].e_tag) { - case ACL_USER: - case ACL_GROUP: - entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); - e += sizeof(reiserfs_acl_entry); - break; - - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - e += sizeof(reiserfs_acl_entry_short); - break; - - default: - goto fail; - } - } - return (char *)ext_acl; - - fail: - kfree(ext_acl); - return ERR_PTR(-EINVAL); -} - -/* - * Inode operation get_posix_acl(). - * - * inode->i_mutex: down - * BKL held [before 2.5.x] - */ -struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) -{ - char *name, *value; - struct posix_acl *acl; - int size; - int retval; - - acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - - switch (type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - BUG(); - } - - size = reiserfs_xattr_get(inode, name, NULL, 0); - if (size < 0) { - if (size == -ENODATA || size == -ENOSYS) { - set_cached_acl(inode, type, NULL); - return NULL; - } - return ERR_PTR(size); - } - - value = kmalloc(size, GFP_NOFS); - if (!value) - return ERR_PTR(-ENOMEM); - - retval = reiserfs_xattr_get(inode, name, value, size); - if (retval == -ENODATA || retval == -ENOSYS) { - /* This shouldn't actually happen as it should have - been caught above.. but just in case */ - acl = NULL; - } else if (retval < 0) { - acl = ERR_PTR(retval); - } else { - acl = posix_acl_from_disk(value, retval); - } - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - - kfree(value); - return acl; -} - -/* - * Inode operation set_posix_acl(). - * - * inode->i_mutex: down - * BKL held [before 2.5.x] - */ -static int -reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, - int type, struct posix_acl *acl) -{ - char *name; - void *value = NULL; - size_t size = 0; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - switch (type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return error; - else { - if (error == 0) - acl = NULL; - } - } - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; - default: - return -EINVAL; - } - - if (acl) { - value = posix_acl_to_disk(acl, &size); - if (IS_ERR(value)) - return (int)PTR_ERR(value); - } - - error = reiserfs_xattr_set_handle(th, inode, name, value, size, 0); - - /* - * Ensure that the inode gets dirtied if we're only using - * the mode bits and an old ACL didn't exist. We don't need - * to check if the inode is hashed here since we won't get - * called by reiserfs_inherit_default_acl(). - */ - if (error == -ENODATA) { - error = 0; - if (type == ACL_TYPE_ACCESS) { - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - } - } - - kfree(value); - - if (!error) - set_cached_acl(inode, type, acl); - - return error; -} - -/* dir->i_mutex: locked, - * inode is new and not released into the wild yet */ -int -reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, - struct inode *dir, struct dentry *dentry, - struct inode *inode) -{ - struct posix_acl *acl; - int err = 0; - - /* ACLs only get applied to files and directories */ - if (S_ISLNK(inode->i_mode)) - return 0; - - /* ACLs can only be used on "new" objects, so if it's an old object - * there is nothing to inherit from */ - if (get_inode_sd_version(dir) == STAT_DATA_V1) - goto apply_umask; - - /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This - * would be useless since permissions are ignored, and a pain because - * it introduces locking cycles */ - if (IS_PRIVATE(dir)) { - inode->i_flags |= S_PRIVATE; - goto apply_umask; - } - - acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - - if (acl) { - /* Copy the default ACL to the default ACL of a new directory */ - if (S_ISDIR(inode->i_mode)) { - err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, - acl); - if (err) - goto cleanup; - } - - /* Now we reconcile the new ACL and the mode, - potentially modifying both */ - err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); - if (err < 0) - return err; - - /* If we need an ACL.. */ - if (err > 0) - err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); - cleanup: - posix_acl_release(acl); - } else { - apply_umask: - /* no ACL, apply umask */ - inode->i_mode &= ~current_umask(); - } - - return err; -} - -/* This is used to cache the default acl before a new object is created. - * The biggest reason for this is to get an idea of how many blocks will - * actually be required for the create operation if we must inherit an ACL. - * An ACL write can add up to 3 object creations and an additional file write - * so we'd prefer not to reserve that many blocks in the journal if we can. - * It also has the advantage of not loading the ACL with a transaction open, - * this may seem silly, but if the owner of the directory is doing the - * creation, the ACL may not be loaded since the permissions wouldn't require - * it. - * We return the number of blocks required for the transaction. - */ -int reiserfs_cache_default_acl(struct inode *inode) -{ - struct posix_acl *acl; - int nblocks = 0; - - if (IS_PRIVATE(inode)) - return 0; - - acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); - - if (acl && !IS_ERR(acl)) { - int size = reiserfs_acl_size(acl->a_count); - - /* Other xattrs can be created during inode creation. We don't - * want to claim too many blocks, so we check to see if we - * we need to create the tree to the xattrs, and then we - * just want two files. */ - nblocks = reiserfs_xattr_jcreate_nblocks(inode); - nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - - REISERFS_I(inode)->i_flags |= i_has_xattr_dir; - - /* We need to account for writes + bitmaps for two files */ - nblocks += reiserfs_xattr_nblocks(inode, size) * 4; - posix_acl_release(acl); - } - - return nblocks; -} - -int reiserfs_acl_chmod(struct inode *inode) -{ - struct reiserfs_transaction_handle th; - struct posix_acl *acl; - size_t size; - int depth; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - if (get_inode_sd_version(inode) == STAT_DATA_V1 || - !reiserfs_posixacl(inode->i_sb)) { - return 0; - } - - reiserfs_write_unlock(inode->i_sb); - acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); - reiserfs_write_lock(inode->i_sb); - if (!acl) - return 0; - if (IS_ERR(acl)) - return PTR_ERR(acl); - error = posix_acl_chmod(&acl, GFP_NOFS, inode->i_mode); - if (error) - return error; - - size = reiserfs_xattr_nblocks(inode, reiserfs_acl_size(acl->a_count)); - depth = reiserfs_write_lock_once(inode->i_sb); - error = journal_begin(&th, inode->i_sb, size * 2); - if (!error) { - int error2; - error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, acl); - error2 = journal_end(&th, inode->i_sb, size * 2); - if (error2) - error = error2; - } - reiserfs_write_unlock_once(inode->i_sb, depth); - posix_acl_release(acl); - return error; -} - -static size_t posix_acl_access_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, - size_t name_len, int type) -{ - const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); - if (!reiserfs_posixacl(dentry->d_sb)) - return 0; - if (list && size <= list_size) - memcpy(list, POSIX_ACL_XATTR_ACCESS, size); - return size; -} - -const struct xattr_handler reiserfs_posix_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .flags = ACL_TYPE_ACCESS, - .get = posix_acl_get, - .set = posix_acl_set, - .list = posix_acl_access_list, -}; - -static size_t posix_acl_default_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, - size_t name_len, int type) -{ - const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); - if (!reiserfs_posixacl(dentry->d_sb)) - return 0; - if (list && size <= list_size) - memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); - return size; -} - -const struct xattr_handler reiserfs_posix_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .flags = ACL_TYPE_DEFAULT, - .get = posix_acl_get, - .set = posix_acl_set, - .list = posix_acl_default_list, -}; diff --git a/ANDROID_3.4.5/fs/reiserfs/xattr_security.c b/ANDROID_3.4.5/fs/reiserfs/xattr_security.c deleted file mode 100644 index 800a3cef..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/xattr_security.c +++ /dev/null @@ -1,120 +0,0 @@ -#include "reiserfs.h" -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/pagemap.h> -#include <linux/xattr.h> -#include <linux/slab.h> -#include "xattr.h" -#include <linux/security.h> -#include <asm/uaccess.h> - -static int -security_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) -{ - if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) - return -EINVAL; - - if (IS_PRIVATE(dentry->d_inode)) - return -EPERM; - - return reiserfs_xattr_get(dentry->d_inode, name, buffer, size); -} - -static int -security_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) -{ - if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) - return -EINVAL; - - if (IS_PRIVATE(dentry->d_inode)) - return -EPERM; - - return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags); -} - -static size_t security_list(struct dentry *dentry, char *list, size_t list_len, - const char *name, size_t namelen, int handler_flags) -{ - const size_t len = namelen + 1; - - if (IS_PRIVATE(dentry->d_inode)) - return 0; - - if (list && len <= list_len) { - memcpy(list, name, namelen); - list[namelen] = '\0'; - } - - return len; -} - -/* Initializes the security context for a new inode and returns the number - * of blocks needed for the transaction. If successful, reiserfs_security - * must be released using reiserfs_security_free when the caller is done. */ -int reiserfs_security_init(struct inode *dir, struct inode *inode, - const struct qstr *qstr, - struct reiserfs_security_handle *sec) -{ - int blocks = 0; - int error; - - sec->name = NULL; - - /* Don't add selinux attributes on xattrs - they'll never get used */ - if (IS_PRIVATE(dir)) - return 0; - - error = security_old_inode_init_security(inode, dir, qstr, &sec->name, - &sec->value, &sec->length); - if (error) { - if (error == -EOPNOTSUPP) - error = 0; - - sec->name = NULL; - sec->value = NULL; - sec->length = 0; - return error; - } - - if (sec->length && reiserfs_xattrs_initialized(inode->i_sb)) { - blocks = reiserfs_xattr_jcreate_nblocks(inode) + - reiserfs_xattr_nblocks(inode, sec->length); - /* We don't want to count the directories twice if we have - * a default ACL. */ - REISERFS_I(inode)->i_flags |= i_has_xattr_dir; - } - return blocks; -} - -int reiserfs_security_write(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct reiserfs_security_handle *sec) -{ - int error; - if (strlen(sec->name) < sizeof(XATTR_SECURITY_PREFIX)) - return -EINVAL; - - error = reiserfs_xattr_set_handle(th, inode, sec->name, sec->value, - sec->length, XATTR_CREATE); - if (error == -ENODATA || error == -EOPNOTSUPP) - error = 0; - - return error; -} - -void reiserfs_security_free(struct reiserfs_security_handle *sec) -{ - kfree(sec->name); - kfree(sec->value); - sec->name = NULL; - sec->value = NULL; -} - -const struct xattr_handler reiserfs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .get = security_get, - .set = security_set, - .list = security_list, -}; diff --git a/ANDROID_3.4.5/fs/reiserfs/xattr_trusted.c b/ANDROID_3.4.5/fs/reiserfs/xattr_trusted.c deleted file mode 100644 index a0035719..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/xattr_trusted.c +++ /dev/null @@ -1,56 +0,0 @@ -#include "reiserfs.h" -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/pagemap.h> -#include <linux/xattr.h> -#include "xattr.h" -#include <asm/uaccess.h> - -static int -trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) -{ - if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode)) - return -EPERM; - - return reiserfs_xattr_get(dentry->d_inode, name, buffer, size); -} - -static int -trusted_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) -{ - if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode)) - return -EPERM; - - return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags); -} - -static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int handler_flags) -{ - const size_t len = name_len + 1; - - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode)) - return 0; - - if (list && len <= list_size) { - memcpy(list, name, name_len); - list[name_len] = '\0'; - } - return len; -} - -const struct xattr_handler reiserfs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .get = trusted_get, - .set = trusted_set, - .list = trusted_list, -}; diff --git a/ANDROID_3.4.5/fs/reiserfs/xattr_user.c b/ANDROID_3.4.5/fs/reiserfs/xattr_user.c deleted file mode 100644 index 8667491a..00000000 --- a/ANDROID_3.4.5/fs/reiserfs/xattr_user.c +++ /dev/null @@ -1,52 +0,0 @@ -#include "reiserfs.h" -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/pagemap.h> -#include <linux/xattr.h> -#include "xattr.h" -#include <asm/uaccess.h> - -static int -user_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) -{ - - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) - return -EINVAL; - if (!reiserfs_xattrs_user(dentry->d_sb)) - return -EOPNOTSUPP; - return reiserfs_xattr_get(dentry->d_inode, name, buffer, size); -} - -static int -user_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) -{ - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) - return -EINVAL; - - if (!reiserfs_xattrs_user(dentry->d_sb)) - return -EOPNOTSUPP; - return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags); -} - -static size_t user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int handler_flags) -{ - const size_t len = name_len + 1; - - if (!reiserfs_xattrs_user(dentry->d_sb)) - return 0; - if (list && len <= list_size) { - memcpy(list, name, name_len); - list[name_len] = '\0'; - } - return len; -} - -const struct xattr_handler reiserfs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .get = user_get, - .set = user_set, - .list = user_list, -}; |