diff options
author | Kevin | 2014-11-15 11:48:36 +0800 |
---|---|---|
committer | Kevin | 2014-11-15 11:48:36 +0800 |
commit | d04075478d378d9e15f3e1abfd14b0bd124077d4 (patch) | |
tree | 733dd964582f388b9e3e367c249946cd32a2851f /fs | |
download | FOSSEE-netbook-uboot-source-d04075478d378d9e15f3e1abfd14b0bd124077d4.tar.gz FOSSEE-netbook-uboot-source-d04075478d378d9e15f3e1abfd14b0bd124077d4.tar.bz2 FOSSEE-netbook-uboot-source-d04075478d378d9e15f3e1abfd14b0bd124077d4.zip |
init commit via android 4.4 uboot
Diffstat (limited to 'fs')
165 files changed, 73655 insertions, 0 deletions
diff --git a/fs/Makefile b/fs/Makefile new file mode 100755 index 0000000..e672bc6 --- /dev/null +++ b/fs/Makefile @@ -0,0 +1,29 @@ +# +# (C) Copyright 2000, 2001 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +# + +SUBDIRS := cramfs fdos fat reiserfs + +.depend all: + @for dir in $(SUBDIRS) ; do \ + $(MAKE) -C $$dir $@ ; done diff --git a/fs/cramfs/Makefile b/fs/cramfs/Makefile new file mode 100755 index 0000000..54a475e --- /dev/null +++ b/fs/cramfs/Makefile @@ -0,0 +1,47 @@ +# +# (C) Copyright 2000, 2001 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +include $(TOPDIR)/config.mk + +LIB = libcramfs.a + +AOBJS = +COBJS = cramfs.o uncompress.o +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += + +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend + +######################################################################### diff --git a/fs/cramfs/cramfs.c b/fs/cramfs/cramfs.c new file mode 100755 index 0000000..48e7f63 --- /dev/null +++ b/fs/cramfs/cramfs.c @@ -0,0 +1,347 @@ +/* + * cramfs.c + * + * Copyright (C) 1999 Linus Torvalds + * + * Copyright (C) 2000-2002 Transmeta Corporation + * + * Copyright (C) 2003 Kai-Uwe Bloem, + * Auerswald GmbH & Co KG, <linux-development@auerswald.de> + * - adapted from the www.tuxbox.org u-boot tree, added "ls" command + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * Compressed ROM filesystem for Linux. + * + * TODO: + * add support for resolving symbolic links + */ + +/* + * These are the VFS interfaces to the compressed ROM filesystem. + * The actual compression is based on zlib, see the other files. + */ + +#include <common.h> +#include <malloc.h> + +#if (CONFIG_COMMANDS & CFG_CMD_JFFS2) + +#include <asm/byteorder.h> +#include <linux/stat.h> +#include <jffs2/jffs2.h> +#include <jffs2/load_kernel.h> +#include <cramfs/cramfs_fs.h> + +/* These two macros may change in future, to provide better st_ino + semantics. */ +#define CRAMINO(x) (CRAMFS_GET_OFFSET(x) ? CRAMFS_GET_OFFSET(x)<<2 : 1) +#define OFFSET(x) ((x)->i_ino) + +struct cramfs_super super; + +/* CPU address space offset calculation macro, struct part_info offset is + * device address space offset, so we need to shift it by a device start address. */ +extern flash_info_t flash_info[]; +#define PART_OFFSET(x) (x->offset + flash_info[x->dev->id->num].start[0]) + +static int cramfs_read_super (struct part_info *info) +{ + unsigned long root_offset; + + /* Read the first block and get the superblock from it */ + memcpy (&super, (void *) PART_OFFSET(info), sizeof (super)); + + /* Do sanity checks on the superblock */ + if (super.magic != CRAMFS_32 (CRAMFS_MAGIC)) { + /* check at 512 byte offset */ + memcpy (&super, (void *) PART_OFFSET(info) + 512, sizeof (super)); + if (super.magic != CRAMFS_32 (CRAMFS_MAGIC)) { + printf ("cramfs: wrong magic\n"); + return -1; + } + } + + /* flags is reused several times, so swab it once */ + super.flags = CRAMFS_32 (super.flags); + super.size = CRAMFS_32 (super.size); + + /* get feature flags first */ + if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) { + printf ("cramfs: unsupported filesystem features\n"); + return -1; + } + + /* Check that the root inode is in a sane state */ + if (!S_ISDIR (CRAMFS_16 (super.root.mode))) { + printf ("cramfs: root is not a directory\n"); + return -1; + } + root_offset = CRAMFS_GET_OFFSET (&(super.root)) << 2; + if (root_offset == 0) { + printf ("cramfs: empty filesystem"); + } else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) && + ((root_offset != sizeof (struct cramfs_super)) && + (root_offset != 512 + sizeof (struct cramfs_super)))) { + printf ("cramfs: bad root offset %lu\n", root_offset); + return -1; + } + + return 0; +} + +static unsigned long cramfs_resolve (unsigned long begin, unsigned long offset, + unsigned long size, int raw, + char *filename) +{ + unsigned long inodeoffset = 0, nextoffset; + + while (inodeoffset < size) { + struct cramfs_inode *inode; + char *name; + int namelen; + + inode = (struct cramfs_inode *) (begin + offset + + inodeoffset); + + /* + * Namelengths on disk are shifted by two + * and the name padded out to 4-byte boundaries + * with zeroes. + */ + namelen = CRAMFS_GET_NAMELEN (inode) << 2; + name = (char *) inode + sizeof (struct cramfs_inode); + + nextoffset = + inodeoffset + sizeof (struct cramfs_inode) + namelen; + + for (;;) { + if (!namelen) + return -1; + if (name[namelen - 1]) + break; + namelen--; + } + + if (!strncmp (filename, name, namelen)) { + char *p = strtok (NULL, "/"); + + if (raw && (p == NULL || *p == '\0')) + return offset + inodeoffset; + + if (S_ISDIR (CRAMFS_16 (inode->mode))) { + return cramfs_resolve (begin, + CRAMFS_GET_OFFSET + (inode) << 2, + CRAMFS_24 (inode-> + size), raw, + p); + } else if (S_ISREG (CRAMFS_16 (inode->mode))) { + return offset + inodeoffset; + } else { + printf ("%*.*s: unsupported file type (%x)\n", + namelen, namelen, name, + CRAMFS_16 (inode->mode)); + return 0; + } + } + + inodeoffset = nextoffset; + } + + printf ("can't find corresponding entry\n"); + return 0; +} + +static int cramfs_uncompress (unsigned long begin, unsigned long offset, + unsigned long loadoffset) +{ + struct cramfs_inode *inode = (struct cramfs_inode *) (begin + offset); + unsigned long *block_ptrs = (unsigned long *) + (begin + (CRAMFS_GET_OFFSET (inode) << 2)); + unsigned long curr_block = (CRAMFS_GET_OFFSET (inode) + + (((CRAMFS_24 (inode->size)) + + 4095) >> 12)) << 2; + int size, total_size = 0; + int i; + + cramfs_uncompress_init (); + + for (i = 0; i < ((CRAMFS_24 (inode->size) + 4095) >> 12); i++) { + size = cramfs_uncompress_block ((void *) loadoffset, + (void *) (begin + curr_block), + (CRAMFS_32 (block_ptrs[i]) - + curr_block)); + if (size < 0) + return size; + loadoffset += size; + total_size += size; + curr_block = CRAMFS_32 (block_ptrs[i]); + } + + cramfs_uncompress_exit (); + return total_size; +} + +int cramfs_load (char *loadoffset, struct part_info *info, char *filename) +{ + unsigned long offset; + + if (cramfs_read_super (info)) + return -1; + + offset = cramfs_resolve (PART_OFFSET(info), + CRAMFS_GET_OFFSET (&(super.root)) << 2, + CRAMFS_24 (super.root.size), 0, + strtok (filename, "/")); + + if (offset <= 0) + return offset; + + return cramfs_uncompress (PART_OFFSET(info), offset, + (unsigned long) loadoffset); +} + +static int cramfs_list_inode (struct part_info *info, unsigned long offset) +{ + struct cramfs_inode *inode = (struct cramfs_inode *) + (PART_OFFSET(info) + offset); + char *name, str[20]; + int namelen, nextoff; + + /* + * Namelengths on disk are shifted by two + * and the name padded out to 4-byte boundaries + * with zeroes. + */ + namelen = CRAMFS_GET_NAMELEN (inode) << 2; + name = (char *) inode + sizeof (struct cramfs_inode); + nextoff = namelen; + + for (;;) { + if (!namelen) + return namelen; + if (name[namelen - 1]) + break; + namelen--; + } + + printf (" %s %8d %*.*s", mkmodestr (CRAMFS_16 (inode->mode), str), + CRAMFS_24 (inode->size), namelen, namelen, name); + + if ((CRAMFS_16 (inode->mode) & S_IFMT) == S_IFLNK) { + /* symbolic link. + * Unpack the link target, trusting in the inode's size field. + */ + unsigned long size = CRAMFS_24 (inode->size); + char *link = malloc (size); + + if (link != NULL && cramfs_uncompress (PART_OFFSET(info), offset, + (unsigned long) link) + == size) + printf (" -> %*.*s\n", (int) size, (int) size, link); + else + printf (" [Error reading link]\n"); + if (link) + free (link); + } else + printf ("\n"); + + return nextoff; +} + +int cramfs_ls (struct part_info *info, char *filename) +{ + struct cramfs_inode *inode; + unsigned long inodeoffset = 0, nextoffset; + unsigned long offset, size; + + if (cramfs_read_super (info)) + return -1; + + if (strlen (filename) == 0 || !strcmp (filename, "/")) { + /* Root directory. Use root inode in super block */ + offset = CRAMFS_GET_OFFSET (&(super.root)) << 2; + size = CRAMFS_24 (super.root.size); + } else { + /* Resolve the path */ + offset = cramfs_resolve (PART_OFFSET(info), + CRAMFS_GET_OFFSET (&(super.root)) << + 2, CRAMFS_24 (super.root.size), 1, + strtok (filename, "/")); + + if (offset <= 0) + return offset; + + /* Resolving was successful. Examine the inode */ + inode = (struct cramfs_inode *) (PART_OFFSET(info) + offset); + if (!S_ISDIR (CRAMFS_16 (inode->mode))) { + /* It's not a directory - list it, and that's that */ + return (cramfs_list_inode (info, offset) > 0); + } + + /* It's a directory. List files within */ + offset = CRAMFS_GET_OFFSET (inode) << 2; + size = CRAMFS_24 (inode->size); + } + + /* List the given directory */ + while (inodeoffset < size) { + inode = (struct cramfs_inode *) (PART_OFFSET(info) + offset + + inodeoffset); + + nextoffset = cramfs_list_inode (info, offset + inodeoffset); + if (nextoffset == 0) + break; + inodeoffset += sizeof (struct cramfs_inode) + nextoffset; + } + + return 1; +} + +int cramfs_info (struct part_info *info) +{ + if (cramfs_read_super (info)) + return 0; + + printf ("size: 0x%x (%u)\n", super.size, super.size); + + if (super.flags != 0) { + printf ("flags:\n"); + if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) + printf ("\tFSID version 2\n"); + if (super.flags & CRAMFS_FLAG_SORTED_DIRS) + printf ("\tsorted dirs\n"); + if (super.flags & CRAMFS_FLAG_HOLES) + printf ("\tholes\n"); + if (super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) + printf ("\tshifted root offset\n"); + } + + printf ("fsid:\n\tcrc: 0x%x\n\tedition: 0x%x\n", + super.fsid.crc, super.fsid.edition); + printf ("name: %16s\n", super.name); + + return 1; +} + +int cramfs_check (struct part_info *info) +{ + struct cramfs_super *sb; + + if (info->dev->id->type != MTD_DEV_TYPE_NOR) + return 0; + + sb = (struct cramfs_super *) PART_OFFSET(info); + if (sb->magic != CRAMFS_32 (CRAMFS_MAGIC)) { + /* check at 512 byte offset */ + sb = (struct cramfs_super *) (PART_OFFSET(info) + 512); + if (sb->magic != CRAMFS_32 (CRAMFS_MAGIC)) + return 0; + } + return 1; +} + +#endif /* CFG_FS_CRAMFS */ diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c new file mode 100755 index 0000000..170832a --- /dev/null +++ b/fs/cramfs/uncompress.c @@ -0,0 +1,106 @@ +/* + * uncompress.c + * + * Copyright (C) 1999 Linus Torvalds + * Copyright (C) 2000-2002 Transmeta Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * cramfs interfaces to the uncompression library. There's really just + * three entrypoints: + * + * - cramfs_uncompress_init() - called to initialize the thing. + * - cramfs_uncompress_exit() - tell me when you're done + * - cramfs_uncompress_block() - uncompress a block. + * + * NOTE NOTE NOTE! The uncompression is entirely single-threaded. We + * only have one stream, and we'll initialize it only once even if it + * then is used by multiple filesystems. + */ + +#include <common.h> +#include <malloc.h> +#include <watchdog.h> +#include <zlib.h> + +#if (CONFIG_COMMANDS & CFG_CMD_JFFS2) + +static z_stream stream; + +#define ZALLOC_ALIGNMENT 16 + +static void *zalloc (void *x, unsigned items, unsigned size) +{ + void *p; + + size *= items; + size = (size + ZALLOC_ALIGNMENT - 1) & ~(ZALLOC_ALIGNMENT - 1); + + p = malloc (size); + + return (p); +} + +static void zfree (void *x, void *addr, unsigned nb) +{ + free (addr); +} + +/* Returns length of decompressed data. */ +int cramfs_uncompress_block (void *dst, void *src, int srclen) +{ + int err; + + inflateReset (&stream); + + stream.next_in = src; + stream.avail_in = srclen; + + stream.next_out = dst; + stream.avail_out = 4096 * 2; + + err = inflate (&stream, Z_FINISH); + + if (err != Z_STREAM_END) + goto err; + return stream.total_out; + + err: + /*printf ("Error %d while decompressing!\n", err); */ + /*printf ("%p(%d)->%p\n", src, srclen, dst); */ + return -1; +} + +int cramfs_uncompress_init (void) +{ + int err; + + stream.zalloc = zalloc; + stream.zfree = zfree; + stream.next_in = 0; + stream.avail_in = 0; + +#if defined(CONFIG_HW_WATCHDOG) || defined(CONFIG_WATCHDOG) + stream.outcb = (cb_func) WATCHDOG_RESET; +#else + stream.outcb = Z_NULL; +#endif /* CONFIG_HW_WATCHDOG */ + + err = inflateInit (&stream); + if (err != Z_OK) { + printf ("Error: inflateInit2() returned %d\n", err); + return -1; + } + + return 0; +} + +int cramfs_uncompress_exit (void) +{ + inflateEnd (&stream); + return 0; +} + +#endif /* CFG_FS_CRAMFS */ diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile new file mode 100755 index 0000000..3b19368 --- /dev/null +++ b/fs/ext2/Makefile @@ -0,0 +1,48 @@ +# +# (C) Copyright 2003 +# Pavel Bartusek, Sysgo Real-Time Solutions AG, pba@sysgo.de +# +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +include $(TOPDIR)/config.mk + +LIB = libext2fs.a + +AOBJS = +COBJS = ext2fs.o dev.o +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += + +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend + +######################################################################### diff --git a/fs/ext2/dev.c b/fs/ext2/dev.c new file mode 100755 index 0000000..1469e98 --- /dev/null +++ b/fs/ext2/dev.c @@ -0,0 +1,126 @@ +/* + * (C) Copyright 2004 + * esd gmbh <www.esd-electronics.com> + * Reinhard Arlt <reinhard.arlt@esd-electronics.com> + * + * based on code of fs/reiserfs/dev.c by + * + * (C) Copyright 2003 - 2004 + * Sysgo AG, <www.elinos.com>, Pavel Bartusek <pba@sysgo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include <common.h> +#if (CONFIG_COMMANDS & CFG_CMD_EXT2) + +#include <config.h> +#include <ext2fs.h> + +static block_dev_desc_t *ext2fs_block_dev_desc; +static disk_partition_t part_info; + +int ext2fs_set_blk_dev (block_dev_desc_t * rbdd, int part) +{ + ext2fs_block_dev_desc = rbdd; + + if (part == 0) { + /* disk doesn't use partition table */ + part_info.start = 0; + part_info.size = rbdd->lba; + part_info.blksz = rbdd->blksz; + } else { + if (get_partition_info + (ext2fs_block_dev_desc, part, &part_info)) { + return 0; + } + } + return (part_info.size); +} + + +int ext2fs_devread (int sector, int byte_offset, int byte_len, char *buf) { + char sec_buf[SECTOR_SIZE]; + unsigned block_len; + +/* + * Check partition boundaries + */ + if ((sector < 0) + || ((sector + ((byte_offset + byte_len - 1) >> SECTOR_BITS)) >= + part_info.size)) { + /* errnum = ERR_OUTSIDE_PART; */ + printf (" ** ext2fs_devread() read outside partition sector %d\n", sector); + return (0); + } + +/* + * Get the read to the beginning of a partition. + */ + sector += byte_offset >> SECTOR_BITS; + byte_offset &= SECTOR_SIZE - 1; + + debug (" <%d, %d, %d>\n", sector, byte_offset, byte_len); + + if (ext2fs_block_dev_desc == NULL) { + printf ("** Invalid Block Device Descriptor (NULL)\n"); + return (0); + } + + if (byte_offset != 0) { + /* read first part which isn't aligned with start of sector */ + if (ext2fs_block_dev_desc-> + block_read (ext2fs_block_dev_desc->dev, + part_info.start + sector, 1, + (unsigned long *) sec_buf) != 1) { + printf (" ** ext2fs_devread() read error **\n"); + return (0); + } + memcpy (buf, sec_buf + byte_offset, + min (SECTOR_SIZE - byte_offset, byte_len)); + buf += min (SECTOR_SIZE - byte_offset, byte_len); + byte_len -= min (SECTOR_SIZE - byte_offset, byte_len); + sector++; + } + + /* read sector aligned part */ + block_len = byte_len & ~(SECTOR_SIZE - 1); + if (ext2fs_block_dev_desc->block_read (ext2fs_block_dev_desc->dev, + part_info.start + sector, + block_len / SECTOR_SIZE, + (unsigned long *) buf) != + block_len / SECTOR_SIZE) { + printf (" ** ext2fs_devread() read error - block\n"); + return (0); + } + buf += block_len; + byte_len -= block_len; + sector += block_len / SECTOR_SIZE; + + if (byte_len != 0) { + /* read rest of data which are not in whole sector */ + if (ext2fs_block_dev_desc-> + block_read (ext2fs_block_dev_desc->dev, + part_info.start + sector, 1, + (unsigned long *) sec_buf) != 1) { + printf (" ** ext2fs_devread() read error - last part\n"); + return (0); + } + memcpy (buf, sec_buf, byte_len); + } + return (1); +} +#endif /* CFG_CMD_EXT2FS */ diff --git a/fs/ext2/ext2fs.c b/fs/ext2/ext2fs.c new file mode 100755 index 0000000..c21d2d6 --- /dev/null +++ b/fs/ext2/ext2fs.c @@ -0,0 +1,878 @@ +/* + * (C) Copyright 2004 + * esd gmbh <www.esd-electronics.com> + * Reinhard Arlt <reinhard.arlt@esd-electronics.com> + * + * based on code from grub2 fs/ext2.c and fs/fshelp.c by + * + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2003, 2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <common.h> + +#if (CONFIG_COMMANDS & CFG_CMD_EXT2) +#include <ext2fs.h> +#include <malloc.h> +#include <asm/byteorder.h> + +extern int ext2fs_devread (int sector, int byte_offset, int byte_len, + char *buf); + +/* Magic value used to identify an ext2 filesystem. */ +#define EXT2_MAGIC 0xEF53 +/* Amount of indirect blocks in an inode. */ +#define INDIRECT_BLOCKS 12 +/* Maximum lenght of a pathname. */ +#define EXT2_PATH_MAX 4096 +/* Maximum nesting of symlinks, used to prevent a loop. */ +#define EXT2_MAX_SYMLINKCNT 8 + +/* Filetype used in directory entry. */ +#define FILETYPE_UNKNOWN 0 +#define FILETYPE_REG 1 +#define FILETYPE_DIRECTORY 2 +#define FILETYPE_SYMLINK 7 + +/* Filetype information as used in inodes. */ +#define FILETYPE_INO_MASK 0170000 +#define FILETYPE_INO_REG 0100000 +#define FILETYPE_INO_DIRECTORY 0040000 +#define FILETYPE_INO_SYMLINK 0120000 + +/* Bits used as offset in sector */ +#define DISK_SECTOR_BITS 9 + +/* Log2 size of ext2 block in 512 blocks. */ +#define LOG2_EXT2_BLOCK_SIZE(data) (__le32_to_cpu (data->sblock.log2_block_size) + 1) + +/* Log2 size of ext2 block in bytes. */ +#define LOG2_BLOCK_SIZE(data) (__le32_to_cpu (data->sblock.log2_block_size) + 10) + +/* The size of an ext2 block in bytes. */ +#define EXT2_BLOCK_SIZE(data) (1 << LOG2_BLOCK_SIZE(data)) + +/* The ext2 superblock. */ +struct ext2_sblock { + uint32_t total_inodes; + uint32_t total_blocks; + uint32_t reserved_blocks; + uint32_t free_blocks; + uint32_t free_inodes; + uint32_t first_data_block; + uint32_t log2_block_size; + uint32_t log2_fragment_size; + uint32_t blocks_per_group; + uint32_t fragments_per_group; + uint32_t inodes_per_group; + uint32_t mtime; + uint32_t utime; + uint16_t mnt_count; + uint16_t max_mnt_count; + uint16_t magic; + uint16_t fs_state; + uint16_t error_handling; + uint16_t minor_revision_level; + uint32_t lastcheck; + uint32_t checkinterval; + uint32_t creator_os; + uint32_t revision_level; + uint16_t uid_reserved; + uint16_t gid_reserved; + uint32_t first_inode; + uint16_t inode_size; + uint16_t block_group_number; + uint32_t feature_compatibility; + uint32_t feature_incompat; + uint32_t feature_ro_compat; + uint32_t unique_id[4]; + char volume_name[16]; + char last_mounted_on[64]; + uint32_t compression_info; +}; + +/* The ext2 blockgroup. */ +struct ext2_block_group { + uint32_t block_id; + uint32_t inode_id; + uint32_t inode_table_id; + uint16_t free_blocks; + uint16_t free_inodes; + uint16_t pad; + uint32_t reserved[3]; +}; + +/* The ext2 inode. */ +struct ext2_inode { + uint16_t mode; + uint16_t uid; + uint32_t size; + uint32_t atime; + uint32_t ctime; + uint32_t mtime; + uint32_t dtime; + uint16_t gid; + uint16_t nlinks; + uint32_t blockcnt; /* Blocks of 512 bytes!! */ + uint32_t flags; + uint32_t osd1; + union { + struct datablocks { + uint32_t dir_blocks[INDIRECT_BLOCKS]; + uint32_t indir_block; + uint32_t double_indir_block; + uint32_t tripple_indir_block; + } blocks; + char symlink[60]; + } b; + uint32_t version; + uint32_t acl; + uint32_t dir_acl; + uint32_t fragment_addr; + uint32_t osd2[3]; +}; + +/* The header of an ext2 directory entry. */ +struct ext2_dirent { + uint32_t inode; + uint16_t direntlen; + uint8_t namelen; + uint8_t filetype; +}; + +struct ext2fs_node { + struct ext2_data *data; + struct ext2_inode inode; + int ino; + int inode_read; +}; + +/* Information about a "mounted" ext2 filesystem. */ +struct ext2_data { + struct ext2_sblock sblock; + struct ext2_inode *inode; + struct ext2fs_node diropen; +}; + + +typedef struct ext2fs_node *ext2fs_node_t; + +struct ext2_data *ext2fs_root = NULL; +ext2fs_node_t ext2fs_file = NULL; +int symlinknest = 0; +uint32_t *indir1_block = NULL; +int indir1_size = 0; +int indir1_blkno = -1; +uint32_t *indir2_block = NULL; +int indir2_size = 0; +int indir2_blkno = -1; + + +static int ext2fs_blockgroup + (struct ext2_data *data, int group, struct ext2_block_group *blkgrp) { +#ifdef DEBUG + printf ("ext2fs read blockgroup\n"); +#endif + return (ext2fs_devread + (((__le32_to_cpu (data->sblock.first_data_block) + + 1) << LOG2_EXT2_BLOCK_SIZE (data)), + group * sizeof (struct ext2_block_group), + sizeof (struct ext2_block_group), (char *) blkgrp)); +} + + +static int ext2fs_read_inode + (struct ext2_data *data, int ino, struct ext2_inode *inode) { + struct ext2_block_group blkgrp; + struct ext2_sblock *sblock = &data->sblock; + int inodes_per_block; + int status; + + unsigned int blkno; + unsigned int blkoff; + + /* It is easier to calculate if the first inode is 0. */ + ino--; +#ifdef DEBUG + printf ("ext2fs read inode %d\n", ino); +#endif + status = ext2fs_blockgroup (data, + ino / + __le32_to_cpu (sblock->inodes_per_group), + &blkgrp); + if (status == 0) { + return (0); + } + inodes_per_block = EXT2_BLOCK_SIZE (data) / 128; + blkno = (ino % __le32_to_cpu (sblock->inodes_per_group)) / + inodes_per_block; + blkoff = (ino % __le32_to_cpu (sblock->inodes_per_group)) % + inodes_per_block; +#ifdef DEBUG + printf ("ext2fs read inode blkno %d blkoff %d\n", blkno, blkoff); +#endif + /* Read the inode. */ + status = ext2fs_devread (((__le32_to_cpu (blkgrp.inode_table_id) + + blkno) << LOG2_EXT2_BLOCK_SIZE (data)), + sizeof (struct ext2_inode) * blkoff, + sizeof (struct ext2_inode), (char *) inode); + if (status == 0) { + return (0); + } + return (1); +} + + +void ext2fs_free_node (ext2fs_node_t node, ext2fs_node_t currroot) { + if ((node != &ext2fs_root->diropen) && (node != currroot)) { + free (node); + } +} + + +static int ext2fs_read_block (ext2fs_node_t node, int fileblock) { + struct ext2_data *data = node->data; + struct ext2_inode *inode = &node->inode; + int blknr; + int blksz = EXT2_BLOCK_SIZE (data); + int log2_blksz = LOG2_EXT2_BLOCK_SIZE (data); + int status; + + /* Direct blocks. */ + if (fileblock < INDIRECT_BLOCKS) { + blknr = __le32_to_cpu (inode->b.blocks.dir_blocks[fileblock]); + } + /* Indirect. */ + else if (fileblock < (INDIRECT_BLOCKS + (blksz / 4))) { + if (indir1_block == NULL) { + indir1_block = (uint32_t *) malloc (blksz); + if (indir1_block == NULL) { + printf ("** ext2fs read block (indir 1) malloc failed. **\n"); + return (-1); + } + indir1_size = blksz; + indir1_blkno = -1; + } + if (blksz != indir1_size) { + free (indir1_block); + indir1_block = NULL; + indir1_size = 0; + indir1_blkno = -1; + indir1_block = (uint32_t *) malloc (blksz); + if (indir1_block == NULL) { + printf ("** ext2fs read block (indir 1) malloc failed. **\n"); + return (-1); + } + indir1_size = blksz; + } + if ((__le32_to_cpu (inode->b.blocks.indir_block) << + log2_blksz) != indir1_blkno) { + status = ext2fs_devread (__le32_to_cpu(inode->b.blocks.indir_block) << log2_blksz, + 0, blksz, + (char *) indir1_block); + if (status == 0) { + printf ("** ext2fs read block (indir 1) failed. **\n"); + return (0); + } + indir1_blkno = + __le32_to_cpu (inode->b.blocks. + indir_block) << log2_blksz; + } + blknr = __le32_to_cpu (indir1_block + [fileblock - INDIRECT_BLOCKS]); + } + /* Double indirect. */ + else if (fileblock < + (INDIRECT_BLOCKS + (blksz / 4 * (blksz / 4 + 1)))) { + unsigned int perblock = blksz / 4; + unsigned int rblock = fileblock - (INDIRECT_BLOCKS + + blksz / 4); + + if (indir1_block == NULL) { + indir1_block = (uint32_t *) malloc (blksz); + if (indir1_block == NULL) { + printf ("** ext2fs read block (indir 2 1) malloc failed. **\n"); + return (-1); + } + indir1_size = blksz; + indir1_blkno = -1; + } + if (blksz != indir1_size) { + free (indir1_block); + indir1_block = NULL; + indir1_size = 0; + indir1_blkno = -1; + indir1_block = (uint32_t *) malloc (blksz); + if (indir1_block == NULL) { + printf ("** ext2fs read block (indir 2 1) malloc failed. **\n"); + return (-1); + } + indir1_size = blksz; + } + if ((__le32_to_cpu (inode->b.blocks.double_indir_block) << + log2_blksz) != indir1_blkno) { + status = ext2fs_devread (__le32_to_cpu(inode->b.blocks.double_indir_block) << log2_blksz, + 0, blksz, + (char *) indir1_block); + if (status == 0) { + printf ("** ext2fs read block (indir 2 1) failed. **\n"); + return (-1); + } + indir1_blkno = + __le32_to_cpu (inode->b.blocks.double_indir_block) << log2_blksz; + } + + if (indir2_block == NULL) { + indir2_block = (uint32_t *) malloc (blksz); + if (indir2_block == NULL) { + printf ("** ext2fs read block (indir 2 2) malloc failed. **\n"); + return (-1); + } + indir2_size = blksz; + indir2_blkno = -1; + } + if (blksz != indir2_size) { + free (indir2_block); + indir2_block = NULL; + indir2_size = 0; + indir2_blkno = -1; + indir2_block = (uint32_t *) malloc (blksz); + if (indir2_block == NULL) { + printf ("** ext2fs read block (indir 2 2) malloc failed. **\n"); + return (-1); + } + indir2_size = blksz; + } + if ((__le32_to_cpu (indir1_block[rblock / perblock]) << + log2_blksz) != indir1_blkno) { + status = ext2fs_devread (__le32_to_cpu(indir1_block[rblock / perblock]) << log2_blksz, + 0, blksz, + (char *) indir2_block); + if (status == 0) { + printf ("** ext2fs read block (indir 2 2) failed. **\n"); + return (-1); + } + indir2_blkno = + __le32_to_cpu (indir1_block[rblock / perblock]) << log2_blksz; + } + blknr = __le32_to_cpu (indir2_block[rblock % perblock]); + } + /* Tripple indirect. */ + else { + printf ("** ext2fs doesn't support tripple indirect blocks. **\n"); + return (-1); + } +#ifdef DEBUG + printf ("ext2fs_read_block %08x\n", blknr); +#endif + return (blknr); +} + + +int ext2fs_read_file + (ext2fs_node_t node, int pos, unsigned int len, char *buf) { + int i; + int blockcnt; + int log2blocksize = LOG2_EXT2_BLOCK_SIZE (node->data); + int blocksize = 1 << (log2blocksize + DISK_SECTOR_BITS); + unsigned int filesize = node->inode.size; + + /* Adjust len so it we can't read past the end of the file. */ + if (len > filesize) { + len = filesize; + } + blockcnt = ((len + pos) + blocksize - 1) / blocksize; + + for (i = pos / blocksize; i < blockcnt; i++) { + int blknr; + int blockoff = pos % blocksize; + int blockend = blocksize; + + int skipfirst = 0; + + blknr = ext2fs_read_block (node, i); + if (blknr < 0) { + return (-1); + } + blknr = blknr << log2blocksize; + + /* Last block. */ + if (i == blockcnt - 1) { + blockend = (len + pos) % blocksize; + + /* The last portion is exactly blocksize. */ + if (!blockend) { + blockend = blocksize; + } + } + + /* First block. */ + if (i == pos / blocksize) { + skipfirst = blockoff; + blockend -= skipfirst; + } + + /* If the block number is 0 this block is not stored on disk but + is zero filled instead. */ + if (blknr) { + int status; + + status = ext2fs_devread (blknr, skipfirst, blockend, buf); + if (status == 0) { + return (-1); + } + } else { + memset (buf, blocksize - skipfirst, 0); + } + buf += blocksize - skipfirst; + } + return (len); +} + + +static int ext2fs_iterate_dir (ext2fs_node_t dir, char *name, ext2fs_node_t * fnode, int *ftype) +{ + unsigned int fpos = 0; + int status; + struct ext2fs_node *diro = (struct ext2fs_node *) dir; + +#ifdef DEBUG + if (name != NULL) + printf ("Iterate dir %s\n", name); +#endif /* of DEBUG */ + if (!diro->inode_read) { + status = ext2fs_read_inode (diro->data, diro->ino, + &diro->inode); + if (status == 0) { + return (0); + } + } + /* Search the file. */ + while (fpos < __le32_to_cpu (diro->inode.size)) { + struct ext2_dirent dirent; + + status = ext2fs_read_file (diro, fpos, + sizeof (struct ext2_dirent), + (char *) &dirent); + if (status < 1) { + return (0); + } + if (dirent.namelen != 0) { + char filename[dirent.namelen + 1]; + ext2fs_node_t fdiro; + int type = FILETYPE_UNKNOWN; + + status = ext2fs_read_file (diro, + fpos + sizeof (struct ext2_dirent), + dirent.namelen, filename); + if (status < 1) { + return (0); + } + fdiro = malloc (sizeof (struct ext2fs_node)); + if (!fdiro) { + return (0); + } + + fdiro->data = diro->data; + fdiro->ino = __le32_to_cpu (dirent.inode); + + filename[dirent.namelen] = '\0'; + + if (dirent.filetype != FILETYPE_UNKNOWN) { + fdiro->inode_read = 0; + + if (dirent.filetype == FILETYPE_DIRECTORY) { + type = FILETYPE_DIRECTORY; + } else if (dirent.filetype == + FILETYPE_SYMLINK) { + type = FILETYPE_SYMLINK; + } else if (dirent.filetype == FILETYPE_REG) { + type = FILETYPE_REG; + } + } else { + /* The filetype can not be read from the dirent, get it from inode */ + + status = ext2fs_read_inode (diro->data, + __le32_to_cpu(dirent.inode), + &fdiro->inode); + if (status == 0) { + free (fdiro); + return (0); + } + fdiro->inode_read = 1; + + if ((__le16_to_cpu (fdiro->inode.mode) & + FILETYPE_INO_MASK) == + FILETYPE_INO_DIRECTORY) { + type = FILETYPE_DIRECTORY; + } else if ((__le16_to_cpu (fdiro->inode.mode) + & FILETYPE_INO_MASK) == + FILETYPE_INO_SYMLINK) { + type = FILETYPE_SYMLINK; + } else if ((__le16_to_cpu (fdiro->inode.mode) + & FILETYPE_INO_MASK) == + FILETYPE_INO_REG) { + type = FILETYPE_REG; + } + } +#ifdef DEBUG + printf ("iterate >%s<\n", filename); +#endif /* of DEBUG */ + if ((name != NULL) && (fnode != NULL) + && (ftype != NULL)) { + if (strcmp (filename, name) == 0) { + *ftype = type; + *fnode = fdiro; + return (1); + } + } else { + if (fdiro->inode_read == 0) { + status = ext2fs_read_inode (diro->data, + __le32_to_cpu (dirent.inode), + &fdiro->inode); + if (status == 0) { + free (fdiro); + return (0); + } + fdiro->inode_read = 1; + } + switch (type) { + case FILETYPE_DIRECTORY: + printf ("<DIR> "); + break; + case FILETYPE_SYMLINK: + printf ("<SYM> "); + break; + case FILETYPE_REG: + printf (" "); + break; + default: + printf ("< ? > "); + break; + } + printf ("%10d %s\n", + __le32_to_cpu (fdiro->inode.size), + filename); + } + free (fdiro); + } + fpos += __le16_to_cpu (dirent.direntlen); + } + return (0); +} + + +static char *ext2fs_read_symlink (ext2fs_node_t node) { + char *symlink; + struct ext2fs_node *diro = node; + int status; + + if (!diro->inode_read) { + status = ext2fs_read_inode (diro->data, diro->ino, + &diro->inode); + if (status == 0) { + return (0); + } + } + symlink = malloc (__le32_to_cpu (diro->inode.size) + 1); + if (!symlink) { + return (0); + } + /* If the filesize of the symlink is bigger than + 60 the symlink is stored in a separate block, + otherwise it is stored in the inode. */ + if (__le32_to_cpu (diro->inode.size) <= 60) { + strncpy (symlink, diro->inode.b.symlink, + __le32_to_cpu (diro->inode.size)); + } else { + status = ext2fs_read_file (diro, 0, + __le32_to_cpu (diro->inode.size), + symlink); + if (status == 0) { + free (symlink); + return (0); + } + } + symlink[__le32_to_cpu (diro->inode.size)] = '\0'; + return (symlink); +} + + +int ext2fs_find_file1 + (const char *currpath, + ext2fs_node_t currroot, ext2fs_node_t * currfound, int *foundtype) { + char fpath[strlen (currpath) + 1]; + char *name = fpath; + char *next; + int status; + int type = FILETYPE_DIRECTORY; + ext2fs_node_t currnode = currroot; + ext2fs_node_t oldnode = currroot; + + strncpy (fpath, currpath, strlen (currpath) + 1); + + /* Remove all leading slashes. */ + while (*name == '/') { + name++; + } + if (!*name) { + *currfound = currnode; + return (1); + } + + for (;;) { + int found; + + /* Extract the actual part from the pathname. */ + next = strchr (name, '/'); + if (next) { + /* Remove all leading slashes. */ + while (*next == '/') { + *(next++) = '\0'; + } + } + + /* At this point it is expected that the current node is a directory, check if this is true. */ + if (type != FILETYPE_DIRECTORY) { + ext2fs_free_node (currnode, currroot); + return (0); + } + + oldnode = currnode; + + /* Iterate over the directory. */ + found = ext2fs_iterate_dir (currnode, name, &currnode, &type); + if (found == 0) { + return (0); + } + if (found == -1) { + break; + } + + /* Read in the symlink and follow it. */ + if (type == FILETYPE_SYMLINK) { + char *symlink; + + /* Test if the symlink does not loop. */ + if (++symlinknest == 8) { + ext2fs_free_node (currnode, currroot); + ext2fs_free_node (oldnode, currroot); + return (0); + } + + symlink = ext2fs_read_symlink (currnode); + ext2fs_free_node (currnode, currroot); + + if (!symlink) { + ext2fs_free_node (oldnode, currroot); + return (0); + } +#ifdef DEBUG + printf ("Got symlink >%s<\n", symlink); +#endif /* of DEBUG */ + /* The symlink is an absolute path, go back to the root inode. */ + if (symlink[0] == '/') { + ext2fs_free_node (oldnode, currroot); + oldnode = &ext2fs_root->diropen; + } + + /* Lookup the node the symlink points to. */ + status = ext2fs_find_file1 (symlink, oldnode, + &currnode, &type); + + free (symlink); + + if (status == 0) { + ext2fs_free_node (oldnode, currroot); + return (0); + } + } + + ext2fs_free_node (oldnode, currroot); + + /* Found the node! */ + if (!next || *next == '\0') { + *currfound = currnode; + *foundtype = type; + return (1); + } + name = next; + } + return (-1); +} + + +int ext2fs_find_file + (const char *path, + ext2fs_node_t rootnode, ext2fs_node_t * foundnode, int expecttype) { + int status; + int foundtype = FILETYPE_DIRECTORY; + + + symlinknest = 0; + if (!path) { + return (0); + } + + status = ext2fs_find_file1 (path, rootnode, foundnode, &foundtype); + if (status == 0) { + return (0); + } + /* Check if the node that was found was of the expected type. */ + if ((expecttype == FILETYPE_REG) && (foundtype != expecttype)) { + return (0); + } else if ((expecttype == FILETYPE_DIRECTORY) + && (foundtype != expecttype)) { + return (0); + } + return (1); +} + + +int ext2fs_ls (char *dirname) { + ext2fs_node_t dirnode; + int status; + + if (ext2fs_root == NULL) { + return (0); + } + + status = ext2fs_find_file (dirname, &ext2fs_root->diropen, &dirnode, + FILETYPE_DIRECTORY); + if (status != 1) { + printf ("** Can not find directory. **\n"); + return (1); + } + ext2fs_iterate_dir (dirnode, NULL, NULL, NULL); + ext2fs_free_node (dirnode, &ext2fs_root->diropen); + return (0); +} + + +int ext2fs_open (char *filename) { + ext2fs_node_t fdiro = NULL; + int status; + int len; + + if (ext2fs_root == NULL) { + return (-1); + } + ext2fs_file = NULL; + status = ext2fs_find_file (filename, &ext2fs_root->diropen, &fdiro, + FILETYPE_REG); + if (status == 0) { + goto fail; + } + if (!fdiro->inode_read) { + status = ext2fs_read_inode (fdiro->data, fdiro->ino, + &fdiro->inode); + if (status == 0) { + goto fail; + } + } + len = __le32_to_cpu (fdiro->inode.size); + ext2fs_file = fdiro; + return (len); + +fail: + ext2fs_free_node (fdiro, &ext2fs_root->diropen); + return (-1); +} + + +int ext2fs_close (void + ) { + if ((ext2fs_file != NULL) && (ext2fs_root != NULL)) { + ext2fs_free_node (ext2fs_file, &ext2fs_root->diropen); + ext2fs_file = NULL; + } + if (ext2fs_root != NULL) { + free (ext2fs_root); + ext2fs_root = NULL; + } + if (indir1_block != NULL) { + free (indir1_block); + indir1_block = NULL; + indir1_size = 0; + indir1_blkno = -1; + } + if (indir2_block != NULL) { + free (indir2_block); + indir2_block = NULL; + indir2_size = 0; + indir2_blkno = -1; + } + return (0); +} + + +int ext2fs_read (char *buf, unsigned len) { + int status; + + if (ext2fs_root == NULL) { + return (0); + } + + if (ext2fs_file == NULL) { + return (0); + } + + status = ext2fs_read_file (ext2fs_file, 0, len, buf); + return (status); +} + + +int ext2fs_mount (unsigned part_length) { + struct ext2_data *data; + int status; + + data = malloc (sizeof (struct ext2_data)); + if (!data) { + return (0); + } + /* Read the superblock. */ + status = ext2fs_devread (1 * 2, 0, sizeof (struct ext2_sblock), + (char *) &data->sblock); + if (status == 0) { + goto fail; + } + /* Make sure this is an ext2 filesystem. */ + if (__le16_to_cpu (data->sblock.magic) != EXT2_MAGIC) { + goto fail; + } + data->diropen.data = data; + data->diropen.ino = 2; + data->diropen.inode_read = 1; + data->inode = &data->diropen.inode; + + status = ext2fs_read_inode (data, 2, data->inode); + if (status == 0) { + goto fail; + } + + ext2fs_root = data; + + return (1); + +fail: + printf ("Failed to mount ext2 filesystem...\n"); + free (data); + ext2fs_root = NULL; + return (0); +} + +#endif /* CFG_CMD_EXT2FS */ diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile new file mode 100755 index 0000000..d792c11 --- /dev/null +++ b/fs/ext4/Makefile @@ -0,0 +1,52 @@ +# +# (C) Copyright 2006 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# (C) Copyright 2003 +# Pavel Bartusek, Sysgo Real-Time Solutions AG, pba@sysgo.de +# +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + + +include $(TOPDIR)/config.mk + +LIB = libext4fs.a + +AOBJS = +COBJS = ext4fs.o ext4_common.o dev.o ext4_journal.o crc16.o +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += + +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend + +######################################################################### diff --git a/fs/ext4/crc16.c b/fs/ext4/crc16.c new file mode 100755 index 0000000..3afb34d --- /dev/null +++ b/fs/ext4/crc16.c @@ -0,0 +1,62 @@ +/* + * crc16.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <common.h> +#include <asm/byteorder.h> +#include <linux/stat.h> +#include "crc16.h" + +/** CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1) */ +static __u16 const crc16_table[256] = { + 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, + 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, + 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, + 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, + 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, + 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, + 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, + 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, + 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, + 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, + 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, + 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, + 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, + 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, + 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, + 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, + 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, + 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, + 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, + 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, + 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, + 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, + 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, + 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, + 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, + 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, + 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, + 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, + 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, + 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, + 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, + 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 +}; + +/** + * Compute the CRC-16 for the data buffer +*/ + +unsigned int ext2fs_crc16(unsigned int crc, + const void *buffer, unsigned int len) +{ + const unsigned char *cp = buffer; + + while (len--) + crc = (((crc >> 8) & 0xffU) ^ + crc16_table[(crc ^ *cp++) & 0xffU]) & 0x0000ffffU; + return crc; +} diff --git a/fs/ext4/crc16.h b/fs/ext4/crc16.h new file mode 100755 index 0000000..5fd113a --- /dev/null +++ b/fs/ext4/crc16.h @@ -0,0 +1,16 @@ +/* + * crc16.h - CRC-16 routine + * Implements the standard CRC-16: + * Width 16 + * Poly 0x8005 (x16 + x15 + x2 + 1) + * Init 0 + * + * Copyright (c) 2005 Ben Gardner <bgardner@wabtec.com> + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#ifndef __CRC16_H +#define __CRC16_H +extern unsigned int ext2fs_crc16(unsigned int crc, + const void *buffer, unsigned int len); +#endif diff --git a/fs/ext4/dev.c b/fs/ext4/dev.c new file mode 100755 index 0000000..471d046 --- /dev/null +++ b/fs/ext4/dev.c @@ -0,0 +1,139 @@ +/* + * (C) Copyright 2011 - 2012 Samsung Electronics + * EXT4 filesystem implementation in Uboot by + * Uma Shankar <uma.shankar@samsung.com> + * Manjunatha C Achar <a.manjunatha@samsung.com> + * + * made from existing ext2/dev.c file of Uboot + * (C) Copyright 2004 + * esd gmbh <www.esd-electronics.com> + * Reinhard Arlt <reinhard.arlt@esd-electronics.com> + * + * based on code of fs/reiserfs/dev.c by + * + * (C) Copyright 2003 - 2004 + * Sysgo AG, <www.elinos.com>, Pavel Bartusek <pba@sysgo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* + * Changelog: + * 0.1 - Newly created file for ext4fs support. Taken from + * fs/ext2/dev.c file in uboot. + */ + +#include <common.h> +#include <config.h> +#include <ext4fs.h> +#include <ext_common.h> + +unsigned long part_offset; + +static block_dev_desc_t *ext4fs_block_dev_desc; +static disk_partition_t *part_info; + +void ext4fs_set_blk_dev(block_dev_desc_t *rbdd, disk_partition_t *info) +{ + ext4fs_block_dev_desc = rbdd; + part_info = info; + part_offset = info->start; + get_fs()->total_sect = info->size;//(info->size * info->blksz) / SECTOR_SIZE; +} + +int ext4fs_devread(int sector, int byte_offset, int byte_len, char *buf) +{ + ALLOC_CACHE_ALIGN_BUFFER(char, sec_buf, SECTOR_SIZE); + unsigned block_len; + + /* Check partition boundaries */ + if ((sector < 0) + || ((sector + ((byte_offset + byte_len - 1) >> SECTOR_BITS)) >= + part_info->size)) { + printf("%s read outside partition %d\n", __func__, sector); + return 0; + } + + /* Get the read to the beginning of a partition */ + sector += byte_offset >> SECTOR_BITS; + byte_offset &= SECTOR_SIZE - 1; + + //printf(" <%d, %d, %d>\n", sector, byte_offset, byte_len); + + if (ext4fs_block_dev_desc == NULL) { + printf("** Invalid Block Device Descriptor (NULL)\n"); + return 0; + } + + if (byte_offset != 0) { + /* read first part which isn't aligned with start of sector */ + if (ext4fs_block_dev_desc-> + block_read(ext4fs_block_dev_desc->dev, + part_info->start + sector, 1, + (unsigned long *) sec_buf) != 1) { + printf(" ** ext2fs_devread() read error **\n"); + return 0; + } + memcpy(buf, sec_buf + byte_offset, + min(SECTOR_SIZE - byte_offset, byte_len)); + buf += min(SECTOR_SIZE - byte_offset, byte_len); + byte_len -= min(SECTOR_SIZE - byte_offset, byte_len); + sector++; + } + + if (byte_len == 0) + return 1; + + /* read sector aligned part */ + block_len = byte_len & ~(SECTOR_SIZE - 1); + + if (block_len == 0) { + ALLOC_CACHE_ALIGN_BUFFER(u8, p, SECTOR_SIZE); + + block_len = SECTOR_SIZE; + ext4fs_block_dev_desc->block_read(ext4fs_block_dev_desc->dev, + part_info->start + sector, + 1, (unsigned long *)p); + memcpy(buf, p, byte_len); + return 1; + } + + if (ext4fs_block_dev_desc->block_read(ext4fs_block_dev_desc->dev, + part_info->start + sector, + block_len / SECTOR_SIZE, + (unsigned long *) buf) != + block_len / SECTOR_SIZE) { + printf(" ** %s read error - block\n", __func__); + return 0; + } + block_len = byte_len & ~(SECTOR_SIZE - 1); + buf += block_len; + byte_len -= block_len; + sector += block_len / SECTOR_SIZE; + + if (byte_len != 0) { + /* read rest of data which are not in whole sector */ + if (ext4fs_block_dev_desc-> + block_read(ext4fs_block_dev_desc->dev, + part_info->start + sector, 1, + (unsigned long *) sec_buf) != 1) { + printf("* %s read error - last part\n", __func__); + return 0; + } + memcpy(buf, sec_buf, byte_len); + } + return 1; +} diff --git a/fs/ext4/ext4_common.c b/fs/ext4/ext4_common.c new file mode 100755 index 0000000..fee02dd --- /dev/null +++ b/fs/ext4/ext4_common.c @@ -0,0 +1,2230 @@ +/* + * (C) Copyright 2011 - 2012 Samsung Electronics + * EXT4 filesystem implementation in Uboot by + * Uma Shankar <uma.shankar@samsung.com> + * Manjunatha C Achar <a.manjunatha@samsung.com> + * + * ext4ls and ext4load : Based on ext2 ls load support in Uboot. + * + * (C) Copyright 2004 + * esd gmbh <www.esd-electronics.com> + * Reinhard Arlt <reinhard.arlt@esd-electronics.com> + * + * based on code from grub2 fs/ext2.c and fs/fshelp.c by + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2003, 2004 Free Software Foundation, Inc. + * + * ext4write : Based on generic ext4 protocol. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <common.h> +#include <ext_common.h> +#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> +#include <asm/byteorder.h> +#include "ext4_common.h" + +struct ext2_data *ext4fs_root; +struct ext2fs_node *ext4fs_file; +uint32_t *ext4fs_indir1_block; +int ext4fs_indir1_size; +int ext4fs_indir1_blkno = -1; +uint32_t *ext4fs_indir2_block; +int ext4fs_indir2_size; +int ext4fs_indir2_blkno = -1; + +uint32_t *ext4fs_indir3_block; +int ext4fs_indir3_size; +int ext4fs_indir3_blkno = -1; +struct ext2_inode *g_parent_inode; +static int symlinknest; + +uint32_t ext4fs_div_roundup(uint32_t size, uint32_t n) +{ + uint32_t res = size / n; + if (res * n != size) + res++; + + return res; +} + +void put_ext4(uint64_t off, void *buf, uint32_t size) +{ + uint64_t startblock; + uint64_t remainder; + unsigned char *temp_ptr = NULL; + ALLOC_CACHE_ALIGN_BUFFER(unsigned char, sec_buf, SECTOR_SIZE); + struct ext_filesystem *fs = get_fs(); + + startblock = off / (uint64_t)SECTOR_SIZE; + startblock += part_offset; + remainder = off % (uint64_t)SECTOR_SIZE; + remainder &= SECTOR_SIZE - 1; + + if (fs->dev_desc == NULL) { + printf("error: dev_desc is null\n"); + return; + } + + if ((startblock + (size / SECTOR_SIZE)) > + (part_offset + fs->total_sect)) { + printf("part_offset is %lu\n", part_offset); + printf("total_sector is %llu\n", fs->total_sect); + printf("error: overflow occurs\n"); + return; + } + if (remainder) { + if (fs->dev_desc->block_read) { + fs->dev_desc->block_read(fs->dev_desc->dev, + startblock, 1, sec_buf); + temp_ptr = sec_buf; + memcpy((temp_ptr + remainder), + (unsigned char *)buf, size); + fs->dev_desc->block_write(fs->dev_desc->dev, + startblock, 1, sec_buf); + } + } else { + if (size / SECTOR_SIZE != 0) { + fs->dev_desc->block_write(fs->dev_desc->dev, + startblock, + size / SECTOR_SIZE, + (unsigned long *)buf); + } else { + fs->dev_desc->block_read(fs->dev_desc->dev, + startblock, 1, sec_buf); + temp_ptr = sec_buf; + memcpy(temp_ptr, buf, size); + fs->dev_desc->block_write(fs->dev_desc->dev, + startblock, 1, + (unsigned long *)sec_buf); + } + } +} + +static int _get_new_inode_no(unsigned char *buffer) +{ + struct ext_filesystem *fs = get_fs(); + unsigned char input; + int operand, status; + int count = 1; + int j = 0; + + /* get the blocksize of the filesystem */ + unsigned char *ptr = buffer; + while (*ptr == 255) { + ptr++; + count += 8; + if (count > ext4fs_root->sblock.inodes_per_group) + return -1; + } + + for (j = 0; j < fs->blksz; j++) { + input = *ptr; + int i = 0; + while (i <= 7) { + operand = 1 << i; + status = input & operand; + if (status) { + i++; + count++; + } else { + *ptr |= operand; + return count; + } + } + ptr = ptr + 1; + } + + return -1; +} + +static int _get_new_blk_no(unsigned char *buffer) +{ + unsigned char input; + int operand, status; + int count = 0; + int j = 0; + unsigned char *ptr = buffer; + struct ext_filesystem *fs = get_fs(); + + if (fs->blksz != 1024) + count = 0; + else + count = 1; + + while (*ptr == 255) { + ptr++; + count += 8; + if (count == (fs->blksz * 8)) + return -1; + } + + for (j = 0; j < fs->blksz; j++) { + input = *ptr; + int i = 0; + while (i <= 7) { + operand = 1 << i; + status = input & operand; + if (status) { + i++; + count++; + } else { + *ptr |= operand; + return count; + } + } + ptr = ptr + 1; + } + + return -1; +} + +int ext4fs_set_block_bmap(long int blockno, unsigned char *buffer, int index) +{ + int i, remainder, status; + unsigned char *ptr = buffer; + unsigned char operand; + i = blockno / 8; + remainder = blockno % 8; + int blocksize = EXT2_BLOCK_SIZE(ext4fs_root); + + i = i - (index * blocksize); + if (blocksize != 1024) { + ptr = ptr + i; + operand = 1 << remainder; + status = *ptr & operand; + if (status) + return -1; + + *ptr = *ptr | operand; + return 0; + } else { + if (remainder == 0) { + ptr = ptr + i - 1; + operand = (1 << 7); + } else { + ptr = ptr + i; + operand = (1 << (remainder - 1)); + } + status = *ptr & operand; + if (status) + return -1; + + *ptr = *ptr | operand; + return 0; + } +} + +void ext4fs_reset_block_bmap(long int blockno, unsigned char *buffer, int index) +{ + int i, remainder, status; + unsigned char *ptr = buffer; + unsigned char operand; + i = blockno / 8; + remainder = blockno % 8; + int blocksize = EXT2_BLOCK_SIZE(ext4fs_root); + + i = i - (index * blocksize); + if (blocksize != 1024) { + ptr = ptr + i; + operand = (1 << remainder); + status = *ptr & operand; + if (status) + *ptr = *ptr & ~(operand); + } else { + if (remainder == 0) { + ptr = ptr + i - 1; + operand = (1 << 7); + } else { + ptr = ptr + i; + operand = (1 << (remainder - 1)); + } + status = *ptr & operand; + if (status) + *ptr = *ptr & ~(operand); + } +} + +int ext4fs_set_inode_bmap(int inode_no, unsigned char *buffer, int index) +{ + int i, remainder, status; + unsigned char *ptr = buffer; + unsigned char operand; + + inode_no -= (index * ext4fs_root->sblock.inodes_per_group); + i = inode_no / 8; + remainder = inode_no % 8; + if (remainder == 0) { + ptr = ptr + i - 1; + operand = (1 << 7); + } else { + ptr = ptr + i; + operand = (1 << (remainder - 1)); + } + status = *ptr & operand; + if (status) + return -1; + + *ptr = *ptr | operand; + + return 0; +} + +void ext4fs_reset_inode_bmap(int inode_no, unsigned char *buffer, int index) +{ + int i, remainder, status; + unsigned char *ptr = buffer; + unsigned char operand; + + inode_no -= (index * ext4fs_root->sblock.inodes_per_group); + i = inode_no / 8; + remainder = inode_no % 8; + if (remainder == 0) { + ptr = ptr + i - 1; + operand = (1 << 7); + } else { + ptr = ptr + i; + operand = (1 << (remainder - 1)); + } + status = *ptr & operand; + if (status) + *ptr = *ptr & ~(operand); +} + +int ext4fs_checksum_update(unsigned int i) +{ + struct ext2_block_group *desc; + struct ext_filesystem *fs = get_fs(); + __u16 crc = 0; + + desc = (struct ext2_block_group *)&fs->bgd[i]; + if (fs->sb->feature_ro_compat & EXT4_FEATURE_RO_COMPAT_GDT_CSUM) { + int offset = offsetof(struct ext2_block_group, bg_checksum); + + crc = ext2fs_crc16(~0, fs->sb->unique_id, + sizeof(fs->sb->unique_id)); + crc = ext2fs_crc16(crc, &i, sizeof(i)); + crc = ext2fs_crc16(crc, desc, offset); + offset += sizeof(desc->bg_checksum); /* skip checksum */ + assert(offset == sizeof(*desc)); + } + + return crc; +} + +static int check_void_in_dentry(struct ext2_dirent *dir, char *filename) +{ + int dentry_length; + int sizeof_void_space; + int new_entry_byte_reqd; + short padding_factor = 0; + + if (dir->namelen % 4 != 0) + padding_factor = 4 - (dir->namelen % 4); + + dentry_length = sizeof(struct ext2_dirent) + + dir->namelen + padding_factor; + sizeof_void_space = dir->direntlen - dentry_length; + if (sizeof_void_space == 0) + return 0; + + padding_factor = 0; + if (strlen(filename) % 4 != 0) + padding_factor = 4 - (strlen(filename) % 4); + + new_entry_byte_reqd = strlen(filename) + + sizeof(struct ext2_dirent) + padding_factor; + if (sizeof_void_space >= new_entry_byte_reqd) { + dir->direntlen = dentry_length; + return sizeof_void_space; + } + + return 0; +} + +void ext4fs_update_parent_dentry(char *filename, int *p_ino, int file_type) +{ + unsigned int *zero_buffer = NULL; + char *root_first_block_buffer = NULL; + int direct_blk_idx; + long int root_blknr; + long int first_block_no_of_root = 0; + long int previous_blknr = -1; + int totalbytes = 0; + short int padding_factor = 0; + unsigned int new_entry_byte_reqd; + unsigned int last_entry_dirlen; + int sizeof_void_space = 0; + int templength = 0; + int inodeno; + int status; + struct ext_filesystem *fs = get_fs(); + /* directory entry */ + struct ext2_dirent *dir; + char *ptr = NULL; + char *temp_dir = NULL; + + zero_buffer = zalloc(fs->blksz); + if (!zero_buffer) { + printf("No Memory\n"); + return; + } + root_first_block_buffer = zalloc(fs->blksz); + if (!root_first_block_buffer) { + free(zero_buffer); + printf("No Memory\n"); + return; + } +restart: + + /* read the block no allocated to a file */ + for (direct_blk_idx = 0; direct_blk_idx < INDIRECT_BLOCKS; + direct_blk_idx++) { + root_blknr = read_allocated_block(g_parent_inode, + direct_blk_idx); + if (root_blknr == 0) { + first_block_no_of_root = previous_blknr; + break; + } + previous_blknr = root_blknr; + } + + status = ext4fs_devread(first_block_no_of_root + * fs->sect_perblk, + 0, fs->blksz, root_first_block_buffer); + if (status == 0) + goto fail; + + if (ext4fs_log_journal(root_first_block_buffer, first_block_no_of_root)) + goto fail; + dir = (struct ext2_dirent *)root_first_block_buffer; + ptr = (char *)dir; + totalbytes = 0; + while (dir->direntlen > 0) { + /* + * blocksize-totalbytes because last directory length + * i.e. dir->direntlen is free availble space in the + * block that means it is a last entry of directory + * entry + */ + + /* traversing the each directory entry */ + if (fs->blksz - totalbytes == dir->direntlen) { + if (strlen(filename) % 4 != 0) + padding_factor = 4 - (strlen(filename) % 4); + + new_entry_byte_reqd = strlen(filename) + + sizeof(struct ext2_dirent) + padding_factor; + padding_factor = 0; + /* + * update last directory entry length to its + * length because we are creating new directory + * entry + */ + if (dir->namelen % 4 != 0) + padding_factor = 4 - (dir->namelen % 4); + + last_entry_dirlen = dir->namelen + + sizeof(struct ext2_dirent) + padding_factor; + if ((fs->blksz - totalbytes - last_entry_dirlen) < + new_entry_byte_reqd) { + printf("1st Block Full:Allocate new block\n"); + + if (direct_blk_idx == INDIRECT_BLOCKS - 1) { + printf("Directory exceeds limit\n"); + goto fail; + } + g_parent_inode->b.blocks.dir_blocks + [direct_blk_idx] = ext4fs_get_new_blk_no(); + if (g_parent_inode->b.blocks.dir_blocks + [direct_blk_idx] == -1) { + printf("no block left to assign\n"); + goto fail; + } + put_ext4(((uint64_t) + (g_parent_inode->b. + blocks.dir_blocks[direct_blk_idx] * + fs->blksz)), zero_buffer, fs->blksz); + g_parent_inode->size = + g_parent_inode->size + fs->blksz; + g_parent_inode->blockcnt = + g_parent_inode->blockcnt + fs->sect_perblk; + if (ext4fs_put_metadata + (root_first_block_buffer, + first_block_no_of_root)) + goto fail; + goto restart; + } + dir->direntlen = last_entry_dirlen; + break; + } + + templength = dir->direntlen; + totalbytes = totalbytes + templength; + sizeof_void_space = check_void_in_dentry(dir, filename); + if (sizeof_void_space) + break; + + dir = (struct ext2_dirent *)((char *)dir + templength); + ptr = (char *)dir; + } + + /* make a pointer ready for creating next directory entry */ + templength = dir->direntlen; + totalbytes = totalbytes + templength; + dir = (struct ext2_dirent *)((char *)dir + templength); + ptr = (char *)dir; + + /* get the next available inode number */ + inodeno = ext4fs_get_new_inode_no(); + if (inodeno == -1) { + printf("no inode left to assign\n"); + goto fail; + } + dir->inode = inodeno; + if (sizeof_void_space) + dir->direntlen = sizeof_void_space; + else + dir->direntlen = fs->blksz - totalbytes; + + dir->namelen = strlen(filename); + dir->filetype = FILETYPE_REG; /* regular file */ + temp_dir = (char *)dir; + temp_dir = temp_dir + sizeof(struct ext2_dirent); + memcpy(temp_dir, filename, strlen(filename)); + + *p_ino = inodeno; + + /* update or write the 1st block of root inode */ + if (ext4fs_put_metadata(root_first_block_buffer, + first_block_no_of_root)) + goto fail; + +fail: + free(zero_buffer); + free(root_first_block_buffer); +} + +static int search_dir(struct ext2_inode *parent_inode, char *dirname) +{ + int status; + int inodeno; + int totalbytes; + int templength; + int direct_blk_idx; + long int blknr; + int found = 0; + char *ptr = NULL; + unsigned char *block_buffer = NULL; + struct ext2_dirent *dir = NULL; + struct ext2_dirent *previous_dir = NULL; + struct ext_filesystem *fs = get_fs(); + + /* read the block no allocated to a file */ + for (direct_blk_idx = 0; direct_blk_idx < INDIRECT_BLOCKS; + direct_blk_idx++) { + blknr = read_allocated_block(parent_inode, direct_blk_idx); + if (blknr == 0) + goto fail; + + /* read the blocks of parenet inode */ + block_buffer = zalloc(fs->blksz); + if (!block_buffer) + goto fail; + + status = ext4fs_devread(blknr * fs->sect_perblk, + 0, fs->blksz, (char *)block_buffer); + if (status == 0) + goto fail; + + dir = (struct ext2_dirent *)block_buffer; + ptr = (char *)dir; + totalbytes = 0; + while (dir->direntlen >= 0) { + /* + * blocksize-totalbytes because last directory + * length i.e.,*dir->direntlen is free availble + * space in the block that means + * it is a last entry of directory entry + */ + if (strlen(dirname) == dir->namelen) { + if (strncmp(dirname, ptr + + sizeof(struct ext2_dirent), + dir->namelen) == 0) { + previous_dir->direntlen += + dir->direntlen; + inodeno = dir->inode; + dir->inode = 0; + found = 1; + break; + } + } + + if (fs->blksz - totalbytes == dir->direntlen) + break; + + /* traversing the each directory entry */ + templength = dir->direntlen; + totalbytes = totalbytes + templength; + previous_dir = dir; + dir = (struct ext2_dirent *)((char *)dir + templength); + ptr = (char *)dir; + } + + if (found == 1) { + free(block_buffer); + block_buffer = NULL; + return inodeno; + } + + free(block_buffer); + block_buffer = NULL; + } + +fail: + free(block_buffer); + + return -1; +} + +static int find_dir_depth(char *dirname) +{ + char *token = strtok(dirname, "/"); + int count = 0; + while (token != NULL) { + token = strtok(NULL, "/"); + count++; + } + return count + 1 + 1; + /* + * for example for string /home/temp + * depth=home(1)+temp(1)+1 extra for NULL; + * so count is 4; + */ +} + +static int parse_path(char **arr, char *dirname) +{ + char *token = strtok(dirname, "/"); + int i = 0; + + /* add root */ + arr[i] = zalloc(strlen("/") + 1); + if (!arr[i]) + return -ENOMEM; + + arr[i++] = "/"; + + /* add each path entry after root */ + while (token != NULL) { + arr[i] = zalloc(strlen(token) + 1); + if (!arr[i]) + return -ENOMEM; + memcpy(arr[i++], token, strlen(token)); + token = strtok(NULL, "/"); + } + arr[i] = NULL; + + return 0; +} + +int ext4fs_iget(int inode_no, struct ext2_inode *inode) +{ + if (ext4fs_read_inode(ext4fs_root, inode_no, inode) == 0) + return -1; + + return 0; +} + +/* + * Function: ext4fs_get_parent_inode_num + * Return Value: inode Number of the parent directory of file/Directory to be + * created + * dirname : Input parmater, input path name of the file/directory to be created + * dname : Output parameter, to be filled with the name of the directory + * extracted from dirname + */ +int ext4fs_get_parent_inode_num(const char *dirname, char *dname, int flags) +{ + int i; + int depth = 0; + int matched_inode_no; + int result_inode_no = -1; + char **ptr = NULL; + char *depth_dirname = NULL; + char *parse_dirname = NULL; + struct ext2_inode *parent_inode = NULL; + struct ext2_inode *first_inode = NULL; + struct ext2_inode temp_inode; + + if (*dirname != '/') { + printf("Please supply Absolute path\n"); + return -1; + } + + /* TODO: input validation make equivalent to linux */ + depth_dirname = zalloc(strlen(dirname) + 1); + if (!depth_dirname) + return -ENOMEM; + + memcpy(depth_dirname, dirname, strlen(dirname)); + depth = find_dir_depth(depth_dirname); + parse_dirname = zalloc(strlen(dirname) + 1); + if (!parse_dirname) + goto fail; + memcpy(parse_dirname, dirname, strlen(dirname)); + + /* allocate memory for each directory level */ + ptr = zalloc((depth) * sizeof(char *)); + if (!ptr) + goto fail; + if (parse_path(ptr, parse_dirname)) + goto fail; + parent_inode = zalloc(sizeof(struct ext2_inode)); + if (!parent_inode) + goto fail; + first_inode = zalloc(sizeof(struct ext2_inode)); + if (!first_inode) + goto fail; + memcpy(parent_inode, ext4fs_root->inode, sizeof(struct ext2_inode)); + memcpy(first_inode, parent_inode, sizeof(struct ext2_inode)); + if (flags & F_FILE) + result_inode_no = EXT2_ROOT_INO; + for (i = 1; i < depth; i++) { + matched_inode_no = search_dir(parent_inode, ptr[i]); + if (matched_inode_no == -1) { + if (ptr[i + 1] == NULL && i == 1) { + result_inode_no = EXT2_ROOT_INO; + goto end; + } else { + if (ptr[i + 1] == NULL) + break; + printf("Invalid path\n"); + result_inode_no = -1; + goto fail; + } + } else { + if (ptr[i + 1] != NULL) { + memset(parent_inode, '\0', + sizeof(struct ext2_inode)); + if (ext4fs_iget(matched_inode_no, + parent_inode)) { + result_inode_no = -1; + goto fail; + } + result_inode_no = matched_inode_no; + } else { + break; + } + } + } + +end: + if (i == 1) + matched_inode_no = search_dir(first_inode, ptr[i]); + else + matched_inode_no = search_dir(parent_inode, ptr[i]); + + if (matched_inode_no != -1) { + ext4fs_iget(matched_inode_no, &temp_inode); + if (temp_inode.mode & S_IFDIR) { + printf("It is a Directory\n"); + result_inode_no = -1; + goto fail; + } + } + + if (strlen(ptr[i]) > 256) { + result_inode_no = -1; + goto fail; + } + memcpy(dname, ptr[i], strlen(ptr[i])); + +fail: + free(depth_dirname); + free(parse_dirname); + free(ptr); + free(parent_inode); + free(first_inode); + + return result_inode_no; +} + +static int check_filename(char *filename, unsigned int blknr) +{ + unsigned int first_block_no_of_root; + int totalbytes = 0; + int templength = 0; + int status, inodeno; + int found = 0; + char *root_first_block_buffer = NULL; + char *root_first_block_addr = NULL; + struct ext2_dirent *dir = NULL; + struct ext2_dirent *previous_dir = NULL; + char *ptr = NULL; + struct ext_filesystem *fs = get_fs(); + + /* get the first block of root */ + first_block_no_of_root = blknr; + root_first_block_buffer = zalloc(fs->blksz); + if (!root_first_block_buffer) + return -ENOMEM; + root_first_block_addr = root_first_block_buffer; + status = ext4fs_devread(first_block_no_of_root * + fs->sect_perblk, 0, + fs->blksz, root_first_block_buffer); + if (status == 0) + goto fail; + + if (ext4fs_log_journal(root_first_block_buffer, first_block_no_of_root)) + goto fail; + dir = (struct ext2_dirent *)root_first_block_buffer; + ptr = (char *)dir; + totalbytes = 0; + while (dir->direntlen >= 0) { + /* + * blocksize-totalbytes because last + * directory length i.e., *dir->direntlen + * is free availble space in the block that + * means it is a last entry of directory entry + */ + if (strlen(filename) == dir->namelen) { + if (strncmp(filename, ptr + sizeof(struct ext2_dirent), + dir->namelen) == 0) { + printf("file found deleting\n"); + previous_dir->direntlen += dir->direntlen; + inodeno = dir->inode; + dir->inode = 0; + found = 1; + break; + } + } + + if (fs->blksz - totalbytes == dir->direntlen) + break; + + /* traversing the each directory entry */ + templength = dir->direntlen; + totalbytes = totalbytes + templength; + previous_dir = dir; + dir = (struct ext2_dirent *)((char *)dir + templength); + ptr = (char *)dir; + } + + + if (found == 1) { + if (ext4fs_put_metadata(root_first_block_addr, + first_block_no_of_root)) + goto fail; + return inodeno; + } +fail: + free(root_first_block_buffer); + + return -1; +} + +int ext4fs_filename_check(char *filename) +{ + short direct_blk_idx = 0; + long int blknr = -1; + int inodeno = -1; + + /* read the block no allocated to a file */ + for (direct_blk_idx = 0; direct_blk_idx < INDIRECT_BLOCKS; + direct_blk_idx++) { + blknr = read_allocated_block(g_parent_inode, direct_blk_idx); + if (blknr == 0) + break; + inodeno = check_filename(filename, blknr); + if (inodeno != -1) + return inodeno; + } + + return -1; +} + +long int ext4fs_get_new_blk_no(void) +{ + short i; + short status; + int remainder; + unsigned int bg_idx; + static int prev_bg_bitmap_index = -1; + unsigned int blk_per_grp = ext4fs_root->sblock.blocks_per_group; + struct ext_filesystem *fs = get_fs(); + char *journal_buffer = zalloc(fs->blksz); + char *zero_buffer = zalloc(fs->blksz); + if (!journal_buffer || !zero_buffer) + goto fail; + struct ext2_block_group *bgd = (struct ext2_block_group *)fs->gdtable; + + if (fs->first_pass_bbmap == 0) { + for (i = 0; i < fs->no_blkgrp; i++) { + if (bgd[i].free_blocks) { + if (bgd[i].bg_flags & EXT4_BG_BLOCK_UNINIT) { + put_ext4(((uint64_t) (bgd[i].block_id * + fs->blksz)), + zero_buffer, fs->blksz); + bgd[i].bg_flags = + bgd[i]. + bg_flags & ~EXT4_BG_BLOCK_UNINIT; + memcpy(fs->blk_bmaps[i], zero_buffer, + fs->blksz); + } + fs->curr_blkno = + _get_new_blk_no(fs->blk_bmaps[i]); + if (fs->curr_blkno == -1) + /* if block bitmap is completely fill */ + continue; + fs->curr_blkno = fs->curr_blkno + + (i * fs->blksz * 8); + fs->first_pass_bbmap++; + bgd[i].free_blocks--; + fs->sb->free_blocks--; + status = ext4fs_devread(bgd[i].block_id * + fs->sect_perblk, 0, + fs->blksz, + journal_buffer); + if (status == 0) + goto fail; + if (ext4fs_log_journal(journal_buffer, + bgd[i].block_id)) + goto fail; + goto success; + } else { + debug("no space left on block group %d\n", i); + } + } + + goto fail; + } else { +restart: + fs->curr_blkno++; + /* get the blockbitmap index respective to blockno */ + if (fs->blksz != 1024) { + bg_idx = fs->curr_blkno / blk_per_grp; + } else { + bg_idx = fs->curr_blkno / blk_per_grp; + remainder = fs->curr_blkno % blk_per_grp; + if (!remainder) + bg_idx--; + } + + /* + * To skip completely filled block group bitmaps + * Optimize the block allocation + */ + if (bg_idx >= fs->no_blkgrp) + goto fail; + + if (bgd[bg_idx].free_blocks == 0) { + debug("block group %u is full. Skipping\n", bg_idx); + fs->curr_blkno = fs->curr_blkno + blk_per_grp; + fs->curr_blkno--; + goto restart; + } + + if (bgd[bg_idx].bg_flags & EXT4_BG_BLOCK_UNINIT) { + memset(zero_buffer, '\0', fs->blksz); + put_ext4(((uint64_t) (bgd[bg_idx].block_id * + fs->blksz)), zero_buffer, fs->blksz); + memcpy(fs->blk_bmaps[bg_idx], zero_buffer, fs->blksz); + bgd[bg_idx].bg_flags = bgd[bg_idx].bg_flags & + ~EXT4_BG_BLOCK_UNINIT; + } + + if (ext4fs_set_block_bmap(fs->curr_blkno, fs->blk_bmaps[bg_idx], + bg_idx) != 0) { + debug("going for restart for the block no %ld %u\n", + fs->curr_blkno, bg_idx); + goto restart; + } + + /* journal backup */ + if (prev_bg_bitmap_index != bg_idx) { + memset(journal_buffer, '\0', fs->blksz); + status = ext4fs_devread(bgd[bg_idx].block_id + * fs->sect_perblk, + 0, fs->blksz, journal_buffer); + if (status == 0) + goto fail; + if (ext4fs_log_journal(journal_buffer, + bgd[bg_idx].block_id)) + goto fail; + + prev_bg_bitmap_index = bg_idx; + } + bgd[bg_idx].free_blocks--; + fs->sb->free_blocks--; + goto success; + } +success: + free(journal_buffer); + free(zero_buffer); + + return fs->curr_blkno; +fail: + free(journal_buffer); + free(zero_buffer); + + return -1; +} + +int ext4fs_get_new_inode_no(void) +{ + short i; + short status; + unsigned int ibmap_idx; + static int prev_inode_bitmap_index = -1; + unsigned int inodes_per_grp = ext4fs_root->sblock.inodes_per_group; + struct ext_filesystem *fs = get_fs(); + char *journal_buffer = zalloc(fs->blksz); + char *zero_buffer = zalloc(fs->blksz); + if (!journal_buffer || !zero_buffer) + goto fail; + struct ext2_block_group *bgd = (struct ext2_block_group *)fs->gdtable; + + if (fs->first_pass_ibmap == 0) { + for (i = 0; i < fs->no_blkgrp; i++) { + if (bgd[i].free_inodes) { + if (bgd[i].bg_itable_unused != + bgd[i].free_inodes) + bgd[i].bg_itable_unused = + bgd[i].free_inodes; + if (bgd[i].bg_flags & EXT4_BG_INODE_UNINIT) { + put_ext4(((uint64_t) + (bgd[i].inode_id * + fs->blksz)), + zero_buffer, fs->blksz); + bgd[i].bg_flags = bgd[i].bg_flags & + ~EXT4_BG_INODE_UNINIT; + memcpy(fs->inode_bmaps[i], + zero_buffer, fs->blksz); + } + fs->curr_inode_no = + _get_new_inode_no(fs->inode_bmaps[i]); + if (fs->curr_inode_no == -1) + /* if block bitmap is completely fill */ + continue; + fs->curr_inode_no = fs->curr_inode_no + + (i * inodes_per_grp); + fs->first_pass_ibmap++; + bgd[i].free_inodes--; + bgd[i].bg_itable_unused--; + fs->sb->free_inodes--; + status = ext4fs_devread(bgd[i].inode_id * + fs->sect_perblk, 0, + fs->blksz, + journal_buffer); + if (status == 0) + goto fail; + if (ext4fs_log_journal(journal_buffer, + bgd[i].inode_id)) + goto fail; + goto success; + } else + debug("no inode left on block group %d\n", i); + } + goto fail; + } else { +restart: + fs->curr_inode_no++; + /* get the blockbitmap index respective to blockno */ + ibmap_idx = fs->curr_inode_no / inodes_per_grp; + if (bgd[ibmap_idx].bg_flags & EXT4_BG_INODE_UNINIT) { + memset(zero_buffer, '\0', fs->blksz); + put_ext4(((uint64_t) (bgd[ibmap_idx].inode_id * + fs->blksz)), zero_buffer, + fs->blksz); + bgd[ibmap_idx].bg_flags = + bgd[ibmap_idx].bg_flags & ~EXT4_BG_INODE_UNINIT; + memcpy(fs->inode_bmaps[ibmap_idx], zero_buffer, + fs->blksz); + } + + if (ext4fs_set_inode_bmap(fs->curr_inode_no, + fs->inode_bmaps[ibmap_idx], + ibmap_idx) != 0) { + debug("going for restart for the block no %d %u\n", + fs->curr_inode_no, ibmap_idx); + goto restart; + } + + /* journal backup */ + if (prev_inode_bitmap_index != ibmap_idx) { + memset(journal_buffer, '\0', fs->blksz); + status = ext4fs_devread(bgd[ibmap_idx].inode_id + * fs->sect_perblk, + 0, fs->blksz, journal_buffer); + if (status == 0) + goto fail; + if (ext4fs_log_journal(journal_buffer, + bgd[ibmap_idx].inode_id)) + goto fail; + prev_inode_bitmap_index = ibmap_idx; + } + if (bgd[ibmap_idx].bg_itable_unused != + bgd[ibmap_idx].free_inodes) + bgd[ibmap_idx].bg_itable_unused = + bgd[ibmap_idx].free_inodes; + bgd[ibmap_idx].free_inodes--; + bgd[ibmap_idx].bg_itable_unused--; + fs->sb->free_inodes--; + goto success; + } + +success: + free(journal_buffer); + free(zero_buffer); + + return fs->curr_inode_no; +fail: + free(journal_buffer); + free(zero_buffer); + + return -1; + +} + + +static void alloc_single_indirect_block(struct ext2_inode *file_inode, + unsigned int *total_remaining_blocks, + unsigned int *no_blks_reqd) +{ + short i; + short status; + long int actual_block_no; + long int si_blockno; + /* si :single indirect */ + unsigned int *si_buffer = NULL; + unsigned int *si_start_addr = NULL; + struct ext_filesystem *fs = get_fs(); + + if (*total_remaining_blocks != 0) { + si_buffer = zalloc(fs->blksz); + if (!si_buffer) { + printf("No Memory\n"); + return; + } + si_start_addr = si_buffer; + si_blockno = ext4fs_get_new_blk_no(); + if (si_blockno == -1) { + printf("no block left to assign\n"); + goto fail; + } + (*no_blks_reqd)++; + debug("SIPB %ld: %u\n", si_blockno, *total_remaining_blocks); + + status = ext4fs_devread(si_blockno * fs->sect_perblk, + 0, fs->blksz, (char *)si_buffer); + memset(si_buffer, '\0', fs->blksz); + if (status == 0) + goto fail; + + for (i = 0; i < (fs->blksz / sizeof(int)); i++) { + actual_block_no = ext4fs_get_new_blk_no(); + if (actual_block_no == -1) { + printf("no block left to assign\n"); + goto fail; + } + *si_buffer = actual_block_no; + debug("SIAB %u: %u\n", *si_buffer, + *total_remaining_blocks); + + si_buffer++; + (*total_remaining_blocks)--; + if (*total_remaining_blocks == 0) + break; + } + + /* write the block to disk */ + put_ext4(((uint64_t) (si_blockno * fs->blksz)), + si_start_addr, fs->blksz); + file_inode->b.blocks.indir_block = si_blockno; + } +fail: + free(si_start_addr); +} + +static void alloc_double_indirect_block(struct ext2_inode *file_inode, + unsigned int *total_remaining_blocks, + unsigned int *no_blks_reqd) +{ + short i; + short j; + short status; + long int actual_block_no; + /* di:double indirect */ + long int di_blockno_parent; + long int di_blockno_child; + unsigned int *di_parent_buffer = NULL; + unsigned int *di_child_buff = NULL; + unsigned int *di_block_start_addr = NULL; + unsigned int *di_child_buff_start = NULL; + struct ext_filesystem *fs = get_fs(); + + if (*total_remaining_blocks != 0) { + /* double indirect parent block connecting to inode */ + di_blockno_parent = ext4fs_get_new_blk_no(); + if (di_blockno_parent == -1) { + printf("no block left to assign\n"); + goto fail; + } + di_parent_buffer = zalloc(fs->blksz); + if (!di_parent_buffer) + goto fail; + + di_block_start_addr = di_parent_buffer; + (*no_blks_reqd)++; + debug("DIPB %ld: %u\n", di_blockno_parent, + *total_remaining_blocks); + + status = ext4fs_devread(di_blockno_parent * + fs->sect_perblk, 0, + fs->blksz, (char *)di_parent_buffer); + memset(di_parent_buffer, '\0', fs->blksz); + + /* + * start:for each double indirect parent + * block create one more block + */ + for (i = 0; i < (fs->blksz / sizeof(int)); i++) { + di_blockno_child = ext4fs_get_new_blk_no(); + if (di_blockno_child == -1) { + printf("no block left to assign\n"); + goto fail; + } + di_child_buff = zalloc(fs->blksz); + if (!di_child_buff) + goto fail; + + di_child_buff_start = di_child_buff; + *di_parent_buffer = di_blockno_child; + di_parent_buffer++; + (*no_blks_reqd)++; + debug("DICB %ld: %u\n", di_blockno_child, + *total_remaining_blocks); + + status = ext4fs_devread(di_blockno_child * + fs->sect_perblk, 0, + fs->blksz, + (char *)di_child_buff); + memset(di_child_buff, '\0', fs->blksz); + /* filling of actual datablocks for each child */ + for (j = 0; j < (fs->blksz / sizeof(int)); j++) { + actual_block_no = ext4fs_get_new_blk_no(); + if (actual_block_no == -1) { + printf("no block left to assign\n"); + goto fail; + } + *di_child_buff = actual_block_no; + debug("DIAB %ld: %u\n", actual_block_no, + *total_remaining_blocks); + + di_child_buff++; + (*total_remaining_blocks)--; + if (*total_remaining_blocks == 0) + break; + } + /* write the block table */ + put_ext4(((uint64_t) (di_blockno_child * fs->blksz)), + di_child_buff_start, fs->blksz); + free(di_child_buff_start); + di_child_buff_start = NULL; + + if (*total_remaining_blocks == 0) + break; + } + put_ext4(((uint64_t) (di_blockno_parent * fs->blksz)), + di_block_start_addr, fs->blksz); + file_inode->b.blocks.double_indir_block = di_blockno_parent; + } +fail: + free(di_block_start_addr); +} + +static void alloc_triple_indirect_block(struct ext2_inode *file_inode, + unsigned int *total_remaining_blocks, + unsigned int *no_blks_reqd) +{ + short i; + short j; + short k; + long int actual_block_no; + /* ti: Triple Indirect */ + long int ti_gp_blockno; + long int ti_parent_blockno; + long int ti_child_blockno; + unsigned int *ti_gp_buff = NULL; + unsigned int *ti_parent_buff = NULL; + unsigned int *ti_child_buff = NULL; + unsigned int *ti_gp_buff_start_addr = NULL; + unsigned int *ti_pbuff_start_addr = NULL; + unsigned int *ti_cbuff_start_addr = NULL; + struct ext_filesystem *fs = get_fs(); + if (*total_remaining_blocks != 0) { + /* triple indirect grand parent block connecting to inode */ + ti_gp_blockno = ext4fs_get_new_blk_no(); + if (ti_gp_blockno == -1) { + printf("no block left to assign\n"); + goto fail; + } + ti_gp_buff = zalloc(fs->blksz); + if (!ti_gp_buff) + goto fail; + + ti_gp_buff_start_addr = ti_gp_buff; + (*no_blks_reqd)++; + debug("TIGPB %ld: %u\n", ti_gp_blockno, + *total_remaining_blocks); + + /* for each 4 byte grand parent entry create one more block */ + for (i = 0; i < (fs->blksz / sizeof(int)); i++) { + ti_parent_blockno = ext4fs_get_new_blk_no(); + if (ti_parent_blockno == -1) { + printf("no block left to assign\n"); + goto fail; + } + ti_parent_buff = zalloc(fs->blksz); + if (!ti_parent_buff) + goto fail; + + ti_pbuff_start_addr = ti_parent_buff; + *ti_gp_buff = ti_parent_blockno; + ti_gp_buff++; + (*no_blks_reqd)++; + debug("TIPB %ld: %u\n", ti_parent_blockno, + *total_remaining_blocks); + + /* for each 4 byte entry parent create one more block */ + for (j = 0; j < (fs->blksz / sizeof(int)); j++) { + ti_child_blockno = ext4fs_get_new_blk_no(); + if (ti_child_blockno == -1) { + printf("no block left assign\n"); + goto fail; + } + ti_child_buff = zalloc(fs->blksz); + if (!ti_child_buff) + goto fail; + + ti_cbuff_start_addr = ti_child_buff; + *ti_parent_buff = ti_child_blockno; + ti_parent_buff++; + (*no_blks_reqd)++; + debug("TICB %ld: %u\n", ti_parent_blockno, + *total_remaining_blocks); + + /* fill actual datablocks for each child */ + for (k = 0; k < (fs->blksz / sizeof(int)); + k++) { + actual_block_no = + ext4fs_get_new_blk_no(); + if (actual_block_no == -1) { + printf("no block left\n"); + goto fail; + } + *ti_child_buff = actual_block_no; + debug("TIAB %ld: %u\n", actual_block_no, + *total_remaining_blocks); + + ti_child_buff++; + (*total_remaining_blocks)--; + if (*total_remaining_blocks == 0) + break; + } + /* write the child block */ + put_ext4(((uint64_t) (ti_child_blockno * + fs->blksz)), + ti_cbuff_start_addr, fs->blksz); + free(ti_cbuff_start_addr); + + if (*total_remaining_blocks == 0) + break; + } + /* write the parent block */ + put_ext4(((uint64_t) (ti_parent_blockno * fs->blksz)), + ti_pbuff_start_addr, fs->blksz); + free(ti_pbuff_start_addr); + + if (*total_remaining_blocks == 0) + break; + } + /* write the grand parent block */ + put_ext4(((uint64_t) (ti_gp_blockno * fs->blksz)), + ti_gp_buff_start_addr, fs->blksz); + file_inode->b.blocks.triple_indir_block = ti_gp_blockno; + } +fail: + free(ti_gp_buff_start_addr); +} + +void ext4fs_allocate_blocks(struct ext2_inode *file_inode, + unsigned int total_remaining_blocks, + unsigned int *total_no_of_block) +{ + short i; + long int direct_blockno; + unsigned int no_blks_reqd = 0; + + /* allocation of direct blocks */ + for (i = 0; i < INDIRECT_BLOCKS; i++) { + direct_blockno = ext4fs_get_new_blk_no(); + if (direct_blockno == -1) { + printf("no block left to assign\n"); + return; + } + file_inode->b.blocks.dir_blocks[i] = direct_blockno; + debug("DB %ld: %u\n", direct_blockno, total_remaining_blocks); + + total_remaining_blocks--; + if (total_remaining_blocks == 0) + break; + } + + alloc_single_indirect_block(file_inode, &total_remaining_blocks, + &no_blks_reqd); + alloc_double_indirect_block(file_inode, &total_remaining_blocks, + &no_blks_reqd); + alloc_triple_indirect_block(file_inode, &total_remaining_blocks, + &no_blks_reqd); + *total_no_of_block += no_blks_reqd; +} + + +static struct ext4_extent_header *ext4fs_get_extent_block + (struct ext2_data *data, char *buf, + struct ext4_extent_header *ext_block, + uint32_t fileblock, int log2_blksz) +{ + struct ext4_extent_idx *index; + unsigned long long block; + struct ext_filesystem *fs = get_fs(); + int i; + + while (1) { + index = (struct ext4_extent_idx *)(ext_block + 1); + + if (le32_to_cpu(ext_block->eh_magic) != EXT4_EXT_MAGIC) + return 0; + + if (ext_block->eh_depth == 0) + return ext_block; + i = -1; + do { + i++; + if (i >= le32_to_cpu(ext_block->eh_entries)) + break; + } while (fileblock > le32_to_cpu(index[i].ei_block)); + + if (--i < 0) + return 0; + + block = le32_to_cpu(index[i].ei_leaf_hi); + block = (block << 32) + le32_to_cpu(index[i].ei_leaf_lo); + + if (ext4fs_devread(block << log2_blksz, 0, fs->blksz, buf)) + ext_block = (struct ext4_extent_header *)buf; + else + return 0; + } +} + +static int ext4fs_blockgroup + (struct ext2_data *data, int group, struct ext2_block_group *blkgrp) +{ + long int blkno; + unsigned int blkoff, desc_per_blk; + + desc_per_blk = EXT2_BLOCK_SIZE(data) / sizeof(struct ext2_block_group); + + blkno = __le32_to_cpu(data->sblock.first_data_block) + 1 + + group / desc_per_blk; + blkoff = (group % desc_per_blk) * sizeof(struct ext2_block_group); + + + return ext4fs_devread(blkno << LOG2_EXT2_BLOCK_SIZE(data), + blkoff, sizeof(struct ext2_block_group), + (char *)blkgrp); +} + +int ext4fs_read_inode(struct ext2_data *data, int ino, struct ext2_inode *inode) +{ + struct ext2_block_group blkgrp; + struct ext2_sblock *sblock = &data->sblock; + struct ext_filesystem *fs = get_fs(); + int inodes_per_block, status; + long int blkno; + unsigned int blkoff; + + /* It is easier to calculate if the first inode is 0. */ + ino--; + status = ext4fs_blockgroup(data, ino / __le32_to_cpu + (sblock->inodes_per_group), &blkgrp); + if (status == 0) + return 0; + + inodes_per_block = EXT2_BLOCK_SIZE(data) / fs->inodesz; + blkno = __le32_to_cpu(blkgrp.inode_table_id) + + (ino % __le32_to_cpu(sblock->inodes_per_group)) / inodes_per_block; + blkoff = (ino % inodes_per_block) * fs->inodesz; + /* Read the inode. */ + status = ext4fs_devread(blkno << LOG2_EXT2_BLOCK_SIZE(data), blkoff, + sizeof(struct ext2_inode), (char *)inode); + if (status == 0) + return 0; + + return 1; +} + +long int read_allocated_block(struct ext2_inode *inode, int fileblock) +{ + long int blknr; + int blksz; + int log2_blksz; + int status; + long int rblock; + long int perblock_parent; + long int perblock_child; + unsigned long long start; + /* get the blocksize of the filesystem */ + blksz = EXT2_BLOCK_SIZE(ext4fs_root); + log2_blksz = LOG2_EXT2_BLOCK_SIZE(ext4fs_root); + if (le32_to_cpu(inode->flags) & EXT4_EXTENTS_FL) { + char *buf = zalloc(blksz); + if (!buf) + return -ENOMEM; + struct ext4_extent_header *ext_block; + struct ext4_extent *extent; + int i = -1; + ext_block = ext4fs_get_extent_block(ext4fs_root, buf, + (struct ext4_extent_header + *)inode->b. + blocks.dir_blocks, + fileblock, log2_blksz); + if (!ext_block) { + printf("invalid extent block\n"); + free(buf); + return -EINVAL; + } + + extent = (struct ext4_extent *)(ext_block + 1); + + do { + i++; + if (i >= le32_to_cpu(ext_block->eh_entries)) + break; + } while (fileblock >= le32_to_cpu(extent[i].ee_block)); + if (--i >= 0) { + fileblock -= le32_to_cpu(extent[i].ee_block); + if (fileblock >= le32_to_cpu(extent[i].ee_len)) { + free(buf); + return 0; + } + + start = le32_to_cpu(extent[i].ee_start_hi); + start = (start << 32) + + le32_to_cpu(extent[i].ee_start_lo); + free(buf); + return fileblock + start; + } + + printf("Extent Error\n"); + free(buf); + return -1; + } + + /* Direct blocks. */ + if (fileblock < INDIRECT_BLOCKS) + blknr = __le32_to_cpu(inode->b.blocks.dir_blocks[fileblock]); + + /* Indirect. */ + else if (fileblock < (INDIRECT_BLOCKS + (blksz / 4))) { + if (ext4fs_indir1_block == NULL) { + ext4fs_indir1_block = zalloc(blksz); + if (ext4fs_indir1_block == NULL) { + printf("** SI ext2fs read block (indir 1)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir1_size = blksz; + ext4fs_indir1_blkno = -1; + } + if (blksz != ext4fs_indir1_size) { + free(ext4fs_indir1_block); + ext4fs_indir1_block = NULL; + ext4fs_indir1_size = 0; + ext4fs_indir1_blkno = -1; + ext4fs_indir1_block = zalloc(blksz); + if (ext4fs_indir1_block == NULL) { + printf("** SI ext2fs read block (indir 1):" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir1_size = blksz; + } + if ((__le32_to_cpu(inode->b.blocks.indir_block) << + log2_blksz) != ext4fs_indir1_blkno) { + status = + ext4fs_devread(__le32_to_cpu + (inode->b.blocks. + indir_block) << log2_blksz, 0, + blksz, (char *)ext4fs_indir1_block); + if (status == 0) { + printf("** SI ext2fs read block (indir 1)" + "failed. **\n"); + return 0; + } + ext4fs_indir1_blkno = + __le32_to_cpu(inode->b.blocks. + indir_block) << log2_blksz; + } + blknr = __le32_to_cpu(ext4fs_indir1_block + [fileblock - INDIRECT_BLOCKS]); + } + /* Double indirect. */ + else if (fileblock < (INDIRECT_BLOCKS + (blksz / 4 * + (blksz / 4 + 1)))) { + + long int perblock = blksz / 4; + long int rblock = fileblock - (INDIRECT_BLOCKS + blksz / 4); + + if (ext4fs_indir1_block == NULL) { + ext4fs_indir1_block = zalloc(blksz); + if (ext4fs_indir1_block == NULL) { + printf("** DI ext2fs read block (indir 2 1)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir1_size = blksz; + ext4fs_indir1_blkno = -1; + } + if (blksz != ext4fs_indir1_size) { + free(ext4fs_indir1_block); + ext4fs_indir1_block = NULL; + ext4fs_indir1_size = 0; + ext4fs_indir1_blkno = -1; + ext4fs_indir1_block = zalloc(blksz); + if (ext4fs_indir1_block == NULL) { + printf("** DI ext2fs read block (indir 2 1)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir1_size = blksz; + } + if ((__le32_to_cpu(inode->b.blocks.double_indir_block) << + log2_blksz) != ext4fs_indir1_blkno) { + status = + ext4fs_devread(__le32_to_cpu + (inode->b.blocks. + double_indir_block) << log2_blksz, + 0, blksz, + (char *)ext4fs_indir1_block); + if (status == 0) { + printf("** DI ext2fs read block (indir 2 1)" + "failed. **\n"); + return -1; + } + ext4fs_indir1_blkno = + __le32_to_cpu(inode->b.blocks.double_indir_block) << + log2_blksz; + } + + if (ext4fs_indir2_block == NULL) { + ext4fs_indir2_block = zalloc(blksz); + if (ext4fs_indir2_block == NULL) { + printf("** DI ext2fs read block (indir 2 2)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir2_size = blksz; + ext4fs_indir2_blkno = -1; + } + if (blksz != ext4fs_indir2_size) { + free(ext4fs_indir2_block); + ext4fs_indir2_block = NULL; + ext4fs_indir2_size = 0; + ext4fs_indir2_blkno = -1; + ext4fs_indir2_block = zalloc(blksz); + if (ext4fs_indir2_block == NULL) { + printf("** DI ext2fs read block (indir 2 2)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir2_size = blksz; + } + if ((__le32_to_cpu(ext4fs_indir1_block[rblock / perblock]) << + log2_blksz) != ext4fs_indir2_blkno) { + status = ext4fs_devread(__le32_to_cpu + (ext4fs_indir1_block + [rblock / + perblock]) << log2_blksz, 0, + blksz, + (char *)ext4fs_indir2_block); + if (status == 0) { + printf("** DI ext2fs read block (indir 2 2)" + "failed. **\n"); + return -1; + } + ext4fs_indir2_blkno = + __le32_to_cpu(ext4fs_indir1_block[rblock + / + perblock]) << + log2_blksz; + } + blknr = __le32_to_cpu(ext4fs_indir2_block[rblock % perblock]); + } + /* Tripple indirect. */ + else { + rblock = fileblock - (INDIRECT_BLOCKS + blksz / 4 + + (blksz / 4 * blksz / 4)); + perblock_child = blksz / 4; + perblock_parent = ((blksz / 4) * (blksz / 4)); + + if (ext4fs_indir1_block == NULL) { + ext4fs_indir1_block = zalloc(blksz); + if (ext4fs_indir1_block == NULL) { + printf("** TI ext2fs read block (indir 2 1)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir1_size = blksz; + ext4fs_indir1_blkno = -1; + } + if (blksz != ext4fs_indir1_size) { + free(ext4fs_indir1_block); + ext4fs_indir1_block = NULL; + ext4fs_indir1_size = 0; + ext4fs_indir1_blkno = -1; + ext4fs_indir1_block = zalloc(blksz); + if (ext4fs_indir1_block == NULL) { + printf("** TI ext2fs read block (indir 2 1)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir1_size = blksz; + } + if ((__le32_to_cpu(inode->b.blocks.triple_indir_block) << + log2_blksz) != ext4fs_indir1_blkno) { + status = ext4fs_devread + (__le32_to_cpu(inode->b.blocks.triple_indir_block) + << log2_blksz, 0, blksz, + (char *)ext4fs_indir1_block); + if (status == 0) { + printf("** TI ext2fs read block (indir 2 1)" + "failed. **\n"); + return -1; + } + ext4fs_indir1_blkno = + __le32_to_cpu(inode->b.blocks.triple_indir_block) << + log2_blksz; + } + + if (ext4fs_indir2_block == NULL) { + ext4fs_indir2_block = zalloc(blksz); + if (ext4fs_indir2_block == NULL) { + printf("** TI ext2fs read block (indir 2 2)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir2_size = blksz; + ext4fs_indir2_blkno = -1; + } + if (blksz != ext4fs_indir2_size) { + free(ext4fs_indir2_block); + ext4fs_indir2_block = NULL; + ext4fs_indir2_size = 0; + ext4fs_indir2_blkno = -1; + ext4fs_indir2_block = zalloc(blksz); + if (ext4fs_indir2_block == NULL) { + printf("** TI ext2fs read block (indir 2 2)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir2_size = blksz; + } + if ((__le32_to_cpu(ext4fs_indir1_block[rblock / + perblock_parent]) << + log2_blksz) + != ext4fs_indir2_blkno) { + status = ext4fs_devread(__le32_to_cpu + (ext4fs_indir1_block + [rblock / + perblock_parent]) << + log2_blksz, 0, blksz, + (char *)ext4fs_indir2_block); + if (status == 0) { + printf("** TI ext2fs read block (indir 2 2)" + "failed. **\n"); + return -1; + } + ext4fs_indir2_blkno = + __le32_to_cpu(ext4fs_indir1_block[rblock / + perblock_parent]) + << log2_blksz; + } + + if (ext4fs_indir3_block == NULL) { + ext4fs_indir3_block = zalloc(blksz); + if (ext4fs_indir3_block == NULL) { + printf("** TI ext2fs read block (indir 2 2)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir3_size = blksz; + ext4fs_indir3_blkno = -1; + } + if (blksz != ext4fs_indir3_size) { + free(ext4fs_indir3_block); + ext4fs_indir3_block = NULL; + ext4fs_indir3_size = 0; + ext4fs_indir3_blkno = -1; + ext4fs_indir3_block = zalloc(blksz); + if (ext4fs_indir3_block == NULL) { + printf("** TI ext2fs read block (indir 2 2)" + "malloc failed. **\n"); + return -1; + } + ext4fs_indir3_size = blksz; + } + if ((__le32_to_cpu(ext4fs_indir2_block[rblock + / + perblock_child]) << + log2_blksz) != ext4fs_indir3_blkno) { + status = + ext4fs_devread(__le32_to_cpu + (ext4fs_indir2_block + [(rblock / perblock_child) + % (blksz / 4)]) << log2_blksz, 0, + blksz, (char *)ext4fs_indir3_block); + if (status == 0) { + printf("** TI ext2fs read block (indir 2 2)" + "failed. **\n"); + return -1; + } + ext4fs_indir3_blkno = + __le32_to_cpu(ext4fs_indir2_block[(rblock / + perblock_child) % + (blksz / + 4)]) << + log2_blksz; + } + + blknr = __le32_to_cpu(ext4fs_indir3_block + [rblock % perblock_child]); + } + debug("ext4fs_read_block %ld\n", blknr); + + return blknr; +} + +void ext4fs_close(void) +{ + if ((ext4fs_file != NULL) && (ext4fs_root != NULL)) { + ext4fs_free_node(ext4fs_file, &ext4fs_root->diropen); + ext4fs_file = NULL; + } + if (ext4fs_root != NULL) { + free(ext4fs_root); + ext4fs_root = NULL; + } + if (ext4fs_indir1_block != NULL) { + free(ext4fs_indir1_block); + ext4fs_indir1_block = NULL; + ext4fs_indir1_size = 0; + ext4fs_indir1_blkno = -1; + } + if (ext4fs_indir2_block != NULL) { + free(ext4fs_indir2_block); + ext4fs_indir2_block = NULL; + ext4fs_indir2_size = 0; + ext4fs_indir2_blkno = -1; + } + if (ext4fs_indir3_block != NULL) { + free(ext4fs_indir3_block); + ext4fs_indir3_block = NULL; + ext4fs_indir3_size = 0; + ext4fs_indir3_blkno = -1; + } +} + +int ext4fs_iterate_dir(struct ext2fs_node *dir, char *name, + struct ext2fs_node **fnode, int *ftype) +{ + unsigned int fpos = 0; + int status; + struct ext2fs_node *diro = (struct ext2fs_node *) dir; + +//#ifdef DEBUG + if (name != NULL) + printf("Iterate dir %s\n", name); +//#endif /* of DEBUG */ + if (!diro->inode_read) { + status = ext4fs_read_inode(diro->data, diro->ino, &diro->inode); + if (status == 0) + return 0; + } + /* Search the file. */ + while (fpos < __le32_to_cpu(diro->inode.size)) { + struct ext2_dirent dirent; + + status = ext4fs_read_file(diro, fpos, + sizeof(struct ext2_dirent), + (char *) &dirent); + if (status < 1) + return 0; + + if (dirent.namelen != 0) { + //Note: please pay attention to the name length + //Since our compiler cannot work right with the variable length, + //we fix the length to 100, but it may be not enough to long name. + //char filename[dirent.namelen + 1]; + char filename[100]; + struct ext2fs_node *fdiro; + int type = FILETYPE_UNKNOWN; + status = ext4fs_read_file(diro, + fpos + + sizeof(struct ext2_dirent), + dirent.namelen, filename); + + if (status < 1) + return 0; + + fdiro = zalloc(sizeof(struct ext2fs_node)); + if (!fdiro) + return 0; + + fdiro->data = diro->data; + fdiro->ino = __le32_to_cpu(dirent.inode); + + filename[dirent.namelen] = '\0'; + + if (dirent.filetype != FILETYPE_UNKNOWN) { + fdiro->inode_read = 0; + + if (dirent.filetype == FILETYPE_DIRECTORY) + type = FILETYPE_DIRECTORY; + else if (dirent.filetype == FILETYPE_SYMLINK) + type = FILETYPE_SYMLINK; + else if (dirent.filetype == FILETYPE_REG) + type = FILETYPE_REG; + } else { + status = ext4fs_read_inode(diro->data, + __le32_to_cpu + (dirent.inode), + &fdiro->inode); + if (status == 0) { + free(fdiro); + return 0; + } + fdiro->inode_read = 1; + + if ((__le16_to_cpu(fdiro->inode.mode) & + FILETYPE_INO_MASK) == + FILETYPE_INO_DIRECTORY) { + type = FILETYPE_DIRECTORY; + } else if ((__le16_to_cpu(fdiro->inode.mode) + & FILETYPE_INO_MASK) == + FILETYPE_INO_SYMLINK) { + type = FILETYPE_SYMLINK; + } else if ((__le16_to_cpu(fdiro->inode.mode) + & FILETYPE_INO_MASK) == + FILETYPE_INO_REG) { + type = FILETYPE_REG; + } + } +#ifdef DEBUG + printf("iterate >%s<\n", filename); +#endif /* of DEBUG */ + if ((name != NULL) && (fnode != NULL) + && (ftype != NULL)) { + if (strcmp(filename, name) == 0) { + *ftype = type; + *fnode = fdiro; + return 1; + } + } else { + if (fdiro->inode_read == 0) { + status = ext4fs_read_inode(diro->data, + __le32_to_cpu( + dirent.inode), + &fdiro->inode); + if (status == 0) { + free(fdiro); + return 0; + } + fdiro->inode_read = 1; + } + switch (type) { + case FILETYPE_DIRECTORY: + printf("<DIR> "); + break; + case FILETYPE_SYMLINK: + printf("<SYM> "); + break; + case FILETYPE_REG: + printf(" "); + break; + default: + printf("< ? > "); + break; + } + printf("%10d %s\n", + __le32_to_cpu(fdiro->inode.size), + filename); + } + free(fdiro); + } + fpos += __le16_to_cpu(dirent.direntlen); + } + return 0; +} + +static char *ext4fs_read_symlink(struct ext2fs_node *node) +{ + char *symlink; + struct ext2fs_node *diro = node; + int status; + + if (!diro->inode_read) { + status = ext4fs_read_inode(diro->data, diro->ino, &diro->inode); + if (status == 0) + return 0; + } + symlink = zalloc(__le32_to_cpu(diro->inode.size) + 1); + if (!symlink) + return 0; + + if (__le32_to_cpu(diro->inode.size) <= 60) { + strncpy(symlink, diro->inode.b.symlink, + __le32_to_cpu(diro->inode.size)); + } else { + status = ext4fs_read_file(diro, 0, + __le32_to_cpu(diro->inode.size), + symlink); + if (status == 0) { + free(symlink); + return 0; + } + } + symlink[__le32_to_cpu(diro->inode.size)] = '\0'; + return symlink; +} + +static int ext4fs_find_file1(const char *currpath, + struct ext2fs_node *currroot, + struct ext2fs_node **currfound, int *foundtype) +{ + char fpath[strlen(currpath) + 1]; + char *name = fpath; + char *next; + int status; + int type = FILETYPE_DIRECTORY; + struct ext2fs_node *currnode = currroot; + struct ext2fs_node *oldnode = currroot; + + strncpy(fpath, currpath, strlen(currpath) + 1); + + /* Remove all leading slashes. */ + while (*name == '/') + name++; + + if (!*name) { + *currfound = currnode; + return 1; + } + + for (;;) { + int found; + + /* Extract the actual part from the pathname. */ + next = strchr(name, '/'); + if (next) { + /* Remove all leading slashes. */ + while (*next == '/') + *(next++) = '\0'; + } + + if (type != FILETYPE_DIRECTORY) { + ext4fs_free_node(currnode, currroot); + return 0; + } + + oldnode = currnode; + + /* Iterate over the directory. */ + found = ext4fs_iterate_dir(currnode, name, &currnode, &type); + if (found == 0) + return 0; + + if (found == -1) + break; + + /* Read in the symlink and follow it. */ + if (type == FILETYPE_SYMLINK) { + char *symlink; + + /* Test if the symlink does not loop. */ + if (++symlinknest == 8) { + ext4fs_free_node(currnode, currroot); + ext4fs_free_node(oldnode, currroot); + return 0; + } + + symlink = ext4fs_read_symlink(currnode); + ext4fs_free_node(currnode, currroot); + + if (!symlink) { + ext4fs_free_node(oldnode, currroot); + return 0; + } + + debug("Got symlink >%s<\n", symlink); + + if (symlink[0] == '/') { + ext4fs_free_node(oldnode, currroot); + oldnode = &ext4fs_root->diropen; + } + + /* Lookup the node the symlink points to. */ + status = ext4fs_find_file1(symlink, oldnode, + &currnode, &type); + + free(symlink); + + if (status == 0) { + ext4fs_free_node(oldnode, currroot); + return 0; + } + } + + ext4fs_free_node(oldnode, currroot); + + /* Found the node! */ + if (!next || *next == '\0') { + *currfound = currnode; + *foundtype = type; + return 1; + } + name = next; + } + return -1; +} + +int ext4fs_find_file(const char *path, struct ext2fs_node *rootnode, + struct ext2fs_node **foundnode, int expecttype) +{ + int status; + int foundtype = FILETYPE_DIRECTORY; + + symlinknest = 0; + if (!path) + return 0; + + status = ext4fs_find_file1(path, rootnode, foundnode, &foundtype); + if (status == 0) + return 0; + + /* Check if the node that was found was of the expected type. */ + if ((expecttype == FILETYPE_REG) && (foundtype != expecttype)) + return 0; + else if ((expecttype == FILETYPE_DIRECTORY) + && (foundtype != expecttype)) + return 0; + + return 1; +} + +int ext4fs_open(const char *filename) +{ + struct ext2fs_node *fdiro = NULL; + int status; + int len; + + if (ext4fs_root == NULL) + return -1; + + ext4fs_file = NULL; + status = ext4fs_find_file(filename, &ext4fs_root->diropen, &fdiro, + FILETYPE_REG); + if (status == 0) + goto fail; + + if (!fdiro->inode_read) { + status = ext4fs_read_inode(fdiro->data, fdiro->ino, + &fdiro->inode); + if (status == 0) + goto fail; + } + len = __le32_to_cpu(fdiro->inode.size); + ext4fs_file = fdiro; + + return len; +fail: + ext4fs_free_node(fdiro, &ext4fs_root->diropen); + + return -1; +} + +int ext4fs_mount(unsigned part_length) +{ + struct ext2_data *data; + int status; + struct ext_filesystem *fs = get_fs(); + data = zalloc(sizeof(struct ext2_data)); + if (!data) + return 0; + + /* Read the superblock. */ + status = ext4fs_devread(1 * 2, 0, sizeof(struct ext2_sblock), + (char *)&data->sblock); + + if (status == 0) + goto fail; + /* Make sure this is an ext2 filesystem. */ + if (__le16_to_cpu(data->sblock.magic) != EXT2_MAGIC) + goto fail; + + if (__le32_to_cpu(data->sblock.revision_level == 0)) + fs->inodesz = 128; + else + fs->inodesz = __le16_to_cpu(data->sblock.inode_size); + + printf("EXT4 rev %d, inode_size %d\n", + __le32_to_cpu(data->sblock.revision_level), fs->inodesz); + + data->diropen.data = data; + data->diropen.ino = 2; + data->diropen.inode_read = 1; + data->inode = &data->diropen.inode; + status = ext4fs_read_inode(data, 2, data->inode); + if (status == 0) + goto fail; + + ext4fs_root = data; + + return 1; +fail: + printf("Failed to mount ext2 filesystem...\n"); + free(data); + ext4fs_root = NULL; + + return 0; +} diff --git a/fs/ext4/ext4_common.h b/fs/ext4/ext4_common.h new file mode 100755 index 0000000..922a940 --- /dev/null +++ b/fs/ext4/ext4_common.h @@ -0,0 +1,88 @@ +/* + * (C) Copyright 2011 - 2012 Samsung Electronics + * EXT4 filesystem implementation in Uboot by + * Uma Shankar <uma.shankar@samsung.com> + * Manjunatha C Achar <a.manjunatha@samsung.com> + * + * ext4ls and ext4load : based on ext2 ls load support in Uboot. + * + * (C) Copyright 2004 + * esd gmbh <www.esd-electronics.com> + * Reinhard Arlt <reinhard.arlt@esd-electronics.com> + * + * based on code from grub2 fs/ext2.c and fs/fshelp.c by + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2003, 2004 Free Software Foundation, Inc. + * + * ext4write : Based on generic ext4 protocol. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __EXT4_COMMON__ +#define __EXT4_COMMON__ +#include <ext_common.h> +#include <ext4fs.h> +#include <malloc.h> +#include <asm/errno.h> +#include "ext4_journal.h" +#include "crc16.h" + +#define YES 1 +#define NO 0 +#define TRUE 1 +#define FALSE 0 +#define RECOVER 1 +#define SCAN 0 + +#define S_IFLNK 0120000 /* symbolic link */ +#define BLOCK_NO_ONE 1 +#define SUPERBLOCK_SECTOR 2 +#define SUPERBLOCK_SIZE 1024 +#define F_FILE 1 + +static inline void *zalloc(size_t size) +{ + void *p = memalign(ARCH_DMA_MINALIGN, size); + memset(p, 0, size); + return p; +} + +int ext4fs_read_inode(struct ext2_data *data, int ino, + struct ext2_inode *inode); +int ext4fs_read_file(struct ext2fs_node *node, int pos, + unsigned int len, char *buf); +int ext4fs_find_file(const char *path, struct ext2fs_node *rootnode, + struct ext2fs_node **foundnode, int expecttype); +int ext4fs_iterate_dir(struct ext2fs_node *dir, char *name, + struct ext2fs_node **fnode, int *ftype); + +uint32_t ext4fs_div_roundup(uint32_t size, uint32_t n); +int ext4fs_checksum_update(unsigned int i); +int ext4fs_get_parent_inode_num(const char *dirname, char *dname, int flags); +void ext4fs_update_parent_dentry(char *filename, int *p_ino, int file_type); +long int ext4fs_get_new_blk_no(void); +int ext4fs_get_new_inode_no(void); +void ext4fs_reset_block_bmap(long int blockno, unsigned char *buffer, + int index); +int ext4fs_set_block_bmap(long int blockno, unsigned char *buffer, int index); +int ext4fs_set_inode_bmap(int inode_no, unsigned char *buffer, int index); +void ext4fs_reset_inode_bmap(int inode_no, unsigned char *buffer, int index); +int ext4fs_iget(int inode_no, struct ext2_inode *inode); +void ext4fs_allocate_blocks(struct ext2_inode *file_inode, + unsigned int total_remaining_blocks, + unsigned int *total_no_of_block); +void put_ext4(uint64_t off, void *buf, uint32_t size); +#endif diff --git a/fs/ext4/ext4_journal.c b/fs/ext4/ext4_journal.c new file mode 100755 index 0000000..bfc8860 --- /dev/null +++ b/fs/ext4/ext4_journal.c @@ -0,0 +1,667 @@ +/* + * (C) Copyright 2011 - 2012 Samsung Electronics + * EXT4 filesystem implementation in Uboot by + * Uma Shankar <uma.shankar@samsung.com> + * Manjunatha C Achar <a.manjunatha@samsung.com> + * + * Journal data structures and headers for Journaling feature of ext4 + * have been referred from JBD2 (Journaling Block device 2) + * implementation in Linux Kernel. + * Written by Stephen C. Tweedie <sct@redhat.com> + * + * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <common.h> +#include <ext4fs.h> +#include <malloc.h> +#include <ext_common.h> +#include <compiler.h> +#include "ext4_common.h" + +static struct revoke_blk_list *revk_blk_list; +static struct revoke_blk_list *prev_node; +static int first_node = TRUE; + +int gindex; +int gd_index; +int jrnl_blk_idx; +struct journal_log *journal_ptr[MAX_JOURNAL_ENTRIES]; +struct dirty_blocks *dirty_block_ptr[MAX_JOURNAL_ENTRIES]; + +int ext4fs_init_journal(void) +{ + int i; + char *temp = NULL; + struct ext_filesystem *fs = get_fs(); + + /* init globals */ + revk_blk_list = NULL; + prev_node = NULL; + gindex = 0; + gd_index = 0; + jrnl_blk_idx = 1; + + for (i = 0; i < MAX_JOURNAL_ENTRIES; i++) { + journal_ptr[i] = zalloc(sizeof(struct journal_log)); + if (!journal_ptr[i]) + goto fail; + dirty_block_ptr[i] = zalloc(sizeof(struct dirty_blocks)); + if (!dirty_block_ptr[i]) + goto fail; + journal_ptr[i]->buf = NULL; + journal_ptr[i]->blknr = -1; + + dirty_block_ptr[i]->buf = NULL; + dirty_block_ptr[i]->blknr = -1; + } + + if (fs->blksz == 4096) { + temp = zalloc(fs->blksz); + if (!temp) + goto fail; + journal_ptr[gindex]->buf = zalloc(fs->blksz); + if (!journal_ptr[gindex]->buf) + goto fail; + ext4fs_devread(0, 0, fs->blksz, temp); + memcpy(temp + SUPERBLOCK_SIZE, fs->sb, SUPERBLOCK_SIZE); + memcpy(journal_ptr[gindex]->buf, temp, fs->blksz); + journal_ptr[gindex++]->blknr = 0; + free(temp); + } else { + journal_ptr[gindex]->buf = zalloc(fs->blksz); + if (!journal_ptr[gindex]->buf) + goto fail; + memcpy(journal_ptr[gindex]->buf, fs->sb, SUPERBLOCK_SIZE); + journal_ptr[gindex++]->blknr = 1; + } + + /* Check the file system state using journal super block */ + if (ext4fs_check_journal_state(SCAN)) + goto fail; + /* Check the file system state using journal super block */ + if (ext4fs_check_journal_state(RECOVER)) + goto fail; + + return 0; +fail: + return -1; +} + +void ext4fs_dump_metadata(void) +{ + struct ext_filesystem *fs = get_fs(); + int i; + for (i = 0; i < MAX_JOURNAL_ENTRIES; i++) { + if (dirty_block_ptr[i]->blknr == -1) + break; + put_ext4((uint64_t) ((uint64_t)dirty_block_ptr[i]->blknr * + (uint64_t)fs->blksz), dirty_block_ptr[i]->buf, + fs->blksz); + } +} + +void ext4fs_free_journal(void) +{ + int i; + for (i = 0; i < MAX_JOURNAL_ENTRIES; i++) { + if (dirty_block_ptr[i]->blknr == -1) + break; + if (dirty_block_ptr[i]->buf) + free(dirty_block_ptr[i]->buf); + } + + for (i = 0; i < MAX_JOURNAL_ENTRIES; i++) { + if (journal_ptr[i]->blknr == -1) + break; + if (journal_ptr[i]->buf) + free(journal_ptr[i]->buf); + } + + for (i = 0; i < MAX_JOURNAL_ENTRIES; i++) { + if (journal_ptr[i]) + free(journal_ptr[i]); + if (dirty_block_ptr[i]) + free(dirty_block_ptr[i]); + } + gindex = 0; + gd_index = 0; + jrnl_blk_idx = 1; +} + +int ext4fs_log_gdt(char *gd_table) +{ + struct ext_filesystem *fs = get_fs(); + short i; + long int var = fs->gdtable_blkno; + for (i = 0; i < fs->no_blk_pergdt; i++) { + journal_ptr[gindex]->buf = zalloc(fs->blksz); + if (!journal_ptr[gindex]->buf) + return -ENOMEM; + memcpy(journal_ptr[gindex]->buf, gd_table, fs->blksz); + gd_table += fs->blksz; + journal_ptr[gindex++]->blknr = var++; + } + + return 0; +} + +/* + * This function stores the backup copy of meta data in RAM + * journal_buffer -- Buffer containing meta data + * blknr -- Block number on disk of the meta data buffer + */ +int ext4fs_log_journal(char *journal_buffer, long int blknr) +{ + struct ext_filesystem *fs = get_fs(); + short i; + + if (!journal_buffer) { + printf("Invalid input arguments %s\n", __func__); + return -EINVAL; + } + + for (i = 0; i < MAX_JOURNAL_ENTRIES; i++) { + if (journal_ptr[i]->blknr == -1) + break; + if (journal_ptr[i]->blknr == blknr) + return 0; + } + + journal_ptr[gindex]->buf = zalloc(fs->blksz); + if (!journal_ptr[gindex]->buf) + return -ENOMEM; + + memcpy(journal_ptr[gindex]->buf, journal_buffer, fs->blksz); + journal_ptr[gindex++]->blknr = blknr; + + return 0; +} + +/* + * This function stores the modified meta data in RAM + * metadata_buffer -- Buffer containing meta data + * blknr -- Block number on disk of the meta data buffer + */ +int ext4fs_put_metadata(char *metadata_buffer, long int blknr) +{ + struct ext_filesystem *fs = get_fs(); + if (!metadata_buffer) { + printf("Invalid input arguments %s\n", __func__); + return -EINVAL; + } + dirty_block_ptr[gd_index]->buf = zalloc(fs->blksz); + if (!dirty_block_ptr[gd_index]->buf) + return -ENOMEM; + memcpy(dirty_block_ptr[gd_index]->buf, metadata_buffer, fs->blksz); + dirty_block_ptr[gd_index++]->blknr = blknr; + + return 0; +} + +void print_revoke_blks(char *revk_blk) +{ + int offset; + int max; + long int blocknr; + struct journal_revoke_header_t *header; + + if (revk_blk == NULL) + return; + + header = (struct journal_revoke_header_t *) revk_blk; + offset = sizeof(struct journal_revoke_header_t); + max = be32_to_cpu(header->r_count); + printf("total bytes %d\n", max); + + while (offset < max) { + blocknr = be32_to_cpu(*((long int *)(revk_blk + offset))); + printf("revoke blknr is %ld\n", blocknr); + offset += 4; + } +} + +static struct revoke_blk_list *_get_node(void) +{ + struct revoke_blk_list *tmp_node; + tmp_node = zalloc(sizeof(struct revoke_blk_list)); + if (tmp_node == NULL) + return NULL; + tmp_node->content = NULL; + tmp_node->next = NULL; + + return tmp_node; +} + +void ext4fs_push_revoke_blk(char *buffer) +{ + struct revoke_blk_list *node = NULL; + struct ext_filesystem *fs = get_fs(); + if (buffer == NULL) { + printf("buffer ptr is NULL\n"); + return; + } + node = _get_node(); + if (!node) { + printf("_get_node: malloc failed\n"); + return; + } + + node->content = zalloc(fs->blksz); + if (node->content == NULL) + return; + memcpy(node->content, buffer, fs->blksz); + + if (first_node == TRUE) { + revk_blk_list = node; + prev_node = node; + first_node = FALSE; + } else { + prev_node->next = node; + prev_node = node; + } +} + +void ext4fs_free_revoke_blks(void) +{ + struct revoke_blk_list *tmp_node = revk_blk_list; + struct revoke_blk_list *next_node = NULL; + + while (tmp_node != NULL) { + if (tmp_node->content) + free(tmp_node->content); + tmp_node = tmp_node->next; + } + + tmp_node = revk_blk_list; + while (tmp_node != NULL) { + next_node = tmp_node->next; + free(tmp_node); + tmp_node = next_node; + } + + revk_blk_list = NULL; + prev_node = NULL; + first_node = TRUE; +} + +int check_blknr_for_revoke(long int blknr, int sequence_no) +{ + struct journal_revoke_header_t *header; + int offset; + int max; + long int blocknr; + char *revk_blk; + struct revoke_blk_list *tmp_revk_node = revk_blk_list; + while (tmp_revk_node != NULL) { + revk_blk = tmp_revk_node->content; + + header = (struct journal_revoke_header_t *) revk_blk; + if (sequence_no < be32_to_cpu(header->r_header.h_sequence)) { + offset = sizeof(struct journal_revoke_header_t); + max = be32_to_cpu(header->r_count); + + while (offset < max) { + blocknr = be32_to_cpu(*((long int *) + (revk_blk + offset))); + if (blocknr == blknr) + goto found; + offset += 4; + } + } + tmp_revk_node = tmp_revk_node->next; + } + + return -1; + +found: + return 0; +} + +/* + * This function parses the journal blocks and replays the + * suceessful transactions. A transaction is successfull + * if commit block is found for a descriptor block + * The tags in descriptor block contain the disk block + * numbers of the metadata to be replayed + */ +void recover_transaction(int prev_desc_logical_no) +{ + struct ext2_inode inode_journal; + struct ext_filesystem *fs = get_fs(); + struct journal_header_t *jdb; + long int blknr; + char *p_jdb; + int ofs, flags; + int i; + struct ext3_journal_block_tag *tag; + char *temp_buff = zalloc(fs->blksz); + char *metadata_buff = zalloc(fs->blksz); + if (!temp_buff || !metadata_buff) + goto fail; + i = prev_desc_logical_no; + ext4fs_read_inode(ext4fs_root, EXT2_JOURNAL_INO, + (struct ext2_inode *)&inode_journal); + blknr = read_allocated_block((struct ext2_inode *) + &inode_journal, i); + ext4fs_devread(blknr * fs->sect_perblk, 0, fs->blksz, temp_buff); + p_jdb = (char *)temp_buff; + jdb = (struct journal_header_t *) temp_buff; + ofs = sizeof(struct journal_header_t); + + do { + tag = (struct ext3_journal_block_tag *)&p_jdb[ofs]; + ofs += sizeof(struct ext3_journal_block_tag); + + if (ofs > fs->blksz) + break; + + flags = be32_to_cpu(tag->flags); + if (!(flags & EXT3_JOURNAL_FLAG_SAME_UUID)) + ofs += 16; + + i++; + debug("\t\ttag %u\n", be32_to_cpu(tag->block)); + if (revk_blk_list != NULL) { + if (check_blknr_for_revoke(be32_to_cpu(tag->block), + be32_to_cpu(jdb->h_sequence)) == 0) + continue; + } + blknr = read_allocated_block(&inode_journal, i); + ext4fs_devread(blknr * fs->sect_perblk, 0, + fs->blksz, metadata_buff); + put_ext4((uint64_t)(be32_to_cpu(tag->block) * fs->blksz), + metadata_buff, (uint32_t) fs->blksz); + } while (!(flags & EXT3_JOURNAL_FLAG_LAST_TAG)); +fail: + free(temp_buff); + free(metadata_buff); +} + +void print_jrnl_status(int recovery_flag) +{ + if (recovery_flag == RECOVER) + printf("Journal Recovery Completed\n"); + else + printf("Journal Scan Completed\n"); +} + +int ext4fs_check_journal_state(int recovery_flag) +{ + int i; + int DB_FOUND = NO; + long int blknr; + int transaction_state = TRANSACTION_COMPLETE; + int prev_desc_logical_no = 0; + int curr_desc_logical_no = 0; + int ofs, flags, block; + struct ext2_inode inode_journal; + struct journal_superblock_t *jsb = NULL; + struct journal_header_t *jdb = NULL; + char *p_jdb = NULL; + struct ext3_journal_block_tag *tag = NULL; + char *temp_buff = NULL; + char *temp_buff1 = NULL; + struct ext_filesystem *fs = get_fs(); + + temp_buff = zalloc(fs->blksz); + if (!temp_buff) + return -ENOMEM; + temp_buff1 = zalloc(fs->blksz); + if (!temp_buff1) { + free(temp_buff); + return -ENOMEM; + } + + ext4fs_read_inode(ext4fs_root, EXT2_JOURNAL_INO, &inode_journal); + blknr = read_allocated_block(&inode_journal, EXT2_JOURNAL_SUPERBLOCK); + ext4fs_devread(blknr * fs->sect_perblk, 0, fs->blksz, temp_buff); + jsb = (struct journal_superblock_t *) temp_buff; + + if (fs->sb->feature_incompat & EXT3_FEATURE_INCOMPAT_RECOVER) { + if (recovery_flag == RECOVER) + printf("Recovery required\n"); + } else { + if (recovery_flag == RECOVER) + printf("File System is consistent\n"); + goto end; + } + + if (be32_to_cpu(jsb->s_start) == 0) + goto end; + + if (!(jsb->s_feature_compat & + cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM))) + jsb->s_feature_compat |= + cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); + + i = be32_to_cpu(jsb->s_first); + while (1) { + block = be32_to_cpu(jsb->s_first); + blknr = read_allocated_block(&inode_journal, i); + memset(temp_buff1, '\0', fs->blksz); + ext4fs_devread(blknr * fs->sect_perblk, + 0, fs->blksz, temp_buff1); + jdb = (struct journal_header_t *) temp_buff1; + + if (be32_to_cpu(jdb->h_blocktype) == + EXT3_JOURNAL_DESCRIPTOR_BLOCK) { + if (be32_to_cpu(jdb->h_sequence) != + be32_to_cpu(jsb->s_sequence)) { + print_jrnl_status(recovery_flag); + break; + } + + curr_desc_logical_no = i; + if (transaction_state == TRANSACTION_COMPLETE) + transaction_state = TRANSACTION_RUNNING; + else + return -1; + p_jdb = (char *)temp_buff1; + ofs = sizeof(struct journal_header_t); + do { + tag = (struct ext3_journal_block_tag *) + &p_jdb[ofs]; + ofs += sizeof(struct ext3_journal_block_tag); + if (ofs > fs->blksz) + break; + flags = be32_to_cpu(tag->flags); + if (!(flags & EXT3_JOURNAL_FLAG_SAME_UUID)) + ofs += 16; + i++; + debug("\t\ttag %u\n", be32_to_cpu(tag->block)); + } while (!(flags & EXT3_JOURNAL_FLAG_LAST_TAG)); + i++; + DB_FOUND = YES; + } else if (be32_to_cpu(jdb->h_blocktype) == + EXT3_JOURNAL_COMMIT_BLOCK) { + if (be32_to_cpu(jdb->h_sequence) != + be32_to_cpu(jsb->s_sequence)) { + print_jrnl_status(recovery_flag); + break; + } + + if (transaction_state == TRANSACTION_RUNNING || + (DB_FOUND == NO)) { + transaction_state = TRANSACTION_COMPLETE; + i++; + jsb->s_sequence = + cpu_to_be32(be32_to_cpu( + jsb->s_sequence) + 1); + } + prev_desc_logical_no = curr_desc_logical_no; + if ((recovery_flag == RECOVER) && (DB_FOUND == YES)) + recover_transaction(prev_desc_logical_no); + + DB_FOUND = NO; + } else if (be32_to_cpu(jdb->h_blocktype) == + EXT3_JOURNAL_REVOKE_BLOCK) { + if (be32_to_cpu(jdb->h_sequence) != + be32_to_cpu(jsb->s_sequence)) { + print_jrnl_status(recovery_flag); + break; + } + if (recovery_flag == SCAN) + ext4fs_push_revoke_blk((char *)jdb); + i++; + } else { + debug("Else Case\n"); + if (be32_to_cpu(jdb->h_sequence) != + be32_to_cpu(jsb->s_sequence)) { + print_jrnl_status(recovery_flag); + break; + } + } + } + +end: + if (recovery_flag == RECOVER) { + jsb->s_start = cpu_to_be32(1); + jsb->s_sequence = cpu_to_be32(be32_to_cpu(jsb->s_sequence) + 1); + /* get the superblock */ + ext4fs_devread(SUPERBLOCK_SECTOR, 0, SUPERBLOCK_SIZE, + (char *)fs->sb); + fs->sb->feature_incompat |= EXT3_FEATURE_INCOMPAT_RECOVER; + + /* Update the super block */ + put_ext4((uint64_t) (SUPERBLOCK_SIZE), + (struct ext2_sblock *)fs->sb, + (uint32_t) SUPERBLOCK_SIZE); + ext4fs_devread(SUPERBLOCK_SECTOR, 0, SUPERBLOCK_SIZE, + (char *)fs->sb); + + blknr = read_allocated_block(&inode_journal, + EXT2_JOURNAL_SUPERBLOCK); + put_ext4((uint64_t) (blknr * fs->blksz), + (struct journal_superblock_t *)temp_buff, + (uint32_t) fs->blksz); + ext4fs_free_revoke_blks(); + } + free(temp_buff); + free(temp_buff1); + + return 0; +} + +static void update_descriptor_block(long int blknr) +{ + int i; + long int jsb_blknr; + struct journal_header_t jdb; + struct ext3_journal_block_tag tag; + struct ext2_inode inode_journal; + struct journal_superblock_t *jsb = NULL; + char *buf = NULL; + char *temp = NULL; + struct ext_filesystem *fs = get_fs(); + char *temp_buff = zalloc(fs->blksz); + if (!temp_buff) + return; + + ext4fs_read_inode(ext4fs_root, EXT2_JOURNAL_INO, &inode_journal); + jsb_blknr = read_allocated_block(&inode_journal, + EXT2_JOURNAL_SUPERBLOCK); + ext4fs_devread(jsb_blknr * fs->sect_perblk, 0, fs->blksz, temp_buff); + jsb = (struct journal_superblock_t *) temp_buff; + + jdb.h_blocktype = cpu_to_be32(EXT3_JOURNAL_DESCRIPTOR_BLOCK); + jdb.h_magic = cpu_to_be32(EXT3_JOURNAL_MAGIC_NUMBER); + jdb.h_sequence = jsb->s_sequence; + buf = zalloc(fs->blksz); + if (!buf) { + free(temp_buff); + return; + } + temp = buf; + memcpy(buf, &jdb, sizeof(struct journal_header_t)); + temp += sizeof(struct journal_header_t); + + for (i = 0; i < MAX_JOURNAL_ENTRIES; i++) { + if (journal_ptr[i]->blknr == -1) + break; + + tag.block = cpu_to_be32(journal_ptr[i]->blknr); + tag.flags = cpu_to_be32(EXT3_JOURNAL_FLAG_SAME_UUID); + memcpy(temp, &tag, sizeof(struct ext3_journal_block_tag)); + temp = temp + sizeof(struct ext3_journal_block_tag); + } + + tag.block = cpu_to_be32(journal_ptr[--i]->blknr); + tag.flags = cpu_to_be32(EXT3_JOURNAL_FLAG_LAST_TAG); + memcpy(temp - sizeof(struct ext3_journal_block_tag), &tag, + sizeof(struct ext3_journal_block_tag)); + put_ext4((uint64_t) (blknr * fs->blksz), buf, (uint32_t) fs->blksz); + + free(temp_buff); + free(buf); +} + +static void update_commit_block(long int blknr) +{ + struct journal_header_t jdb; + struct ext_filesystem *fs = get_fs(); + char *buf = NULL; + struct ext2_inode inode_journal; + struct journal_superblock_t *jsb; + long int jsb_blknr; + char *temp_buff = zalloc(fs->blksz); + if (!temp_buff) + return; + + ext4fs_read_inode(ext4fs_root, EXT2_JOURNAL_INO, &inode_journal); + jsb_blknr = read_allocated_block(&inode_journal, + EXT2_JOURNAL_SUPERBLOCK); + ext4fs_devread(jsb_blknr * fs->sect_perblk, 0, fs->blksz, temp_buff); + jsb = (struct journal_superblock_t *) temp_buff; + + jdb.h_blocktype = cpu_to_be32(EXT3_JOURNAL_COMMIT_BLOCK); + jdb.h_magic = cpu_to_be32(EXT3_JOURNAL_MAGIC_NUMBER); + jdb.h_sequence = jsb->s_sequence; + buf = zalloc(fs->blksz); + if (!buf) { + free(temp_buff); + return; + } + memcpy(buf, &jdb, sizeof(struct journal_header_t)); + put_ext4((uint64_t) (blknr * fs->blksz), buf, (uint32_t) fs->blksz); + + free(temp_buff); + free(buf); +} + +void ext4fs_update_journal(void) +{ + struct ext2_inode inode_journal; + struct ext_filesystem *fs = get_fs(); + long int blknr; + int i; + ext4fs_read_inode(ext4fs_root, EXT2_JOURNAL_INO, &inode_journal); + blknr = read_allocated_block(&inode_journal, jrnl_blk_idx++); + update_descriptor_block(blknr); + for (i = 0; i < MAX_JOURNAL_ENTRIES; i++) { + if (journal_ptr[i]->blknr == -1) + break; + blknr = read_allocated_block(&inode_journal, jrnl_blk_idx++); + put_ext4((uint64_t) ((uint64_t)blknr * (uint64_t)fs->blksz), + journal_ptr[i]->buf, fs->blksz); + } + blknr = read_allocated_block(&inode_journal, jrnl_blk_idx++); + update_commit_block(blknr); + printf("update journal finished\n"); +} diff --git a/fs/ext4/ext4_journal.h b/fs/ext4/ext4_journal.h new file mode 100755 index 0000000..acc1c51 --- /dev/null +++ b/fs/ext4/ext4_journal.h @@ -0,0 +1,141 @@ +/* + * (C) Copyright 2011 - 2012 Samsung Electronics + * EXT4 filesystem implementation in Uboot by + * Uma Shankar <uma.shankar@samsung.com> + * Manjunatha C Achar <a.manjunatha@samsung.com> + * + * Journal data structures and headers for Journaling feature of ext4 + * have been referred from JBD2 (Journaling Block device 2) + * implementation in Linux Kernel. + * + * Written by Stephen C. Tweedie <sct@redhat.com> + * + * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __EXT4_JRNL__ +#define __EXT4_JRNL__ + +#define EXT2_JOURNAL_INO 8 /* Journal inode */ +#define EXT2_JOURNAL_SUPERBLOCK 0 /* Journal Superblock number */ + +#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 +#define EXT3_JOURNAL_MAGIC_NUMBER 0xc03b3998U +#define TRANSACTION_RUNNING 1 +#define TRANSACTION_COMPLETE 0 +#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ +#define EXT3_JOURNAL_DESCRIPTOR_BLOCK 1 +#define EXT3_JOURNAL_COMMIT_BLOCK 2 +#define EXT3_JOURNAL_SUPERBLOCK_V1 3 +#define EXT3_JOURNAL_SUPERBLOCK_V2 4 +#define EXT3_JOURNAL_REVOKE_BLOCK 5 +#define EXT3_JOURNAL_FLAG_ESCAPE 1 +#define EXT3_JOURNAL_FLAG_SAME_UUID 2 +#define EXT3_JOURNAL_FLAG_DELETED 4 +#define EXT3_JOURNAL_FLAG_LAST_TAG 8 + +/* Maximum entries in 1 journal transaction */ +#define MAX_JOURNAL_ENTRIES 100 +struct journal_log { + char *buf; + int blknr; +}; + +struct dirty_blocks { + char *buf; + int blknr; +}; + +/* Standard header for all descriptor blocks: */ +struct journal_header_t { + __u32 h_magic; + __u32 h_blocktype; + __u32 h_sequence; +}; + +/* The journal superblock. All fields are in big-endian byte order. */ +struct journal_superblock_t { + /* 0x0000 */ + struct journal_header_t s_header; + + /* Static information describing the journal */ + __u32 s_blocksize; /* journal device blocksize */ + __u32 s_maxlen; /* total blocks in journal file */ + __u32 s_first; /* first block of log information */ + + /* Dynamic information describing the current state of the log */ + __u32 s_sequence; /* first commit ID expected in log */ + __u32 s_start; /* blocknr of start of log */ + + /* Error value, as set by journal_abort(). */ + __s32 s_errno; + + /* Remaining fields are only valid in a version-2 superblock */ + __u32 s_feature_compat; /* compatible feature set */ + __u32 s_feature_incompat; /* incompatible feature set */ + __u32 s_feature_ro_compat; /* readonly-compatible feature set */ + /* 0x0030 */ + __u8 s_uuid[16]; /* 128-bit uuid for journal */ + + /* 0x0040 */ + __u32 s_nr_users; /* Nr of filesystems sharing log */ + + __u32 s_dynsuper; /* Blocknr of dynamic superblock copy */ + + /* 0x0048 */ + __u32 s_max_transaction; /* Limit of journal blocks per trans. */ + __u32 s_max_trans_data; /* Limit of data blocks per trans. */ + + /* 0x0050 */ + __u32 s_padding[44]; + + /* 0x0100 */ + __u8 s_users[16 * 48]; /* ids of all fs'es sharing the log */ + /* 0x0400 */ +} ; + +struct ext3_journal_block_tag { + uint32_t block; + uint32_t flags; +}; + +struct journal_revoke_header_t { + struct journal_header_t r_header; + int r_count; /* Count of bytes used in the block */ +}; + +struct revoke_blk_list { + char *content; /* revoke block itself */ + struct revoke_blk_list *next; +}; + +extern struct ext2_data *ext4fs_root; + +int ext4fs_init_journal(void); +int ext4fs_log_gdt(char *gd_table); +int ext4fs_check_journal_state(int recovery_flag); +int ext4fs_log_journal(char *journal_buffer, long int blknr); +int ext4fs_put_metadata(char *metadata_buffer, long int blknr); +void ext4fs_update_journal(void); +void ext4fs_dump_metadata(void); +void ext4fs_push_revoke_blk(char *buffer); +void ext4fs_free_journal(void); +void ext4fs_free_revoke_blks(void); +#endif diff --git a/fs/ext4/ext4fs.c b/fs/ext4/ext4fs.c new file mode 100755 index 0000000..cde2911 --- /dev/null +++ b/fs/ext4/ext4fs.c @@ -0,0 +1,1159 @@ +/* + * (C) Copyright 2011 - 2012 Samsung Electronics + * EXT4 filesystem implementation in Uboot by + * Uma Shankar <uma.shankar@samsung.com> + * Manjunatha C Achar <a.manjunatha@samsung.com> + * + * ext4ls and ext4load : Based on ext2 ls and load support in Uboot. + * Ext4 read optimization taken from Open-Moko + * Qi bootloader + * + * (C) Copyright 2004 + * esd gmbh <www.esd-electronics.com> + * Reinhard Arlt <reinhard.arlt@esd-electronics.com> + * + * based on code from grub2 fs/ext2.c and fs/fshelp.c by + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2003, 2004 Free Software Foundation, Inc. + * + * ext4write : Based on generic ext4 protocol. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <common.h> +#include <malloc.h> +#include <ext_common.h> +#include <ext4fs.h> +#include <linux/stat.h> +#include <linux/time.h> +#include <asm/byteorder.h> +#include "ext4_common.h" + +int ext4fs_symlinknest; +struct ext_filesystem ext_fs; + +struct ext_filesystem *get_fs(void) +{ + return &ext_fs; +} + +void ext4fs_free_node(struct ext2fs_node *node, struct ext2fs_node *currroot) +{ + if ((node != &ext4fs_root->diropen) && (node != currroot)) + free(node); +} + +/* + * Taken from openmoko-kernel mailing list: By Andy green + * Optimized read file API : collects and defers contiguous sector + * reads into one potentially more efficient larger sequential read action + */ +int ext4fs_read_file(struct ext2fs_node *node, int pos, + unsigned int len, char *buf) +{ + int i; + int blockcnt; + int log2blocksize = LOG2_EXT2_BLOCK_SIZE(node->data); + int blocksize = 1 << (log2blocksize + DISK_SECTOR_BITS); + unsigned int filesize = __le32_to_cpu(node->inode.size); + int previous_block_number = -1; + int delayed_start = 0; + int delayed_extent = 0; + int delayed_skipfirst = 0; + int delayed_next = 0; + char *delayed_buf = NULL; + short status; + + /* Adjust len so it we can't read past the end of the file. */ + if (len > filesize) + len = filesize; + + blockcnt = ((len + pos) + blocksize - 1) / blocksize; + + for (i = pos / blocksize; i < blockcnt; i++) { + int blknr; + int blockoff = pos % blocksize; + int blockend = blocksize; + int skipfirst = 0; + blknr = read_allocated_block(&(node->inode), i); + + if (blknr < 0) + return -1; + + blknr = blknr << log2blocksize; + + /* Last block. */ + if (i == blockcnt - 1) { + blockend = (len + pos) % blocksize; + + /* The last portion is exactly blocksize. */ + if (!blockend) + blockend = blocksize; + } + + /* First block. */ + if (i == pos / blocksize) { + skipfirst = blockoff; + blockend -= skipfirst; + } + + if (blknr) { + int status; + + if (previous_block_number != -1) { + if (delayed_next == blknr) { + delayed_extent += blockend; + delayed_next += blockend >> SECTOR_BITS; + } else { /* spill */ + status = ext4fs_devread(delayed_start, + delayed_skipfirst, + delayed_extent, + delayed_buf); + if (status == 0) + return -1; + previous_block_number = blknr; + delayed_start = blknr; + delayed_extent = blockend; + delayed_skipfirst = skipfirst; + delayed_buf = buf; + delayed_next = blknr + + (blockend >> SECTOR_BITS); + } + } else { + previous_block_number = blknr; + delayed_start = blknr; + delayed_extent = blockend; + delayed_skipfirst = skipfirst; + delayed_buf = buf; + delayed_next = blknr + + (blockend >> SECTOR_BITS); + } + } else { + if (previous_block_number != -1) { + /* spill */ + status = ext4fs_devread(delayed_start, + delayed_skipfirst, + delayed_extent, + delayed_buf); + if (status == 0) + return -1; + previous_block_number = -1; + } + memset(buf, 0, blocksize - skipfirst); + } + + buf += blocksize - skipfirst; + } + if (previous_block_number != -1) { + /* spill */ + status = ext4fs_devread(delayed_start, + delayed_skipfirst, delayed_extent, + delayed_buf); + if (status == 0) + return -1; + previous_block_number = -1; + } + return len; + +} + +int ext4fs_ls(const char *dirname) +{ + struct ext2fs_node *dirnode; + int status; + + if (dirname == NULL) + return 0; + + status = ext4fs_find_file(dirname, &ext4fs_root->diropen, &dirnode, + FILETYPE_DIRECTORY); + if (status != 1) { + printf("** Can not find directory. **\n"); + return 1; + } + + ext4fs_iterate_dir(dirnode, NULL, NULL, NULL); + ext4fs_free_node(dirnode, &ext4fs_root->diropen); + + return 0; +} + +int ext4fs_read(char *buf, unsigned len) +{ + if (ext4fs_root == NULL || ext4fs_file == NULL) + return 0; + + return ext4fs_read_file(ext4fs_file, 0, len, buf); +} + +static void ext4fs_update(void) +{ + short i; + ext4fs_update_journal(); + struct ext_filesystem *fs = get_fs(); + + /* update super block */ + put_ext4((uint64_t)(SUPERBLOCK_SIZE), + (struct ext2_sblock *)fs->sb, (uint32_t)SUPERBLOCK_SIZE); + + /* update block groups */ + for (i = 0; i < fs->no_blkgrp; i++) { + fs->bgd[i].bg_checksum = ext4fs_checksum_update(i); + put_ext4((uint64_t)(fs->bgd[i].block_id * fs->blksz), + fs->blk_bmaps[i], fs->blksz); + } + + /* update inode table groups */ + for (i = 0; i < fs->no_blkgrp; i++) { + put_ext4((uint64_t) (fs->bgd[i].inode_id * fs->blksz), + fs->inode_bmaps[i], fs->blksz); + } + + /* update the block group descriptor table */ + put_ext4((uint64_t)(fs->gdtable_blkno * fs->blksz), + (struct ext2_block_group *)fs->gdtable, + (fs->blksz * fs->no_blk_pergdt)); + + ext4fs_dump_metadata(); + + gindex = 0; + gd_index = 0; +} + +int ext4fs_get_bgdtable(void) +{ + int status; + int grp_desc_size; + struct ext_filesystem *fs = get_fs(); + grp_desc_size = sizeof(struct ext2_block_group); + fs->no_blk_pergdt = (fs->no_blkgrp * grp_desc_size) / fs->blksz; + if ((fs->no_blkgrp * grp_desc_size) % fs->blksz) + fs->no_blk_pergdt++; + + /* allocate memory for gdtable */ + fs->gdtable = zalloc(fs->blksz * fs->no_blk_pergdt); + if (!fs->gdtable) + return -ENOMEM; + /* read the group descriptor table */ + status = ext4fs_devread(fs->gdtable_blkno * fs->sect_perblk, 0, + fs->blksz * fs->no_blk_pergdt, fs->gdtable); + if (status == 0) + goto fail; + + if (ext4fs_log_gdt(fs->gdtable)) { + printf("Error in ext4fs_log_gdt\n"); + return -1; + } + + return 0; +fail: + free(fs->gdtable); + fs->gdtable = NULL; + + return -1; +} + +static void delete_single_indirect_block(struct ext2_inode *inode) +{ + struct ext2_block_group *bgd = NULL; + static int prev_bg_bmap_idx = -1; + long int blknr; + int remainder; + int bg_idx; + int status; + unsigned int blk_per_grp = ext4fs_root->sblock.blocks_per_group; + struct ext_filesystem *fs = get_fs(); + char *journal_buffer = zalloc(fs->blksz); + if (!journal_buffer) { + printf("No memory\n"); + return; + } + /* get block group descriptor table */ + bgd = (struct ext2_block_group *)fs->gdtable; + + /* deleting the single indirect block associated with inode */ + if (inode->b.blocks.indir_block != 0) { + debug("SIPB releasing %u\n", inode->b.blocks.indir_block); + blknr = inode->b.blocks.indir_block; + if (fs->blksz != 1024) { + bg_idx = blknr / blk_per_grp; + } else { + bg_idx = blknr / blk_per_grp; + remainder = blknr % blk_per_grp; + if (!remainder) + bg_idx--; + } + ext4fs_reset_block_bmap(blknr, fs->blk_bmaps[bg_idx], bg_idx); + bgd[bg_idx].free_blocks++; + fs->sb->free_blocks++; + /* journal backup */ + if (prev_bg_bmap_idx != bg_idx) { + status = + ext4fs_devread(bgd[bg_idx].block_id * + fs->sect_perblk, 0, fs->blksz, + journal_buffer); + if (status == 0) + goto fail; + if (ext4fs_log_journal + (journal_buffer, bgd[bg_idx].block_id)) + goto fail; + prev_bg_bmap_idx = bg_idx; + } + } +fail: + free(journal_buffer); +} + +static void delete_double_indirect_block(struct ext2_inode *inode) +{ + int i; + short status; + static int prev_bg_bmap_idx = -1; + long int blknr; + int remainder; + int bg_idx; + unsigned int blk_per_grp = ext4fs_root->sblock.blocks_per_group; + unsigned int *di_buffer = NULL; + unsigned int *DIB_start_addr = NULL; + struct ext2_block_group *bgd = NULL; + struct ext_filesystem *fs = get_fs(); + char *journal_buffer = zalloc(fs->blksz); + if (!journal_buffer) { + printf("No memory\n"); + return; + } + /* get the block group descriptor table */ + bgd = (struct ext2_block_group *)fs->gdtable; + + if (inode->b.blocks.double_indir_block != 0) { + di_buffer = zalloc(fs->blksz); + if (!di_buffer) { + printf("No memory\n"); + return; + } + DIB_start_addr = (unsigned int *)di_buffer; + blknr = inode->b.blocks.double_indir_block; + status = ext4fs_devread(blknr * fs->sect_perblk, 0, fs->blksz, + (char *)di_buffer); + for (i = 0; i < fs->blksz / sizeof(int); i++) { + if (*di_buffer == 0) + break; + + debug("DICB releasing %u\n", *di_buffer); + if (fs->blksz != 1024) { + bg_idx = (*di_buffer) / blk_per_grp; + } else { + bg_idx = (*di_buffer) / blk_per_grp; + remainder = (*di_buffer) % blk_per_grp; + if (!remainder) + bg_idx--; + } + ext4fs_reset_block_bmap(*di_buffer, + fs->blk_bmaps[bg_idx], bg_idx); + di_buffer++; + bgd[bg_idx].free_blocks++; + fs->sb->free_blocks++; + /* journal backup */ + if (prev_bg_bmap_idx != bg_idx) { + status = ext4fs_devread(bgd[bg_idx].block_id + * fs->sect_perblk, 0, + fs->blksz, + journal_buffer); + if (status == 0) + goto fail; + + if (ext4fs_log_journal(journal_buffer, + bgd[bg_idx].block_id)) + goto fail; + prev_bg_bmap_idx = bg_idx; + } + } + + /* removing the parent double indirect block */ + blknr = inode->b.blocks.double_indir_block; + if (fs->blksz != 1024) { + bg_idx = blknr / blk_per_grp; + } else { + bg_idx = blknr / blk_per_grp; + remainder = blknr % blk_per_grp; + if (!remainder) + bg_idx--; + } + ext4fs_reset_block_bmap(blknr, fs->blk_bmaps[bg_idx], bg_idx); + bgd[bg_idx].free_blocks++; + fs->sb->free_blocks++; + /* journal backup */ + if (prev_bg_bmap_idx != bg_idx) { + memset(journal_buffer, '\0', fs->blksz); + status = ext4fs_devread(bgd[bg_idx].block_id * + fs->sect_perblk, 0, fs->blksz, + journal_buffer); + if (status == 0) + goto fail; + + if (ext4fs_log_journal(journal_buffer, + bgd[bg_idx].block_id)) + goto fail; + prev_bg_bmap_idx = bg_idx; + } + debug("DIPB releasing %ld\n", blknr); + } +fail: + free(DIB_start_addr); + free(journal_buffer); +} + +static void delete_triple_indirect_block(struct ext2_inode *inode) +{ + int i, j; + short status; + static int prev_bg_bmap_idx = -1; + long int blknr; + int remainder; + int bg_idx; + unsigned int blk_per_grp = ext4fs_root->sblock.blocks_per_group; + unsigned int *tigp_buffer = NULL; + unsigned int *tib_start_addr = NULL; + unsigned int *tip_buffer = NULL; + unsigned int *tipb_start_addr = NULL; + struct ext2_block_group *bgd = NULL; + struct ext_filesystem *fs = get_fs(); + char *journal_buffer = zalloc(fs->blksz); + if (!journal_buffer) { + printf("No memory\n"); + return; + } + /* get block group descriptor table */ + bgd = (struct ext2_block_group *)fs->gdtable; + + if (inode->b.blocks.triple_indir_block != 0) { + tigp_buffer = zalloc(fs->blksz); + if (!tigp_buffer) { + printf("No memory\n"); + return; + } + tib_start_addr = (unsigned int *)tigp_buffer; + blknr = inode->b.blocks.triple_indir_block; + status = ext4fs_devread(blknr * fs->sect_perblk, 0, fs->blksz, + (char *)tigp_buffer); + for (i = 0; i < fs->blksz / sizeof(int); i++) { + if (*tigp_buffer == 0) + break; + debug("tigp buffer releasing %u\n", *tigp_buffer); + + tip_buffer = zalloc(fs->blksz); + if (!tip_buffer) + goto fail; + tipb_start_addr = (unsigned int *)tip_buffer; + status = ext4fs_devread((*tigp_buffer) * + fs->sect_perblk, 0, fs->blksz, + (char *)tip_buffer); + for (j = 0; j < fs->blksz / sizeof(int); j++) { + if (*tip_buffer == 0) + break; + if (fs->blksz != 1024) { + bg_idx = (*tip_buffer) / blk_per_grp; + } else { + bg_idx = (*tip_buffer) / blk_per_grp; + + remainder = (*tip_buffer) % blk_per_grp; + if (!remainder) + bg_idx--; + } + + ext4fs_reset_block_bmap(*tip_buffer, + fs->blk_bmaps[bg_idx], + bg_idx); + + tip_buffer++; + bgd[bg_idx].free_blocks++; + fs->sb->free_blocks++; + /* journal backup */ + if (prev_bg_bmap_idx != bg_idx) { + status = + ext4fs_devread( + bgd[bg_idx].block_id * + fs->sect_perblk, 0, + fs->blksz, + journal_buffer); + if (status == 0) + goto fail; + + if (ext4fs_log_journal(journal_buffer, + bgd[bg_idx]. + block_id)) + goto fail; + prev_bg_bmap_idx = bg_idx; + } + } + free(tipb_start_addr); + tipb_start_addr = NULL; + + /* + * removing the grand parent blocks + * which is connected to inode + */ + if (fs->blksz != 1024) { + bg_idx = (*tigp_buffer) / blk_per_grp; + } else { + bg_idx = (*tigp_buffer) / blk_per_grp; + + remainder = (*tigp_buffer) % blk_per_grp; + if (!remainder) + bg_idx--; + } + ext4fs_reset_block_bmap(*tigp_buffer, + fs->blk_bmaps[bg_idx], bg_idx); + + tigp_buffer++; + bgd[bg_idx].free_blocks++; + fs->sb->free_blocks++; + /* journal backup */ + if (prev_bg_bmap_idx != bg_idx) { + memset(journal_buffer, '\0', fs->blksz); + status = + ext4fs_devread(bgd[bg_idx].block_id * + fs->sect_perblk, 0, + fs->blksz, journal_buffer); + if (status == 0) + goto fail; + + if (ext4fs_log_journal(journal_buffer, + bgd[bg_idx].block_id)) + goto fail; + prev_bg_bmap_idx = bg_idx; + } + } + + /* removing the grand parent triple indirect block */ + blknr = inode->b.blocks.triple_indir_block; + if (fs->blksz != 1024) { + bg_idx = blknr / blk_per_grp; + } else { + bg_idx = blknr / blk_per_grp; + remainder = blknr % blk_per_grp; + if (!remainder) + bg_idx--; + } + ext4fs_reset_block_bmap(blknr, fs->blk_bmaps[bg_idx], bg_idx); + bgd[bg_idx].free_blocks++; + fs->sb->free_blocks++; + /* journal backup */ + if (prev_bg_bmap_idx != bg_idx) { + memset(journal_buffer, '\0', fs->blksz); + status = ext4fs_devread(bgd[bg_idx].block_id * + fs->sect_perblk, 0, fs->blksz, + journal_buffer); + if (status == 0) + goto fail; + + if (ext4fs_log_journal(journal_buffer, + bgd[bg_idx].block_id)) + goto fail; + prev_bg_bmap_idx = bg_idx; + } + debug("tigp buffer itself releasing %ld\n", blknr); + } +fail: + free(tib_start_addr); + free(tipb_start_addr); + free(journal_buffer); +} + +static int ext4fs_delete_file(int inodeno) +{ + struct ext2_inode inode; + short status; + int i; + int remainder; + long int blknr; + int bg_idx; + int ibmap_idx; + char *read_buffer = NULL; + char *start_block_address = NULL; + unsigned int no_blocks; + + static int prev_bg_bmap_idx = -1; + unsigned int inodes_per_block; + long int blkno; + unsigned int blkoff; + unsigned int blk_per_grp = ext4fs_root->sblock.blocks_per_group; + unsigned int inode_per_grp = ext4fs_root->sblock.inodes_per_group; + struct ext2_inode *inode_buffer = NULL; + struct ext2_block_group *bgd = NULL; + struct ext_filesystem *fs = get_fs(); + char *journal_buffer = zalloc(fs->blksz); + if (!journal_buffer) + return -ENOMEM; + /* get the block group descriptor table */ + bgd = (struct ext2_block_group *)fs->gdtable; + status = ext4fs_read_inode(ext4fs_root, inodeno, &inode); + if (status == 0) + goto fail; + + /* read the block no allocated to a file */ + no_blocks = inode.size / fs->blksz; + if (inode.size % fs->blksz) + no_blocks++; + + if (le32_to_cpu(inode.flags) & EXT4_EXTENTS_FL) { + struct ext2fs_node *node_inode = + zalloc(sizeof(struct ext2fs_node)); + if (!node_inode) + goto fail; + node_inode->data = ext4fs_root; + node_inode->ino = inodeno; + node_inode->inode_read = 0; + memcpy(&(node_inode->inode), &inode, sizeof(struct ext2_inode)); + + for (i = 0; i < no_blocks; i++) { + blknr = read_allocated_block(&(node_inode->inode), i); + if (fs->blksz != 1024) { + bg_idx = blknr / blk_per_grp; + } else { + bg_idx = blknr / blk_per_grp; + remainder = blknr % blk_per_grp; + if (!remainder) + bg_idx--; + } + ext4fs_reset_block_bmap(blknr, fs->blk_bmaps[bg_idx], + bg_idx); + debug("EXT4_EXTENTS Block releasing %ld: %d\n", + blknr, bg_idx); + + bgd[bg_idx].free_blocks++; + fs->sb->free_blocks++; + + /* journal backup */ + if (prev_bg_bmap_idx != bg_idx) { + status = + ext4fs_devread(bgd[bg_idx].block_id * + fs->sect_perblk, 0, + fs->blksz, journal_buffer); + if (status == 0) + goto fail; + if (ext4fs_log_journal(journal_buffer, + bgd[bg_idx].block_id)) + goto fail; + prev_bg_bmap_idx = bg_idx; + } + } + if (node_inode) { + free(node_inode); + node_inode = NULL; + } + } else { + + delete_single_indirect_block(&inode); + delete_double_indirect_block(&inode); + delete_triple_indirect_block(&inode); + + /* read the block no allocated to a file */ + no_blocks = inode.size / fs->blksz; + if (inode.size % fs->blksz) + no_blocks++; + for (i = 0; i < no_blocks; i++) { + blknr = read_allocated_block(&inode, i); + if (fs->blksz != 1024) { + bg_idx = blknr / blk_per_grp; + } else { + bg_idx = blknr / blk_per_grp; + remainder = blknr % blk_per_grp; + if (!remainder) + bg_idx--; + } + ext4fs_reset_block_bmap(blknr, fs->blk_bmaps[bg_idx], + bg_idx); + debug("ActualB releasing %ld: %d\n", blknr, bg_idx); + + bgd[bg_idx].free_blocks++; + fs->sb->free_blocks++; + /* journal backup */ + if (prev_bg_bmap_idx != bg_idx) { + memset(journal_buffer, '\0', fs->blksz); + status = ext4fs_devread(bgd[bg_idx].block_id + * fs->sect_perblk, + 0, fs->blksz, + journal_buffer); + if (status == 0) + goto fail; + if (ext4fs_log_journal(journal_buffer, + bgd[bg_idx].block_id)) + goto fail; + prev_bg_bmap_idx = bg_idx; + } + } + } + + /* from the inode no to blockno */ + inodes_per_block = fs->blksz / fs->inodesz; + ibmap_idx = inodeno / inode_per_grp; + + /* get the block no */ + inodeno--; + blkno = __le32_to_cpu(bgd[ibmap_idx].inode_table_id) + + (inodeno % __le32_to_cpu(inode_per_grp)) / inodes_per_block; + + /* get the offset of the inode */ + blkoff = ((inodeno) % inodes_per_block) * fs->inodesz; + + /* read the block no containing the inode */ + read_buffer = zalloc(fs->blksz); + if (!read_buffer) + goto fail; + start_block_address = read_buffer; + status = ext4fs_devread(blkno * fs->sect_perblk, + 0, fs->blksz, read_buffer); + if (status == 0) + goto fail; + + if (ext4fs_log_journal(read_buffer, blkno)) + goto fail; + + read_buffer = read_buffer + blkoff; + inode_buffer = (struct ext2_inode *)read_buffer; + memset(inode_buffer, '\0', sizeof(struct ext2_inode)); + + /* write the inode to original position in inode table */ + if (ext4fs_put_metadata(start_block_address, blkno)) + goto fail; + + /* update the respective inode bitmaps */ + inodeno++; + ext4fs_reset_inode_bmap(inodeno, fs->inode_bmaps[ibmap_idx], ibmap_idx); + bgd[ibmap_idx].free_inodes++; + fs->sb->free_inodes++; + /* journal backup */ + memset(journal_buffer, '\0', fs->blksz); + status = ext4fs_devread(bgd[ibmap_idx].inode_id * + fs->sect_perblk, 0, fs->blksz, journal_buffer); + if (status == 0) + goto fail; + if (ext4fs_log_journal(journal_buffer, bgd[ibmap_idx].inode_id)) + goto fail; + + ext4fs_update(); + ext4fs_deinit(); + + if (ext4fs_init() != 0) { + printf("error in File System init\n"); + goto fail; + } + + free(start_block_address); + free(journal_buffer); + + return 0; +fail: + free(start_block_address); + free(journal_buffer); + + return -1; +} + +int ext4fs_init(void) +{ + short status; + int i; + unsigned int real_free_blocks = 0; + struct ext_filesystem *fs = get_fs(); + + /* populate fs */ + fs->blksz = EXT2_BLOCK_SIZE(ext4fs_root); + fs->inodesz = INODE_SIZE_FILESYSTEM(ext4fs_root); + fs->sect_perblk = fs->blksz / SECTOR_SIZE; + + /* get the superblock */ + fs->sb = zalloc(SUPERBLOCK_SIZE); + if (!fs->sb) + return -ENOMEM; + if (!ext4fs_devread(SUPERBLOCK_SECTOR, 0, SUPERBLOCK_SIZE, + (char *)fs->sb)) + goto fail; + + /* init journal */ + if (ext4fs_init_journal()) + goto fail; + + /* get total no of blockgroups */ + fs->no_blkgrp = (uint32_t)ext4fs_div_roundup( + (ext4fs_root->sblock.total_blocks - + ext4fs_root->sblock.first_data_block), + ext4fs_root->sblock.blocks_per_group); + + /* get the block group descriptor table */ + fs->gdtable_blkno = ((EXT2_MIN_BLOCK_SIZE == fs->blksz) + 1); + if (ext4fs_get_bgdtable() == -1) { + printf("Error in getting the block group descriptor table\n"); + goto fail; + } + fs->bgd = (struct ext2_block_group *)fs->gdtable; + + /* load all the available bitmap block of the partition */ + fs->blk_bmaps = zalloc(fs->no_blkgrp * sizeof(char *)); + if (!fs->blk_bmaps) + goto fail; + for (i = 0; i < fs->no_blkgrp; i++) { + fs->blk_bmaps[i] = zalloc(fs->blksz); + if (!fs->blk_bmaps[i]) + goto fail; + } + + for (i = 0; i < fs->no_blkgrp; i++) { + status = + ext4fs_devread(fs->bgd[i].block_id * fs->sect_perblk, 0, + fs->blksz, (char *)fs->blk_bmaps[i]); + if (status == 0) + goto fail; + } + + /* load all the available inode bitmap of the partition */ + fs->inode_bmaps = zalloc(fs->no_blkgrp * sizeof(unsigned char *)); + if (!fs->inode_bmaps) + goto fail; + for (i = 0; i < fs->no_blkgrp; i++) { + fs->inode_bmaps[i] = zalloc(fs->blksz); + if (!fs->inode_bmaps[i]) + goto fail; + } + + for (i = 0; i < fs->no_blkgrp; i++) { + status = ext4fs_devread(fs->bgd[i].inode_id * fs->sect_perblk, + 0, fs->blksz, + (char *)fs->inode_bmaps[i]); + if (status == 0) + goto fail; + } + + /* + * check filesystem consistency with free blocks of file system + * some time we observed that superblock freeblocks does not match + * with the blockgroups freeblocks when improper + * reboot of a linux kernel + */ + for (i = 0; i < fs->no_blkgrp; i++) + real_free_blocks = real_free_blocks + fs->bgd[i].free_blocks; + if (real_free_blocks != fs->sb->free_blocks) + fs->sb->free_blocks = real_free_blocks; + + return 0; +fail: + ext4fs_deinit(); + + return -1; +} + +void ext4fs_deinit(void) +{ + int i; + struct ext2_inode inode_journal; + struct journal_superblock_t *jsb; + long int blknr; + struct ext_filesystem *fs = get_fs(); + + /* free journal */ + char *temp_buff = zalloc(fs->blksz); + if (temp_buff) { + ext4fs_read_inode(ext4fs_root, EXT2_JOURNAL_INO, + &inode_journal); + blknr = read_allocated_block(&inode_journal, + EXT2_JOURNAL_SUPERBLOCK); + ext4fs_devread(blknr * fs->sect_perblk, 0, fs->blksz, + temp_buff); + jsb = (struct journal_superblock_t *)temp_buff; + jsb->s_start = cpu_to_be32(0); + put_ext4((uint64_t) (blknr * fs->blksz), + (struct journal_superblock_t *)temp_buff, fs->blksz); + free(temp_buff); + } + ext4fs_free_journal(); + + /* get the superblock */ + ext4fs_devread(SUPERBLOCK_SECTOR, 0, SUPERBLOCK_SIZE, (char *)fs->sb); + fs->sb->feature_incompat &= ~EXT3_FEATURE_INCOMPAT_RECOVER; + put_ext4((uint64_t)(SUPERBLOCK_SIZE), + (struct ext2_sblock *)fs->sb, (uint32_t)SUPERBLOCK_SIZE); + free(fs->sb); + fs->sb = NULL; + + if (fs->blk_bmaps) { + for (i = 0; i < fs->no_blkgrp; i++) { + free(fs->blk_bmaps[i]); + fs->blk_bmaps[i] = NULL; + } + free(fs->blk_bmaps); + fs->blk_bmaps = NULL; + } + + if (fs->inode_bmaps) { + for (i = 0; i < fs->no_blkgrp; i++) { + free(fs->inode_bmaps[i]); + fs->inode_bmaps[i] = NULL; + } + free(fs->inode_bmaps); + fs->inode_bmaps = NULL; + } + + + free(fs->gdtable); + fs->gdtable = NULL; + fs->bgd = NULL; + /* + * reinitiliazed the global inode and + * block bitmap first execution check variables + */ + fs->first_pass_ibmap = 0; + fs->first_pass_bbmap = 0; + fs->curr_inode_no = 0; + fs->curr_blkno = 0; +} + +static int ext4fs_write_file(struct ext2_inode *file_inode, + int pos, unsigned int len, char *buf) +{ + int i; + int blockcnt; + int log2blocksize = LOG2_EXT2_BLOCK_SIZE(ext4fs_root); + unsigned int filesize = __le32_to_cpu(file_inode->size); + struct ext_filesystem *fs = get_fs(); + int previous_block_number = -1; + int delayed_start = 0; + int delayed_extent = 0; + int delayed_skipfirst = 0; + int delayed_next = 0; + char *delayed_buf = NULL; + + /* Adjust len so it we can't read past the end of the file. */ + if (len > filesize) + len = filesize; + + blockcnt = ((len + pos) + fs->blksz - 1) / fs->blksz; + + for (i = pos / fs->blksz; i < blockcnt; i++) { + long int blknr; + int blockend = fs->blksz; + int skipfirst = 0; + blknr = read_allocated_block(file_inode, i); + if (blknr < 0) + return -1; + + blknr = blknr << log2blocksize; + + if (blknr) { + if (previous_block_number != -1) { + if (delayed_next == blknr) { + delayed_extent += blockend; + delayed_next += blockend >> SECTOR_BITS; + } else { /* spill */ + put_ext4((uint64_t) (delayed_start * + SECTOR_SIZE), + delayed_buf, + (uint32_t) delayed_extent); + previous_block_number = blknr; + delayed_start = blknr; + delayed_extent = blockend; + delayed_skipfirst = skipfirst; + delayed_buf = buf; + delayed_next = blknr + + (blockend >> SECTOR_BITS); + } + } else { + previous_block_number = blknr; + delayed_start = blknr; + delayed_extent = blockend; + delayed_skipfirst = skipfirst; + delayed_buf = buf; + delayed_next = blknr + + (blockend >> SECTOR_BITS); + } + } else { + if (previous_block_number != -1) { + /* spill */ + put_ext4((uint64_t) (delayed_start * + SECTOR_SIZE), delayed_buf, + (uint32_t) delayed_extent); + previous_block_number = -1; + } + memset(buf, 0, fs->blksz - skipfirst); + } + buf += fs->blksz - skipfirst; + } + if (previous_block_number != -1) { + /* spill */ + put_ext4((uint64_t) (delayed_start * SECTOR_SIZE), + delayed_buf, (uint32_t) delayed_extent); + previous_block_number = -1; + } + + return len; +} + +int ext4fs_write(const char *fname, unsigned char *buffer, + unsigned long sizebytes) +{ + int ret = 0; + struct ext2_inode *file_inode = NULL; + unsigned char *inode_buffer = NULL; + int parent_inodeno; + int inodeno; + time_t timestamp = 0; + + uint64_t bytes_reqd_for_file; + unsigned int blks_reqd_for_file; + unsigned int blocks_remaining; + int existing_file_inodeno; + char filename[256]; + + char *temp_ptr = NULL; + long int itable_blkno; + long int parent_itable_blkno; + long int blkoff; + struct ext2_sblock *sblock = &(ext4fs_root->sblock); + unsigned int inodes_per_block; + unsigned int ibmap_idx; + struct ext_filesystem *fs = get_fs(); + g_parent_inode = zalloc(sizeof(struct ext2_inode)); + if (!g_parent_inode) + goto fail; + + if (ext4fs_init() != 0) { + printf("error in File System init\n"); + return -1; + } + inodes_per_block = fs->blksz / fs->inodesz; + parent_inodeno = ext4fs_get_parent_inode_num(fname, filename, F_FILE); + if (parent_inodeno == -1) + goto fail; + if (ext4fs_iget(parent_inodeno, g_parent_inode)) + goto fail; + /* check if the filename is already present in root */ + existing_file_inodeno = ext4fs_filename_check(filename); + if (existing_file_inodeno != -1) { + ret = ext4fs_delete_file(existing_file_inodeno); + fs->first_pass_bbmap = 0; + fs->curr_blkno = 0; + + fs->first_pass_ibmap = 0; + fs->curr_inode_no = 0; + if (ret) + goto fail; + } + /* calucalate how many blocks required */ + bytes_reqd_for_file = sizebytes; + blks_reqd_for_file = bytes_reqd_for_file / fs->blksz; + if (bytes_reqd_for_file % fs->blksz != 0) { + blks_reqd_for_file++; + printf("total bytes for a file %u\n", blks_reqd_for_file); + } + blocks_remaining = blks_reqd_for_file; + /* test for available space in partition */ + if (fs->sb->free_blocks < blks_reqd_for_file) { + printf("Not enough space on partition !!!\n"); + goto fail; + } + + ext4fs_update_parent_dentry(filename, &inodeno, FILETYPE_REG); + /* prepare file inode */ + inode_buffer = zalloc(fs->inodesz); + if (!inode_buffer) + goto fail; + file_inode = (struct ext2_inode *)inode_buffer; + file_inode->mode = S_IFREG | S_IRWXU | + S_IRGRP | S_IROTH | S_IXGRP | S_IXOTH; + /* ToDo: Update correct time */ + file_inode->mtime = timestamp; + file_inode->atime = timestamp; + file_inode->ctime = timestamp; + file_inode->nlinks = 1; + file_inode->size = sizebytes; + + /* Allocate data blocks */ + ext4fs_allocate_blocks(file_inode, blocks_remaining, + &blks_reqd_for_file); + file_inode->blockcnt = (blks_reqd_for_file * fs->blksz) / SECTOR_SIZE; + + temp_ptr = zalloc(fs->blksz); + if (!temp_ptr) + goto fail; + ibmap_idx = inodeno / ext4fs_root->sblock.inodes_per_group; + inodeno--; + itable_blkno = __le32_to_cpu(fs->bgd[ibmap_idx].inode_table_id) + + (inodeno % __le32_to_cpu(sblock->inodes_per_group)) / + inodes_per_block; + blkoff = (inodeno % inodes_per_block) * fs->inodesz; + ext4fs_devread(itable_blkno * fs->sect_perblk, 0, fs->blksz, temp_ptr); + if (ext4fs_log_journal(temp_ptr, itable_blkno)) + goto fail; + + memcpy(temp_ptr + blkoff, inode_buffer, fs->inodesz); + if (ext4fs_put_metadata(temp_ptr, itable_blkno)) + goto fail; + /* copy the file content into data blocks */ + if (ext4fs_write_file(file_inode, 0, sizebytes, (char *)buffer) == -1) { + printf("Error in copying content\n"); + goto fail; + } + + ibmap_idx = parent_inodeno / ext4fs_root->sblock.inodes_per_group; + parent_inodeno--; + parent_itable_blkno = __le32_to_cpu(fs->bgd[ibmap_idx].inode_table_id) + + (parent_inodeno % + __le32_to_cpu(sblock->inodes_per_group)) / inodes_per_block; + blkoff = (parent_inodeno % inodes_per_block) * fs->inodesz; + if (parent_itable_blkno != itable_blkno) { + memset(temp_ptr, '\0', fs->blksz); + ext4fs_devread(parent_itable_blkno * fs->sect_perblk, + 0, fs->blksz, temp_ptr); + if (ext4fs_log_journal(temp_ptr, parent_itable_blkno)) + goto fail; + + memcpy(temp_ptr + blkoff, g_parent_inode, + sizeof(struct ext2_inode)); + if (ext4fs_put_metadata(temp_ptr, parent_itable_blkno)) + goto fail; + free(temp_ptr); + } else { + /* + * If parent and child fall in same inode table block + * both should be kept in 1 buffer + */ + memcpy(temp_ptr + blkoff, g_parent_inode, + sizeof(struct ext2_inode)); + gd_index--; + if (ext4fs_put_metadata(temp_ptr, itable_blkno)) + goto fail; + free(temp_ptr); + } + ext4fs_update(); + ext4fs_deinit(); + + fs->first_pass_bbmap = 0; + fs->curr_blkno = 0; + fs->first_pass_ibmap = 0; + fs->curr_inode_no = 0; + free(inode_buffer); + free(g_parent_inode); + g_parent_inode = NULL; + + return 0; +fail: + ext4fs_deinit(); + free(inode_buffer); + free(g_parent_inode); + g_parent_inode = NULL; + + return -1; +} diff --git a/fs/ext4/format/Makefile b/fs/ext4/format/Makefile new file mode 100755 index 0000000..302b7ff --- /dev/null +++ b/fs/ext4/format/Makefile @@ -0,0 +1,56 @@ +# +# (C) Copyright 2006 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# (C) Copyright 2003 +# Pavel Bartusek, Sysgo Real-Time Solutions AG, pba@sysgo.de +# +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + + +include $(TOPDIR)/config.mk + +LIB = libext4fsformat.a + +AOBJS = +COBJS = ext4_format.o freefs.o bitmaps.o gen_bitmap.o bitops.o rw_bitmaps.o \ + csum.o crc16.o alloc_sb.o closefs.o io_manager.o alloc_tables.o alloc.o \ + alloc_stats.o mkjournal.o mkdir.o newdir.o inode.o i_block.o dirblock.o \ + lookup.o link.o dir_iterate.o block.o ind_block.o extent.o expanddir.o \ + res_gdt.o bb_inode.o badblocks.o +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += + +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend + +######################################################################### diff --git a/fs/ext4/format/alloc.c b/fs/ext4/format/alloc.c new file mode 100755 index 0000000..8cfd57d --- /dev/null +++ b/fs/ext4/format/alloc.c @@ -0,0 +1,274 @@ +/* + * alloc.c --- allocate new inodes, blocks for ext2fs + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +/* + * Check for uninit block bitmaps and deal with them appropriately + */ +static void check_block_uninit(ext2_filsys fs, ext2fs_block_bitmap map, + dgrp_t group) +{ + blk_t i; + blk_t blk, super_blk, old_desc_blk, new_desc_blk; + int old_desc_blocks; + + if (!(EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) || + !(fs->group_desc[group].bg_flags & EXT2_BG_BLOCK_UNINIT)) + return; + + blk = (group * fs->super->s_blocks_per_group) + + fs->super->s_first_data_block; + + ext2fs_super_and_bgd_loc(fs, group, &super_blk, + &old_desc_blk, &new_desc_blk, 0); + + if (fs->super->s_feature_incompat & + EXT2_FEATURE_INCOMPAT_META_BG) + old_desc_blocks = fs->super->s_first_meta_bg; + else + old_desc_blocks = fs->desc_blocks + fs->super->s_reserved_gdt_blocks; + + for (i=0; i < fs->super->s_blocks_per_group; i++, blk++) { + if ((blk == super_blk) || + (old_desc_blk && old_desc_blocks && + (blk >= old_desc_blk) && + (blk < old_desc_blk + old_desc_blocks)) || + (new_desc_blk && (blk == new_desc_blk)) || + (blk == fs->group_desc[group].bg_block_bitmap) || + (blk == fs->group_desc[group].bg_inode_bitmap) || + (blk >= fs->group_desc[group].bg_inode_table && + (blk < fs->group_desc[group].bg_inode_table + + fs->inode_blocks_per_group))) + ext2fs_fast_mark_block_bitmap(map, blk); + else + ext2fs_fast_unmark_block_bitmap(map, blk); + } + fs->group_desc[group].bg_flags &= ~EXT2_BG_BLOCK_UNINIT; + ext2fs_group_desc_csum_set(fs, group); +} + +/* + * Check for uninit inode bitmaps and deal with them appropriately + */ +static void check_inode_uninit(ext2_filsys fs, ext2fs_inode_bitmap map, + dgrp_t group) +{ + ext2_ino_t i, ino; + + if (!(EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) || + !(fs->group_desc[group].bg_flags & EXT2_BG_INODE_UNINIT)) + return; + + ino = (group * fs->super->s_inodes_per_group) + 1; + for (i=0; i < fs->super->s_inodes_per_group; i++, ino++) + ext2fs_fast_unmark_inode_bitmap(map, ino); + + fs->group_desc[group].bg_flags &= ~EXT2_BG_INODE_UNINIT; + check_block_uninit(fs, fs->block_map, group); +} + +/* + * Right now, just search forward from the parent directory's block + * group to find the next free inode. + * + * Should have a special policy for directories. + */ +errcode_t ext2fs_new_inode(ext2_filsys fs, ext2_ino_t dir, + int mode EXT2FS_ATTR((unused)), + ext2fs_inode_bitmap map, ext2_ino_t *ret) +{ + ext2_ino_t dir_group = 0; + ext2_ino_t i; + ext2_ino_t start_inode; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (!map) + map = fs->inode_map; + if (!map) + return EXT2_ET_NO_INODE_BITMAP; + + if (dir > 0) + dir_group = (dir - 1) / EXT2_INODES_PER_GROUP(fs->super); + + start_inode = (dir_group * EXT2_INODES_PER_GROUP(fs->super)) + 1; + if (start_inode < EXT2_FIRST_INODE(fs->super)) + start_inode = EXT2_FIRST_INODE(fs->super); + if (start_inode > fs->super->s_inodes_count) + return EXT2_ET_INODE_ALLOC_FAIL; + i = start_inode; + + do { + if (((i - 1) % EXT2_INODES_PER_GROUP(fs->super)) == 0) + check_inode_uninit(fs, map, (i - 1) / + EXT2_INODES_PER_GROUP(fs->super)); + + if (!ext2fs_fast_test_inode_bitmap(map, i)) + break; + i++; + if (i > fs->super->s_inodes_count) + i = EXT2_FIRST_INODE(fs->super); + } while (i != start_inode); + + if (ext2fs_test_inode_bitmap(map, i)) + return EXT2_ET_INODE_ALLOC_FAIL; + *ret = i; + return 0; +} + +/* + * Stupid algorithm --- we now just search forward starting from the + * goal. Should put in a smarter one someday.... + */ +errcode_t ext2fs_new_block(ext2_filsys fs, blk_t goal, + ext2fs_block_bitmap map, blk_t *ret) +{ + blk_t i; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (!map) + map = fs->block_map; + if (!map) + return EXT2_ET_NO_BLOCK_BITMAP; + if (!goal || (goal >= fs->super->s_blocks_count)) + goal = fs->super->s_first_data_block; + i = goal; + check_block_uninit(fs, map, + (i - fs->super->s_first_data_block) / + EXT2_BLOCKS_PER_GROUP(fs->super)); + do { + if (((i - fs->super->s_first_data_block) % + EXT2_BLOCKS_PER_GROUP(fs->super)) == 0) + check_block_uninit(fs, map, + (i - fs->super->s_first_data_block) / + EXT2_BLOCKS_PER_GROUP(fs->super)); + + if (!ext2fs_fast_test_block_bitmap(map, i)) { + *ret = i; + return 0; + } + i++; + if (i >= fs->super->s_blocks_count) + i = fs->super->s_first_data_block; + } while (i != goal); + return EXT2_ET_BLOCK_ALLOC_FAIL; +} + +/* + * This function zeros out the allocated block, and updates all of the + * appropriate filesystem records. + */ +errcode_t ext2fs_alloc_block(ext2_filsys fs, blk_t goal, + char *block_buf, blk_t *ret) +{ + errcode_t retval; + blk_t block; + char *buf = 0; + + if (!block_buf) { + retval = ext2fs_get_mem(fs->blocksize, &buf); + if (retval) + return retval; + block_buf = buf; + } + memset(block_buf, 0, fs->blocksize); + + if (fs->get_alloc_block) { + blk64_t new; + + retval = (fs->get_alloc_block)(fs, (blk64_t) goal, &new); + if (retval) + goto fail; + block = (blk_t) new; + } else { + if (!fs->block_map) { + retval = ext2fs_read_block_bitmap(fs); + if (retval) + goto fail; + } + + retval = ext2fs_new_block(fs, goal, 0, &block); + if (retval) + goto fail; + } + + retval = io_channel_write_blk(fs->io, block, 1, block_buf); + if (retval) + goto fail; + + ext2fs_block_alloc_stats(fs, block, +1); + *ret = block; + +fail: + if (buf) + ext2fs_free_mem(&buf); + return retval; +} + +errcode_t ext2fs_get_free_blocks(ext2_filsys fs, blk_t start, blk_t finish, + int num, ext2fs_block_bitmap map, blk_t *ret) +{ + blk_t b = start; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (!map) + map = fs->block_map; + if (!map) + return EXT2_ET_NO_BLOCK_BITMAP; + if (!b) + b = fs->super->s_first_data_block; + if (!finish) + finish = start; + if (!num) + num = 1; + do { + if (b+num-1 > fs->super->s_blocks_count) + b = fs->super->s_first_data_block; + if (ext2fs_fast_test_block_bitmap_range(map, b, num)) { + *ret = b; + return 0; + } + b++; + } while (b != finish); + return EXT2_ET_BLOCK_ALLOC_FAIL; +} + +void ext2fs_set_alloc_block_callback(ext2_filsys fs, + errcode_t (*func)(ext2_filsys fs, + blk64_t goal, + blk64_t *ret), + errcode_t (**old)(ext2_filsys fs, + blk64_t goal, + blk64_t *ret)) +{ + if (!fs || fs->magic != EXT2_ET_MAGIC_EXT2FS_FILSYS) + return; + + if (old) + *old = fs->get_alloc_block; + + fs->get_alloc_block = func; +} diff --git a/fs/ext4/format/alloc_sb.c b/fs/ext4/format/alloc_sb.c new file mode 100755 index 0000000..3bbe43d --- /dev/null +++ b/fs/ext4/format/alloc_sb.c @@ -0,0 +1,70 @@ +/* + * alloc_sb.c --- Allocate the superblock and block group descriptors for a + * newly initialized filesystem. Used by mke2fs when initializing a filesystem + * + * Copyright (C) 1994, 1995, 1996, 2003 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +/* + * This function reserves the superblock and block group descriptors + * for a given block group. It currently returns the number of free + * blocks assuming that inode table and allocation bitmaps will be in + * the group. This is not necessarily the case when the flex_bg + * feature is enabled, so callers should take care! It was only + * really intended for use by mke2fs, and even there it's not that + * useful. In the future, when we redo this function for 64-bit block + * numbers, we should probably return the number of blocks used by the + * super block and group descriptors instead. + * + * See also the comment for ext2fs_super_and_bgd_loc() + */ +int ext2fs_reserve_super_and_bgd(ext2_filsys fs, + dgrp_t group, + ext2fs_block_bitmap bmap) +{ + blk_t super_blk, old_desc_blk, new_desc_blk; + int j, old_desc_blocks, num_blocks; + + num_blocks = ext2fs_super_and_bgd_loc(fs, group, &super_blk, + &old_desc_blk, &new_desc_blk, 0);//how many data blocks + + + if (fs->super->s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) + old_desc_blocks = fs->super->s_first_meta_bg; + else + old_desc_blocks = + fs->desc_blocks + fs->super->s_reserved_gdt_blocks;//how many block the group descriptions will take and reserve + + if (super_blk || (group == 0)) + ext2fs_mark_block_bitmap(bmap, super_blk);//mark the superblock in block bitmap + + if (old_desc_blk) { + if (fs->super->s_reserved_gdt_blocks && fs->block_map == bmap) + fs->group_desc[group].bg_flags &= ~EXT2_BG_BLOCK_UNINIT; + for (j=0; j < old_desc_blocks; j++) + if (old_desc_blk + j < fs->super->s_blocks_count) + ext2fs_mark_block_bitmap(bmap, + old_desc_blk + j);//mar the group descriptions in the block bitmap + } + if (new_desc_blk) + ext2fs_mark_block_bitmap(bmap, new_desc_blk); + + return num_blocks;//how many data blocks +} diff --git a/fs/ext4/format/alloc_stats.c b/fs/ext4/format/alloc_stats.c new file mode 100755 index 0000000..cc7e87d --- /dev/null +++ b/fs/ext4/format/alloc_stats.c @@ -0,0 +1,108 @@ +/* + * alloc_stats.c --- Update allocation statistics for ext2fs + * + * Copyright (C) 2001 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +void ext2fs_inode_alloc_stats2(ext2_filsys fs, ext2_ino_t ino, + int inuse, int isdir) +{ + int group = ext2fs_group_of_ino(fs, ino); + +#ifndef OMIT_COM_ERR + if (ino > fs->super->s_inodes_count) { + printf("error ext2fs_inode_alloc_stats2\n"); + return; + } +#endif + if (inuse > 0) + ext2fs_mark_inode_bitmap(fs->inode_map, ino); + else + ext2fs_unmark_inode_bitmap(fs->inode_map, ino); + fs->group_desc[group].bg_free_inodes_count -= inuse; + if (isdir) + fs->group_desc[group].bg_used_dirs_count += inuse; + + /* We don't strictly need to be clearing the uninit flag if inuse < 0 + * (i.e. freeing inodes) but it also means something is bad. */ + fs->group_desc[group].bg_flags &= ~EXT2_BG_INODE_UNINIT; + if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + ext2_ino_t first_unused_inode = fs->super->s_inodes_per_group - + fs->group_desc[group].bg_itable_unused + + group * fs->super->s_inodes_per_group + 1; + + if (ino >= first_unused_inode) + fs->group_desc[group].bg_itable_unused = + group * fs->super->s_inodes_per_group + + fs->super->s_inodes_per_group - ino; + ext2fs_group_desc_csum_set(fs, group); + } + + fs->super->s_free_inodes_count -= inuse; + ext2fs_mark_super_dirty(fs); + ext2fs_mark_ib_dirty(fs); +} + +void ext2fs_inode_alloc_stats(ext2_filsys fs, ext2_ino_t ino, int inuse) +{ + ext2fs_inode_alloc_stats2(fs, ino, inuse, 0); +} + +void ext2fs_block_alloc_stats(ext2_filsys fs, blk_t blk, int inuse) +{ + int group = ext2fs_group_of_blk(fs, blk); + +#ifndef OMIT_COM_ERR + if (blk >= fs->super->s_blocks_count) { + printf("error ext2fs_block_alloc_stats\n"); + return; + } +#endif + if (inuse > 0) + ext2fs_mark_block_bitmap(fs->block_map, blk); + else + ext2fs_unmark_block_bitmap(fs->block_map, blk); + fs->group_desc[group].bg_free_blocks_count -= inuse; + fs->group_desc[group].bg_flags &= ~EXT2_BG_BLOCK_UNINIT; + ext2fs_group_desc_csum_set(fs, group); + + fs->super->s_free_blocks_count -= inuse; + ext2fs_mark_super_dirty(fs); + ext2fs_mark_bb_dirty(fs); + if (fs->block_alloc_stats) + (fs->block_alloc_stats)(fs, (blk64_t) blk, inuse); +} + +void ext2fs_set_block_alloc_stats_callback(ext2_filsys fs, + void (*func)(ext2_filsys fs, + blk64_t blk, + int inuse), + void (**old)(ext2_filsys fs, + blk64_t blk, + int inuse)) +{ + if (!fs || fs->magic != EXT2_ET_MAGIC_EXT2FS_FILSYS) + return; + if (old) + *old = fs->block_alloc_stats; + + fs->block_alloc_stats = func; +} diff --git a/fs/ext4/format/alloc_tables.c b/fs/ext4/format/alloc_tables.c new file mode 100755 index 0000000..37b70e7 --- /dev/null +++ b/fs/ext4/format/alloc_tables.c @@ -0,0 +1,225 @@ +/* + * alloc_tables.c --- Allocate tables for a newly initialized + * filesystem. Used by mke2fs when initializing a filesystem + * + * Copyright (C) 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +/* + * This routine searches for free blocks that can allocate a full + * group of bitmaps or inode tables for a flexbg group. Returns the + * block number with a correct offset were the bitmaps and inode + * tables can be allocated continously and in order. + */ +static blk_t flexbg_offset(ext2_filsys fs, dgrp_t group, blk_t start_blk, + ext2fs_block_bitmap bmap, int offset, int size, + int elem_size) +{ + int flexbg, flexbg_size; + blk_t last_blk, first_free = 0; + dgrp_t last_grp; + + flexbg_size = 1 << fs->super->s_log_groups_per_flex; + flexbg = group / flexbg_size; + + if (size > (int) (fs->super->s_blocks_per_group / 8)) + size = (int) fs->super->s_blocks_per_group / 8; + + if (offset) + offset -= 1; + + /* + * Don't do a long search if the previous block + * search is still valid. + */ + if (start_blk && group % flexbg_size) { + if (ext2fs_test_block_bitmap_range(bmap, start_blk + elem_size, + size)) + return start_blk + elem_size; + } + + start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg); + last_grp = group | (flexbg_size - 1); + if (last_grp > fs->group_desc_count) + last_grp = fs->group_desc_count; + last_blk = ext2fs_group_last_block(fs, last_grp); + + /* Find the first available block */ + if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap, + &first_free)) + return first_free; + + if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size, + bmap, &first_free)) + return first_free; + + return first_free; +} + +errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group, + ext2fs_block_bitmap bmap) +{ + errcode_t retval; + blk_t group_blk, start_blk, last_blk, new_blk, blk; + dgrp_t last_grp = 0; + int j, rem_grps = 0, flexbg_size = 0; + + group_blk = ext2fs_group_first_block(fs, group);//the first block number in the group + last_blk = ext2fs_group_last_block(fs, group);//the last block number in the group + + if (!bmap) + bmap = fs->block_map; + + if (EXT2_HAS_INCOMPAT_FEATURE(fs->super, + EXT4_FEATURE_INCOMPAT_FLEX_BG) && + fs->super->s_log_groups_per_flex) { + flexbg_size = 1 << fs->super->s_log_groups_per_flex; + last_grp = group | (flexbg_size - 1); + rem_grps = last_grp - group; + if (last_grp > fs->group_desc_count) + last_grp = fs->group_desc_count; + } + + /* + * Allocate the block and inode bitmaps, if necessary + */ + if (fs->stride) { + retval = ext2fs_get_free_blocks(fs, group_blk, last_blk, + 1, bmap, &start_blk); + if (retval) + return retval; + start_blk += fs->inode_blocks_per_group; + start_blk += ((fs->stride * group) % + (last_blk - start_blk + 1)); + if (start_blk >= last_blk) + start_blk = group_blk; + } else + start_blk = group_blk; + + if (flexbg_size) { + blk_t prev_block = 0; + if (group && fs->group_desc[group-1].bg_block_bitmap) + prev_block = fs->group_desc[group-1].bg_block_bitmap; + start_blk = flexbg_offset(fs, group, prev_block, bmap, + 0, rem_grps, 1); + last_blk = ext2fs_group_last_block(fs, last_grp); + } + + if (!fs->group_desc[group].bg_block_bitmap) { + retval = ext2fs_get_free_blocks(fs, start_blk, last_blk, + 1, bmap, &new_blk); + if (retval == EXT2_ET_BLOCK_ALLOC_FAIL) + retval = ext2fs_get_free_blocks(fs, group_blk, + last_blk, 1, bmap, &new_blk); + if (retval) + return retval; + ext2fs_mark_block_bitmap(bmap, new_blk); + fs->group_desc[group].bg_block_bitmap = new_blk; + if (flexbg_size) { + dgrp_t gr = ext2fs_group_of_blk(fs, new_blk); + fs->group_desc[gr].bg_free_blocks_count--; + fs->super->s_free_blocks_count--; + fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT; + ext2fs_group_desc_csum_set(fs, gr); + } + } + + if (flexbg_size) { + blk_t prev_block = 0; + if (group && fs->group_desc[group-1].bg_inode_bitmap) + prev_block = fs->group_desc[group-1].bg_inode_bitmap; + start_blk = flexbg_offset(fs, group, prev_block, bmap, + flexbg_size, rem_grps, 1); + last_blk = ext2fs_group_last_block(fs, last_grp); + } + + if (!fs->group_desc[group].bg_inode_bitmap) { + retval = ext2fs_get_free_blocks(fs, start_blk, last_blk, + 1, bmap, &new_blk); + if (retval == EXT2_ET_BLOCK_ALLOC_FAIL) + retval = ext2fs_get_free_blocks(fs, group_blk, + last_blk, 1, bmap, &new_blk); + if (retval) + return retval; + ext2fs_mark_block_bitmap(bmap, new_blk); + fs->group_desc[group].bg_inode_bitmap = new_blk; + if (flexbg_size) { + dgrp_t gr = ext2fs_group_of_blk(fs, new_blk); + fs->group_desc[gr].bg_free_blocks_count--; + fs->super->s_free_blocks_count--; + fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT; + ext2fs_group_desc_csum_set(fs, gr); + } + } + + /* + * Allocate the inode table + */ + if (flexbg_size) { + blk_t prev_block = 0; + if (group && fs->group_desc[group-1].bg_inode_table) + prev_block = fs->group_desc[group-1].bg_inode_table; + if (last_grp == fs->group_desc_count) + rem_grps = last_grp - group; + group_blk = flexbg_offset(fs, group, prev_block, bmap, + flexbg_size * 2, + fs->inode_blocks_per_group * + rem_grps, + fs->inode_blocks_per_group); + last_blk = ext2fs_group_last_block(fs, last_grp); + } + + if (!fs->group_desc[group].bg_inode_table) { + retval = ext2fs_get_free_blocks(fs, group_blk, last_blk, + fs->inode_blocks_per_group, + bmap, &new_blk); + if (retval) + return retval; + for (j=0, blk = new_blk; + j < fs->inode_blocks_per_group; + j++, blk++) { + ext2fs_mark_block_bitmap(bmap, blk); + if (flexbg_size) { + dgrp_t gr = ext2fs_group_of_blk(fs, blk); + fs->group_desc[gr].bg_free_blocks_count--; + fs->super->s_free_blocks_count--; + fs->group_desc[gr].bg_flags &= ~EXT2_BG_BLOCK_UNINIT; + ext2fs_group_desc_csum_set(fs, gr); + } + } + fs->group_desc[group].bg_inode_table = new_blk; + } + ext2fs_group_desc_csum_set(fs, group); + return 0; +} + +errcode_t ext2fs_allocate_tables(ext2_filsys fs) +{ + errcode_t retval; + dgrp_t i; + + for (i = 0; i < fs->group_desc_count; i++) { + retval = ext2fs_allocate_group_table(fs, i, fs->block_map); + if (retval) + return retval; + } + return 0; +} + diff --git a/fs/ext4/format/badblocks.c b/fs/ext4/format/badblocks.c new file mode 100755 index 0000000..dc0f7e7 --- /dev/null +++ b/fs/ext4/format/badblocks.c @@ -0,0 +1,319 @@ +/* + * badblocks.c --- routines to manipulate the bad block structure + * + * Copyright (C) 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + +#include "ext2_fs.h" +#include "ext2fsP.h" + +/* + * Helper function for making a badblocks list + */ +static errcode_t make_u32_list(int size, int num, __u32 *list, + ext2_u32_list *ret) +{ + ext2_u32_list bb; + errcode_t retval; + + retval = ext2fs_get_mem(sizeof(struct ext2_struct_u32_list), &bb); + if (retval) + return retval; + memset(bb, 0, sizeof(struct ext2_struct_u32_list)); + bb->magic = EXT2_ET_MAGIC_BADBLOCKS_LIST; + bb->size = size ? size : 10; + bb->num = num; + retval = ext2fs_get_array(bb->size, sizeof(blk_t), &bb->list); + if (retval) { + ext2fs_free_mem(&bb); + return retval; + } + if (list) + memcpy(bb->list, list, bb->size * sizeof(blk_t)); + else + memset(bb->list, 0, bb->size * sizeof(blk_t)); + *ret = bb; + return 0; +} + + +/* + * This procedure creates an empty u32 list. + */ +errcode_t ext2fs_u32_list_create(ext2_u32_list *ret, int size) +{ + return make_u32_list(size, 0, 0, ret); +} + +/* + * This procedure creates an empty badblocks list. + */ +errcode_t ext2fs_badblocks_list_create(ext2_badblocks_list *ret, int size) +{ + return make_u32_list(size, 0, 0, (ext2_badblocks_list *) ret); +} + + +/* + * This procedure copies a badblocks list + */ +errcode_t ext2fs_u32_copy(ext2_u32_list src, ext2_u32_list *dest) +{ + errcode_t retval; + + retval = make_u32_list(src->size, src->num, src->list, dest); + if (retval) + return retval; + (*dest)->badblocks_flags = src->badblocks_flags; + return 0; +} + +errcode_t ext2fs_badblocks_copy(ext2_badblocks_list src, + ext2_badblocks_list *dest) +{ + return ext2fs_u32_copy((ext2_u32_list) src, + (ext2_u32_list *) dest); +} + +/* + * This procedure frees a badblocks list. + * + * (note: moved to closefs.c) + */ + + +/* + * This procedure adds a block to a badblocks list. + */ +errcode_t ext2fs_u32_list_add(ext2_u32_list bb, __u32 blk) +{ + errcode_t retval; + int i, j; + unsigned long old_size; + + EXT2_CHECK_MAGIC(bb, EXT2_ET_MAGIC_BADBLOCKS_LIST); + + if (bb->num >= bb->size) { + old_size = bb->size * sizeof(__u32); + bb->size += 100; + retval = ext2fs_resize_mem(old_size, bb->size * sizeof(__u32), + &bb->list); + if (retval) { + bb->size -= 100; + return retval; + } + } + + /* + * Add special case code for appending to the end of the list + */ + i = bb->num-1; + if ((bb->num != 0) && (bb->list[i] == blk)) + return 0; + if ((bb->num == 0) || (bb->list[i] < blk)) { + bb->list[bb->num++] = blk; + return 0; + } + + j = bb->num; + for (i=0; i < bb->num; i++) { + if (bb->list[i] == blk) + return 0; + if (bb->list[i] > blk) { + j = i; + break; + } + } + for (i=bb->num; i > j; i--) + bb->list[i] = bb->list[i-1]; + bb->list[j] = blk; + bb->num++; + return 0; +} + +errcode_t ext2fs_badblocks_list_add(ext2_badblocks_list bb, blk_t blk) +{ + return ext2fs_u32_list_add((ext2_u32_list) bb, (__u32) blk); +} + +/* + * This procedure finds a particular block is on a badblocks + * list. + */ +int ext2fs_u32_list_find(ext2_u32_list bb, __u32 blk) +{ + int low, high, mid; + + if (bb->magic != EXT2_ET_MAGIC_BADBLOCKS_LIST) + return -1; + + if (bb->num == 0) + return -1; + + low = 0; + high = bb->num-1; + if (blk == bb->list[low]) + return low; + if (blk == bb->list[high]) + return high; + + while (low < high) { + mid = (low+high)/2; + if (mid == low || mid == high) + break; + if (blk == bb->list[mid]) + return mid; + if (blk < bb->list[mid]) + high = mid; + else + low = mid; + } + return -1; +} + +/* + * This procedure tests to see if a particular block is on a badblocks + * list. + */ +int ext2fs_u32_list_test(ext2_u32_list bb, __u32 blk) +{ + if (ext2fs_u32_list_find(bb, blk) < 0) + return 0; + else + return 1; +} + +int ext2fs_badblocks_list_test(ext2_badblocks_list bb, blk_t blk) +{ + return ext2fs_u32_list_test((ext2_u32_list) bb, (__u32) blk); +} + + +/* + * Remove a block from the badblock list + */ +int ext2fs_u32_list_del(ext2_u32_list bb, __u32 blk) +{ + int remloc, i; + + if (bb->num == 0) + return -1; + + remloc = ext2fs_u32_list_find(bb, blk); + if (remloc < 0) + return -1; + + for (i = remloc ; i < bb->num-1; i++) + bb->list[i] = bb->list[i+1]; + bb->num--; + return 0; +} + +void ext2fs_badblocks_list_del(ext2_u32_list bb, __u32 blk) +{ + ext2fs_u32_list_del(bb, blk); +} + +errcode_t ext2fs_u32_list_iterate_begin(ext2_u32_list bb, + ext2_u32_iterate *ret) +{ + ext2_u32_iterate iter; + errcode_t retval; + + EXT2_CHECK_MAGIC(bb, EXT2_ET_MAGIC_BADBLOCKS_LIST); + + retval = ext2fs_get_mem(sizeof(struct ext2_struct_u32_iterate), &iter); + if (retval) + return retval; + + iter->magic = EXT2_ET_MAGIC_BADBLOCKS_ITERATE; + iter->bb = bb; + iter->ptr = 0; + *ret = iter; + return 0; +} + +errcode_t ext2fs_badblocks_list_iterate_begin(ext2_badblocks_list bb, + ext2_badblocks_iterate *ret) +{ + return ext2fs_u32_list_iterate_begin((ext2_u32_list) bb, + (ext2_u32_iterate *) ret); +} + + +int ext2fs_u32_list_iterate(ext2_u32_iterate iter, __u32 *blk) +{ + ext2_u32_list bb; + + if (iter->magic != EXT2_ET_MAGIC_BADBLOCKS_ITERATE) + return 0; + + bb = iter->bb; + + if (bb->magic != EXT2_ET_MAGIC_BADBLOCKS_LIST) + return 0; + + if (iter->ptr < bb->num) { + *blk = bb->list[iter->ptr++]; + return 1; + } + *blk = 0; + return 0; +} + +int ext2fs_badblocks_list_iterate(ext2_badblocks_iterate iter, blk_t *blk) +{ + return ext2fs_u32_list_iterate((ext2_u32_iterate) iter, + (__u32 *) blk); +} + + +void ext2fs_u32_list_iterate_end(ext2_u32_iterate iter) +{ + if (!iter || (iter->magic != EXT2_ET_MAGIC_BADBLOCKS_ITERATE)) + return; + + iter->bb = 0; + ext2fs_free_mem(&iter); +} + +void ext2fs_badblocks_list_iterate_end(ext2_badblocks_iterate iter) +{ + ext2fs_u32_list_iterate_end((ext2_u32_iterate) iter); +} + + +int ext2fs_u32_list_equal(ext2_u32_list bb1, ext2_u32_list bb2) +{ + EXT2_CHECK_MAGIC(bb1, EXT2_ET_MAGIC_BADBLOCKS_LIST); + EXT2_CHECK_MAGIC(bb2, EXT2_ET_MAGIC_BADBLOCKS_LIST); + + if (bb1->num != bb2->num) + return 0; + + if (memcmp(bb1->list, bb2->list, bb1->num * sizeof(blk_t)) != 0) + return 0; + return 1; +} + +int ext2fs_badblocks_equal(ext2_badblocks_list bb1, ext2_badblocks_list bb2) +{ + return ext2fs_u32_list_equal((ext2_u32_list) bb1, + (ext2_u32_list) bb2); +} + +int ext2fs_u32_list_count(ext2_u32_list bb) +{ + return bb->num; +} diff --git a/fs/ext4/format/bb_inode.c b/fs/ext4/format/bb_inode.c new file mode 100755 index 0000000..33480a6 --- /dev/null +++ b/fs/ext4/format/bb_inode.c @@ -0,0 +1,261 @@ +/* + * bb_inode.c --- routines to update the bad block inode. + * + * WARNING: This routine modifies a lot of state in the filesystem; if + * this routine returns an error, the bad block inode may be in an + * inconsistent state. + * + * Copyright (C) 1994, 1995 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +struct set_badblock_record { + ext2_badblocks_iterate bb_iter; + int bad_block_count; + blk_t *ind_blocks; + int max_ind_blocks; + int ind_blocks_size; + int ind_blocks_ptr; + char *block_buf; + errcode_t err; +}; + +static int set_bad_block_proc(ext2_filsys fs, blk_t *block_nr, + e2_blkcnt_t blockcnt, + blk_t ref_block, int ref_offset, + void *priv_data); +static int clear_bad_block_proc(ext2_filsys fs, blk_t *block_nr, + e2_blkcnt_t blockcnt, + blk_t ref_block, int ref_offset, + void *priv_data); + +/* + * Given a bad blocks bitmap, update the bad blocks inode to reflect + * the map. + */ +errcode_t ext2fs_update_bb_inode(ext2_filsys fs, ext2_badblocks_list bb_list) +{ + errcode_t retval; + struct set_badblock_record rec; + struct ext2_inode inode; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (!fs->block_map) + return EXT2_ET_NO_BLOCK_BITMAP; + + rec.bad_block_count = 0; + rec.ind_blocks_size = rec.ind_blocks_ptr = 0; + rec.max_ind_blocks = 10; + retval = ext2fs_get_array(rec.max_ind_blocks, sizeof(blk_t), + &rec.ind_blocks); + if (retval) + return retval; + memset(rec.ind_blocks, 0, rec.max_ind_blocks * sizeof(blk_t)); + retval = ext2fs_get_mem(fs->blocksize, &rec.block_buf); + if (retval) + goto cleanup; + memset(rec.block_buf, 0, fs->blocksize); + rec.err = 0; + + + /* + * First clear the old bad blocks (while saving the indirect blocks) + */ + retval = ext2fs_block_iterate2(fs, EXT2_BAD_INO, + BLOCK_FLAG_DEPTH_TRAVERSE, 0, + clear_bad_block_proc, &rec); + if (retval) + goto cleanup; + if (rec.err) { + retval = rec.err; + goto cleanup; + } + + /* + * Now set the bad blocks! + * + * First, mark the bad blocks as used. This prevents a bad + * block from being used as an indirecto block for the bad + * block inode (!). + */ + if (bb_list) { + retval = ext2fs_badblocks_list_iterate_begin(bb_list, + &rec.bb_iter); + if (retval) + goto cleanup; + retval = ext2fs_block_iterate2(fs, EXT2_BAD_INO, + BLOCK_FLAG_APPEND, 0, + set_bad_block_proc, &rec); + ext2fs_badblocks_list_iterate_end(rec.bb_iter); + if (retval) + goto cleanup; + if (rec.err) { + retval = rec.err; + goto cleanup; + } + } + + /* + * Update the bad block inode's mod time and block count + * field. + */ + retval = ext2fs_read_inode(fs, EXT2_BAD_INO, &inode); + if (retval) + goto cleanup; + + inode.i_atime = inode.i_mtime = 0x5105cd7b;//fs->now ? fs->now : time(0); + if (!inode.i_ctime) + inode.i_ctime = 0x5105cd7b;//fs->now ? fs->now : time(0); + ext2fs_iblk_set(fs, &inode, rec.bad_block_count); + inode.i_size = rec.bad_block_count * fs->blocksize; + + retval = ext2fs_write_inode(fs, EXT2_BAD_INO, &inode); + if (retval) + goto cleanup; + +cleanup: + ext2fs_free_mem(&rec.ind_blocks); + ext2fs_free_mem(&rec.block_buf); + return retval; +} + +/* + * Helper function for update_bb_inode() + * + * Clear the bad blocks in the bad block inode, while saving the + * indirect blocks. + */ +#ifdef __TURBOC__ + #pragma argsused +#endif +static int clear_bad_block_proc(ext2_filsys fs, blk_t *block_nr, + e2_blkcnt_t blockcnt, + blk_t ref_block EXT2FS_ATTR((unused)), + int ref_offset EXT2FS_ATTR((unused)), + void *priv_data) +{ + struct set_badblock_record *rec = (struct set_badblock_record *) + priv_data; + errcode_t retval; + unsigned long old_size; + + if (!*block_nr) + return 0; + + /* + * If the block number is outrageous, clear it and ignore it. + */ + if (*block_nr >= fs->super->s_blocks_count || + *block_nr < fs->super->s_first_data_block) { + *block_nr = 0; + return BLOCK_CHANGED; + } + + if (blockcnt < 0) { + if (rec->ind_blocks_size >= rec->max_ind_blocks) { + old_size = rec->max_ind_blocks * sizeof(blk_t); + rec->max_ind_blocks += 10; + retval = ext2fs_resize_mem(old_size, + rec->max_ind_blocks * sizeof(blk_t), + &rec->ind_blocks); + if (retval) { + rec->max_ind_blocks -= 10; + rec->err = retval; + return BLOCK_ABORT; + } + } + rec->ind_blocks[rec->ind_blocks_size++] = *block_nr; + } + + /* + * Mark the block as unused, and update accounting information + */ + ext2fs_block_alloc_stats(fs, *block_nr, -1); + + *block_nr = 0; + return BLOCK_CHANGED; +} + + +/* + * Helper function for update_bb_inode() + * + * Set the block list in the bad block inode, using the supplied bitmap. + */ +#ifdef __TURBOC__ + #pragma argsused +#endif +static int set_bad_block_proc(ext2_filsys fs, blk_t *block_nr, + e2_blkcnt_t blockcnt, + blk_t ref_block EXT2FS_ATTR((unused)), + int ref_offset EXT2FS_ATTR((unused)), + void *priv_data) +{ + struct set_badblock_record *rec = (struct set_badblock_record *) + priv_data; + errcode_t retval; + blk_t blk; + + if (blockcnt >= 0) { + /* + * Get the next bad block. + */ + if (!ext2fs_badblocks_list_iterate(rec->bb_iter, &blk)) + return BLOCK_ABORT; + rec->bad_block_count++; + } else { + /* + * An indirect block; fetch a block from the + * previously used indirect block list. The block + * most be not marked as used; if so, get another one. + * If we run out of reserved indirect blocks, allocate + * a new one. + */ + retry: + if (rec->ind_blocks_ptr < rec->ind_blocks_size) { + blk = rec->ind_blocks[rec->ind_blocks_ptr++]; + if (ext2fs_test_block_bitmap(fs->block_map, blk)) + goto retry; + } else { + retval = ext2fs_new_block(fs, 0, 0, &blk); + if (retval) { + rec->err = retval; + return BLOCK_ABORT; + } + } + retval = io_channel_write_blk(fs->io, blk, 1, rec->block_buf); + if (retval) { + rec->err = retval; + return BLOCK_ABORT; + } + } + + /* + * Update block counts + */ + ext2fs_block_alloc_stats(fs, blk, +1); + + *block_nr = blk; + return BLOCK_CHANGED; +} + + + + + + diff --git a/fs/ext4/format/bitmaps.c b/fs/ext4/format/bitmaps.c new file mode 100755 index 0000000..6260a89 --- /dev/null +++ b/fs/ext4/format/bitmaps.c @@ -0,0 +1,178 @@ +/* + * bitmaps.c --- routines to read, write, and manipulate the inode and + * block bitmaps. + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +void ext2fs_free_inode_bitmap(ext2fs_inode_bitmap bitmap) +{ + ext2fs_free_generic_bitmap(bitmap); +} + +void ext2fs_free_block_bitmap(ext2fs_block_bitmap bitmap) +{ + ext2fs_free_generic_bitmap(bitmap); +} + +errcode_t ext2fs_copy_bitmap(ext2fs_generic_bitmap src, + ext2fs_generic_bitmap *dest) +{ + return (ext2fs_copy_generic_bitmap(src, dest)); +} + +void ext2fs_set_bitmap_padding(ext2fs_generic_bitmap map) +{ + ext2fs_set_generic_bitmap_padding(map); +} + +errcode_t ext2fs_allocate_inode_bitmap(ext2_filsys fs, + const char *descr, + ext2fs_inode_bitmap *ret) +{ + __u32 start, end, real_end; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + fs->write_bitmaps = ext2fs_write_bitmaps; + + start = 1; + end = fs->super->s_inodes_count; + real_end = (EXT2_INODES_PER_GROUP(fs->super) * fs->group_desc_count); + + return (ext2fs_make_generic_bitmap(EXT2_ET_MAGIC_INODE_BITMAP, fs, + start, end, real_end, + descr, 0, ret)); +} + +errcode_t ext2fs_allocate_block_bitmap(ext2_filsys fs, + const char *descr, + ext2fs_block_bitmap *ret) +{ + __u32 start, end, real_end; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + fs->write_bitmaps = ext2fs_write_bitmaps; + + start = fs->super->s_first_data_block; + end = fs->super->s_blocks_count-1; + real_end = (EXT2_BLOCKS_PER_GROUP(fs->super) + * fs->group_desc_count)-1 + start; + + return (ext2fs_make_generic_bitmap(EXT2_ET_MAGIC_BLOCK_BITMAP, fs, + start, end, real_end, + descr, 0, ret)); +} + +errcode_t ext2fs_fudge_inode_bitmap_end(ext2fs_inode_bitmap bitmap, + ext2_ino_t end, ext2_ino_t *oend) +{ + + return (ext2fs_fudge_generic_bitmap_end(bitmap, + EXT2_ET_MAGIC_INODE_BITMAP, + EXT2_ET_FUDGE_INODE_BITMAP_END, + end, oend)); +} + +errcode_t ext2fs_fudge_block_bitmap_end(ext2fs_block_bitmap bitmap, + blk_t end, blk_t *oend) +{ + return (ext2fs_fudge_generic_bitmap_end(bitmap, + EXT2_ET_MAGIC_BLOCK_BITMAP, + EXT2_ET_FUDGE_BLOCK_BITMAP_END, + end, oend)); +} + +void ext2fs_clear_inode_bitmap(ext2fs_inode_bitmap bitmap) +{ + ext2fs_clear_generic_bitmap(bitmap); +} + +void ext2fs_clear_block_bitmap(ext2fs_block_bitmap bitmap) +{ + ext2fs_clear_generic_bitmap(bitmap); +} + +errcode_t ext2fs_resize_inode_bitmap(__u32 new_end, __u32 new_real_end, + ext2fs_inode_bitmap bmap) +{ + return (ext2fs_resize_generic_bitmap(EXT2_ET_MAGIC_INODE_BITMAP, + new_end, new_real_end, bmap)); +} + +errcode_t ext2fs_resize_block_bitmap(__u32 new_end, __u32 new_real_end, + ext2fs_block_bitmap bmap) +{ + return (ext2fs_resize_generic_bitmap(EXT2_ET_MAGIC_BLOCK_BITMAP, + new_end, new_real_end, bmap)); +} + +errcode_t ext2fs_compare_block_bitmap(ext2fs_block_bitmap bm1, + ext2fs_block_bitmap bm2) +{ + return (ext2fs_compare_generic_bitmap(EXT2_ET_MAGIC_BLOCK_BITMAP, + EXT2_ET_NEQ_BLOCK_BITMAP, + bm1, bm2)); +} + +errcode_t ext2fs_compare_inode_bitmap(ext2fs_inode_bitmap bm1, + ext2fs_inode_bitmap bm2) +{ + return (ext2fs_compare_generic_bitmap(EXT2_ET_MAGIC_INODE_BITMAP, + EXT2_ET_NEQ_INODE_BITMAP, + bm1, bm2)); +} + +errcode_t ext2fs_set_inode_bitmap_range(ext2fs_inode_bitmap bmap, + ext2_ino_t start, unsigned int num, + void *in) +{ + return (ext2fs_set_generic_bitmap_range(bmap, + EXT2_ET_MAGIC_INODE_BITMAP, + start, num, in)); +} + +errcode_t ext2fs_get_inode_bitmap_range(ext2fs_inode_bitmap bmap, + ext2_ino_t start, unsigned int num, + void *out) +{ + return (ext2fs_get_generic_bitmap_range(bmap, + EXT2_ET_MAGIC_INODE_BITMAP, + start, num, out)); +} + +errcode_t ext2fs_set_block_bitmap_range(ext2fs_block_bitmap bmap, + blk_t start, unsigned int num, + void *in) +{ + return (ext2fs_set_generic_bitmap_range(bmap, + EXT2_ET_MAGIC_BLOCK_BITMAP, + start, num, in)); +} + +errcode_t ext2fs_get_block_bitmap_range(ext2fs_block_bitmap bmap, + blk_t start, unsigned int num, + void *out) +{ + return (ext2fs_get_generic_bitmap_range(bmap, + EXT2_ET_MAGIC_BLOCK_BITMAP, + start, num, out)); +} diff --git a/fs/ext4/format/bitops.c b/fs/ext4/format/bitops.c new file mode 100755 index 0000000..d14bad2 --- /dev/null +++ b/fs/ext4/format/bitops.c @@ -0,0 +1,77 @@ +/* + * bitops.c --- Bitmap frobbing code. See bitops.h for the inlined + * routines. + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +#ifndef _EXT2_HAVE_ASM_BITOPS_ + +/* + * For the benefit of those who are trying to port Linux to another + * architecture, here are some C-language equivalents. You should + * recode these in the native assmebly language, if at all possible. + * + * C language equivalents written by Theodore Ts'o, 9/26/92. + * Modified by Pete A. Zaitcev 7/14/95 to be portable to big endian + * systems, as well as non-32 bit systems. + */ + +int ext2fs_set_bit(unsigned int nr,void * addr) +{ + int mask, retval; + unsigned char *ADDR = (unsigned char *) addr; + + ADDR += nr >> 3; + mask = 1 << (nr & 0x07); + retval = mask & *ADDR; + *ADDR |= mask; + return retval; +} + +int ext2fs_clear_bit(unsigned int nr, void * addr) +{ + int mask, retval; + unsigned char *ADDR = (unsigned char *) addr; + + ADDR += nr >> 3; + mask = 1 << (nr & 0x07); + retval = mask & *ADDR; + *ADDR &= ~mask; + return retval; +} + +int ext2fs_test_bit(unsigned int nr, const void * addr) +{ + int mask; + const unsigned char *ADDR = (const unsigned char *) addr; + + ADDR += nr >> 3; + mask = 1 << (nr & 0x07); + return (mask & *ADDR); +} + +#endif /* !_EXT2_HAVE_ASM_BITOPS_ */ + +void ext2fs_warn_bitmap(errcode_t errcode, unsigned long arg, + const char *description) +{ + printf("Error \n"); +} diff --git a/fs/ext4/format/bitops.h b/fs/ext4/format/bitops.h new file mode 100755 index 0000000..9825686 --- /dev/null +++ b/fs/ext4/format/bitops.h @@ -0,0 +1,422 @@ +/* + * bitops.h --- Bitmap frobbing code. The byte swapping routines are + * also included here. + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +extern int ext2fs_set_bit(unsigned int nr,void * addr); +extern int ext2fs_clear_bit(unsigned int nr, void * addr); +extern int ext2fs_test_bit(unsigned int nr, const void * addr); +extern void ext2fs_fast_set_bit(unsigned int nr,void * addr); +extern void ext2fs_fast_clear_bit(unsigned int nr, void * addr); +extern __u16 ext2fs_swab16(__u16 val); +extern __u32 ext2fs_swab32(__u32 val); +extern __u64 ext2fs_swab64(__u64 val); + +#ifdef WORDS_BIGENDIAN +#define ext2fs_cpu_to_le64(x) ext2fs_swab64((x)) +#define ext2fs_le64_to_cpu(x) ext2fs_swab64((x)) +#define ext2fs_cpu_to_le32(x) ext2fs_swab32((x)) +#define ext2fs_le32_to_cpu(x) ext2fs_swab32((x)) +#define ext2fs_cpu_to_le16(x) ext2fs_swab16((x)) +#define ext2fs_le16_to_cpu(x) ext2fs_swab16((x)) +#define ext2fs_cpu_to_be32(x) ((__u32)(x)) +#define ext2fs_be32_to_cpu(x) ((__u32)(x)) +#define ext2fs_cpu_to_be16(x) ((__u16)(x)) +#define ext2fs_be16_to_cpu(x) ((__u16)(x)) +#else +#define ext2fs_cpu_to_le64(x) ((__u64)(x)) +#define ext2fs_le64_to_cpu(x) ((__u64)(x)) +#define ext2fs_cpu_to_le32(x) ((__u32)(x)) +#define ext2fs_le32_to_cpu(x) ((__u32)(x)) +#define ext2fs_cpu_to_le16(x) ((__u16)(x)) +#define ext2fs_le16_to_cpu(x) ((__u16)(x)) +#define ext2fs_cpu_to_be32(x) ext2fs_swab32((x)) +#define ext2fs_be32_to_cpu(x) ext2fs_swab32((x)) +#define ext2fs_cpu_to_be16(x) ext2fs_swab16((x)) +#define ext2fs_be16_to_cpu(x) ext2fs_swab16((x)) +#endif + +/* + * EXT2FS bitmap manipulation routines. + */ + +/* Support for sending warning messages from the inline subroutines */ +extern const char *ext2fs_block_string; +extern const char *ext2fs_inode_string; +extern const char *ext2fs_mark_string; +extern const char *ext2fs_unmark_string; +extern const char *ext2fs_test_string; +extern void ext2fs_warn_bitmap(errcode_t errcode, unsigned long arg, + const char *description); +extern void ext2fs_warn_bitmap2(ext2fs_generic_bitmap bitmap, + int code, unsigned long arg); + +extern int ext2fs_mark_block_bitmap(ext2fs_block_bitmap bitmap, blk_t block); +extern int ext2fs_unmark_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block); +extern int ext2fs_test_block_bitmap(ext2fs_block_bitmap bitmap, blk_t block); + +extern int ext2fs_mark_inode_bitmap(ext2fs_inode_bitmap bitmap, ext2_ino_t inode); +extern int ext2fs_unmark_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode); +extern int ext2fs_test_inode_bitmap(ext2fs_inode_bitmap bitmap, ext2_ino_t inode); + +extern void ext2fs_fast_mark_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block); +extern void ext2fs_fast_unmark_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block); +extern int ext2fs_fast_test_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block); + +extern void ext2fs_fast_mark_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode); +extern void ext2fs_fast_unmark_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode); +extern int ext2fs_fast_test_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode); +extern blk_t ext2fs_get_block_bitmap_start(ext2fs_block_bitmap bitmap); +extern ext2_ino_t ext2fs_get_inode_bitmap_start(ext2fs_inode_bitmap bitmap); +extern blk_t ext2fs_get_block_bitmap_end(ext2fs_block_bitmap bitmap); +extern ext2_ino_t ext2fs_get_inode_bitmap_end(ext2fs_inode_bitmap bitmap); + +extern void ext2fs_mark_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num); +extern void ext2fs_unmark_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num); +extern int ext2fs_test_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num); +extern void ext2fs_fast_mark_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num); +extern void ext2fs_fast_unmark_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num); +extern int ext2fs_fast_test_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num); +extern void ext2fs_set_bitmap_padding(ext2fs_generic_bitmap map); + +/* These routines moved to gen_bitmap.c */ +extern int ext2fs_mark_generic_bitmap(ext2fs_generic_bitmap bitmap, + __u32 bitno); +extern int ext2fs_unmark_generic_bitmap(ext2fs_generic_bitmap bitmap, + blk_t bitno); +extern int ext2fs_test_generic_bitmap(ext2fs_generic_bitmap bitmap, + blk_t bitno); +extern int ext2fs_test_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num); +extern __u32 ext2fs_get_generic_bitmap_start(ext2fs_generic_bitmap bitmap); +extern __u32 ext2fs_get_generic_bitmap_end(ext2fs_generic_bitmap bitmap); + +/* + * The inline routines themselves... + * + * If NO_INLINE_FUNCS is defined, then we won't try to do inline + * functions at all; they will be included as normal functions in + * inline.c + */ +#ifdef NO_INLINE_FUNCS +#if (defined(__GNUC__) && (defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__mc68000__))) + /* This prevents bitops.c from trying to include the C */ + /* function version of these functions */ +#define _EXT2_HAVE_ASM_BITOPS_ +#endif +#endif /* NO_INLINE_FUNCS */ + +#if (defined(INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS)) +#ifdef INCLUDE_INLINE_FUNCS +#define _INLINE_ extern +#else +#ifdef __GNUC__ +#define _INLINE_ extern __inline__ +#else /* For Watcom C */ +#define _INLINE_ extern inline +#endif +#endif + +/* + * Fast bit set/clear functions that doesn't need to return the + * previous bit value. + */ + +_INLINE_ void ext2fs_fast_set_bit(unsigned int nr,void * addr) +{ + unsigned char *ADDR = (unsigned char *) addr; + + ADDR += nr >> 3; + *ADDR |= (1 << (nr & 0x07)); +} + +_INLINE_ void ext2fs_fast_clear_bit(unsigned int nr, void * addr) +{ + unsigned char *ADDR = (unsigned char *) addr; + + ADDR += nr >> 3; + *ADDR &= ~(1 << (nr & 0x07)); +} + + +#if ((defined __GNUC__) && !defined(_EXT2_USE_C_VERSIONS_) && \ + (defined(__i386__) || defined(__i486__) || defined(__i586__))) + +#define _EXT2_HAVE_ASM_BITOPS_ +#define _EXT2_HAVE_ASM_SWAB_ + +/* + * These are done by inline assembly for speed reasons..... + * + * All bitoperations return 0 if the bit was cleared before the + * operation and != 0 if it was not. Bit 0 is the LSB of addr; bit 32 + * is the LSB of (addr+1). + */ + +/* + * Some hacks to defeat gcc over-optimizations.. + */ +struct __dummy_h { unsigned long a[100]; }; +#define EXT2FS_ADDR (*(struct __dummy_h *) addr) +#define EXT2FS_CONST_ADDR (*(const struct __dummy_h *) addr) + +_INLINE_ int ext2fs_set_bit(unsigned int nr, void * addr) +{ + int oldbit; + + addr = (void *) (((unsigned char *) addr) + (nr >> 3)); + __asm__ __volatile__("btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (EXT2FS_ADDR) + :"r" (nr & 7)); + return oldbit; +} + +_INLINE_ int ext2fs_clear_bit(unsigned int nr, void * addr) +{ + int oldbit; + + addr = (void *) (((unsigned char *) addr) + (nr >> 3)); + __asm__ __volatile__("btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (EXT2FS_ADDR) + :"r" (nr & 7)); + return oldbit; +} + +_INLINE_ int ext2fs_test_bit(unsigned int nr, const void * addr) +{ + int oldbit; + + addr = (const void *) (((const unsigned char *) addr) + (nr >> 3)); + __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (EXT2FS_CONST_ADDR),"r" (nr & 7)); + return oldbit; +} + +_INLINE_ __u32 ext2fs_swab32(__u32 val) +{ +#ifdef EXT2FS_REQUIRE_486 + __asm__("bswap %0" : "=r" (val) : "0" (val)); +#else + __asm__("xchgb %b0,%h0\n\t" /* swap lower bytes */ + "rorl $16,%0\n\t" /* swap words */ + "xchgb %b0,%h0" /* swap higher bytes */ + :"=q" (val) + : "0" (val)); +#endif + return val; +} + +_INLINE_ __u16 ext2fs_swab16(__u16 val) +{ + __asm__("xchgb %b0,%h0" /* swap bytes */ \ + : "=q" (val) \ + : "0" (val)); \ + return val; +} + +#undef EXT2FS_ADDR + +#endif /* i386 */ + +#if ((defined __GNUC__) && !defined(_EXT2_USE_C_VERSIONS_) && \ + (defined(__mc68000__))) + +#define _EXT2_HAVE_ASM_BITOPS_ + +_INLINE_ int ext2fs_set_bit(unsigned int nr,void * addr) +{ + char retval; + + __asm__ __volatile__ ("bfset %2@{%1:#1}; sne %0" + : "=d" (retval) : "d" (nr^7), "a" (addr)); + + return retval; +} + +_INLINE_ int ext2fs_clear_bit(unsigned int nr, void * addr) +{ + char retval; + + __asm__ __volatile__ ("bfclr %2@{%1:#1}; sne %0" + : "=d" (retval) : "d" (nr^7), "a" (addr)); + + return retval; +} + +_INLINE_ int ext2fs_test_bit(unsigned int nr, const void * addr) +{ + char retval; + + __asm__ __volatile__ ("bftst %2@{%1:#1}; sne %0" + : "=d" (retval) : "d" (nr^7), "a" (addr)); + + return retval; +} + +#endif /* __mc68000__ */ + + +#if !defined(_EXT2_HAVE_ASM_SWAB_) + +_INLINE_ __u16 ext2fs_swab16(__u16 val) +{ + return (val >> 8) | (val << 8); +} + +_INLINE_ __u32 ext2fs_swab32(__u32 val) +{ + return ((val>>24) | ((val>>8)&0xFF00) | + ((val<<8)&0xFF0000) | (val<<24)); +} + +#endif /* !_EXT2_HAVE_ASM_SWAB */ + +_INLINE_ __u64 ext2fs_swab64(__u64 val) +{ + return (ext2fs_swab32(val >> 32) | + (((__u64)ext2fs_swab32(val & 0xFFFFFFFFUL)) << 32)); +} + +_INLINE_ int ext2fs_mark_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block) +{ + return ext2fs_mark_generic_bitmap((ext2fs_generic_bitmap) bitmap, + block); +} + +_INLINE_ int ext2fs_unmark_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block) +{ + return ext2fs_unmark_generic_bitmap((ext2fs_generic_bitmap) bitmap, + block); +} + +_INLINE_ int ext2fs_test_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block) +{ + return ext2fs_test_generic_bitmap((ext2fs_generic_bitmap) bitmap, + block); +} + +_INLINE_ int ext2fs_mark_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode) +{ + return ext2fs_mark_generic_bitmap((ext2fs_generic_bitmap) bitmap, + inode); +} + +_INLINE_ int ext2fs_unmark_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode) +{ + return ext2fs_unmark_generic_bitmap((ext2fs_generic_bitmap) bitmap, + inode); +} + +_INLINE_ int ext2fs_test_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode) +{ + return ext2fs_test_generic_bitmap((ext2fs_generic_bitmap) bitmap, + inode); +} + +_INLINE_ void ext2fs_fast_mark_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block) +{ + ext2fs_mark_generic_bitmap((ext2fs_generic_bitmap) bitmap, block); +} + +_INLINE_ void ext2fs_fast_unmark_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block) +{ + ext2fs_unmark_generic_bitmap((ext2fs_generic_bitmap) bitmap, block); +} + +_INLINE_ int ext2fs_fast_test_block_bitmap(ext2fs_block_bitmap bitmap, + blk_t block) +{ + return ext2fs_test_generic_bitmap((ext2fs_generic_bitmap) bitmap, + block); +} + +_INLINE_ void ext2fs_fast_mark_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode) +{ + ext2fs_mark_generic_bitmap((ext2fs_generic_bitmap) bitmap, inode); +} + +_INLINE_ void ext2fs_fast_unmark_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode) +{ + ext2fs_unmark_generic_bitmap((ext2fs_generic_bitmap) bitmap, inode); +} + +_INLINE_ int ext2fs_fast_test_inode_bitmap(ext2fs_inode_bitmap bitmap, + ext2_ino_t inode) +{ + return ext2fs_test_generic_bitmap((ext2fs_generic_bitmap) bitmap, + inode); +} + +_INLINE_ blk_t ext2fs_get_block_bitmap_start(ext2fs_block_bitmap bitmap) +{ + return ext2fs_get_generic_bitmap_start((ext2fs_generic_bitmap) bitmap); +} + +_INLINE_ ext2_ino_t ext2fs_get_inode_bitmap_start(ext2fs_inode_bitmap bitmap) +{ + return ext2fs_get_generic_bitmap_start((ext2fs_generic_bitmap) bitmap); +} + +_INLINE_ blk_t ext2fs_get_block_bitmap_end(ext2fs_block_bitmap bitmap) +{ + return ext2fs_get_generic_bitmap_end((ext2fs_generic_bitmap) bitmap); +} + +_INLINE_ ext2_ino_t ext2fs_get_inode_bitmap_end(ext2fs_inode_bitmap bitmap) +{ + return ext2fs_get_generic_bitmap_end((ext2fs_generic_bitmap) bitmap); +} + +_INLINE_ int ext2fs_fast_test_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num) +{ + return ext2fs_test_block_bitmap_range(bitmap, block, num); +} + +_INLINE_ void ext2fs_fast_mark_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num) +{ + ext2fs_mark_block_bitmap_range(bitmap, block, num); +} + +_INLINE_ void ext2fs_fast_unmark_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num) +{ + ext2fs_unmark_block_bitmap_range(bitmap, block, num); +} +#undef _INLINE_ +#endif + diff --git a/fs/ext4/format/block.c b/fs/ext4/format/block.c new file mode 100755 index 0000000..0e53fd1 --- /dev/null +++ b/fs/ext4/format/block.c @@ -0,0 +1,578 @@ +/* + * block.c --- iterate over all blocks in an inode + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +struct block_context { + ext2_filsys fs; + int (*func)(ext2_filsys fs, + blk_t *blocknr, + e2_blkcnt_t bcount, + blk_t ref_blk, + int ref_offset, + void *priv_data); + e2_blkcnt_t bcount; + int bsize; + int flags; + errcode_t errcode; + char *ind_buf; + char *dind_buf; + char *tind_buf; + void *priv_data; +}; + +#define check_for_ro_violation_return(ctx, ret) \ + do { \ + if (((ctx)->flags & BLOCK_FLAG_READ_ONLY) && \ + ((ret) & BLOCK_CHANGED)) { \ + (ctx)->errcode = EXT2_ET_RO_BLOCK_ITERATE; \ + ret |= BLOCK_ABORT | BLOCK_ERROR; \ + return ret; \ + } \ + } while (0) + +#define check_for_ro_violation_goto(ctx, ret, label) \ + do { \ + if (((ctx)->flags & BLOCK_FLAG_READ_ONLY) && \ + ((ret) & BLOCK_CHANGED)) { \ + (ctx)->errcode = EXT2_ET_RO_BLOCK_ITERATE; \ + ret |= BLOCK_ABORT | BLOCK_ERROR; \ + goto label; \ + } \ + } while (0) + +static int block_iterate_ind(blk_t *ind_block, blk_t ref_block, + int ref_offset, struct block_context *ctx) +{ + int ret = 0, changed = 0; + int i, flags, limit, offset; + blk_t *block_nr; + + limit = ctx->fs->blocksize >> 2; + if (!(ctx->flags & BLOCK_FLAG_DEPTH_TRAVERSE) && + !(ctx->flags & BLOCK_FLAG_DATA_ONLY)) + ret = (*ctx->func)(ctx->fs, ind_block, + BLOCK_COUNT_IND, ref_block, + ref_offset, ctx->priv_data); + check_for_ro_violation_return(ctx, ret); + if (!*ind_block || (ret & BLOCK_ABORT)) { + ctx->bcount += limit; + return ret; + } + if (*ind_block >= ctx->fs->super->s_blocks_count || + *ind_block < ctx->fs->super->s_first_data_block) { + ctx->errcode = EXT2_ET_BAD_IND_BLOCK; + ret |= BLOCK_ERROR; + return ret; + } + ctx->errcode = ext2fs_read_ind_block(ctx->fs, *ind_block, + ctx->ind_buf); + if (ctx->errcode) { + ret |= BLOCK_ERROR; + return ret; + } + + block_nr = (blk_t *) ctx->ind_buf; + offset = 0; + if (ctx->flags & BLOCK_FLAG_APPEND) { + for (i = 0; i < limit; i++, ctx->bcount++, block_nr++) { + flags = (*ctx->func)(ctx->fs, block_nr, ctx->bcount, + *ind_block, offset, + ctx->priv_data); + changed |= flags; + if (flags & BLOCK_ABORT) { + ret |= BLOCK_ABORT; + break; + } + offset += sizeof(blk_t); + } + } else { + for (i = 0; i < limit; i++, ctx->bcount++, block_nr++) { + if (*block_nr == 0) + goto skip_sparse; + flags = (*ctx->func)(ctx->fs, block_nr, ctx->bcount, + *ind_block, offset, + ctx->priv_data); + changed |= flags; + if (flags & BLOCK_ABORT) { + ret |= BLOCK_ABORT; + break; + } + skip_sparse: + offset += sizeof(blk_t); + } + } + check_for_ro_violation_return(ctx, changed); + if (changed & BLOCK_CHANGED) { + ctx->errcode = ext2fs_write_ind_block(ctx->fs, *ind_block, + ctx->ind_buf); + if (ctx->errcode) + ret |= BLOCK_ERROR | BLOCK_ABORT; + } + if ((ctx->flags & BLOCK_FLAG_DEPTH_TRAVERSE) && + !(ctx->flags & BLOCK_FLAG_DATA_ONLY) && + !(ret & BLOCK_ABORT)) + ret |= (*ctx->func)(ctx->fs, ind_block, + BLOCK_COUNT_IND, ref_block, + ref_offset, ctx->priv_data); + check_for_ro_violation_return(ctx, ret); + return ret; +} + +static int block_iterate_dind(blk_t *dind_block, blk_t ref_block, + int ref_offset, struct block_context *ctx) +{ + int ret = 0, changed = 0; + int i, flags, limit, offset; + blk_t *block_nr; + + limit = ctx->fs->blocksize >> 2; + if (!(ctx->flags & (BLOCK_FLAG_DEPTH_TRAVERSE | + BLOCK_FLAG_DATA_ONLY))) + ret = (*ctx->func)(ctx->fs, dind_block, + BLOCK_COUNT_DIND, ref_block, + ref_offset, ctx->priv_data); + check_for_ro_violation_return(ctx, ret); + if (!*dind_block || (ret & BLOCK_ABORT)) { + ctx->bcount += limit*limit; + return ret; + } + if (*dind_block >= ctx->fs->super->s_blocks_count || + *dind_block < ctx->fs->super->s_first_data_block) { + ctx->errcode = EXT2_ET_BAD_DIND_BLOCK; + ret |= BLOCK_ERROR; + return ret; + } + ctx->errcode = ext2fs_read_ind_block(ctx->fs, *dind_block, + ctx->dind_buf); + if (ctx->errcode) { + ret |= BLOCK_ERROR; + return ret; + } + + block_nr = (blk_t *) ctx->dind_buf; + offset = 0; + if (ctx->flags & BLOCK_FLAG_APPEND) { + for (i = 0; i < limit; i++, block_nr++) { + flags = block_iterate_ind(block_nr, + *dind_block, offset, + ctx); + changed |= flags; + if (flags & (BLOCK_ABORT | BLOCK_ERROR)) { + ret |= flags & (BLOCK_ABORT | BLOCK_ERROR); + break; + } + offset += sizeof(blk_t); + } + } else { + for (i = 0; i < limit; i++, block_nr++) { + if (*block_nr == 0) { + ctx->bcount += limit; + continue; + } + flags = block_iterate_ind(block_nr, + *dind_block, offset, + ctx); + changed |= flags; + if (flags & (BLOCK_ABORT | BLOCK_ERROR)) { + ret |= flags & (BLOCK_ABORT | BLOCK_ERROR); + break; + } + offset += sizeof(blk_t); + } + } + check_for_ro_violation_return(ctx, changed); + if (changed & BLOCK_CHANGED) { + ctx->errcode = ext2fs_write_ind_block(ctx->fs, *dind_block, + ctx->dind_buf); + if (ctx->errcode) + ret |= BLOCK_ERROR | BLOCK_ABORT; + } + if ((ctx->flags & BLOCK_FLAG_DEPTH_TRAVERSE) && + !(ctx->flags & BLOCK_FLAG_DATA_ONLY) && + !(ret & BLOCK_ABORT)) + ret |= (*ctx->func)(ctx->fs, dind_block, + BLOCK_COUNT_DIND, ref_block, + ref_offset, ctx->priv_data); + check_for_ro_violation_return(ctx, ret); + return ret; +} + +static int block_iterate_tind(blk_t *tind_block, blk_t ref_block, + int ref_offset, struct block_context *ctx) +{ + int ret = 0, changed = 0; + int i, flags, limit, offset; + blk_t *block_nr; + + limit = ctx->fs->blocksize >> 2; + if (!(ctx->flags & (BLOCK_FLAG_DEPTH_TRAVERSE | + BLOCK_FLAG_DATA_ONLY))) + ret = (*ctx->func)(ctx->fs, tind_block, + BLOCK_COUNT_TIND, ref_block, + ref_offset, ctx->priv_data); + check_for_ro_violation_return(ctx, ret); + if (!*tind_block || (ret & BLOCK_ABORT)) { + ctx->bcount += limit*limit*limit; + return ret; + } + if (*tind_block >= ctx->fs->super->s_blocks_count || + *tind_block < ctx->fs->super->s_first_data_block) { + ctx->errcode = EXT2_ET_BAD_TIND_BLOCK; + ret |= BLOCK_ERROR; + return ret; + } + ctx->errcode = ext2fs_read_ind_block(ctx->fs, *tind_block, + ctx->tind_buf); + if (ctx->errcode) { + ret |= BLOCK_ERROR; + return ret; + } + + block_nr = (blk_t *) ctx->tind_buf; + offset = 0; + if (ctx->flags & BLOCK_FLAG_APPEND) { + for (i = 0; i < limit; i++, block_nr++) { + flags = block_iterate_dind(block_nr, + *tind_block, + offset, ctx); + changed |= flags; + if (flags & (BLOCK_ABORT | BLOCK_ERROR)) { + ret |= flags & (BLOCK_ABORT | BLOCK_ERROR); + break; + } + offset += sizeof(blk_t); + } + } else { + for (i = 0; i < limit; i++, block_nr++) { + if (*block_nr == 0) { + ctx->bcount += limit*limit; + continue; + } + flags = block_iterate_dind(block_nr, + *tind_block, + offset, ctx); + changed |= flags; + if (flags & (BLOCK_ABORT | BLOCK_ERROR)) { + ret |= flags & (BLOCK_ABORT | BLOCK_ERROR); + break; + } + offset += sizeof(blk_t); + } + } + check_for_ro_violation_return(ctx, changed); + if (changed & BLOCK_CHANGED) { + ctx->errcode = ext2fs_write_ind_block(ctx->fs, *tind_block, + ctx->tind_buf); + if (ctx->errcode) + ret |= BLOCK_ERROR | BLOCK_ABORT; + } + if ((ctx->flags & BLOCK_FLAG_DEPTH_TRAVERSE) && + !(ctx->flags & BLOCK_FLAG_DATA_ONLY) && + !(ret & BLOCK_ABORT)) + ret |= (*ctx->func)(ctx->fs, tind_block, + BLOCK_COUNT_TIND, ref_block, + ref_offset, ctx->priv_data); + check_for_ro_violation_return(ctx, ret); + return ret; +} + + +/* + * Helper function for creating the journal using direct I/O routines + */ +struct mkjournal_struct { + int num_blocks; + int newblocks; + blk_t goal; + blk_t blk_to_zero; + int zero_count; + char *buf; + errcode_t err; +}; + + +errcode_t ext2fs_block_iterate2(ext2_filsys fs, + ext2_ino_t ino, + int flags, + char *block_buf, + int (*func)(ext2_filsys fs, + blk_t *blocknr, + e2_blkcnt_t blockcnt, + blk_t ref_blk, + int ref_offset, + void *priv_data), + void *priv_data) +{ + int i; + int r, ret = 0; + struct ext2_inode inode; + errcode_t retval; + struct block_context ctx; + int limit; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + ctx.errcode = ext2fs_read_inode(fs, ino, &inode); + if (ctx.errcode) + return ctx.errcode; + + /* + * Check to see if we need to limit large files + */ + if (flags & BLOCK_FLAG_NO_LARGE) { + if (!LINUX_S_ISDIR(inode.i_mode) && + (inode.i_size_high != 0)) + return EXT2_ET_FILE_TOO_BIG; + } + + limit = fs->blocksize >> 2; + + ctx.fs = fs; + ctx.func = func; + ctx.priv_data = priv_data; + ctx.flags = flags; + ctx.bcount = 0; + if (block_buf) { + ctx.ind_buf = block_buf; + } else { + retval = ext2fs_get_array(3, fs->blocksize, &ctx.ind_buf); + if (retval) + return retval; + } + ctx.dind_buf = ctx.ind_buf + fs->blocksize; + ctx.tind_buf = ctx.dind_buf + fs->blocksize; + + /* + * Iterate over the HURD translator block (if present) + */ + if ((fs->super->s_creator_os == EXT2_OS_HURD) && + !(flags & BLOCK_FLAG_DATA_ONLY)) { + if (inode.osd1.hurd1.h_i_translator) { + ret |= (*ctx.func)(fs, + &inode.osd1.hurd1.h_i_translator, + BLOCK_COUNT_TRANSLATOR, + 0, 0, priv_data); + if (ret & BLOCK_ABORT) + goto abort_exit; + check_for_ro_violation_goto(&ctx, ret, abort_exit); + } + } + + if (inode.i_flags & EXT4_EXTENTS_FL) { + ext2_extent_handle_t handle; + struct ext2fs_extent extent; + e2_blkcnt_t blockcnt = 0; + blk_t blk, new_blk; + int op = EXT2_EXTENT_ROOT; + int uninit; + unsigned int j; + unsigned int count=0; + unsigned int percentage=0; + unsigned int step=((struct mkjournal_struct *)priv_data)->num_blocks/100+1; + + ctx.errcode = ext2fs_extent_open2(fs, ino, &inode, &handle); + if (ctx.errcode) + goto abort_exit; + while (1) { + ctx.errcode = ext2fs_extent_get(handle, op, &extent); + + if (ctx.errcode) { + if (ctx.errcode != EXT2_ET_EXTENT_NO_NEXT) + break; + ctx.errcode = 0; + if (!(flags & BLOCK_FLAG_APPEND)) + break; + next_block_set: + count++; + blk = 0; + r = (*ctx.func)(fs, &blk, blockcnt, + 0, 0, priv_data); + ret |= r; + check_for_ro_violation_goto(&ctx, ret, + extent_errout); + if (r & BLOCK_CHANGED) { + ctx.errcode = + ext2fs_extent_set_bmap(handle, + (blk64_t) blockcnt++, + (blk64_t) blk, 0); + if (ctx.errcode || (ret & BLOCK_ABORT)){ + //printf("count 0x%x\n", count); + printf("\b\b\b"); + printf("100%\n"); + break; + } + if (blk) { + if ((count%step)==0) { + //printf("count 0x%x\n",count); + printf("\b\b\b"); + percentage++; + printf("%2d\%", percentage); + } + goto next_block_set; + } + } + break; + } + op = EXT2_EXTENT_NEXT; + blk = extent.e_pblk; + if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) { + if (ctx.flags & BLOCK_FLAG_DATA_ONLY) + continue; + if ((!(extent.e_flags & + EXT2_EXTENT_FLAGS_SECOND_VISIT) && + !(ctx.flags & BLOCK_FLAG_DEPTH_TRAVERSE)) || + ((extent.e_flags & + EXT2_EXTENT_FLAGS_SECOND_VISIT) && + (ctx.flags & BLOCK_FLAG_DEPTH_TRAVERSE))) { + ret |= (*ctx.func)(fs, &blk, + -1, 0, 0, priv_data); + if (ret & BLOCK_CHANGED) { + extent.e_pblk = blk; + ctx.errcode = + ext2fs_extent_replace(handle, 0, &extent); + if (ctx.errcode) + break; + } + } + continue; + } + uninit = 0; + if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) + uninit = EXT2_EXTENT_SET_BMAP_UNINIT; + for (blockcnt = extent.e_lblk, j = 0; + j < extent.e_len; + blk++, blockcnt++, j++) { + new_blk = blk; + r = (*ctx.func)(fs, &new_blk, blockcnt, + 0, 0, priv_data); + ret |= r; + check_for_ro_violation_goto(&ctx, ret, + extent_errout); + if (r & BLOCK_CHANGED) { + ctx.errcode = + ext2fs_extent_set_bmap(handle, + (blk64_t) blockcnt, + (blk64_t) new_blk, + uninit); + if (ctx.errcode) + goto extent_errout; + } + if (ret & BLOCK_ABORT) + break; + } + } + + extent_errout: + ext2fs_extent_free(handle); + ret |= BLOCK_ERROR | BLOCK_ABORT; + goto errout; + } + + /* + * Iterate over normal data blocks + */ + for (i = 0; i < EXT2_NDIR_BLOCKS ; i++, ctx.bcount++) { + if (inode.i_block[i] || (flags & BLOCK_FLAG_APPEND)) { + ret |= (*ctx.func)(fs, &inode.i_block[i], + ctx.bcount, 0, i, priv_data); + if (ret & BLOCK_ABORT) + goto abort_exit; + } + } + check_for_ro_violation_goto(&ctx, ret, abort_exit); + if (inode.i_block[EXT2_IND_BLOCK] || (flags & BLOCK_FLAG_APPEND)) { + ret |= block_iterate_ind(&inode.i_block[EXT2_IND_BLOCK], + 0, EXT2_IND_BLOCK, &ctx); + if (ret & BLOCK_ABORT) + goto abort_exit; + } else + ctx.bcount += limit; + if (inode.i_block[EXT2_DIND_BLOCK] || (flags & BLOCK_FLAG_APPEND)) { + ret |= block_iterate_dind(&inode.i_block[EXT2_DIND_BLOCK], + 0, EXT2_DIND_BLOCK, &ctx); + if (ret & BLOCK_ABORT) + goto abort_exit; + } else + ctx.bcount += limit * limit; + if (inode.i_block[EXT2_TIND_BLOCK] || (flags & BLOCK_FLAG_APPEND)) { + ret |= block_iterate_tind(&inode.i_block[EXT2_TIND_BLOCK], + 0, EXT2_TIND_BLOCK, &ctx); + if (ret & BLOCK_ABORT) + goto abort_exit; + } + +abort_exit: + if (ret & BLOCK_CHANGED) { + retval = ext2fs_write_inode(fs, ino, &inode); + if (retval) { + ret |= BLOCK_ERROR; + ctx.errcode = retval; + } + } +errout: + if (!block_buf) + ext2fs_free_mem(&ctx.ind_buf); + + return (ret & BLOCK_ERROR) ? ctx.errcode : 0; +} + +/* + * Emulate the old ext2fs_block_iterate function! + */ + +struct xlate { + int (*func)(ext2_filsys fs, + blk_t *blocknr, + int bcount, + void *priv_data); + void *real_private; +}; + +#ifdef __TURBOC__ + #pragma argsused +#endif +static int xlate_func(ext2_filsys fs, blk_t *blocknr, e2_blkcnt_t blockcnt, + blk_t ref_block EXT2FS_ATTR((unused)), + int ref_offset EXT2FS_ATTR((unused)), + void *priv_data) +{ + struct xlate *xl = (struct xlate *) priv_data; + + return (*xl->func)(fs, blocknr, (int) blockcnt, xl->real_private); +} + +errcode_t ext2fs_block_iterate(ext2_filsys fs, + ext2_ino_t ino, + int flags, + char *block_buf, + int (*func)(ext2_filsys fs, + blk_t *blocknr, + int blockcnt, + void *priv_data), + void *priv_data) +{ + struct xlate xl; + + xl.real_private = priv_data; + xl.func = func; + + return ext2fs_block_iterate2(fs, ino, BLOCK_FLAG_NO_LARGE | flags, + block_buf, xlate_func, &xl); +} + diff --git a/fs/ext4/format/closefs.c b/fs/ext4/format/closefs.c new file mode 100755 index 0000000..3184711 --- /dev/null +++ b/fs/ext4/format/closefs.c @@ -0,0 +1,429 @@ +/* + * closefs.c --- close an ext2 filesystem + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fsP.h" + +static int test_root(int a, int b) +{ + if (a == 0) + return 1; + while (1) { + if (a == 1) + return 1; + if (a % b) + return 0; + a = a / b; + } +} + +int ext2fs_bg_has_super(ext2_filsys fs, int group_block) +{ + if (!(fs->super->s_feature_ro_compat & + EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)) + return 1; + + if (test_root(group_block, 3) || (test_root(group_block, 5)) || + test_root(group_block, 7)) + return 1; + + return 0; +} + +/* + * This function returns the location of the superblock, block group + * descriptors for a given block group. It currently returns the + * number of free blocks assuming that inode table and allocation + * bitmaps will be in the group. This is not necessarily the case + * when the flex_bg feature is enabled, so callers should take care! + * It was only really intended for use by mke2fs, and even there it's + * not that useful. In the future, when we redo this function for + * 64-bit block numbers, we should probably return the number of + * blocks used by the super block and group descriptors instead. + * + * See also the comment for ext2fs_reserve_super_and_bgd() + */ +int ext2fs_super_and_bgd_loc(ext2_filsys fs, + dgrp_t group, + blk_t *ret_super_blk, + blk_t *ret_old_desc_blk, + blk_t *ret_new_desc_blk, + int *ret_meta_bg) +{ + blk_t group_block, super_blk = 0, old_desc_blk = 0, new_desc_blk = 0; + unsigned int meta_bg, meta_bg_size; + blk_t numblocks, old_desc_blocks; + int has_super; + + group_block = ext2fs_group_first_block(fs, group);//absolute address of the superblock + + if (fs->super->s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) + old_desc_blocks = fs->super->s_first_meta_bg; + else + old_desc_blocks = + fs->desc_blocks + fs->super->s_reserved_gdt_blocks;//how many block the group descriptions will take and reserve + + if (group == fs->group_desc_count-1) { + numblocks = (fs->super->s_blocks_count - + fs->super->s_first_data_block) % + fs->super->s_blocks_per_group; + if (!numblocks) + numblocks = fs->super->s_blocks_per_group; + } else + numblocks = fs->super->s_blocks_per_group; + + has_super = ext2fs_bg_has_super(fs, group); + + if (has_super) { + super_blk = group_block; + numblocks--;//minus one super block + } + meta_bg_size = EXT2_DESC_PER_BLOCK(fs->super);//how many block group descriptions can exist in one block + meta_bg = group / meta_bg_size; + + if (!(fs->super->s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) || + (meta_bg < fs->super->s_first_meta_bg)) { + if (has_super) { + old_desc_blk = group_block + 1; + numblocks -= old_desc_blocks;//minus the group description blocks and reserved blocks + } + } else { + if (((group % meta_bg_size) == 0) || + ((group % meta_bg_size) == 1) || + ((group % meta_bg_size) == (meta_bg_size-1))) { + if (has_super) + has_super = 1; + new_desc_blk = group_block + has_super; + numblocks--; + } + } + + numblocks -= 2 + fs->inode_blocks_per_group;//how many data blocks + + if (ret_super_blk) + *ret_super_blk = super_blk; + if (ret_old_desc_blk) + *ret_old_desc_blk = old_desc_blk; + if (ret_new_desc_blk) + *ret_new_desc_blk = new_desc_blk; + if (ret_meta_bg) + *ret_meta_bg = meta_bg; + return (numblocks); +} + + +/* + * This function forces out the primary superblock. We need to only + * write out those fields which we have changed, since if the + * filesystem is mounted, it may have changed some of the other + * fields. + * + * It takes as input a superblock which has already been byte swapped + * (if necessary). + * + */ +static errcode_t write_primary_superblock(ext2_filsys fs, + struct ext2_super_block *super) +{ + __u16 *old_super, *new_super; + int check_idx, write_idx, size; + errcode_t retval; + + if (!fs->io->manager->write_byte || !fs->orig_super) { + fallback: + io_channel_set_blksize(fs->io, SUPERBLOCK_OFFSET); + retval = io_channel_write_blk(fs->io, 1, -SUPERBLOCK_SIZE,//-SUPERBLOCK_SIZE + super); + io_channel_set_blksize(fs->io, fs->blocksize); + return retval; + } + + old_super = (__u16 *) fs->orig_super; + new_super = (__u16 *) super; + + for (check_idx = 0; check_idx < SUPERBLOCK_SIZE/2; check_idx++) { + if (old_super[check_idx] == new_super[check_idx]) + continue; + write_idx = check_idx; + for (check_idx++; check_idx < SUPERBLOCK_SIZE/2; check_idx++) + if (old_super[check_idx] == new_super[check_idx]) + break; + size = 2 * (check_idx - write_idx); +#if 0 + printf("Writing %d bytes starting at %d\n", + size, write_idx*2); +#endif + retval = io_channel_write_byte(fs->io, + SUPERBLOCK_OFFSET + (2 * write_idx), size, + new_super + write_idx); + if (retval == EXT2_ET_UNIMPLEMENTED) + goto fallback; + if (retval) + return retval; + } + memcpy(fs->orig_super, super, SUPERBLOCK_SIZE); + return 0; +} + + +/* + * Updates the revision to EXT2_DYNAMIC_REV + */ +void ext2fs_update_dynamic_rev(ext2_filsys fs) +{ + struct ext2_super_block *sb = fs->super; + + if (sb->s_rev_level > EXT2_GOOD_OLD_REV) + return; + + sb->s_rev_level = EXT2_DYNAMIC_REV; + sb->s_first_ino = EXT2_GOOD_OLD_FIRST_INO; + sb->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; + /* s_uuid is handled by e2fsck already */ + /* other fields should be left alone */ +} + +static errcode_t write_backup_super(ext2_filsys fs, dgrp_t group, + blk_t group_block, + struct ext2_super_block *super_shadow) +{ + dgrp_t sgrp = group; + + if (sgrp > ((1 << 16) - 1)) + sgrp = (1 << 16) - 1; +#ifdef WORDS_BIGENDIAN + super_shadow->s_block_group_nr = ext2fs_swab16(sgrp); +#else + fs->super->s_block_group_nr = sgrp; +#endif + + return io_channel_write_blk(fs->io, group_block, -SUPERBLOCK_SIZE,//-SUPERBLOCK_SIZE + super_shadow); +} +#if 0 +static void verbose_buffer(void* buf,unsigned int size) +{ + int i; + int offset=0; + for(i=0;i<512;i++) { + printf("offset 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",offset, + *((unsigned int *)(buf+offset)), + *((unsigned int *)(buf+offset+4)), + *((unsigned int *)(buf+offset+8)), + *((unsigned int *)(buf+offset+12))); + offset+=16; + i+=16; + } +} +#endif + +errcode_t ext2fs_flush(ext2_filsys fs) +{ + dgrp_t i; + errcode_t retval; + unsigned long fs_state; + __u32 feature_incompat; + struct ext2_super_block *super_shadow = 0; + struct ext2_group_desc *group_shadow = 0; +#ifdef WORDS_BIGENDIAN + struct ext2_group_desc *s, *t; + dgrp_t j; +#endif + char *group_ptr; + int old_desc_blocks; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + fs_state = fs->super->s_state; + feature_incompat = fs->super->s_feature_incompat; + + fs->super->s_wtime = 0x5105cd7b;//fs->now ? fs->now : time(NULL); + fs->super->s_block_group_nr = 0; +#ifdef WORDS_BIGENDIAN + retval = EXT2_ET_NO_MEMORY; + retval = ext2fs_get_mem(SUPERBLOCK_SIZE, &super_shadow); + if (retval) + goto errout; + retval = ext2fs_get_array(fs->desc_blocks, fs->blocksize, + &group_shadow); + if (retval) + goto errout; + memset(group_shadow, 0, (size_t) fs->blocksize * + fs->desc_blocks); + + /* swap the group descriptors */ + for (j=0, s=fs->group_desc, t=group_shadow; + j < fs->group_desc_count; j++, t++, s++) { + *t = *s; + ext2fs_swap_group_desc(t); + } +#else + super_shadow = fs->super; + group_shadow = fs->group_desc; +#endif + + /* + * Set the state of the FS to be non-valid. (The state has + * already been backed up earlier, and will be restored after + * we write out the backup superblocks.) + */ + fs->super->s_state &= ~EXT2_VALID_FS; + fs->super->s_feature_incompat &= ~EXT3_FEATURE_INCOMPAT_RECOVER; +#ifdef WORDS_BIGENDIAN + *super_shadow = *fs->super; + ext2fs_swap_super(super_shadow); +#endif + + /* + * If this is an external journal device, don't write out the + * block group descriptors or any of the backup superblocks + */ + if (fs->super->s_feature_incompat & + EXT3_FEATURE_INCOMPAT_JOURNAL_DEV) + goto write_primary_superblock_only; + + /* + * Write out the master group descriptors, and the backup + * superblocks and group descriptors. + */ + group_ptr = (char *) group_shadow; + if (fs->super->s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) + old_desc_blocks = fs->super->s_first_meta_bg; + else + old_desc_blocks = fs->desc_blocks; + + //verbose_buffer(group_ptr, sizeof(group_ptr));//Tina + + + for (i = 0; i < fs->group_desc_count; i++) { + blk_t super_blk, old_desc_blk, new_desc_blk; + int meta_bg; + + ext2fs_super_and_bgd_loc(fs, i, &super_blk, &old_desc_blk, + &new_desc_blk, &meta_bg); + + if (!(fs->flags & EXT2_FLAG_MASTER_SB_ONLY) &&i && super_blk) { + retval = write_backup_super(fs, i, super_blk, + super_shadow); + if (retval) + goto errout; + } + if (fs->flags & EXT2_FLAG_SUPER_ONLY) + continue; + + + if ((old_desc_blk) && + (!(fs->flags & EXT2_FLAG_MASTER_SB_ONLY) || (i == 0))) { + //printf("Tina old_desc_blk group 0x%x from 0x%x to 0x%x\n", i,old_desc_blk, old_desc_blk+old_desc_blocks-1); + retval = io_channel_write_blk(fs->io, + old_desc_blk, old_desc_blocks, group_ptr); + if (retval) + goto errout; + } + if (new_desc_blk) { + //printf("Tina new_desc_blk from 0x%x to0x%x\n", old_desc_blk, old_desc_blk+old_desc_blocks-1); + retval = io_channel_write_blk(fs->io, new_desc_blk, + 1, group_ptr + (meta_bg*fs->blocksize)); + if (retval) + goto errout; + } + } + + /* + * If the write_bitmaps() function is present, call it to + * flush the bitmaps. This is done this way so that a simple + * program that doesn't mess with the bitmaps doesn't need to + * drag in the bitmaps.c code. + */ + if (fs->write_bitmaps) { + retval = fs->write_bitmaps(fs); + if (retval) + goto errout; + } + +write_primary_superblock_only: + /* + * Write out master superblock. This has to be done + * separately, since it is located at a fixed location + * (SUPERBLOCK_OFFSET). We flush all other pending changes + * out to disk first, just to avoid a race condition with an + * insy-tinsy window.... + */ + + fs->super->s_block_group_nr = 0; + fs->super->s_state = fs_state; + fs->super->s_feature_incompat = feature_incompat; +#ifdef WORDS_BIGENDIAN + *super_shadow = *fs->super; + ext2fs_swap_super(super_shadow); +#endif + + retval = io_channel_flush(fs->io); + retval = write_primary_superblock(fs, super_shadow); + if (retval) + goto errout; + + fs->flags &= ~EXT2_FLAG_DIRTY; + + retval = io_channel_flush(fs->io); +errout: + fs->super->s_state = fs_state; +#ifdef WORDS_BIGENDIAN + if (super_shadow) + ext2fs_free_mem(&super_shadow); + if (group_shadow) + ext2fs_free_mem(&group_shadow); +#endif + return retval; +} + +errcode_t ext2fs_close(ext2_filsys fs) +{ + errcode_t retval; + int meta_blks; + io_stats stats = 0; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + if (fs->write_bitmaps) { + retval = fs->write_bitmaps(fs); + if (retval) + return retval; + } + if (fs->super->s_kbytes_written && + fs->io->manager->get_stats) + fs->io->manager->get_stats(fs->io, &stats); + if (stats && stats->bytes_written && (fs->flags & EXT2_FLAG_RW)) { + fs->super->s_kbytes_written += stats->bytes_written >> 10; + meta_blks = fs->desc_blocks + 1; + if (!(fs->flags & EXT2_FLAG_SUPER_ONLY)) + fs->super->s_kbytes_written += meta_blks / + (fs->blocksize / 1024); + if ((fs->flags & EXT2_FLAG_DIRTY) == 0) + fs->flags |= EXT2_FLAG_SUPER_ONLY | EXT2_FLAG_DIRTY; + } + if (fs->flags & EXT2_FLAG_DIRTY) { + retval = ext2fs_flush(fs); + if (retval) + return retval; + } + ext2fs_free(fs); + return 0; +} + diff --git a/fs/ext4/format/com_err.h b/fs/ext4/format/com_err.h new file mode 100755 index 0000000..d8d092e --- /dev/null +++ b/fs/ext4/format/com_err.h @@ -0,0 +1,66 @@ +/* + * Header file for common error description library. + * + * Copyright 1988, Student Information Processing Board of the + * Massachusetts Institute of Technology. + * + * For copyright and distribution info, see the documentation supplied + * with this package. + */ + +#if !defined(__COM_ERR_H) && !defined(__COM_ERR_H__) + +#ifdef __GNUC__ +#define COM_ERR_ATTR(x) __attribute__(x) +#else +#define COM_ERR_ATTR(x) +#endif + +#include <stddef.h> +#include <stdarg.h> + +typedef long errcode_t; + +struct error_table { + char const * const * msgs; + long base; + int n_msgs; +}; +struct et_list; + +//extern void com_err (const char *, long, const char *, ...) +// COM_ERR_ATTR((format(printf, 3, 4))); + +//extern void com_err_va (const char *whoami, errcode_t code, const char *fmt, +// va_list args) +// COM_ERR_ATTR((format(printf, 3, 0))); + +extern char const *error_message (long); +extern void (*com_err_hook) (const char *, long, const char *, va_list); +extern void (*set_com_err_hook (void (*) (const char *, long, + const char *, va_list))) + (const char *, long, const char *, va_list); +extern void (*reset_com_err_hook (void)) (const char *, long, + const char *, va_list); +extern int init_error_table(const char * const *msgs, long base, int count); + +extern errcode_t add_error_table(const struct error_table * et); +extern errcode_t remove_error_table(const struct error_table * et); +extern void add_to_error_table(struct et_list *new_table); + +/* Provided for Heimdall compatibility */ +extern const char *com_right(struct et_list *list, long code); +extern const char *com_right_r(struct et_list *list, long code, char *str, size_t len); +extern void initialize_error_table_r(struct et_list **list, + const char **messages, + int num_errors, + long base); +extern void free_error_table(struct et_list *et); + +/* Provided for compatibility with other com_err libraries */ +extern int et_list_lock(void); +extern int et_list_unlock(void); + +#define __COM_ERR_H +#define __COM_ERR_H__ +#endif /* !defined(__COM_ERR_H) && !defined(__COM_ERR_H__)*/ diff --git a/fs/ext4/format/crc16.c b/fs/ext4/format/crc16.c new file mode 100755 index 0000000..5996021 --- /dev/null +++ b/fs/ext4/format/crc16.c @@ -0,0 +1,71 @@ +/* + * crc16.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +#include "ext2_types.h" + +#include "crc16.h" + +/** CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1) */ +static __u16 const crc16_table[256] = { + 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, + 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, + 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, + 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, + 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, + 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, + 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, + 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, + 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, + 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, + 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, + 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, + 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, + 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, + 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, + 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, + 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, + 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, + 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, + 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, + 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, + 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, + 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, + 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, + 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, + 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, + 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, + 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, + 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, + 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, + 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, + 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 +}; + +/** + * Compute the CRC-16 for the data buffer + * + * @param crc previous CRC value + * @param buffer data pointer + * @param len number of bytes in the buffer + * @return the updated CRC value + */ +crc16_t ext2fs_crc16(crc16_t crc, const void *buffer, unsigned int len) +{ + const unsigned char *cp = buffer; + + while (len--) + /* + * for an unknown reason, PPC treats __u16 as signed + * and keeps doing sign extension on the value. + * Instead, use only the low 16 bits of an unsigned + * int for holding the CRC value to avoid this. + */ + crc = (((crc >> 8) & 0xffU) ^ + crc16_table[(crc ^ *cp++) & 0xffU]) & 0x0000ffffU; + return crc; +} diff --git a/fs/ext4/format/crc16.h b/fs/ext4/format/crc16.h new file mode 100755 index 0000000..322e68d --- /dev/null +++ b/fs/ext4/format/crc16.h @@ -0,0 +1,26 @@ +/* + * crc16.h - CRC-16 routine + * + * Implements the standard CRC-16: + * Width 16 + * Poly 0x8005 (x16 + x15 + x2 + 1) + * Init 0 + * + * Copyright (c) 2005 Ben Gardner <bgardner@wabtec.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#ifndef __CRC16_H +#define __CRC16_H + +/* for an unknown reason, PPC treats __u16 as signed and keeps doing sign + * extension on the value. Instead, use only the low 16 bits of an + * unsigned int for holding the CRC value to avoid this. + */ +typedef unsigned int crc16_t; + +extern crc16_t ext2fs_crc16(crc16_t crc, const void *buffer, unsigned int len); + +#endif /* __CRC16_H */ diff --git a/fs/ext4/format/csum.c b/fs/ext4/format/csum.c new file mode 100755 index 0000000..9d8d701 --- /dev/null +++ b/fs/ext4/format/csum.c @@ -0,0 +1,149 @@ +/* + * csum.c --- checksumming of ext3 structures + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Copyright (C) 2006, 2007 by Andreas Dilger <adilger@clusterfs.com> + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" +#include "crc16.h" + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#ifdef DEBUG +#define STATIC +#else +#define STATIC static +#endif + +STATIC __u16 ext2fs_group_desc_csum(ext2_filsys fs, dgrp_t group) +{ + __u16 crc = 0; + struct ext2_group_desc *desc; + + desc = &fs->group_desc[group]; + + if (fs->super->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_GDT_CSUM) { + int offset = offsetof(struct ext2_group_desc, bg_checksum); + +#ifdef WORDS_BIGENDIAN + struct ext2_group_desc swabdesc = *desc; + + /* Have to swab back to little-endian to do the checksum */ + ext2fs_swap_group_desc(&swabdesc); + desc = &swabdesc; + + group = ext2fs_swab32(group); +#endif + crc = ext2fs_crc16(~0, fs->super->s_uuid, + sizeof(fs->super->s_uuid)); + crc = ext2fs_crc16(crc, &group, sizeof(group)); + crc = ext2fs_crc16(crc, desc, offset); + offset += sizeof(desc->bg_checksum); /* skip checksum */ + assert(offset == sizeof(*desc)); + /* for checksum of struct ext4_group_desc do the rest...*/ + if (offset < fs->super->s_desc_size) { + crc = ext2fs_crc16(crc, (char *)desc + offset, + fs->super->s_desc_size - offset); + } + } + + return crc; +} + +int ext2fs_group_desc_csum_verify(ext2_filsys fs, dgrp_t group) +{ + if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && + (fs->group_desc[group].bg_checksum != + ext2fs_group_desc_csum(fs, group))) + return 0; + + return 1; +} + +void ext2fs_group_desc_csum_set(ext2_filsys fs, dgrp_t group) +{ + if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + fs->group_desc[group].bg_checksum = + ext2fs_group_desc_csum(fs, group); +} + +static __u32 find_last_inode_ingrp(ext2fs_inode_bitmap bitmap, + __u32 inodes_per_grp, dgrp_t grp_no) +{ + ext2_ino_t i, start_ino, end_ino; + + start_ino = grp_no * inodes_per_grp + 1; + end_ino = start_ino + inodes_per_grp - 1; + + for (i = end_ino; i >= start_ino; i--) { + if (ext2fs_fast_test_inode_bitmap(bitmap, i)) + return i - start_ino + 1; + } + return inodes_per_grp; +} + +/* update the bitmap flags, set the itable high watermark, and calculate + * checksums for the group descriptors */ +errcode_t ext2fs_set_gdt_csum(ext2_filsys fs) +{ + struct ext2_super_block *sb = fs->super; + struct ext2_group_desc *bg = fs->group_desc; + int dirty = 0; + dgrp_t i; + + if (!fs->inode_map) + return EXT2_ET_NO_INODE_BITMAP; + + if (!EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + return 0; + + for (i = 0; i < fs->group_desc_count; i++, bg++) { + int old_csum = bg->bg_checksum; + int old_unused = bg->bg_itable_unused; + int old_flags = bg->bg_flags; + + if (bg->bg_free_inodes_count == sb->s_inodes_per_group) { + bg->bg_flags |= EXT2_BG_INODE_UNINIT; + bg->bg_itable_unused = sb->s_inodes_per_group; + } else { + bg->bg_flags &= ~EXT2_BG_INODE_UNINIT; + bg->bg_itable_unused = sb->s_inodes_per_group - + find_last_inode_ingrp(fs->inode_map, + sb->s_inodes_per_group,i); + } + + ext2fs_group_desc_csum_set(fs, i); + if (old_flags != bg->bg_flags) + dirty = 1; + if (old_unused != bg->bg_itable_unused) + dirty = 1; + if (old_csum != bg->bg_checksum) + dirty = 1; + } + if (dirty) + ext2fs_mark_super_dirty(fs); + return 0; +} + + diff --git a/fs/ext4/format/dir_iterate.c b/fs/ext4/format/dir_iterate.c new file mode 100755 index 0000000..bf4cbfa --- /dev/null +++ b/fs/ext4/format/dir_iterate.c @@ -0,0 +1,266 @@ +/* + * dir_iterate.c --- ext2fs directory iteration operations + * + * Copyright (C) 1993, 1994, 1994, 1995, 1996, 1997 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + +#include "ext2_fs.h" +#include "ext2fsP.h" + +#define EXT4_MAX_REC_LEN ((1<<16)-1) + +errcode_t ext2fs_get_rec_len(ext2_filsys fs, + struct ext2_dir_entry *dirent, + unsigned int *rec_len) +{ + unsigned int len = dirent->rec_len; + + if (fs->blocksize < 65536) + *rec_len = len; + else if (len == EXT4_MAX_REC_LEN || len == 0) + *rec_len = fs->blocksize; + else + *rec_len = (len & 65532) | ((len & 3) << 16); + return 0; +} + +errcode_t ext2fs_set_rec_len(ext2_filsys fs, + unsigned int len, + struct ext2_dir_entry *dirent) +{ + if ((len > fs->blocksize) || (fs->blocksize > (1 << 18)) || (len & 3)) + return EINVAL; + if (len < 65536) { + dirent->rec_len = len; + return 0; + } + if (len == fs->blocksize) { + if (fs->blocksize == 65536) + dirent->rec_len = EXT4_MAX_REC_LEN; + else + dirent->rec_len = 0; + } else + dirent->rec_len = (len & 65532) | ((len >> 16) & 3); + return 0; +} + +/* + * This function checks to see whether or not a potential deleted + * directory entry looks valid. What we do is check the deleted entry + * and each successive entry to make sure that they all look valid and + * that the last deleted entry ends at the beginning of the next + * undeleted entry. Returns 1 if the deleted entry looks valid, zero + * if not valid. + */ +static int ext2fs_validate_entry(ext2_filsys fs, char *buf, + unsigned int offset, + unsigned int final_offset) +{ + struct ext2_dir_entry *dirent; + unsigned int rec_len; +#define DIRENT_MIN_LENGTH 12 + + while ((offset < final_offset) && + (offset <= fs->blocksize - DIRENT_MIN_LENGTH)) { + dirent = (struct ext2_dir_entry *)(buf + offset); + if (ext2fs_get_rec_len(fs, dirent, &rec_len)) + return 0; + offset += rec_len; + if ((rec_len < 8) || + ((rec_len % 4) != 0) || + ((((unsigned) dirent->name_len & 0xFF)+8) > rec_len)) + return 0; + } + return (offset == final_offset); +} + +errcode_t ext2fs_dir_iterate2(ext2_filsys fs, + ext2_ino_t dir, + int flags, + char *block_buf, + int (*func)(ext2_ino_t dir, + int entry, + struct ext2_dir_entry *dirent, + int offset, + int blocksize, + char *buf, + void *priv_data), + void *priv_data) +{ + struct dir_context ctx; + errcode_t retval; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + retval = ext2fs_check_directory(fs, dir); + if (retval) + return retval; + + ctx.dir = dir; + ctx.flags = flags; + if (block_buf) + ctx.buf = block_buf; + else { + retval = ext2fs_get_mem(fs->blocksize, &ctx.buf); + if (retval) + return retval; + } + ctx.func = func; + ctx.priv_data = priv_data; + ctx.errcode = 0; + retval = ext2fs_block_iterate2(fs, dir, BLOCK_FLAG_READ_ONLY, 0, + ext2fs_process_dir_block, &ctx); + if (!block_buf) + ext2fs_free_mem(&ctx.buf); + if (retval) + return retval; + return ctx.errcode; +} + +struct xlate { + int (*func)(struct ext2_dir_entry *dirent, + int offset, + int blocksize, + char *buf, + void *priv_data); + void *real_private; +}; + +static int xlate_func(ext2_ino_t dir EXT2FS_ATTR((unused)), + int entry EXT2FS_ATTR((unused)), + struct ext2_dir_entry *dirent, int offset, + int blocksize, char *buf, void *priv_data) +{ + struct xlate *xl = (struct xlate *) priv_data; + + return (*xl->func)(dirent, offset, blocksize, buf, xl->real_private); +} + +extern errcode_t ext2fs_dir_iterate(ext2_filsys fs, + ext2_ino_t dir, + int flags, + char *block_buf, + int (*func)(struct ext2_dir_entry *dirent, + int offset, + int blocksize, + char *buf, + void *priv_data), + void *priv_data) +{ + struct xlate xl; + + xl.real_private = priv_data; + xl.func = func; + + return ext2fs_dir_iterate2(fs, dir, flags, block_buf, + xlate_func, &xl); +} + + +/* + * Helper function which is private to this module. Used by + * ext2fs_dir_iterate() and ext2fs_dblist_dir_iterate() + */ +int ext2fs_process_dir_block(ext2_filsys fs, + blk_t *blocknr, + e2_blkcnt_t blockcnt, + blk_t ref_block EXT2FS_ATTR((unused)), + int ref_offset EXT2FS_ATTR((unused)), + void *priv_data) +{ + struct dir_context *ctx = (struct dir_context *) priv_data; + unsigned int offset = 0; + unsigned int next_real_entry = 0; + int ret = 0; + int changed = 0; + int do_abort = 0; + unsigned int rec_len, size; + int entry; + struct ext2_dir_entry *dirent; + + if (blockcnt < 0) + return 0; + + entry = blockcnt ? DIRENT_OTHER_FILE : DIRENT_DOT_FILE; + + ctx->errcode = ext2fs_read_dir_block(fs, *blocknr, ctx->buf); + if (ctx->errcode) + return BLOCK_ABORT; + + while (offset < fs->blocksize) { + dirent = (struct ext2_dir_entry *) (ctx->buf + offset); + if (ext2fs_get_rec_len(fs, dirent, &rec_len)) + return BLOCK_ABORT; + if (((offset + rec_len) > fs->blocksize) || + (rec_len < 8) || + ((rec_len % 4) != 0) || + ((((unsigned) dirent->name_len & 0xFF)+8) > rec_len)) { + ctx->errcode = EXT2_ET_DIR_CORRUPTED; + return BLOCK_ABORT; + } + if (!dirent->inode && + !(ctx->flags & DIRENT_FLAG_INCLUDE_EMPTY)) + goto next; + + ret = (ctx->func)(ctx->dir, + (next_real_entry > offset) ? + DIRENT_DELETED_FILE : entry, + dirent, offset, + fs->blocksize, ctx->buf, + ctx->priv_data); + if (entry < DIRENT_OTHER_FILE) + entry++; + + if (ret & DIRENT_CHANGED) { + if (ext2fs_get_rec_len(fs, dirent, &rec_len)) + return BLOCK_ABORT; + changed++; + } + if (ret & DIRENT_ABORT) { + do_abort++; + break; + } +next: + if (next_real_entry == offset) + next_real_entry += rec_len; + + if (ctx->flags & DIRENT_FLAG_INCLUDE_REMOVED) { + size = ((dirent->name_len & 0xFF) + 11) & ~3; + + if (rec_len != size) { + unsigned int final_offset; + + final_offset = offset + rec_len; + offset += size; + while (offset < final_offset && + !ext2fs_validate_entry(fs, ctx->buf, + offset, + final_offset)) + offset += 4; + continue; + } + } + offset += rec_len; + } + + if (changed) { + ctx->errcode = ext2fs_write_dir_block(fs, *blocknr, ctx->buf); + if (ctx->errcode) + return BLOCK_ABORT; + } + if (do_abort) + return BLOCK_ABORT; + return 0; +} + diff --git a/fs/ext4/format/dirblock.c b/fs/ext4/format/dirblock.c new file mode 100755 index 0000000..6787990 --- /dev/null +++ b/fs/ext4/format/dirblock.c @@ -0,0 +1,116 @@ +/* + * dirblock.c --- directory block routines. + * + * Copyright (C) 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +errcode_t ext2fs_read_dir_block2(ext2_filsys fs, blk_t block, + void *buf, int flags EXT2FS_ATTR((unused))) +{ + errcode_t retval; + char *p, *end; + struct ext2_dir_entry *dirent; + unsigned int name_len, rec_len; + + + retval = io_channel_read_blk(fs->io, block, 1, buf); + if (retval) + return retval; + + p = (char *) buf; + end = (char *) buf + fs->blocksize; + while (p < end-8) { + dirent = (struct ext2_dir_entry *) p; +#ifdef WORDS_BIGENDIAN + dirent->inode = ext2fs_swab32(dirent->inode); + dirent->rec_len = ext2fs_swab16(dirent->rec_len); + dirent->name_len = ext2fs_swab16(dirent->name_len); +#endif + name_len = dirent->name_len; +#ifdef WORDS_BIGENDIAN + if (flags & EXT2_DIRBLOCK_V2_STRUCT) + dirent->name_len = ext2fs_swab16(dirent->name_len); +#endif + if ((retval = ext2fs_get_rec_len(fs, dirent, &rec_len)) != 0) + return retval; + if ((rec_len < 8) || (rec_len % 4)) { + rec_len = 8; + retval = EXT2_ET_DIR_CORRUPTED; + } else if (((name_len & 0xFF) + 8) > rec_len) + retval = EXT2_ET_DIR_CORRUPTED; + p += rec_len; + } + return retval; +} + +errcode_t ext2fs_read_dir_block(ext2_filsys fs, blk_t block, + void *buf) +{ + return ext2fs_read_dir_block2(fs, block, buf, 0); +} + + +errcode_t ext2fs_write_dir_block2(ext2_filsys fs, blk_t block, + void *inbuf, int flags EXT2FS_ATTR((unused))) +{ +#ifdef WORDS_BIGENDIAN + errcode_t retval; + char *p, *end; + char *buf = 0; + unsigned int rec_len; + struct ext2_dir_entry *dirent; + + retval = ext2fs_get_mem(fs->blocksize, &buf); + if (retval) + return retval; + memcpy(buf, inbuf, fs->blocksize); + p = buf; + end = buf + fs->blocksize; + while (p < end) { + dirent = (struct ext2_dir_entry *) p; + if ((retval = ext2fs_get_rec_len(fs, dirent, &rec_len)) != 0) + return retval; + if ((rec_len < 8) || + (rec_len % 4)) { + ext2fs_free_mem(&buf); + return (EXT2_ET_DIR_CORRUPTED); + } + p += rec_len; + dirent->inode = ext2fs_swab32(dirent->inode); + dirent->rec_len = ext2fs_swab16(dirent->rec_len); + dirent->name_len = ext2fs_swab16(dirent->name_len); + + if (flags & EXT2_DIRBLOCK_V2_STRUCT) + dirent->name_len = ext2fs_swab16(dirent->name_len); + } + retval = io_channel_write_blk(fs->io, block, 1, buf); + ext2fs_free_mem(&buf); + return retval; +#else + return io_channel_write_blk(fs->io, block, 1, (char *) inbuf); +#endif +} + + +errcode_t ext2fs_write_dir_block(ext2_filsys fs, blk_t block, + void *inbuf) +{ + return ext2fs_write_dir_block2(fs, block, inbuf, 0); +} + diff --git a/fs/ext4/format/e2image.h b/fs/ext4/format/e2image.h new file mode 100755 index 0000000..4de2c8d --- /dev/null +++ b/fs/ext4/format/e2image.h @@ -0,0 +1,51 @@ +/* + * e2image.h --- header file describing the ext2 image format + * + * Copyright (C) 2000 Theodore Ts'o. + * + * Note: this uses the POSIX IO interfaces, unlike most of the other + * functions in this library. So sue me. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + + +struct ext2_image_hdr { + __u32 magic_number; /* This must be EXT2_ET_MAGIC_E2IMAGE */ + char magic_descriptor[16]; /* "Ext2 Image 1.0", w/ null padding */ + char fs_hostname[64];/* Hostname of machine of image */ + char fs_netaddr[32]; /* Network address */ + __u32 fs_netaddr_type;/* 0 = IPV4, 1 = IPV6, etc. */ + __u32 fs_device; /* Device number of image */ + char fs_device_name[64]; /* Device name */ + char fs_uuid[16]; /* UUID of filesystem */ + __u32 fs_blocksize; /* Block size of the filesystem */ + __u32 fs_reserved[8]; + + __u32 image_device; /* Device number of image file */ + __u32 image_inode; /* Inode number of image file */ + __u32 image_time; /* Time of image creation */ + __u32 image_reserved[8]; + + __u32 offset_super; /* Byte offset of the sb and descriptors */ + __u32 offset_inode; /* Byte offset of the inode table */ + __u32 offset_inodemap; /* Byte offset of the inode bitmaps */ + __u32 offset_blockmap; /* Byte offset of the inode bitmaps */ + __u32 offset_reserved[8]; +}; + + + + + + + + + + + + + diff --git a/fs/ext4/format/expanddir.c b/fs/ext4/format/expanddir.c new file mode 100755 index 0000000..1b4fc47 --- /dev/null +++ b/fs/ext4/format/expanddir.c @@ -0,0 +1,126 @@ +/* + * expand.c --- expand an ext2fs directory + * + * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +struct expand_dir_struct { + int done; + int newblocks; + errcode_t err; +}; + +static int expand_dir_proc(ext2_filsys fs, + blk_t *blocknr, + e2_blkcnt_t blockcnt, + blk_t ref_block EXT2FS_ATTR((unused)), + int ref_offset EXT2FS_ATTR((unused)), + void *priv_data) +{ + struct expand_dir_struct *es = (struct expand_dir_struct *) priv_data; + blk_t new_blk; + static blk_t last_blk = 0; + char *block; + errcode_t retval; + + if (*blocknr) { + last_blk = *blocknr; + return 0; + } + retval = ext2fs_new_block(fs, last_blk, 0, &new_blk); + if (retval) { + es->err = retval; + return BLOCK_ABORT; + } + if (blockcnt > 0) { + retval = ext2fs_new_dir_block(fs, 0, 0, &block); + if (retval) { + es->err = retval; + return BLOCK_ABORT; + } + es->done = 1; + retval = ext2fs_write_dir_block(fs, new_blk, block); + } else { + retval = ext2fs_get_mem(fs->blocksize, &block); + if (retval) { + es->err = retval; + return BLOCK_ABORT; + } + memset(block, 0, fs->blocksize); + retval = io_channel_write_blk(fs->io, new_blk, 1, block); + } + if (retval) { + es->err = retval; + return BLOCK_ABORT; + } + ext2fs_free_mem(&block); + *blocknr = new_blk; + ext2fs_block_alloc_stats(fs, new_blk, +1); + es->newblocks++; + + if (es->done) + return (BLOCK_CHANGED | BLOCK_ABORT); + else + return BLOCK_CHANGED; +} + +errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir) +{ + errcode_t retval; + struct expand_dir_struct es; + struct ext2_inode inode; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (!(fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if (!fs->block_map) + return EXT2_ET_NO_BLOCK_BITMAP; + + retval = ext2fs_check_directory(fs, dir); + if (retval) + return retval; + + es.done = 0; + es.err = 0; + es.newblocks = 0; + + retval = ext2fs_block_iterate2(fs, dir, BLOCK_FLAG_APPEND, + 0, expand_dir_proc, &es); + + if (es.err) + return es.err; + if (!es.done) + return EXT2_ET_EXPAND_DIR_ERR; + + /* + * Update the size and block count fields in the inode. + */ + retval = ext2fs_read_inode(fs, dir, &inode); + if (retval) + return retval; + + inode.i_size += fs->blocksize; + ext2fs_iblk_add_blocks(fs, &inode, es.newblocks); + + retval = ext2fs_write_inode(fs, dir, &inode); + if (retval) + return retval; + + return 0; +} diff --git a/fs/ext4/format/ext2_err.h b/fs/ext4/format/ext2_err.h new file mode 100755 index 0000000..ab0169b --- /dev/null +++ b/fs/ext4/format/ext2_err.h @@ -0,0 +1,155 @@ +/* + * ext2_err.h: + * This file is automatically generated; please do not edit it. + */ + +#include "com_err.h" + +#define EXT2_ET_BASE (2133571328L) +#define EXT2_ET_MAGIC_EXT2FS_FILSYS (2133571329L) +#define EXT2_ET_MAGIC_BADBLOCKS_LIST (2133571330L) +#define EXT2_ET_MAGIC_BADBLOCKS_ITERATE (2133571331L) +#define EXT2_ET_MAGIC_INODE_SCAN (2133571332L) +#define EXT2_ET_MAGIC_IO_CHANNEL (2133571333L) +#define EXT2_ET_MAGIC_UNIX_IO_CHANNEL (2133571334L) +#define EXT2_ET_MAGIC_IO_MANAGER (2133571335L) +#define EXT2_ET_MAGIC_BLOCK_BITMAP (2133571336L) +#define EXT2_ET_MAGIC_INODE_BITMAP (2133571337L) +#define EXT2_ET_MAGIC_GENERIC_BITMAP (2133571338L) +#define EXT2_ET_MAGIC_TEST_IO_CHANNEL (2133571339L) +#define EXT2_ET_MAGIC_DBLIST (2133571340L) +#define EXT2_ET_MAGIC_ICOUNT (2133571341L) +#define EXT2_ET_MAGIC_PQ_IO_CHANNEL (2133571342L) +#define EXT2_ET_MAGIC_EXT2_FILE (2133571343L) +#define EXT2_ET_MAGIC_E2IMAGE (2133571344L) +#define EXT2_ET_MAGIC_INODE_IO_CHANNEL (2133571345L) +#define EXT2_ET_MAGIC_EXTENT_HANDLE (2133571346L) +#define EXT2_ET_BAD_MAGIC (2133571347L) +#define EXT2_ET_REV_TOO_HIGH (2133571348L) +#define EXT2_ET_RO_FILSYS (2133571349L) +#define EXT2_ET_GDESC_READ (2133571350L) +#define EXT2_ET_GDESC_WRITE (2133571351L) +#define EXT2_ET_GDESC_BAD_BLOCK_MAP (2133571352L) +#define EXT2_ET_GDESC_BAD_INODE_MAP (2133571353L) +#define EXT2_ET_GDESC_BAD_INODE_TABLE (2133571354L) +#define EXT2_ET_INODE_BITMAP_WRITE (2133571355L) +#define EXT2_ET_INODE_BITMAP_READ (2133571356L) +#define EXT2_ET_BLOCK_BITMAP_WRITE (2133571357L) +#define EXT2_ET_BLOCK_BITMAP_READ (2133571358L) +#define EXT2_ET_INODE_TABLE_WRITE (2133571359L) +#define EXT2_ET_INODE_TABLE_READ (2133571360L) +#define EXT2_ET_NEXT_INODE_READ (2133571361L) +#define EXT2_ET_UNEXPECTED_BLOCK_SIZE (2133571362L) +#define EXT2_ET_DIR_CORRUPTED (2133571363L) +#define EXT2_ET_SHORT_READ (2133571364L) +#define EXT2_ET_SHORT_WRITE (2133571365L) +#define EXT2_ET_DIR_NO_SPACE (2133571366L) +#define EXT2_ET_NO_INODE_BITMAP (2133571367L) +#define EXT2_ET_NO_BLOCK_BITMAP (2133571368L) +#define EXT2_ET_BAD_INODE_NUM (2133571369L) +#define EXT2_ET_BAD_BLOCK_NUM (2133571370L) +#define EXT2_ET_EXPAND_DIR_ERR (2133571371L) +#define EXT2_ET_TOOSMALL (2133571372L) +#define EXT2_ET_BAD_BLOCK_MARK (2133571373L) +#define EXT2_ET_BAD_BLOCK_UNMARK (2133571374L) +#define EXT2_ET_BAD_BLOCK_TEST (2133571375L) +#define EXT2_ET_BAD_INODE_MARK (2133571376L) +#define EXT2_ET_BAD_INODE_UNMARK (2133571377L) +#define EXT2_ET_BAD_INODE_TEST (2133571378L) +#define EXT2_ET_FUDGE_BLOCK_BITMAP_END (2133571379L) +#define EXT2_ET_FUDGE_INODE_BITMAP_END (2133571380L) +#define EXT2_ET_BAD_IND_BLOCK (2133571381L) +#define EXT2_ET_BAD_DIND_BLOCK (2133571382L) +#define EXT2_ET_BAD_TIND_BLOCK (2133571383L) +#define EXT2_ET_NEQ_BLOCK_BITMAP (2133571384L) +#define EXT2_ET_NEQ_INODE_BITMAP (2133571385L) +#define EXT2_ET_BAD_DEVICE_NAME (2133571386L) +#define EXT2_ET_MISSING_INODE_TABLE (2133571387L) +#define EXT2_ET_CORRUPT_SUPERBLOCK (2133571388L) +#define EXT2_ET_BAD_GENERIC_MARK (2133571389L) +#define EXT2_ET_BAD_GENERIC_UNMARK (2133571390L) +#define EXT2_ET_BAD_GENERIC_TEST (2133571391L) +#define EXT2_ET_SYMLINK_LOOP (2133571392L) +#define EXT2_ET_CALLBACK_NOTHANDLED (2133571393L) +#define EXT2_ET_BAD_BLOCK_IN_INODE_TABLE (2133571394L) +#define EXT2_ET_UNSUPP_FEATURE (2133571395L) +#define EXT2_ET_RO_UNSUPP_FEATURE (2133571396L) +#define EXT2_ET_LLSEEK_FAILED (2133571397L) +#define EXT2_ET_NO_MEMORY (2133571398L) +#define EXT2_ET_INVALID_ARGUMENT (2133571399L) +#define EXT2_ET_BLOCK_ALLOC_FAIL (2133571400L) +#define EXT2_ET_INODE_ALLOC_FAIL (2133571401L) +#define EXT2_ET_NO_DIRECTORY (2133571402L) +#define EXT2_ET_TOO_MANY_REFS (2133571403L) +#define EXT2_ET_FILE_NOT_FOUND (2133571404L) +#define EXT2_ET_FILE_RO (2133571405L) +#define EXT2_ET_DB_NOT_FOUND (2133571406L) +#define EXT2_ET_DIR_EXISTS (2133571407L) +#define EXT2_ET_UNIMPLEMENTED (2133571408L) +#define EXT2_ET_CANCEL_REQUESTED (2133571409L) +#define EXT2_ET_FILE_TOO_BIG (2133571410L) +#define EXT2_ET_JOURNAL_NOT_BLOCK (2133571411L) +#define EXT2_ET_NO_JOURNAL_SB (2133571412L) +#define EXT2_ET_JOURNAL_TOO_SMALL (2133571413L) +#define EXT2_ET_JOURNAL_UNSUPP_VERSION (2133571414L) +#define EXT2_ET_LOAD_EXT_JOURNAL (2133571415L) +#define EXT2_ET_NO_JOURNAL (2133571416L) +#define EXT2_ET_DIRHASH_UNSUPP (2133571417L) +#define EXT2_ET_BAD_EA_BLOCK_NUM (2133571418L) +#define EXT2_ET_TOO_MANY_INODES (2133571419L) +#define EXT2_ET_NOT_IMAGE_FILE (2133571420L) +#define EXT2_ET_RES_GDT_BLOCKS (2133571421L) +#define EXT2_ET_RESIZE_INODE_CORRUPT (2133571422L) +#define EXT2_ET_SET_BMAP_NO_IND (2133571423L) +#define EXT2_ET_TDB_SUCCESS (2133571424L) +#define EXT2_ET_TDB_ERR_CORRUPT (2133571425L) +#define EXT2_ET_TDB_ERR_IO (2133571426L) +#define EXT2_ET_TDB_ERR_LOCK (2133571427L) +#define EXT2_ET_TDB_ERR_OOM (2133571428L) +#define EXT2_ET_TDB_ERR_EXISTS (2133571429L) +#define EXT2_ET_TDB_ERR_NOLOCK (2133571430L) +#define EXT2_ET_TDB_ERR_EINVAL (2133571431L) +#define EXT2_ET_TDB_ERR_NOEXIST (2133571432L) +#define EXT2_ET_TDB_ERR_RDONLY (2133571433L) +#define EXT2_ET_DBLIST_EMPTY (2133571434L) +#define EXT2_ET_RO_BLOCK_ITERATE (2133571435L) +#define EXT2_ET_MAGIC_EXTENT_PATH (2133571436L) +#define EXT2_ET_MAGIC_RESERVED_10 (2133571437L) +#define EXT2_ET_MAGIC_RESERVED_11 (2133571438L) +#define EXT2_ET_MAGIC_RESERVED_12 (2133571439L) +#define EXT2_ET_MAGIC_RESERVED_13 (2133571440L) +#define EXT2_ET_MAGIC_RESERVED_14 (2133571441L) +#define EXT2_ET_MAGIC_RESERVED_15 (2133571442L) +#define EXT2_ET_MAGIC_RESERVED_16 (2133571443L) +#define EXT2_ET_MAGIC_RESERVED_17 (2133571444L) +#define EXT2_ET_MAGIC_RESERVED_18 (2133571445L) +#define EXT2_ET_MAGIC_RESERVED_19 (2133571446L) +#define EXT2_ET_EXTENT_HEADER_BAD (2133571447L) +#define EXT2_ET_EXTENT_INDEX_BAD (2133571448L) +#define EXT2_ET_EXTENT_LEAF_BAD (2133571449L) +#define EXT2_ET_EXTENT_NO_SPACE (2133571450L) +#define EXT2_ET_INODE_NOT_EXTENT (2133571451L) +#define EXT2_ET_EXTENT_NO_NEXT (2133571452L) +#define EXT2_ET_EXTENT_NO_PREV (2133571453L) +#define EXT2_ET_EXTENT_NO_UP (2133571454L) +#define EXT2_ET_EXTENT_NO_DOWN (2133571455L) +#define EXT2_ET_NO_CURRENT_NODE (2133571456L) +#define EXT2_ET_OP_NOT_SUPPORTED (2133571457L) +#define EXT2_ET_CANT_INSERT_EXTENT (2133571458L) +#define EXT2_ET_CANT_SPLIT_EXTENT (2133571459L) +#define EXT2_ET_EXTENT_NOT_FOUND (2133571460L) +#define EXT2_ET_EXTENT_NOT_SUPPORTED (2133571461L) +#define EXT2_ET_EXTENT_INVALID_LENGTH (2133571462L) +#define EXT2_ET_IO_CHANNEL_NO_SUPPORT_64 (2133571463L) +#define EXT2_NO_MTAB_FILE (2133571464L) +extern const struct error_table et_ext2_error_table; +extern void initialize_ext2_error_table(void); + +/* For compatibility with Heimdal */ +extern void initialize_ext2_error_table_r(struct et_list **list); + +#define ERROR_TABLE_BASE_ext2 (2133571328L) + +/* for compatibility with older versions... */ +#define init_ext2_err_tbl initialize_ext2_error_table +#define ext2_err_base ERROR_TABLE_BASE_ext2 diff --git a/fs/ext4/format/ext2_ext_attr.h b/fs/ext4/format/ext2_ext_attr.h new file mode 100755 index 0000000..ed548d1 --- /dev/null +++ b/fs/ext4/format/ext2_ext_attr.h @@ -0,0 +1,71 @@ +/* + File: linux/ext2_ext_attr.h + + On-disk format of extended attributes for the ext2 filesystem. + + (C) 2000 Andreas Gruenbacher, <a.gruenbacher@computer.org> +*/ + +#ifndef _EXT2_EXT_ATTR_H +#define _EXT2_EXT_ATTR_H +/* Magic value in attribute blocks */ +#define EXT2_EXT_ATTR_MAGIC_v1 0xEA010000 +#define EXT2_EXT_ATTR_MAGIC 0xEA020000 + +/* Maximum number of references to one attribute block */ +#define EXT2_EXT_ATTR_REFCOUNT_MAX 1024 + +struct ext2_ext_attr_header { + __u32 h_magic; /* magic number for identification */ + __u32 h_refcount; /* reference count */ + __u32 h_blocks; /* number of disk blocks used */ + __u32 h_hash; /* hash value of all attributes */ + __u32 h_reserved[4]; /* zero right now */ +}; + +struct ext2_ext_attr_entry { + __u8 e_name_len; /* length of name */ + __u8 e_name_index; /* attribute name index */ + __u16 e_value_offs; /* offset in disk block of value */ + __u32 e_value_block; /* disk block attribute is stored on (n/i) */ + __u32 e_value_size; /* size of attribute value */ + __u32 e_hash; /* hash value of name and value */ +#if 0 + char e_name[0]; /* attribute name */ +#endif +}; + +#define EXT2_EXT_ATTR_PAD_BITS 2 +#define EXT2_EXT_ATTR_PAD ((unsigned) 1<<EXT2_EXT_ATTR_PAD_BITS) +#define EXT2_EXT_ATTR_ROUND (EXT2_EXT_ATTR_PAD-1) +#define EXT2_EXT_ATTR_LEN(name_len) \ + (((name_len) + EXT2_EXT_ATTR_ROUND + \ + sizeof(struct ext2_ext_attr_entry)) & ~EXT2_EXT_ATTR_ROUND) +#define EXT2_EXT_ATTR_NEXT(entry) \ + ( (struct ext2_ext_attr_entry *)( \ + (char *)(entry) + EXT2_EXT_ATTR_LEN((entry)->e_name_len)) ) +#define EXT2_EXT_ATTR_SIZE(size) \ + (((size) + EXT2_EXT_ATTR_ROUND) & ~EXT2_EXT_ATTR_ROUND) +#define EXT2_EXT_IS_LAST_ENTRY(entry) (*((__u32 *)(entry)) == 0UL) +#define EXT2_EXT_ATTR_NAME(entry) \ + (((char *) (entry)) + sizeof(struct ext2_ext_attr_entry)) +#define EXT2_XATTR_LEN(name_len) \ + (((name_len) + EXT2_EXT_ATTR_ROUND + \ + sizeof(struct ext2_xattr_entry)) & ~EXT2_EXT_ATTR_ROUND) +#define EXT2_XATTR_SIZE(size) \ + (((size) + EXT2_EXT_ATTR_ROUND) & ~EXT2_EXT_ATTR_ROUND) + +#ifdef __KERNEL__ +# ifdef CONFIG_EXT2_FS_EXT_ATTR +extern int ext2_get_ext_attr(struct inode *, const char *, char *, size_t, int); +extern int ext2_set_ext_attr(struct inode *, const char *, char *, size_t, int); +extern void ext2_ext_attr_free_inode(struct inode *inode); +extern void ext2_ext_attr_put_super(struct super_block *sb); +extern int ext2_ext_attr_init(void); +extern void ext2_ext_attr_done(void); +# else +# define ext2_get_ext_attr NULL +# define ext2_set_ext_attr NULL +# endif +#endif /* __KERNEL__ */ +#endif /* _EXT2_EXT_ATTR_H */ diff --git a/fs/ext4/format/ext2_fs.h b/fs/ext4/format/ext2_fs.h new file mode 100755 index 0000000..3373911 --- /dev/null +++ b/fs/ext4/format/ext2_fs.h @@ -0,0 +1,794 @@ +/* + * linux/include/linux/ext2_fs.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_EXT2_FS_H +#define _LINUX_EXT2_FS_H + +#include "ext2_types.h" /* Changed from linux/types.h */ + +/* + * The second extended filesystem constants/structures + */ + +/* + * Define EXT2FS_DEBUG to produce debug messages + */ +#undef EXT2FS_DEBUG + +/* + * Define EXT2_PREALLOCATE to preallocate data blocks for expanding files + */ +#define EXT2_PREALLOCATE +#define EXT2_DEFAULT_PREALLOC_BLOCKS 8 + +/* + * The second extended file system version + */ +#define EXT2FS_DATE "95/08/09" +#define EXT2FS_VERSION "0.5b" + +/* + * Special inode numbers + */ +#define EXT2_BAD_INO 1 /* Bad blocks inode */ +#define EXT2_ROOT_INO 2 /* Root inode */ +#define EXT2_ACL_IDX_INO 3 /* ACL inode */ +#define EXT2_ACL_DATA_INO 4 /* ACL inode */ +#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ +#define EXT2_RESIZE_INO 7 /* Reserved group descriptors inode */ +#define EXT2_JOURNAL_INO 8 /* Journal inode */ +#define EXT2_EXCLUDE_INO 9 /* The "exclude" inode, for snapshots */ + +/* First non-reserved inode for old ext2 filesystems */ +#define EXT2_GOOD_OLD_FIRST_INO 11 + +/* + * The second extended file system magic number + */ +#define EXT2_SUPER_MAGIC 0xEF53 + +//#ifdef __KERNEL__ +//#define EXT2_SB(sb) (&((sb)->u.ext2_sb)) +//#else +/* Assume that user mode programs are passing in an ext2fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test + * macros from user land. */ +#define EXT2_SB(sb) (sb) +//#endif + +/* + * Maximal count of links to a file + */ +#define EXT2_LINK_MAX 65000 + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT2_MIN_BLOCK_LOG_SIZE 10 /* 1024 */ +#define EXT2_MAX_BLOCK_LOG_SIZE 16 /* 65536 */ +#define EXT2_MIN_BLOCK_SIZE (1 << EXT2_MIN_BLOCK_LOG_SIZE) +#define EXT2_MAX_BLOCK_SIZE (1 << EXT2_MAX_BLOCK_LOG_SIZE) +//#ifdef __KERNEL__ +//#define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize) +//#define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +//#define EXT2_ADDR_PER_BLOCK_BITS(s) (EXT2_SB(s)->addr_per_block_bits) +//#define EXT2_INODE_SIZE(s) (EXT2_SB(s)->s_inode_size) +//#define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino) +//#else +#define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) +#define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) +#define EXT2_INODE_SIZE(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \ + EXT2_GOOD_OLD_INODE_SIZE : (s)->s_inode_size) +#define EXT2_FIRST_INO(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \ + EXT2_GOOD_OLD_FIRST_INO : (s)->s_first_ino) +//#endif +#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof(__u32)) + +/* + * Macro-instructions used to manage fragments + */ +#define EXT2_MIN_FRAG_SIZE EXT2_MIN_BLOCK_SIZE +#define EXT2_MAX_FRAG_SIZE EXT2_MAX_BLOCK_SIZE +#define EXT2_MIN_FRAG_LOG_SIZE EXT2_MIN_BLOCK_LOG_SIZE +//#ifdef __KERNEL__ +//# define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size) +//# define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block) +//#else +# define EXT2_FRAG_SIZE(s) (EXT2_MIN_FRAG_SIZE << (s)->s_log_frag_size) +# define EXT2_FRAGS_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / EXT2_FRAG_SIZE(s)) +//#endif + +/* + * ACL structures + */ +struct ext2_acl_header /* Header of Access Control Lists */ +{ + __u32 aclh_size; + __u32 aclh_file_count; + __u32 aclh_acle_count; + __u32 aclh_first_acle; +}; + +struct ext2_acl_entry /* Access Control List Entry */ +{ + __u32 acle_size; + __u16 acle_perms; /* Access permissions */ + __u16 acle_type; /* Type of entry */ + __u16 acle_tag; /* User or group identity */ + __u16 acle_pad1; + __u32 acle_next; /* Pointer on next entry for the */ + /* same inode or on next free entry */ +}; + +/* + * Structure of a blocks group descriptor + */ +struct ext2_group_desc +{ + __u32 bg_block_bitmap; /* Blocks bitmap block */ + __u32 bg_inode_bitmap; /* Inodes bitmap block */ + __u32 bg_inode_table; /* Inodes table block */ + __u16 bg_free_blocks_count; /* Free blocks count */ + __u16 bg_free_inodes_count; /* Free inodes count */ + __u16 bg_used_dirs_count; /* Directories count */ + __u16 bg_flags; + __u32 bg_reserved[2]; + __u16 bg_itable_unused; /* Unused inodes count */ + __u16 bg_checksum; /* crc16(s_uuid+grouo_num+group_desc)*/ +}; + +struct ext4_group_desc +{ + __u32 bg_block_bitmap; /* Blocks bitmap block */ + __u32 bg_inode_bitmap; /* Inodes bitmap block */ + __u32 bg_inode_table; /* Inodes table block */ + __u16 bg_free_blocks_count; /* Free blocks count */ + __u16 bg_free_inodes_count; /* Free inodes count */ + __u16 bg_used_dirs_count; /* Directories count */ + __u16 bg_flags; + __u32 bg_reserved[2]; + __u16 bg_itable_unused; /* Unused inodes count */ + __u16 bg_checksum; /* crc16(s_uuid+grouo_num+group_desc)*/ + __u32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ + __u32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ + __u32 bg_inode_table_hi; /* Inodes table block MSB */ + __u16 bg_free_blocks_count_hi;/* Free blocks count MSB */ + __u16 bg_free_inodes_count_hi;/* Free inodes count MSB */ + __u16 bg_used_dirs_count_hi; /* Directories count MSB */ + __u16 bg_pad; + __u32 bg_reserved2[3]; +}; + +#define EXT2_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not initialized */ +#define EXT2_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not initialized */ +#define EXT2_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ + +/* + * Data structures used by the directory indexing feature + * + * Note: all of the multibyte integer fields are little endian. + */ + +/* + * Note: dx_root_info is laid out so that if it should somehow get + * overlaid by a dirent the two low bits of the hash version will be + * zero. Therefore, the hash version mod 4 should never be 0. + * Sincerely, the paranoia department. + */ +struct ext2_dx_root_info { + __u32 reserved_zero; + __u8 hash_version; /* 0 now, 1 at release */ + __u8 info_length; /* 8 */ + __u8 indirect_levels; + __u8 unused_flags; +}; + +#define EXT2_HASH_LEGACY 0 +#define EXT2_HASH_HALF_MD4 1 +#define EXT2_HASH_TEA 2 +#define EXT2_HASH_LEGACY_UNSIGNED 3 /* reserved for userspace lib */ +#define EXT2_HASH_HALF_MD4_UNSIGNED 4 /* reserved for userspace lib */ +#define EXT2_HASH_TEA_UNSIGNED 5 /* reserved for userspace lib */ + +#define EXT2_HASH_FLAG_INCOMPAT 0x1 + +struct ext2_dx_entry { + __u32 hash; + __u32 block; +}; + +struct ext2_dx_countlimit { + __u16 limit; + __u16 count; +}; + + +/* + * Macro-instructions used to manage group descriptors + */ +#define EXT2_MIN_DESC_SIZE 32 +#define EXT2_MIN_DESC_SIZE_64BIT 64 +#define EXT2_MAX_DESC_SIZE EXT2_MIN_BLOCK_SIZE +#define EXT2_DESC_SIZE(s) \ + ((EXT2_SB(s)->s_feature_incompat & EXT4_FEATURE_INCOMPAT_64BIT) ? \ + (s)->s_desc_size : EXT2_MIN_DESC_SIZE) + +#define EXT2_BLOCKS_PER_GROUP(s) (EXT2_SB(s)->s_blocks_per_group) +#define EXT2_INODES_PER_GROUP(s) (EXT2_SB(s)->s_inodes_per_group) +#define EXT2_INODES_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s)/EXT2_INODE_SIZE(s)) +/* limits imposed by 16-bit value gd_free_{blocks,inode}_count */ +#define EXT2_MAX_BLOCKS_PER_GROUP(s) ((1 << 16) - 8) +#define EXT2_MAX_INODES_PER_GROUP(s) ((1 << 16) - EXT2_INODES_PER_BLOCK(s)) +//#ifdef __KERNEL__ +//#define EXT2_DESC_PER_BLOCK(s) (EXT2_SB(s)->s_desc_per_block) +//#define EXT2_DESC_PER_BLOCK_BITS(s) (EXT2_SB(s)->s_desc_per_block_bits) +//#else +#define EXT2_DESC_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / EXT2_DESC_SIZE(s)) +//#endif + +/* + * Constants relative to the data blocks + */ +#define EXT2_NDIR_BLOCKS 12 +#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS +#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) +#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) +#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) + +/* + * Inode flags + */ +#define EXT2_SECRM_FL 0x00000001 /* Secure deletion */ +#define EXT2_UNRM_FL 0x00000002 /* Undelete */ +#define EXT2_COMPR_FL 0x00000004 /* Compress file */ +#define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */ +#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */ +#define EXT2_NODUMP_FL 0x00000040 /* do not dump file */ +#define EXT2_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define EXT2_DIRTY_FL 0x00000100 +#define EXT2_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define EXT2_NOCOMPR_FL 0x00000400 /* Access raw compressed data */ +#define EXT2_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define EXT2_BTREE_FL 0x00001000 /* btree format dir */ +#define EXT2_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define EXT2_IMAGIC_FL 0x00002000 +#define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ +#define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define EXT2_DIRSYNC_FL 0x00010000 /* Synchronous directory modifications */ +#define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ +#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ +#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ +#define EXT4_SNAPFILE_FL 0x01000000 /* Inode is a snapshot */ +#define EXT4_SNAPFILE_DELETED_FL 0x04000000 /* Snapshot is being deleted */ +#define EXT4_SNAPFILE_SHRUNK_FL 0x08000000 /* Snapshot shrink has completed */ +#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ + +#define EXT2_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */ + +/* + * ioctl commands + */ + +/* Used for online resize */ +struct ext2_new_group_input { + __u32 group; /* Group number for this data */ + __u32 block_bitmap; /* Absolute block number of block bitmap */ + __u32 inode_bitmap; /* Absolute block number of inode bitmap */ + __u32 inode_table; /* Absolute block number of inode table start */ + __u32 blocks_count; /* Total number of blocks in this group */ + __u16 reserved_blocks; /* Number of reserved blocks in this group */ + __u16 unused; /* Number of reserved GDT blocks in group */ +}; + +struct ext4_new_group_input { + __u32 group; /* Group number for this data */ + __u64 block_bitmap; /* Absolute block number of block bitmap */ + __u64 inode_bitmap; /* Absolute block number of inode bitmap */ + __u64 inode_table; /* Absolute block number of inode table start */ + __u32 blocks_count; /* Total number of blocks in this group */ + __u16 reserved_blocks; /* Number of reserved blocks in this group */ + __u16 unused; +}; + +#ifdef __GNU__ /* Needed for the Hurd */ +#define _IOT_ext2_new_group_input _IOT (_IOTS(__u32), 5, _IOTS(__u16), 2, 0, 0) +#endif + +#define EXT2_IOC_GETFLAGS _IOR('f', 1, long) +#define EXT2_IOC_SETFLAGS _IOW('f', 2, long) +#define EXT2_IOC_GETVERSION _IOR('v', 1, long) +#define EXT2_IOC_SETVERSION _IOW('v', 2, long) +#define EXT2_IOC_GETVERSION_NEW _IOR('f', 3, long) +#define EXT2_IOC_SETVERSION_NEW _IOW('f', 4, long) +#define EXT2_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT2_IOC_GROUP_ADD _IOW('f', 8,struct ext2_new_group_input) +#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) + +/* + * Structure of an inode on the disk + */ +struct ext2_inode { + __u16 i_mode; /* File mode */ + __u16 i_uid; /* Low 16 bits of Owner Uid */ + __u32 i_size; /* Size in bytes */ + __u32 i_atime; /* Access time */ + __u32 i_ctime; /* Inode change time */ + __u32 i_mtime; /* Modification time */ + __u32 i_dtime; /* Deletion Time */ + __u16 i_gid; /* Low 16 bits of Group Id */ + __u16 i_links_count; /* Links count */ + __u32 i_blocks; /* Blocks count */ + __u32 i_flags; /* File flags */ + union { + struct { + __u32 l_i_version; /* was l_i_reserved1 */ + } linux1; + struct { + __u32 h_i_translator; + } hurd1; + } osd1; /* OS dependent 1 */ + __u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */ + __u32 i_generation; /* File version (for NFS) */ + __u32 i_file_acl; /* File ACL */ + __u32 i_dir_acl; /* Directory ACL */ + __u32 i_faddr; /* Fragment address */ + union { + struct { + __u16 l_i_blocks_hi; + __u16 l_i_file_acl_high; + __u16 l_i_uid_high; /* these 2 fields */ + __u16 l_i_gid_high; /* were reserved2[0] */ + __u32 l_i_reserved2; + } linux2; + struct { + __u8 h_i_frag; /* Fragment number */ + __u8 h_i_fsize; /* Fragment size */ + __u16 h_i_mode_high; + __u16 h_i_uid_high; + __u16 h_i_gid_high; + __u32 h_i_author; + } hurd2; + } osd2; /* OS dependent 2 */ +}; + +/* + * Permanent part of an large inode on the disk + */ +struct ext2_inode_large { + __u16 i_mode; /* File mode */ + __u16 i_uid; /* Low 16 bits of Owner Uid */ + __u32 i_size; /* Size in bytes */ + __u32 i_atime; /* Access time */ + __u32 i_ctime; /* Inode Change time */ + __u32 i_mtime; /* Modification time */ + __u32 i_dtime; /* Deletion Time */ + __u16 i_gid; /* Low 16 bits of Group Id */ + __u16 i_links_count; /* Links count */ + __u32 i_blocks; /* Blocks count */ + __u32 i_flags; /* File flags */ + union { + struct { + __u32 l_i_version; /* was l_i_reserved1 */ + } linux1; + struct { + __u32 h_i_translator; + } hurd1; + } osd1; /* OS dependent 1 */ + __u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */ + __u32 i_generation; /* File version (for NFS) */ + __u32 i_file_acl; /* File ACL */ + __u32 i_dir_acl; /* Directory ACL */ + __u32 i_faddr; /* Fragment address */ + union { + struct { + __u16 l_i_blocks_hi; + __u16 l_i_file_acl_high; + __u16 l_i_uid_high; /* these 2 fields */ + __u16 l_i_gid_high; /* were reserved2[0] */ + __u32 l_i_reserved2; + } linux2; + struct { + __u8 h_i_frag; /* Fragment number */ + __u8 h_i_fsize; /* Fragment size */ + __u16 h_i_mode_high; + __u16 h_i_uid_high; + __u16 h_i_gid_high; + __u32 h_i_author; + } hurd2; + } osd2; /* OS dependent 2 */ + __u16 i_extra_isize; + __u16 i_pad1; + __u32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __u32 i_mtime_extra; /* extra Modification time (nsec << 2 | epoch) */ + __u32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __u32 i_crtime; /* File creation time */ + __u32 i_crtime_extra; /* extra File creation time (nsec << 2 | epoch)*/ + __u32 i_version_hi; /* high 32 bits for 64-bit version */ +}; + +#define i_size_high i_dir_acl + +#if defined(__KERNEL__) || defined(__linux__) +#define i_reserved1 osd1.linux1.l_i_reserved1 +#define i_frag osd2.linux2.l_i_frag +#define i_fsize osd2.linux2.l_i_fsize +#define i_uid_low i_uid +#define i_gid_low i_gid +#define i_uid_high osd2.linux2.l_i_uid_high +#define i_gid_high osd2.linux2.l_i_gid_high +#define i_reserved2 osd2.linux2.l_i_reserved2 +#else +#if defined(__GNU__) + +#define i_translator osd1.hurd1.h_i_translator +#define i_frag osd2.hurd2.h_i_frag; +#define i_fsize osd2.hurd2.h_i_fsize; +#define i_uid_high osd2.hurd2.h_i_uid_high +#define i_gid_high osd2.hurd2.h_i_gid_high +#define i_author osd2.hurd2.h_i_author + +#endif /* __GNU__ */ +#endif /* defined(__KERNEL__) || defined(__linux__) */ + +#define inode_uid(inode) ((inode).i_uid | (inode).osd2.linux2.l_i_uid_high << 16) +#define inode_gid(inode) ((inode).i_gid | (inode).osd2.linux2.l_i_gid_high << 16) +#define ext2fs_set_i_uid_high(inode,x) ((inode).osd2.linux2.l_i_uid_high = (x)) +#define ext2fs_set_i_gid_high(inode,x) ((inode).osd2.linux2.l_i_gid_high = (x)) + +/* + * File system states + */ +#define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT2_ERROR_FS 0x0002 /* Errors detected */ +#define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */ + +/* + * Misc. filesystem flags + */ +#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ +#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ +#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* OK for use on development code */ +#define EXT2_FLAGS_IS_SNAPSHOT 0x0010 /* This is a snapshot image */ +#define EXT2_FLAGS_FIX_SNAPSHOT 0x0020 /* Snapshot inodes corrupted */ +#define EXT2_FLAGS_FIX_EXCLUDE 0x0040 /* Exclude bitmaps corrupted */ + +/* + * Mount flags + */ +#define EXT2_MOUNT_CHECK 0x0001 /* Do mount-time checks */ +#define EXT2_MOUNT_GRPID 0x0004 /* Create files with directory's group */ +#define EXT2_MOUNT_DEBUG 0x0008 /* Some debugging messages */ +#define EXT2_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ +#define EXT2_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ +#define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ +#define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ +#define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ + +#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt +#define set_opt(o, opt) o |= EXT2_MOUNT_##opt +#define test_opt(sb, opt) (EXT2_SB(sb)->s_mount_opt & \ + EXT2_MOUNT_##opt) +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT2_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ + +/* + * Behaviour when detecting errors + */ +#define EXT2_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT2_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT2_ERRORS_PANIC 3 /* Panic */ +#define EXT2_ERRORS_DEFAULT EXT2_ERRORS_CONTINUE + +#if (__GNUC__ >= 4) +#define ext4_offsetof(TYPE,MEMBER) __builtin_offsetof(TYPE,MEMBER) +#else +#define ext4_offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +/* + * Structure of the super block + */ +struct ext2_super_block { + __u32 s_inodes_count; /* Inodes count */ + __u32 s_blocks_count; /* Blocks count */ + __u32 s_r_blocks_count; /* Reserved blocks count */ + __u32 s_free_blocks_count; /* Free blocks count */ + __u32 s_free_inodes_count; /* Free inodes count */ + __u32 s_first_data_block; /* First Data Block */ + __u32 s_log_block_size; /* Block size */ + __s32 s_log_frag_size; /* Fragment size */ + __u32 s_blocks_per_group; /* # Blocks per group */ + __u32 s_frags_per_group; /* # Fragments per group */ + __u32 s_inodes_per_group; /* # Inodes per group */ + __u32 s_mtime; /* Mount time */ + __u32 s_wtime; /* Write time */ + __u16 s_mnt_count; /* Mount count */ + __s16 s_max_mnt_count; /* Maximal mount count */ + __u16 s_magic; /* Magic signature */ + __u16 s_state; /* File system state */ + __u16 s_errors; /* Behaviour when detecting errors */ + __u16 s_minor_rev_level; /* minor revision level */ + __u32 s_lastcheck; /* time of last check */ + __u32 s_checkinterval; /* max. time between checks */ + __u32 s_creator_os; /* OS */ + __u32 s_rev_level; /* Revision level */ + __u16 s_def_resuid; /* Default uid for reserved blocks */ + __u16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT2_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __u32 s_first_ino; /* First non-reserved inode */ + __u16 s_inode_size; /* size of inode structure */ + __u16 s_block_group_nr; /* block group # of this superblock */ + __u32 s_feature_compat; /* compatible feature set */ + __u32 s_feature_incompat; /* incompatible feature set */ + __u32 s_feature_ro_compat; /* readonly-compatible feature set */ + __u8 s_uuid[16]; /* 128-bit uuid for volume */ + char s_volume_name[16]; /* volume name */ + char s_last_mounted[64]; /* directory where last mounted */ + __u32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT2_FEATURE_COMPAT_DIR_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __u16 s_reserved_gdt_blocks; /* Per group table for online growth */ + /* + * Journaling support valid if EXT2_FEATURE_COMPAT_HAS_JOURNAL set. + */ + __u8 s_journal_uuid[16]; /* uuid of journal superblock */ + __u32 s_journal_inum; /* inode number of journal file */ + __u32 s_journal_dev; /* device number of journal file */ + __u32 s_last_orphan; /* start of list of inodes to delete */ + __u32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_jnl_backup_type; /* Default type of journal backup */ + __u16 s_desc_size; /* Group desc. size: INCOMPAT_64BIT */ + __u32 s_default_mount_opts; + __u32 s_first_meta_bg; /* First metablock group */ + __u32 s_mkfs_time; /* When the filesystem was created */ + __u32 s_jnl_blocks[17]; /* Backup of the journal inode */ + __u32 s_blocks_count_hi; /* Blocks count high 32bits */ + __u32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ + __u32 s_free_blocks_hi; /* Free blocks count */ + __u16 s_min_extra_isize; /* All inodes have at least # bytes */ + __u16 s_want_extra_isize; /* New inodes should reserve # bytes */ + __u32 s_flags; /* Miscellaneous flags */ + __u16 s_raid_stride; /* RAID stride */ + __u16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __u64 s_mmp_block; /* Block for multi-mount protection */ + __u32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ + __u8 s_log_groups_per_flex; /* FLEX_BG group size */ + __u8 s_reserved_char_pad; + __u16 s_reserved_pad; /* Padding to next 32bits */ + __u64 s_kbytes_written; /* nr of lifetime kilobytes written */ + __u32 s_snapshot_inum; /* Inode number of active snapshot */ + __u32 s_snapshot_id; /* sequential ID of active snapshot */ + __u64 s_snapshot_r_blocks_count; /* reserved blocks for active + snapshot's future use */ + __u32 s_snapshot_list; /* inode number of the head of the on-disk snapshot list */ +#define EXT4_S_ERR_START ext4_offsetof(struct ext2_super_block, s_error_count) + __u32 s_error_count; /* number of fs errors */ + __u32 s_first_error_time; /* first time an error happened */ + __u32 s_first_error_ino; /* inode involved in first error */ + __u64 s_first_error_block; /* block involved of first error */ + __u8 s_first_error_func[32]; /* function where the error happened */ + __u32 s_first_error_line; /* line number where error happened */ + __u32 s_last_error_time; /* most recent time of an error */ + __u32 s_last_error_ino; /* inode involved in last error */ + __u32 s_last_error_line; /* line number where error happened */ + __u64 s_last_error_block; /* block involved of last error */ + __u8 s_last_error_func[32]; /* function where the error happened */ +#define EXT4_S_ERR_END ext4_offsetof(struct ext2_super_block, s_mount_opts) + __u8 s_mount_opts[64]; + __u32 s_reserved[112]; /* Padding to the end of the block */ +}; + +#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) + +/* + * Codes for operating systems + */ +#define EXT2_OS_LINUX 0 +#define EXT2_OS_HURD 1 +#define EXT2_OBSO_OS_MASIX 2 +#define EXT2_OS_FREEBSD 3 +#define EXT2_OS_LITES 4 + +/* + * Revision levels + */ +#define EXT2_GOOD_OLD_REV 0 /* The good old (original) format */ +#define EXT2_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ + +#define EXT2_CURRENT_REV EXT2_GOOD_OLD_REV +#define EXT2_MAX_SUPP_REV EXT2_DYNAMIC_REV + +#define EXT2_GOOD_OLD_INODE_SIZE 128 + +/* + * Journal inode backup types + */ +#define EXT3_JNL_BACKUP_BLOCKS 1 + +/* + * Feature set definitions + */ + +#define EXT2_HAS_COMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_feature_compat & (mask) ) +#define EXT2_HAS_RO_COMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_feature_ro_compat & (mask) ) +#define EXT2_HAS_INCOMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_feature_incompat & (mask) ) + +#define EXT2_FEATURE_COMPAT_DIR_PREALLOC 0x0001 +#define EXT2_FEATURE_COMPAT_IMAGIC_INODES 0x0002 +#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004 +#define EXT2_FEATURE_COMPAT_EXT_ATTR 0x0008 +#define EXT2_FEATURE_COMPAT_RESIZE_INODE 0x0010 +#define EXT2_FEATURE_COMPAT_DIR_INDEX 0x0020 +#define EXT2_FEATURE_COMPAT_LAZY_BG 0x0040 +#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE 0x0080 + +#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 +#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 +/* #define EXT2_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 not used */ +#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 +#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 +#define EXT4_FEATURE_RO_COMPAT_HAS_SNAPSHOT 0x0080 + +#define EXT2_FEATURE_INCOMPAT_COMPRESSION 0x0001 +#define EXT2_FEATURE_INCOMPAT_FILETYPE 0x0002 +#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ +#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ +#define EXT2_FEATURE_INCOMPAT_META_BG 0x0010 +#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 +#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 +#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 +#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 +#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 +#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 + + +#define EXT2_FEATURE_COMPAT_SUPP 0 +#define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE) +#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ + EXT2_FEATURE_RO_COMPAT_BTREE_DIR) + +/* + * Default values for user and/or group using reserved blocks + */ +#define EXT2_DEF_RESUID 0 +#define EXT2_DEF_RESGID 0 + +/* + * Default mount options + */ +#define EXT2_DEFM_DEBUG 0x0001 +#define EXT2_DEFM_BSDGROUPS 0x0002 +#define EXT2_DEFM_XATTR_USER 0x0004 +#define EXT2_DEFM_ACL 0x0008 +#define EXT2_DEFM_UID16 0x0010 +#define EXT3_DEFM_JMODE 0x0060 +#define EXT3_DEFM_JMODE_DATA 0x0020 +#define EXT3_DEFM_JMODE_ORDERED 0x0040 +#define EXT3_DEFM_JMODE_WBACK 0x0060 +#define EXT4_DEFM_NOBARRIER 0x0100 +#define EXT4_DEFM_BLOCK_VALIDITY 0x0200 +#define EXT4_DEFM_DISCARD 0x0400 +#define EXT4_DEFM_NODELALLOC 0x0800 + +/* + * Structure of a directory entry + */ +#define EXT2_NAME_LEN 255 + +struct ext2_dir_entry { + __u32 inode; /* Inode number */ + __u16 rec_len; /* Directory entry length */ + __u16 name_len; /* Name length */ + char name[EXT2_NAME_LEN]; /* File name */ +}; + +/* + * The new version of the directory entry. Since EXT2 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct ext2_dir_entry_2 { + __u32 inode; /* Inode number */ + __u16 rec_len; /* Directory entry length */ + __u8 name_len; /* Name length */ + __u8 file_type; + char name[EXT2_NAME_LEN]; /* File name */ +}; + +/* + * Ext2 directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +#define EXT2_FT_UNKNOWN 0 +#define EXT2_FT_REG_FILE 1 +#define EXT2_FT_DIR 2 +#define EXT2_FT_CHRDEV 3 +#define EXT2_FT_BLKDEV 4 +#define EXT2_FT_FIFO 5 +#define EXT2_FT_SOCK 6 +#define EXT2_FT_SYMLINK 7 + +#define EXT2_FT_MAX 8 + +/* + * EXT2_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT2_DIR_PAD 4 +#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) +#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ + ~EXT2_DIR_ROUND) + +/* + * This structure will be used for multiple mount protection. It will be + * written into the block number saved in the s_mmp_block field in the + * superblock. + */ +#define EXT2_MMP_MAGIC 0x004D4D50 /* ASCII for MMP */ +#define EXT2_MMP_CLEAN 0xFF4D4D50 /* Value of mmp_seq for clean unmount */ +#define EXT2_MMP_FSCK_ON 0xE24D4D50 /* Value of mmp_seq when being fscked */ + +struct mmp_struct { + __u32 mmp_magic; + __u32 mmp_seq; + __u64 mmp_time; + char mmp_nodename[64]; + char mmp_bdevname[32]; + __u16 mmp_interval; + __u16 mmp_pad1; + __u32 mmp_pad2; +}; + +/* + * Interval in number of seconds to update the MMP sequence number. + */ +#define EXT2_MMP_DEF_INTERVAL 5 + +#endif /* _LINUX_EXT2_FS_H */ diff --git a/fs/ext4/format/ext2_io.h b/fs/ext4/format/ext2_io.h new file mode 100755 index 0000000..b917559 --- /dev/null +++ b/fs/ext4/format/ext2_io.h @@ -0,0 +1,134 @@ +/* + * io.h --- the I/O manager abstraction + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#ifndef _EXT2FS_EXT2_IO_H +#define _EXT2FS_EXT2_IO_H + +/* + * ext2_loff_t is defined here since unix_io.c needs it. + */ +#if defined(__GNUC__) || defined(HAS_LONG_LONG) +typedef long long ext2_loff_t; +#else +typedef long ext2_loff_t; +#endif + +/* llseek.c */ +ext2_loff_t ext2fs_llseek (int, ext2_loff_t, int); + +typedef struct struct_io_manager *io_manager; +typedef struct struct_io_channel *io_channel; +typedef struct struct_io_stats *io_stats; + +#define CHANNEL_FLAGS_WRITETHROUGH 0x01 + +struct struct_io_channel { + errcode_t magic; + io_manager manager; + char *name; + int block_size; + errcode_t (*read_error)(io_channel channel, + unsigned long block, + int count, + void *data, + size_t size, + int actual_bytes_read, + errcode_t error); + errcode_t (*write_error)(io_channel channel, + unsigned long block, + int count, + const void *data, + size_t size, + int actual_bytes_written, + errcode_t error); + int refcount; + int flags; + long reserved[14]; + void *private_data; + void *app_data; +}; + +struct struct_io_stats { + int num_fields; + int reserved; + unsigned long long bytes_read; + unsigned long long bytes_written; +}; + +struct struct_io_manager { + errcode_t magic; + const char *name; + errcode_t (*open)(const char *name, int flags, io_channel *channel); + errcode_t (*close)(io_channel channel); + errcode_t (*set_blksize)(io_channel channel, int blksize); + errcode_t (*read_blk)(io_channel channel, unsigned long block, + int count, void *data); + errcode_t (*write_blk)(io_channel channel, unsigned long block, + int count, const void *data); + errcode_t (*flush)(io_channel channel); + errcode_t (*write_byte)(io_channel channel, unsigned long offset, + int count, const void *data); + errcode_t (*set_option)(io_channel channel, const char *option, + const char *arg); + errcode_t (*get_stats)(io_channel channel, io_stats *io_stats); + errcode_t (*read_blk64)(io_channel channel, unsigned long long block, + int count, void *data); + errcode_t (*write_blk64)(io_channel channel, unsigned long long block, + int count, const void *data); + long reserved[16]; +}; + +#define IO_FLAG_RW 0x0001 +#define IO_FLAG_EXCLUSIVE 0x0002 +#define IO_FLAG_DIRECT_IO 0x0004 + +/* + * Convenience functions.... + */ +#define io_channel_close(c) ((c)->manager->close((c))) +#define io_channel_set_blksize(c,s) ((c)->manager->set_blksize((c),s)) +#define io_channel_read_blk(c,b,n,d) ((c)->manager->read_blk((c),b,n,d)) +#define io_channel_write_blk(c,b,n,d) ((c)->manager->write_blk((c),b,n,d)) +#define io_channel_flush(c) ((c)->manager->flush((c))) +#define io_channel_bumpcount(c) ((c)->refcount++) + +/* io_manager.c */ +extern errcode_t io_channel_set_options(io_channel channel, + const char *options); +extern errcode_t io_channel_write_byte(io_channel channel, + unsigned long offset, + int count, const void *data); +extern errcode_t io_channel_read_blk64(io_channel channel, + unsigned long long block, + int count, void *data); +extern errcode_t io_channel_write_blk64(io_channel channel, + unsigned long long block, + int count, const void *data); + +/* unix_io.c */ +extern io_manager unix_io_manager; + +/* undo_io.c */ +extern io_manager undo_io_manager; +extern errcode_t set_undo_io_backing_manager(io_manager manager); +extern errcode_t set_undo_io_backup_file(char *file_name); + +/* test_io.c */ +extern io_manager test_io_manager, test_io_backing_manager; +extern void (*test_io_cb_read_blk) + (unsigned long block, int count, errcode_t err); +extern void (*test_io_cb_write_blk) + (unsigned long block, int count, errcode_t err); +extern void (*test_io_cb_set_blksize) + (int blksize, errcode_t err); + +#endif /* _EXT2FS_EXT2_IO_H */ + diff --git a/fs/ext4/format/ext2_types.h b/fs/ext4/format/ext2_types.h new file mode 100755 index 0000000..36f0eed --- /dev/null +++ b/fs/ext4/format/ext2_types.h @@ -0,0 +1,145 @@ +/* + * If linux/types.h is already been included, assume it has defined + * everything we need. (cross fingers) Other header files may have + * also defined the types that we need. + */ +#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \ + !defined(_EXT2_TYPES_H)) +#define _EXT2_TYPES_H + +#define __S8_TYPEDEF __signed__ char +#define __U8_TYPEDEF unsigned char +#define __S16_TYPEDEF __signed__ short +#define __U16_TYPEDEF unsigned short +#define __S32_TYPEDEF __signed__ int +#define __U32_TYPEDEF unsigned int +#define __S64_TYPEDEF __signed__ long long +#define __U64_TYPEDEF unsigned long long + +#ifdef __U8_TYPEDEF +typedef __U8_TYPEDEF __u8; +#else +typedef unsigned char __u8; +#endif + +#ifdef __S8_TYPEDEF +typedef __S8_TYPEDEF __s8; +#else +typedef signed char __s8; +#endif + +#ifdef __U16_TYPEDEF +typedef __U16_TYPEDEF __u16; +#else +#if (4 == 2) +typedef unsigned int __u16; +#else +#if (2 == 2) +typedef unsigned short __u16; +#else + ?==error: undefined 16 bit type +#endif /* SIZEOF_SHORT == 2 */ +#endif /* SIZEOF_INT == 2 */ +#endif /* __U16_TYPEDEF */ + +#ifdef __S16_TYPEDEF +typedef __S16_TYPEDEF __s16; +#else +#if (4 == 2) +typedef int __s16; +#else +#if (2 == 2) +typedef short __s16; +#else + ?==error: undefined 16 bit type +#endif /* SIZEOF_SHORT == 2 */ +#endif /* SIZEOF_INT == 2 */ +#endif /* __S16_TYPEDEF */ + + +#ifdef __U32_TYPEDEF +typedef __U32_TYPEDEF __u32; +#else +#if (4 == 4) +typedef unsigned int __u32; +#else +#if (8 == 4) +typedef unsigned long __u32; +#else +#if (2 == 4) +typedef unsigned short __u32; +#else + ?== error: undefined 32 bit type +#endif /* SIZEOF_SHORT == 4 */ +#endif /* SIZEOF_LONG == 4 */ +#endif /* SIZEOF_INT == 4 */ +#endif /* __U32_TYPEDEF */ + +#ifdef __S32_TYPEDEF +typedef __S32_TYPEDEF __s32; +#else +#if (4 == 4) +typedef int __s32; +#else +#if (8 == 4) +typedef long __s32; +#else +#if (2 == 4) +typedef short __s32; +#else + ?== error: undefined 32 bit type +#endif /* SIZEOF_SHORT == 4 */ +#endif /* SIZEOF_LONG == 4 */ +#endif /* SIZEOF_INT == 4 */ +#endif /* __S32_TYPEDEF */ + +#ifdef __U64_TYPEDEF +typedef __U64_TYPEDEF __u64; +#else +#if (4 == 8) +typedef unsigned int __u64; +#else +#if (8 == 8) +typedef unsigned long __u64; +#else +#if (8 == 8) +typedef unsigned long long __u64; +#endif /* SIZEOF_LONG_LONG == 8 */ +#endif /* SIZEOF_LONG == 8 */ +#endif /* SIZEOF_INT == 8 */ +#endif /* __U64_TYPEDEF */ + +#ifdef __S64_TYPEDEF +typedef __S64_TYPEDEF __s64; +#else +#if (4 == 8) +typedef int __s64; +#else +#if (8 == 8) +typedef long __s64; +#else +#if (8 == 8) +#if defined(__GNUC__) +typedef __signed__ long long __s64; +#else +typedef signed long long __s64; +#endif /* __GNUC__ */ +#endif /* SIZEOF_LONG_LONG == 8 */ +#endif /* SIZEOF_LONG == 8 */ +#endif /* SIZEOF_INT == 8 */ +#endif /* __S64_TYPEDEF */ + +#undef __S8_TYPEDEF +#undef __U8_TYPEDEF +#undef __S16_TYPEDEF +#undef __U16_TYPEDEF +#undef __S32_TYPEDEF +#undef __U32_TYPEDEF +#undef __S64_TYPEDEF +#undef __U64_TYPEDEF + +#endif /* _*_TYPES_H */ + +/* These defines are needed for the public ext2fs.h header file */ +#define HAVE_SYS_TYPES_H 1 +#undef WORDS_BIGENDIAN diff --git a/fs/ext4/format/ext2fs.h b/fs/ext4/format/ext2fs.h new file mode 100755 index 0000000..2dcb053 --- /dev/null +++ b/fs/ext4/format/ext2fs.h @@ -0,0 +1,1377 @@ +/* + * ext2fs.h --- ext2fs + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#ifndef _EXT2FS_EXT2FS_H +#define _EXT2FS_EXT2FS_H + +#ifdef __GNUC__ +#define EXT2FS_ATTR(x) __attribute__(x) +#else +#define EXT2FS_ATTR(x) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Non-GNU C compilers won't necessarily understand inline + */ +#if (!defined(__GNUC__) && !defined(__WATCOMC__)) +#define NO_INLINE_FUNCS +#endif + +/* + * Where the master copy of the superblock is located, and how big + * superblocks are supposed to be. We define SUPERBLOCK_SIZE because + * the size of the superblock structure is not necessarily trustworthy + * (some versions have the padding set up so that the superblock is + * 1032 bytes long). + */ +#define SUPERBLOCK_OFFSET 1024 +#define SUPERBLOCK_SIZE 1024 + +/* + * The last ext2fs revision level that this version of the library is + * able to support. + */ +#define EXT2_LIB_CURRENT_REV EXT2_DYNAMIC_REV + +//#ifdef HAVE_SYS_TYPES_H +//#include <sys/types.h> +//#endif + +//#include <stdio.h> +//#include <stdlib.h> +//#include <string.h> +#include <errno.h> + +#define EXT2_FLAT_INCLUDES 1 + +#if EXT2_FLAT_INCLUDES +#include "ext2_types.h" +#include "ext2_fs.h" +#include "ext3_extents.h" +#else +#include <ext2fs/ext2_types.h> +#include <ext2fs/ext2_fs.h> +#include <ext2fs/ext3_extents.h> +#endif /* EXT2_FLAT_INCLUDES */ + +typedef __u32 ext2_ino_t; +typedef __u32 blk_t; +typedef __u64 blk64_t; +typedef __u32 dgrp_t; +typedef __u32 ext2_off_t; +typedef __s64 e2_blkcnt_t; +typedef __u32 ext2_dirhash_t; + +#if EXT2_FLAT_INCLUDES +#include "com_err.h" +#include "ext2_io.h" +#include "ext2_err.h" +#include "ext2_ext_attr.h" +#else +#include <et/com_err.h> +#include <ext2fs/ext2_io.h> +#include <ext2fs/ext2_err.h> +#include <ext2fs/ext2_ext_attr.h> +#endif + +/* + * Portability help for Microsoft Visual C++ + */ +#ifdef _MSC_VER +#define EXT2_QSORT_TYPE int __cdecl +#else +#define EXT2_QSORT_TYPE int +#endif + +typedef struct struct_ext2_filsys *ext2_filsys; + +#define EXT2FS_MARK_ERROR 0 +#define EXT2FS_UNMARK_ERROR 1 +#define EXT2FS_TEST_ERROR 2 + +typedef struct ext2fs_struct_generic_bitmap *ext2fs_generic_bitmap; +typedef struct ext2fs_struct_generic_bitmap *ext2fs_inode_bitmap; +typedef struct ext2fs_struct_generic_bitmap *ext2fs_block_bitmap; + +#define EXT2_FIRST_INODE(s) EXT2_FIRST_INO(s) + + +/* + * Badblocks list definitions + */ + +typedef struct ext2_struct_u32_list *ext2_badblocks_list; +typedef struct ext2_struct_u32_iterate *ext2_badblocks_iterate; + +typedef struct ext2_struct_u32_list *ext2_u32_list; +typedef struct ext2_struct_u32_iterate *ext2_u32_iterate; + +/* old */ +typedef struct ext2_struct_u32_list *badblocks_list; +typedef struct ext2_struct_u32_iterate *badblocks_iterate; + +#define BADBLOCKS_FLAG_DIRTY 1 + +/* + * ext2_dblist structure and abstractions (see dblist.c) + */ +struct ext2_db_entry { + ext2_ino_t ino; + blk_t blk; + int blockcnt; +}; + +typedef struct ext2_struct_dblist *ext2_dblist; + +#define DBLIST_ABORT 1 + +/* + * ext2_fileio definitions + */ + +#define EXT2_FILE_WRITE 0x0001 +#define EXT2_FILE_CREATE 0x0002 + +#define EXT2_FILE_MASK 0x00FF + +#define EXT2_FILE_BUF_DIRTY 0x4000 +#define EXT2_FILE_BUF_VALID 0x2000 + +typedef struct ext2_file *ext2_file_t; + +#define EXT2_SEEK_SET 0 +#define EXT2_SEEK_CUR 1 +#define EXT2_SEEK_END 2 + +/* + * Flags for the ext2_filsys structure and for ext2fs_open() + */ +#define EXT2_FLAG_RW 0x01 +#define EXT2_FLAG_CHANGED 0x02 +#define EXT2_FLAG_DIRTY 0x04 +#define EXT2_FLAG_VALID 0x08 +#define EXT2_FLAG_IB_DIRTY 0x10 +#define EXT2_FLAG_BB_DIRTY 0x20 +#define EXT2_FLAG_SWAP_BYTES 0x40 +#define EXT2_FLAG_SWAP_BYTES_READ 0x80 +#define EXT2_FLAG_SWAP_BYTES_WRITE 0x100 +#define EXT2_FLAG_MASTER_SB_ONLY 0x200 +#define EXT2_FLAG_FORCE 0x400 +#define EXT2_FLAG_SUPER_ONLY 0x800 +#define EXT2_FLAG_JOURNAL_DEV_OK 0x1000 +#define EXT2_FLAG_IMAGE_FILE 0x2000 +#define EXT2_FLAG_EXCLUSIVE 0x4000 +#define EXT2_FLAG_SOFTSUPP_FEATURES 0x8000 +#define EXT2_FLAG_NOFREE_ON_ERROR 0x10000 +#define EXT2_FLAG_DIRECT_IO 0x80000 + +/* + * Special flag in the ext2 inode i_flag field that means that this is + * a new inode. (So that ext2_write_inode() can clear extra fields.) + */ +#define EXT2_NEW_INODE_FL 0x80000000 + +/* + * Flags for mkjournal + * + * EXT2_MKJOURNAL_V1_SUPER Make a (deprecated) V1 journal superblock + */ +#define EXT2_MKJOURNAL_V1_SUPER 0x0000001 + +struct struct_ext2_filsys { + errcode_t magic; + io_channel io; + int flags; + struct ext2_super_block * super; + unsigned int blocksize; + int fragsize; + dgrp_t group_desc_count; + unsigned long desc_blocks; + struct ext2_group_desc * group_desc; + int inode_blocks_per_group; + ext2fs_inode_bitmap inode_map; + ext2fs_block_bitmap block_map; + errcode_t (*get_blocks)(ext2_filsys fs, ext2_ino_t ino, blk_t *blocks); + errcode_t (*check_directory)(ext2_filsys fs, ext2_ino_t ino); + errcode_t (*write_bitmaps)(ext2_filsys fs); + errcode_t (*read_inode)(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode); + errcode_t (*write_inode)(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode); + ext2_badblocks_list badblocks; + ext2_dblist dblist; + __u32 stride; /* for mke2fs */ + struct ext2_super_block * orig_super; + struct ext2_image_hdr * image_header; + __u32 umask; + time_t now; + /* + * Reserved for future expansion + */ + __u32 reserved[7]; + + /* + * Reserved for the use of the calling application. + */ + void * priv_data; + + /* + * Inode cache + */ + struct ext2_inode_cache *icache; + io_channel image_io; + + /* + * More callback functions + */ + errcode_t (*get_alloc_block)(ext2_filsys fs, blk64_t goal, + blk64_t *ret); + void (*block_alloc_stats)(ext2_filsys fs, blk64_t blk, int inuse); +}; + +#if EXT2_FLAT_INCLUDES +#include "bitops.h" +#else +#include <ext2fs/bitops.h> +#endif + +/* + * Return flags for the block iterator functions + */ +#define BLOCK_CHANGED 1 +#define BLOCK_ABORT 2 +#define BLOCK_ERROR 4 + +/* + * Block interate flags + * + * BLOCK_FLAG_APPEND, or BLOCK_FLAG_HOLE, indicates that the interator + * function should be called on blocks where the block number is zero. + * This is used by ext2fs_expand_dir() to be able to add a new block + * to an inode. It can also be used for programs that want to be able + * to deal with files that contain "holes". + * + * BLOCK_FLAG_DEPTH_TRAVERSE indicates that the iterator function for + * the indirect, doubly indirect, etc. blocks should be called after + * all of the blocks containined in the indirect blocks are processed. + * This is useful if you are going to be deallocating blocks from an + * inode. + * + * BLOCK_FLAG_DATA_ONLY indicates that the iterator function should be + * called for data blocks only. + * + * BLOCK_FLAG_READ_ONLY is a promise by the caller that it will not + * modify returned block number. + * + * BLOCK_FLAG_NO_LARGE is for internal use only. It informs + * ext2fs_block_iterate2 that large files won't be accepted. + */ +#define BLOCK_FLAG_APPEND 1 +#define BLOCK_FLAG_HOLE 1 +#define BLOCK_FLAG_DEPTH_TRAVERSE 2 +#define BLOCK_FLAG_DATA_ONLY 4 +#define BLOCK_FLAG_READ_ONLY 8 + +#define BLOCK_FLAG_NO_LARGE 0x1000 + +/* + * Magic "block count" return values for the block iterator function. + */ +#define BLOCK_COUNT_IND (-1) +#define BLOCK_COUNT_DIND (-2) +#define BLOCK_COUNT_TIND (-3) +#define BLOCK_COUNT_TRANSLATOR (-4) + +#if 0 +/* + * Flags for ext2fs_move_blocks + */ +#define EXT2_BMOVE_GET_DBLIST 0x0001 +#define EXT2_BMOVE_DEBUG 0x0002 +#endif + +/* + * Generic (non-filesystem layout specific) extents structure + */ + +#define EXT2_EXTENT_FLAGS_LEAF 0x0001 +#define EXT2_EXTENT_FLAGS_UNINIT 0x0002 +#define EXT2_EXTENT_FLAGS_SECOND_VISIT 0x0004 + +struct ext2fs_extent { + blk64_t e_pblk; /* first physical block */ + blk64_t e_lblk; /* first logical block extent covers */ + __u32 e_len; /* number of blocks covered by extent */ + __u32 e_flags; /* extent flags */ +}; + +typedef struct ext2_extent_handle *ext2_extent_handle_t; +typedef struct ext2_extent_path *ext2_extent_path_t; + +/* + * Flags used by ext2fs_extent_get() + */ +#define EXT2_EXTENT_CURRENT 0x0000 +#define EXT2_EXTENT_MOVE_MASK 0x000F +#define EXT2_EXTENT_ROOT 0x0001 +#define EXT2_EXTENT_LAST_LEAF 0x0002 +#define EXT2_EXTENT_FIRST_SIB 0x0003 +#define EXT2_EXTENT_LAST_SIB 0x0004 +#define EXT2_EXTENT_NEXT_SIB 0x0005 +#define EXT2_EXTENT_PREV_SIB 0x0006 +#define EXT2_EXTENT_NEXT_LEAF 0x0007 +#define EXT2_EXTENT_PREV_LEAF 0x0008 +#define EXT2_EXTENT_NEXT 0x0009 +#define EXT2_EXTENT_PREV 0x000A +#define EXT2_EXTENT_UP 0x000B +#define EXT2_EXTENT_DOWN 0x000C +#define EXT2_EXTENT_DOWN_AND_LAST 0x000D + +/* + * Flags used by ext2fs_extent_insert() + */ +#define EXT2_EXTENT_INSERT_AFTER 0x0001 /* insert after handle loc'n */ +#define EXT2_EXTENT_INSERT_NOSPLIT 0x0002 /* insert may not cause split */ + +/* + * Flags used by ext2fs_extent_delete() + */ +#define EXT2_EXTENT_DELETE_KEEP_EMPTY 0x001 /* keep node if last extnt gone */ + +/* + * Flags used by ext2fs_extent_set_bmap() + */ +#define EXT2_EXTENT_SET_BMAP_UNINIT 0x0001 + +/* + * Data structure returned by ext2fs_extent_get_info() + */ +struct ext2_extent_info { + int curr_entry; + int curr_level; + int num_entries; + int max_entries; + int max_depth; + int bytes_avail; + blk64_t max_lblk; + blk64_t max_pblk; + __u32 max_len; + __u32 max_uninit_len; +}; + +/* + * Flags for directory block reading and writing functions + */ +#define EXT2_DIRBLOCK_V2_STRUCT 0x0001 + +/* + * Return flags for the directory iterator functions + */ +#define DIRENT_CHANGED 1 +#define DIRENT_ABORT 2 +#define DIRENT_ERROR 3 + +/* + * Directory iterator flags + */ + +#define DIRENT_FLAG_INCLUDE_EMPTY 1 +#define DIRENT_FLAG_INCLUDE_REMOVED 2 + +#define DIRENT_DOT_FILE 1 +#define DIRENT_DOT_DOT_FILE 2 +#define DIRENT_OTHER_FILE 3 +#define DIRENT_DELETED_FILE 4 + +/* + * Inode scan definitions + */ +typedef struct ext2_struct_inode_scan *ext2_inode_scan; + +/* + * ext2fs_scan flags + */ +#define EXT2_SF_CHK_BADBLOCKS 0x0001 +#define EXT2_SF_BAD_INODE_BLK 0x0002 +#define EXT2_SF_BAD_EXTRA_BYTES 0x0004 +#define EXT2_SF_SKIP_MISSING_ITABLE 0x0008 +#define EXT2_SF_DO_LAZY 0x0010 + +/* + * ext2fs_check_if_mounted flags + */ +#define EXT2_MF_MOUNTED 1 +#define EXT2_MF_ISROOT 2 +#define EXT2_MF_READONLY 4 +#define EXT2_MF_SWAP 8 +#define EXT2_MF_BUSY 16 + +/* + * Ext2/linux mode flags. We define them here so that we don't need + * to depend on the OS's sys/stat.h, since we may be compiling on a + * non-Linux system. + */ +#define LINUX_S_IFMT 00170000 +#define LINUX_S_IFSOCK 0140000 +#define LINUX_S_IFLNK 0120000 +#define LINUX_S_IFREG 0100000 +#define LINUX_S_IFBLK 0060000 +#define LINUX_S_IFDIR 0040000 +#define LINUX_S_IFCHR 0020000 +#define LINUX_S_IFIFO 0010000 +#define LINUX_S_ISUID 0004000 +#define LINUX_S_ISGID 0002000 +#define LINUX_S_ISVTX 0001000 + +#define LINUX_S_IRWXU 00700 +#define LINUX_S_IRUSR 00400 +#define LINUX_S_IWUSR 00200 +#define LINUX_S_IXUSR 00100 + +#define LINUX_S_IRWXG 00070 +#define LINUX_S_IRGRP 00040 +#define LINUX_S_IWGRP 00020 +#define LINUX_S_IXGRP 00010 + +#define LINUX_S_IRWXO 00007 +#define LINUX_S_IROTH 00004 +#define LINUX_S_IWOTH 00002 +#define LINUX_S_IXOTH 00001 + +#define LINUX_S_ISLNK(m) (((m) & LINUX_S_IFMT) == LINUX_S_IFLNK) +#define LINUX_S_ISREG(m) (((m) & LINUX_S_IFMT) == LINUX_S_IFREG) +#define LINUX_S_ISDIR(m) (((m) & LINUX_S_IFMT) == LINUX_S_IFDIR) +#define LINUX_S_ISCHR(m) (((m) & LINUX_S_IFMT) == LINUX_S_IFCHR) +#define LINUX_S_ISBLK(m) (((m) & LINUX_S_IFMT) == LINUX_S_IFBLK) +#define LINUX_S_ISFIFO(m) (((m) & LINUX_S_IFMT) == LINUX_S_IFIFO) +#define LINUX_S_ISSOCK(m) (((m) & LINUX_S_IFMT) == LINUX_S_IFSOCK) + +/* + * ext2 size of an inode + */ +#define EXT2_I_SIZE(i) ((i)->i_size | ((__u64) (i)->i_size_high << 32)) + +/* + * ext2_icount_t abstraction + */ +#define EXT2_ICOUNT_OPT_INCREMENT 0x01 + +typedef struct ext2_icount *ext2_icount_t; + +/* + * Flags for ext2fs_bmap + */ +#define BMAP_ALLOC 0x0001 +#define BMAP_SET 0x0002 + +/* + * Returned flags from ext2fs_bmap + */ +#define BMAP_RET_UNINIT 0x0001 + +/* + * Flags for imager.c functions + */ +#define IMAGER_FLAG_INODEMAP 1 +#define IMAGER_FLAG_SPARSEWRITE 2 + +/* + * For checking structure magic numbers... + */ + +#define EXT2_CHECK_MAGIC(struct, code) \ + if ((struct)->magic != (code)) return (code) + + +/* + * For ext2 compression support + */ +#define EXT2FS_COMPRESSED_BLKADDR ((blk_t) -1) +#define HOLE_BLKADDR(_b) ((_b) == 0 || (_b) == EXT2FS_COMPRESSED_BLKADDR) + +/* + * Features supported by this version of the library + */ +#define EXT2_LIB_FEATURE_COMPAT_SUPP (EXT2_FEATURE_COMPAT_DIR_PREALLOC|\ + EXT2_FEATURE_COMPAT_IMAGIC_INODES|\ + EXT3_FEATURE_COMPAT_HAS_JOURNAL|\ + EXT2_FEATURE_COMPAT_RESIZE_INODE|\ + EXT2_FEATURE_COMPAT_DIR_INDEX|\ + EXT2_FEATURE_COMPAT_EXT_ATTR) + +/* This #ifdef is temporary until compression is fully supported */ +#ifdef ENABLE_COMPRESSION +#ifndef I_KNOW_THAT_COMPRESSION_IS_EXPERIMENTAL +/* If the below warning bugs you, then have + `CPPFLAGS=-DI_KNOW_THAT_COMPRESSION_IS_EXPERIMENTAL' in your + environment at configure time. */ + #warning "Compression support is experimental" +#endif +#define EXT2_LIB_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE|\ + EXT2_FEATURE_INCOMPAT_COMPRESSION|\ + EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\ + EXT2_FEATURE_INCOMPAT_META_BG|\ + EXT3_FEATURE_INCOMPAT_RECOVER|\ + EXT3_FEATURE_INCOMPAT_EXTENTS|\ + EXT4_FEATURE_INCOMPAT_FLEX_BG) +#else +#define EXT2_LIB_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE|\ + EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\ + EXT2_FEATURE_INCOMPAT_META_BG|\ + EXT3_FEATURE_INCOMPAT_RECOVER|\ + EXT3_FEATURE_INCOMPAT_EXTENTS|\ + EXT4_FEATURE_INCOMPAT_FLEX_BG) +#endif +#define EXT2_LIB_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\ + EXT4_FEATURE_RO_COMPAT_HUGE_FILE|\ + EXT2_FEATURE_RO_COMPAT_LARGE_FILE|\ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK|\ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE|\ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM) + +/* + * These features are only allowed if EXT2_FLAG_SOFTSUPP_FEATURES is passed + * to ext2fs_openfs() + */ +#define EXT2_LIB_SOFTSUPP_INCOMPAT (0) +#define EXT2_LIB_SOFTSUPP_RO_COMPAT (0) + +/* + * function prototypes + */ + +/* alloc.c */ +extern errcode_t ext2fs_new_inode(ext2_filsys fs, ext2_ino_t dir, int mode, + ext2fs_inode_bitmap map, ext2_ino_t *ret); +extern errcode_t ext2fs_new_block(ext2_filsys fs, blk_t goal, + ext2fs_block_bitmap map, blk_t *ret); +extern errcode_t ext2fs_get_free_blocks(ext2_filsys fs, blk_t start, + blk_t finish, int num, + ext2fs_block_bitmap map, + blk_t *ret); +extern errcode_t ext2fs_alloc_block(ext2_filsys fs, blk_t goal, + char *block_buf, blk_t *ret); +extern void ext2fs_set_alloc_block_callback(ext2_filsys fs, + errcode_t (*func)(ext2_filsys fs, + blk64_t goal, + blk64_t *ret), + errcode_t (**old)(ext2_filsys fs, + blk64_t goal, + blk64_t *ret)); + +/* alloc_sb.c */ +extern int ext2fs_reserve_super_and_bgd(ext2_filsys fs, + dgrp_t group, + ext2fs_block_bitmap bmap); +extern void ext2fs_set_block_alloc_stats_callback(ext2_filsys fs, + void (*func)(ext2_filsys fs, + blk64_t blk, + int inuse), + void (**old)(ext2_filsys fs, + blk64_t blk, + int inuse)); + +/* alloc_stats.c */ +void ext2fs_inode_alloc_stats(ext2_filsys fs, ext2_ino_t ino, int inuse); +void ext2fs_inode_alloc_stats2(ext2_filsys fs, ext2_ino_t ino, + int inuse, int isdir); +void ext2fs_block_alloc_stats(ext2_filsys fs, blk_t blk, int inuse); + +/* alloc_tables.c */ +extern errcode_t ext2fs_allocate_tables(ext2_filsys fs); +extern errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group, + ext2fs_block_bitmap bmap); + +/* badblocks.c */ +extern errcode_t ext2fs_u32_list_create(ext2_u32_list *ret, int size); +extern errcode_t ext2fs_u32_list_add(ext2_u32_list bb, __u32 blk); +extern int ext2fs_u32_list_find(ext2_u32_list bb, __u32 blk); +extern int ext2fs_u32_list_test(ext2_u32_list bb, blk_t blk); +extern errcode_t ext2fs_u32_list_iterate_begin(ext2_u32_list bb, + ext2_u32_iterate *ret); +extern int ext2fs_u32_list_iterate(ext2_u32_iterate iter, blk_t *blk); +extern void ext2fs_u32_list_iterate_end(ext2_u32_iterate iter); +extern errcode_t ext2fs_u32_copy(ext2_u32_list src, ext2_u32_list *dest); +extern int ext2fs_u32_list_equal(ext2_u32_list bb1, ext2_u32_list bb2); + +extern errcode_t ext2fs_badblocks_list_create(ext2_badblocks_list *ret, + int size); +extern errcode_t ext2fs_badblocks_list_add(ext2_badblocks_list bb, + blk_t blk); +extern int ext2fs_badblocks_list_test(ext2_badblocks_list bb, + blk_t blk); +extern int ext2fs_u32_list_del(ext2_u32_list bb, __u32 blk); +extern void ext2fs_badblocks_list_del(ext2_u32_list bb, __u32 blk); +extern errcode_t + ext2fs_badblocks_list_iterate_begin(ext2_badblocks_list bb, + ext2_badblocks_iterate *ret); +extern int ext2fs_badblocks_list_iterate(ext2_badblocks_iterate iter, + blk_t *blk); +extern void ext2fs_badblocks_list_iterate_end(ext2_badblocks_iterate iter); +extern errcode_t ext2fs_badblocks_copy(ext2_badblocks_list src, + ext2_badblocks_list *dest); +extern int ext2fs_badblocks_equal(ext2_badblocks_list bb1, + ext2_badblocks_list bb2); +extern int ext2fs_u32_list_count(ext2_u32_list bb); + +/* bb_compat */ +extern errcode_t badblocks_list_create(badblocks_list *ret, int size); +extern errcode_t badblocks_list_add(badblocks_list bb, blk_t blk); +extern int badblocks_list_test(badblocks_list bb, blk_t blk); +extern errcode_t badblocks_list_iterate_begin(badblocks_list bb, + badblocks_iterate *ret); +extern int badblocks_list_iterate(badblocks_iterate iter, blk_t *blk); +extern void badblocks_list_iterate_end(badblocks_iterate iter); +extern void badblocks_list_free(badblocks_list bb); + +/* bb_inode.c */ +extern errcode_t ext2fs_update_bb_inode(ext2_filsys fs, + ext2_badblocks_list bb_list); + +/* bitmaps.c */ +extern void ext2fs_free_block_bitmap(ext2fs_block_bitmap bitmap); +extern void ext2fs_free_inode_bitmap(ext2fs_inode_bitmap bitmap); +extern errcode_t ext2fs_copy_bitmap(ext2fs_generic_bitmap src, + ext2fs_generic_bitmap *dest); +extern errcode_t ext2fs_write_inode_bitmap(ext2_filsys fs); +extern errcode_t ext2fs_write_block_bitmap (ext2_filsys fs); +extern errcode_t ext2fs_read_inode_bitmap (ext2_filsys fs); +extern errcode_t ext2fs_read_block_bitmap(ext2_filsys fs); +extern errcode_t ext2fs_allocate_block_bitmap(ext2_filsys fs, + const char *descr, + ext2fs_block_bitmap *ret); +extern errcode_t ext2fs_allocate_inode_bitmap(ext2_filsys fs, + const char *descr, + ext2fs_inode_bitmap *ret); +extern errcode_t ext2fs_fudge_inode_bitmap_end(ext2fs_inode_bitmap bitmap, + ext2_ino_t end, ext2_ino_t *oend); +extern errcode_t ext2fs_fudge_block_bitmap_end(ext2fs_block_bitmap bitmap, + blk_t end, blk_t *oend); +extern void ext2fs_clear_inode_bitmap(ext2fs_inode_bitmap bitmap); +extern void ext2fs_clear_block_bitmap(ext2fs_block_bitmap bitmap); +extern errcode_t ext2fs_read_bitmaps(ext2_filsys fs); +extern errcode_t ext2fs_write_bitmaps(ext2_filsys fs); +extern errcode_t ext2fs_resize_inode_bitmap(__u32 new_end, __u32 new_real_end, + ext2fs_inode_bitmap bmap); +extern errcode_t ext2fs_resize_block_bitmap(__u32 new_end, __u32 new_real_end, + ext2fs_block_bitmap bmap); +extern errcode_t ext2fs_compare_block_bitmap(ext2fs_block_bitmap bm1, + ext2fs_block_bitmap bm2); +extern errcode_t ext2fs_compare_inode_bitmap(ext2fs_inode_bitmap bm1, + ext2fs_inode_bitmap bm2); +extern errcode_t ext2fs_set_inode_bitmap_range(ext2fs_inode_bitmap bmap, + ext2_ino_t start, unsigned int num, + void *in); +extern errcode_t ext2fs_get_inode_bitmap_range(ext2fs_inode_bitmap bmap, + ext2_ino_t start, unsigned int num, + void *out); +extern errcode_t ext2fs_set_block_bitmap_range(ext2fs_block_bitmap bmap, + blk_t start, unsigned int num, + void *in); +extern errcode_t ext2fs_get_block_bitmap_range(ext2fs_block_bitmap bmap, + blk_t start, unsigned int num, + void *out); + + +/* block.c */ +extern errcode_t ext2fs_block_iterate(ext2_filsys fs, + ext2_ino_t ino, + int flags, + char *block_buf, + int (*func)(ext2_filsys fs, + blk_t *blocknr, + int blockcnt, + void *priv_data), + void *priv_data); +errcode_t ext2fs_block_iterate2(ext2_filsys fs, + ext2_ino_t ino, + int flags, + char *block_buf, + int (*func)(ext2_filsys fs, + blk_t *blocknr, + e2_blkcnt_t blockcnt, + blk_t ref_blk, + int ref_offset, + void *priv_data), + void *priv_data); + +/* bmap.c */ +extern errcode_t ext2fs_bmap(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode, + char *block_buf, int bmap_flags, + blk_t block, blk_t *phys_blk); +extern errcode_t ext2fs_bmap2(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode, + char *block_buf, int bmap_flags, blk64_t block, + int *ret_flags, blk64_t *phys_blk); + +#if 0 +/* bmove.c */ +extern errcode_t ext2fs_move_blocks(ext2_filsys fs, + ext2fs_block_bitmap reserve, + ext2fs_block_bitmap alloc_map, + int flags); +#endif + +/* check_desc.c */ +extern errcode_t ext2fs_check_desc(ext2_filsys fs); + +/* closefs.c */ +extern errcode_t ext2fs_close(ext2_filsys fs); +extern errcode_t ext2fs_flush(ext2_filsys fs); +extern int ext2fs_super_and_bgd_loc(ext2_filsys fs, + dgrp_t group, + blk_t *ret_super_blk, + blk_t *ret_old_desc_blk, + blk_t *ret_new_desc_blk, + int *ret_meta_bg); +extern void ext2fs_update_dynamic_rev(ext2_filsys fs); + +/* csum.c */ +extern void ext2fs_group_desc_csum_set(ext2_filsys fs, dgrp_t group); +extern int ext2fs_group_desc_csum_verify(ext2_filsys fs, dgrp_t group); +extern errcode_t ext2fs_set_gdt_csum(ext2_filsys fs); + +/* dblist.c */ + +extern errcode_t ext2fs_get_num_dirs(ext2_filsys fs, ext2_ino_t *ret_num_dirs); +extern errcode_t ext2fs_init_dblist(ext2_filsys fs, ext2_dblist *ret_dblist); +extern errcode_t ext2fs_add_dir_block(ext2_dblist dblist, ext2_ino_t ino, + blk_t blk, int blockcnt); +extern void ext2fs_dblist_sort(ext2_dblist dblist, + EXT2_QSORT_TYPE (*sortfunc)(const void *, + const void *)); +extern errcode_t ext2fs_dblist_iterate(ext2_dblist dblist, + int (*func)(ext2_filsys fs, struct ext2_db_entry *db_info, + void *priv_data), + void *priv_data); +extern errcode_t ext2fs_set_dir_block(ext2_dblist dblist, ext2_ino_t ino, + blk_t blk, int blockcnt); +extern errcode_t ext2fs_copy_dblist(ext2_dblist src, + ext2_dblist *dest); +extern int ext2fs_dblist_count(ext2_dblist dblist); +extern errcode_t ext2fs_dblist_get_last(ext2_dblist dblist, + struct ext2_db_entry **entry); +extern errcode_t ext2fs_dblist_drop_last(ext2_dblist dblist); + +/* dblist_dir.c */ +extern errcode_t + ext2fs_dblist_dir_iterate(ext2_dblist dblist, + int flags, + char *block_buf, + int (*func)(ext2_ino_t dir, + int entry, + struct ext2_dir_entry *dirent, + int offset, + int blocksize, + char *buf, + void *priv_data), + void *priv_data); + +/* dirblock.c */ +extern errcode_t ext2fs_read_dir_block(ext2_filsys fs, blk_t block, + void *buf); +extern errcode_t ext2fs_read_dir_block2(ext2_filsys fs, blk_t block, + void *buf, int flags); +extern errcode_t ext2fs_write_dir_block(ext2_filsys fs, blk_t block, + void *buf); +extern errcode_t ext2fs_write_dir_block2(ext2_filsys fs, blk_t block, + void *buf, int flags); + +/* dirhash.c */ +extern errcode_t ext2fs_dirhash(int version, const char *name, int len, + const __u32 *seed, + ext2_dirhash_t *ret_hash, + ext2_dirhash_t *ret_minor_hash); + + +/* dir_iterate.c */ +extern errcode_t ext2fs_get_rec_len(ext2_filsys fs, + struct ext2_dir_entry *dirent, + unsigned int *rec_len); +extern errcode_t ext2fs_set_rec_len(ext2_filsys fs, + unsigned int len, + struct ext2_dir_entry *dirent); +extern errcode_t ext2fs_dir_iterate(ext2_filsys fs, + ext2_ino_t dir, + int flags, + char *block_buf, + int (*func)(struct ext2_dir_entry *dirent, + int offset, + int blocksize, + char *buf, + void *priv_data), + void *priv_data); +extern errcode_t ext2fs_dir_iterate2(ext2_filsys fs, + ext2_ino_t dir, + int flags, + char *block_buf, + int (*func)(ext2_ino_t dir, + int entry, + struct ext2_dir_entry *dirent, + int offset, + int blocksize, + char *buf, + void *priv_data), + void *priv_data); + +/* dupfs.c */ +extern errcode_t ext2fs_dup_handle(ext2_filsys src, ext2_filsys *dest); + +/* expanddir.c */ +extern errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir); + +/* ext_attr.c */ +extern __u32 ext2fs_ext_attr_hash_entry(struct ext2_ext_attr_entry *entry, + void *data); +extern errcode_t ext2fs_read_ext_attr(ext2_filsys fs, blk_t block, void *buf); +extern errcode_t ext2fs_write_ext_attr(ext2_filsys fs, blk_t block, + void *buf); +extern errcode_t ext2fs_adjust_ea_refcount(ext2_filsys fs, blk_t blk, + char *block_buf, + int adjust, __u32 *newcount); + +/* extent.c */ +extern errcode_t ext2fs_extent_header_verify(void *ptr, int size); +extern errcode_t ext2fs_extent_open(ext2_filsys fs, ext2_ino_t ino, + ext2_extent_handle_t *handle); +extern errcode_t ext2fs_extent_open2(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode, + ext2_extent_handle_t *ret_handle); +extern void ext2fs_extent_free(ext2_extent_handle_t handle); +extern errcode_t ext2fs_extent_get(ext2_extent_handle_t handle, + int flags, struct ext2fs_extent *extent); +extern errcode_t ext2fs_extent_replace(ext2_extent_handle_t handle, int flags, + struct ext2fs_extent *extent); +extern errcode_t ext2fs_extent_insert(ext2_extent_handle_t handle, int flags, + struct ext2fs_extent *extent); +extern errcode_t ext2fs_extent_set_bmap(ext2_extent_handle_t handle, + blk64_t logical, blk64_t physical, + int flags); +extern errcode_t ext2fs_extent_delete(ext2_extent_handle_t handle, int flags); +extern errcode_t ext2fs_extent_get_info(ext2_extent_handle_t handle, + struct ext2_extent_info *info); +extern errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle, + blk64_t blk); + +/* fileio.c */ +extern errcode_t ext2fs_file_open2(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode, + int flags, ext2_file_t *ret); +extern errcode_t ext2fs_file_open(ext2_filsys fs, ext2_ino_t ino, + int flags, ext2_file_t *ret); +extern ext2_filsys ext2fs_file_get_fs(ext2_file_t file); +extern errcode_t ext2fs_file_close(ext2_file_t file); +extern errcode_t ext2fs_file_flush(ext2_file_t file); +extern errcode_t ext2fs_file_read(ext2_file_t file, void *buf, + unsigned int wanted, unsigned int *got); +extern errcode_t ext2fs_file_write(ext2_file_t file, const void *buf, + unsigned int nbytes, unsigned int *written); +extern errcode_t ext2fs_file_llseek(ext2_file_t file, __u64 offset, + int whence, __u64 *ret_pos); +extern errcode_t ext2fs_file_lseek(ext2_file_t file, ext2_off_t offset, + int whence, ext2_off_t *ret_pos); +errcode_t ext2fs_file_get_lsize(ext2_file_t file, __u64 *ret_size); +extern ext2_off_t ext2fs_file_get_size(ext2_file_t file); +extern errcode_t ext2fs_file_set_size(ext2_file_t file, ext2_off_t size); + +/* finddev.c */ +extern char *ext2fs_find_block_device(dev_t device); + +/* flushb.c */ +extern errcode_t ext2fs_sync_device(int fd, int flushb); + +/* freefs.c */ +extern void ext2fs_free(ext2_filsys fs); +extern void ext2fs_free_dblist(ext2_dblist dblist); +extern void ext2fs_badblocks_list_free(ext2_badblocks_list bb); +extern void ext2fs_u32_list_free(ext2_u32_list bb); + +/* gen_bitmap.c */ +extern void ext2fs_free_generic_bitmap(ext2fs_inode_bitmap bitmap); +extern errcode_t ext2fs_make_generic_bitmap(errcode_t magic, ext2_filsys fs, + __u32 start, __u32 end, + __u32 real_end, + const char *descr, char *init_map, + ext2fs_generic_bitmap *ret); +extern errcode_t ext2fs_allocate_generic_bitmap(__u32 start, + __u32 end, + __u32 real_end, + const char *descr, + ext2fs_generic_bitmap *ret); +extern errcode_t ext2fs_copy_generic_bitmap(ext2fs_generic_bitmap src, + ext2fs_generic_bitmap *dest); +extern void ext2fs_clear_generic_bitmap(ext2fs_generic_bitmap bitmap); +extern errcode_t ext2fs_fudge_generic_bitmap_end(ext2fs_inode_bitmap bitmap, + errcode_t magic, + errcode_t neq, + ext2_ino_t end, + ext2_ino_t *oend); +extern void ext2fs_set_generic_bitmap_padding(ext2fs_generic_bitmap map); +extern errcode_t ext2fs_resize_generic_bitmap(errcode_t magic, + __u32 new_end, + __u32 new_real_end, + ext2fs_generic_bitmap bmap); +extern errcode_t ext2fs_compare_generic_bitmap(errcode_t magic, errcode_t neq, + ext2fs_generic_bitmap bm1, + ext2fs_generic_bitmap bm2); +extern errcode_t ext2fs_get_generic_bitmap_range(ext2fs_generic_bitmap bmap, + errcode_t magic, + __u32 start, __u32 num, + void *out); +extern errcode_t ext2fs_set_generic_bitmap_range(ext2fs_generic_bitmap bmap, + errcode_t magic, + __u32 start, __u32 num, + void *in); + +/* getsize.c */ +extern errcode_t ext2fs_get_device_size(const char *file, int blocksize, + blk_t *retblocks); +extern errcode_t ext2fs_get_device_size2(const char *file, int blocksize, + blk64_t *retblocks); + +/* getsectsize.c */ +errcode_t ext2fs_get_device_sectsize(const char *file, int *sectsize); +errcode_t ext2fs_get_device_phys_sectsize(const char *file, int *sectsize); + +/* i_block.c */ +errcode_t ext2fs_iblk_add_blocks(ext2_filsys fs, struct ext2_inode *inode, + blk64_t num_blocks); +errcode_t ext2fs_iblk_sub_blocks(ext2_filsys fs, struct ext2_inode *inode, + blk64_t num_blocks); +errcode_t ext2fs_iblk_set(ext2_filsys fs, struct ext2_inode *inode, blk64_t b); + +/* imager.c */ +extern errcode_t ext2fs_image_inode_write(ext2_filsys fs, int fd, int flags); +extern errcode_t ext2fs_image_inode_read(ext2_filsys fs, int fd, int flags); +extern errcode_t ext2fs_image_super_write(ext2_filsys fs, int fd, int flags); +extern errcode_t ext2fs_image_super_read(ext2_filsys fs, int fd, int flags); +extern errcode_t ext2fs_image_bitmap_write(ext2_filsys fs, int fd, int flags); +extern errcode_t ext2fs_image_bitmap_read(ext2_filsys fs, int fd, int flags); + +/* ind_block.c */ +errcode_t ext2fs_read_ind_block(ext2_filsys fs, blk_t blk, void *buf); +errcode_t ext2fs_write_ind_block(ext2_filsys fs, blk_t blk, void *buf); + + +/* icount.c */ +extern void ext2fs_free_icount(ext2_icount_t icount); +extern errcode_t ext2fs_create_icount_tdb(ext2_filsys fs, char *tdb_dir, + int flags, ext2_icount_t *ret); +extern errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, + unsigned int size, + ext2_icount_t hint, ext2_icount_t *ret); +extern errcode_t ext2fs_create_icount(ext2_filsys fs, int flags, + unsigned int size, + ext2_icount_t *ret); +extern errcode_t ext2fs_icount_fetch(ext2_icount_t icount, ext2_ino_t ino, + __u16 *ret); +extern errcode_t ext2fs_icount_increment(ext2_icount_t icount, ext2_ino_t ino, + __u16 *ret); +extern errcode_t ext2fs_icount_store(ext2_icount_t icount, ext2_ino_t ino, + __u16 count); +extern ext2_ino_t ext2fs_get_icount_size(ext2_icount_t icount); + +/* inode.c */ +extern errcode_t ext2fs_flush_icache(ext2_filsys fs); +extern errcode_t ext2fs_get_next_inode_full(ext2_inode_scan scan, + ext2_ino_t *ino, + struct ext2_inode *inode, + int bufsize); +extern errcode_t ext2fs_open_inode_scan(ext2_filsys fs, int buffer_blocks, + ext2_inode_scan *ret_scan); +extern void ext2fs_close_inode_scan(ext2_inode_scan scan); +extern errcode_t ext2fs_get_next_inode(ext2_inode_scan scan, ext2_ino_t *ino, + struct ext2_inode *inode); +extern errcode_t ext2fs_inode_scan_goto_blockgroup(ext2_inode_scan scan, + int group); +extern void ext2fs_set_inode_callback + (ext2_inode_scan scan, + errcode_t (*done_group)(ext2_filsys fs, + ext2_inode_scan scan, + dgrp_t group, + void * priv_data), + void *done_group_data); +extern int ext2fs_inode_scan_flags(ext2_inode_scan scan, int set_flags, + int clear_flags); +extern errcode_t ext2fs_read_inode_full(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode * inode, + int bufsize); +extern errcode_t ext2fs_read_inode (ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode * inode); +extern errcode_t ext2fs_write_inode_full(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode * inode, + int bufsize); +extern errcode_t ext2fs_write_inode(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode * inode); +extern errcode_t ext2fs_write_new_inode(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode * inode); +extern errcode_t ext2fs_get_blocks(ext2_filsys fs, ext2_ino_t ino, blk_t *blocks); +extern errcode_t ext2fs_check_directory(ext2_filsys fs, ext2_ino_t ino); + +/* inode_io.c */ +extern io_manager inode_io_manager; +extern errcode_t ext2fs_inode_io_intern(ext2_filsys fs, ext2_ino_t ino, + char **name); +extern errcode_t ext2fs_inode_io_intern2(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode, + char **name); + +/* ismounted.c */ +extern errcode_t ext2fs_check_if_mounted(const char *file, int *mount_flags); +extern errcode_t ext2fs_check_mount_point(const char *device, int *mount_flags, + char *mtpt, int mtlen); + +/* namei.c */ +extern errcode_t ext2fs_lookup(ext2_filsys fs, ext2_ino_t dir, const char *name, + int namelen, char *buf, ext2_ino_t *inode); +extern errcode_t ext2fs_namei(ext2_filsys fs, ext2_ino_t root, ext2_ino_t cwd, + const char *name, ext2_ino_t *inode); +errcode_t ext2fs_namei_follow(ext2_filsys fs, ext2_ino_t root, ext2_ino_t cwd, + const char *name, ext2_ino_t *inode); +extern errcode_t ext2fs_follow_link(ext2_filsys fs, ext2_ino_t root, ext2_ino_t cwd, + ext2_ino_t inode, ext2_ino_t *res_inode); + +/* native.c */ +int ext2fs_native_flag(void); + +/* newdir.c */ +extern errcode_t ext2fs_new_dir_block(ext2_filsys fs, ext2_ino_t dir_ino, + ext2_ino_t parent_ino, char **block); + +/* mkdir.c */ +extern errcode_t ext2fs_mkdir(ext2_filsys fs, ext2_ino_t parent, ext2_ino_t inum, + const char *name); + +/* mkjournal.c */ +extern errcode_t ext2fs_zero_blocks(ext2_filsys fs, blk_t blk, int num, + blk_t *ret_blk, int *ret_count); +extern errcode_t ext2fs_create_journal_superblock(ext2_filsys fs, + __u32 size, int flags, + char **ret_jsb); +extern errcode_t ext2fs_add_journal_device(ext2_filsys fs, + ext2_filsys journal_dev); +extern errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t size, + int flags); +extern int ext2fs_default_journal_size(__u64 blocks); + +/* openfs.c */ +extern errcode_t ext2fs_open(const char *name, int flags, int superblock, + unsigned int block_size, io_manager manager, + ext2_filsys *ret_fs); +extern errcode_t ext2fs_open2(const char *name, const char *io_options, + int flags, int superblock, + unsigned int block_size, io_manager manager, + ext2_filsys *ret_fs); +extern blk_t ext2fs_descriptor_block_loc(ext2_filsys fs, blk_t group_block, + dgrp_t i); +errcode_t ext2fs_get_data_io(ext2_filsys fs, io_channel *old_io); +errcode_t ext2fs_set_data_io(ext2_filsys fs, io_channel new_io); +errcode_t ext2fs_rewrite_to_io(ext2_filsys fs, io_channel new_io); + +/* get_pathname.c */ +extern errcode_t ext2fs_get_pathname(ext2_filsys fs, ext2_ino_t dir, ext2_ino_t ino, + char **name); + +/* link.c */ +errcode_t ext2fs_link(ext2_filsys fs, ext2_ino_t dir, const char *name, + ext2_ino_t ino, int flags); +errcode_t ext2fs_unlink(ext2_filsys fs, ext2_ino_t dir, const char *name, + ext2_ino_t ino, int flags); + +/* read_bb.c */ +extern errcode_t ext2fs_read_bb_inode(ext2_filsys fs, + ext2_badblocks_list *bb_list); + +/* res_gdt.c */ +extern errcode_t ext2fs_create_resize_inode(ext2_filsys fs); + +/* swapfs.c */ +extern void ext2fs_swap_ext_attr(char *to, char *from, int bufsize, + int has_header); +extern void ext2fs_swap_ext_attr_header(struct ext2_ext_attr_header *to_header, + struct ext2_ext_attr_header *from_hdr); +extern void ext2fs_swap_ext_attr_entry(struct ext2_ext_attr_entry *to_entry, + struct ext2_ext_attr_entry *from_entry); +extern void ext2fs_swap_super(struct ext2_super_block * super); +extern void ext2fs_swap_group_desc(struct ext2_group_desc *gdp); +extern void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t, + struct ext2_inode_large *f, int hostorder, + int bufsize); +extern void ext2fs_swap_inode(ext2_filsys fs,struct ext2_inode *t, + struct ext2_inode *f, int hostorder); + +/* valid_blk.c */ +extern int ext2fs_inode_has_valid_blocks(struct ext2_inode *inode); + +/* version.c */ +extern int ext2fs_parse_version_string(const char *ver_string); +extern int ext2fs_get_library_version(const char **ver_string, + const char **date_string); + + + +/* inline functions */ +extern errcode_t ext2fs_get_mem(unsigned long size, void *ptr); +extern errcode_t ext2fs_get_memalign(unsigned long size, + unsigned long align, void *ptr); +extern errcode_t ext2fs_free_mem(void *ptr); +extern errcode_t ext2fs_resize_mem(unsigned long old_size, + unsigned long size, void *ptr); +extern void ext2fs_mark_super_dirty(ext2_filsys fs); +extern void ext2fs_mark_changed(ext2_filsys fs); +extern int ext2fs_test_changed(ext2_filsys fs); +extern void ext2fs_mark_valid(ext2_filsys fs); +extern void ext2fs_unmark_valid(ext2_filsys fs); +extern int ext2fs_test_valid(ext2_filsys fs); +extern void ext2fs_mark_ib_dirty(ext2_filsys fs); +extern void ext2fs_mark_bb_dirty(ext2_filsys fs); +extern int ext2fs_test_ib_dirty(ext2_filsys fs); +extern int ext2fs_test_bb_dirty(ext2_filsys fs); +extern int ext2fs_group_of_blk(ext2_filsys fs, blk_t blk); +extern int ext2fs_group_of_ino(ext2_filsys fs, ext2_ino_t ino); +extern blk_t ext2fs_group_first_block(ext2_filsys fs, dgrp_t group); +extern blk_t ext2fs_group_last_block(ext2_filsys fs, dgrp_t group); +extern blk_t ext2fs_inode_data_blocks(ext2_filsys fs, + struct ext2_inode *inode); +extern unsigned int ext2fs_div_ceil(unsigned int a, unsigned int b); + +/* + * The actual inlined functions definitions themselves... + * + * If NO_INLINE_FUNCS is defined, then we won't try to do inline + * functions at all! + */ +#if (defined(INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS)) +#ifdef INCLUDE_INLINE_FUNCS +#define _INLINE_ extern +#else +#ifdef __GNUC__ +#define _INLINE_ extern __inline__ +#else /* For Watcom C */ +#define _INLINE_ extern inline +#endif +#endif + +#ifndef EXT2_CUSTOM_MEMORY_ROUTINES +//#include <string.h> +/* + * Allocate memory + */ +_INLINE_ errcode_t ext2fs_get_mem(unsigned long size, void *ptr) +{ + void *pp; + + pp = malloc(size); + if (!pp){ + printf("ext2fs_get_mem cannot get mem\n"); + return EXT2_ET_NO_MEMORY; + } + memcpy(ptr, &pp, sizeof (pp)); + return 0; +} + +_INLINE_ errcode_t ext2fs_get_memalign(unsigned long size, + unsigned long align, void *ptr) +{ + void *pp; + pp=malloc(size); + if (!ptr) + return EXT2_ET_NO_MEMORY; + memcpy(ptr, &pp, sizeof (pp)); + return 0; +} + +_INLINE_ errcode_t ext2fs_get_array(unsigned long count, unsigned long size, void *ptr) +{ + if (count && (-1UL)/count<size) + return EXT2_ET_NO_MEMORY; //maybe define EXT2_ET_OVERFLOW ? + return ext2fs_get_mem(count*size, ptr); +} + +/* + * Free memory + */ +_INLINE_ errcode_t ext2fs_free_mem(void *ptr) +{ + void *p; + + memcpy(&p, ptr, sizeof(p)); + free(p); + p = 0; + memcpy(ptr, &p, sizeof(p)); + return 0; +} + +/* + * Resize memory + */ +_INLINE_ errcode_t ext2fs_resize_mem(unsigned long EXT2FS_ATTR((unused)) old_size, + unsigned long size, void *ptr) +{ + void *p; + + /* Use "memcpy" for pointer assignments here to avoid problems + * with C99 strict type aliasing rules. */ + memcpy(&p, ptr, sizeof(p)); + p = realloc(p, size); + if (!p) + return EXT2_ET_NO_MEMORY; + memcpy(ptr, &p, sizeof(p)); + return 0; +} +#endif /* Custom memory routines */ + +/* + * Mark a filesystem superblock as dirty + */ +_INLINE_ void ext2fs_mark_super_dirty(ext2_filsys fs) +{ + fs->flags |= EXT2_FLAG_DIRTY | EXT2_FLAG_CHANGED; +} + +/* + * Mark a filesystem as changed + */ +_INLINE_ void ext2fs_mark_changed(ext2_filsys fs) +{ + fs->flags |= EXT2_FLAG_CHANGED; +} + +/* + * Check to see if a filesystem has changed + */ +_INLINE_ int ext2fs_test_changed(ext2_filsys fs) +{ + return (fs->flags & EXT2_FLAG_CHANGED); +} + +/* + * Mark a filesystem as valid + */ +_INLINE_ void ext2fs_mark_valid(ext2_filsys fs) +{ + fs->flags |= EXT2_FLAG_VALID; +} + +/* + * Mark a filesystem as NOT valid + */ +_INLINE_ void ext2fs_unmark_valid(ext2_filsys fs) +{ + fs->flags &= ~EXT2_FLAG_VALID; +} + +/* + * Check to see if a filesystem is valid + */ +_INLINE_ int ext2fs_test_valid(ext2_filsys fs) +{ + return (fs->flags & EXT2_FLAG_VALID); +} + +/* + * Mark the inode bitmap as dirty + */ +_INLINE_ void ext2fs_mark_ib_dirty(ext2_filsys fs) +{ + fs->flags |= EXT2_FLAG_IB_DIRTY | EXT2_FLAG_CHANGED; +} + +/* + * Mark the block bitmap as dirty + */ +_INLINE_ void ext2fs_mark_bb_dirty(ext2_filsys fs) +{ + fs->flags |= EXT2_FLAG_BB_DIRTY | EXT2_FLAG_CHANGED; +} + +/* + * Check to see if a filesystem's inode bitmap is dirty + */ +_INLINE_ int ext2fs_test_ib_dirty(ext2_filsys fs) +{ + return (fs->flags & EXT2_FLAG_IB_DIRTY); +} + +/* + * Check to see if a filesystem's block bitmap is dirty + */ +_INLINE_ int ext2fs_test_bb_dirty(ext2_filsys fs) +{ + return (fs->flags & EXT2_FLAG_BB_DIRTY); +} + +/* + * Return the group # of a block + */ +_INLINE_ int ext2fs_group_of_blk(ext2_filsys fs, blk_t blk) +{ + return (blk - fs->super->s_first_data_block) / + fs->super->s_blocks_per_group; +} + +/* + * Return the group # of an inode number + */ +_INLINE_ int ext2fs_group_of_ino(ext2_filsys fs, ext2_ino_t ino) +{ + return (ino - 1) / fs->super->s_inodes_per_group; +} + +/* + * Return the first block (inclusive) in a group + */ +_INLINE_ blk_t ext2fs_group_first_block(ext2_filsys fs, dgrp_t group) +{ + return fs->super->s_first_data_block + + (group * fs->super->s_blocks_per_group); +} + +/* + * Return the last block (inclusive) in a group + */ +_INLINE_ blk_t ext2fs_group_last_block(ext2_filsys fs, dgrp_t group) +{ + return (group == fs->group_desc_count - 1 ? + fs->super->s_blocks_count - 1 : + ext2fs_group_first_block(fs, group) + + (fs->super->s_blocks_per_group - 1)); +} + +_INLINE_ blk_t ext2fs_inode_data_blocks(ext2_filsys fs, + struct ext2_inode *inode) +{ + return inode->i_blocks - + (inode->i_file_acl ? fs->blocksize >> 9 : 0); +} + +/* + * This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b) + */ +_INLINE_ unsigned int ext2fs_div_ceil(unsigned int a, unsigned int b) +{ + if (!a) + return 0; + return ((a - 1) / b) + 1; +} +#undef _INLINE_ +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _EXT2FS_EXT2FS_H */ diff --git a/fs/ext4/format/ext2fsP.h b/fs/ext4/format/ext2fsP.h new file mode 100755 index 0000000..8772a4f --- /dev/null +++ b/fs/ext4/format/ext2fsP.h @@ -0,0 +1,88 @@ +/* + * ext2fsP.h --- private header file for ext2 library + * + * Copyright (C) 1997 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include "ext2fs.h" + +/* + * Badblocks list + */ +struct ext2_struct_u32_list { + int magic; + int num; + int size; + __u32 *list; + int badblocks_flags; +}; + +struct ext2_struct_u32_iterate { + int magic; + ext2_u32_list bb; + int ptr; +}; + + +/* + * Directory block iterator definition + */ +struct ext2_struct_dblist { + int magic; + ext2_filsys fs; + ext2_ino_t size; + ext2_ino_t count; + int sorted; + struct ext2_db_entry * list; +}; + +/* + * For directory iterators + */ +struct dir_context { + ext2_ino_t dir; + int flags; + char *buf; + int (*func)(ext2_ino_t dir, + int entry, + struct ext2_dir_entry *dirent, + int offset, + int blocksize, + char *buf, + void *priv_data); + void *priv_data; + errcode_t errcode; +}; + +/* + * Inode cache structure + */ +struct ext2_inode_cache { + void * buffer; + blk_t buffer_blk; + int cache_last; + int cache_size; + int refcount; + struct ext2_inode_cache_ent *cache; +}; + +struct ext2_inode_cache_ent { + ext2_ino_t ino; + struct ext2_inode inode; +}; + +/* Function prototypes */ + +extern int ext2fs_process_dir_block(ext2_filsys fs, + blk_t *blocknr, + e2_blkcnt_t blockcnt, + blk_t ref_block, + int ref_offset, + void *priv_data); + + diff --git a/fs/ext4/format/ext3_extents.h b/fs/ext4/format/ext3_extents.h new file mode 100755 index 0000000..88fabc9 --- /dev/null +++ b/fs/ext4/format/ext3_extents.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2003,2004 Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas <alex@clusterfs.com> + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#ifndef _LINUX_EXT3_EXTENTS +#define _LINUX_EXT3_EXTENTS + +/* + * ext3_inode has i_block array (total 60 bytes) + * first 4 bytes are used to store: + * - tree depth (0 mean there is no tree yet. all extents in the inode) + * - number of alive extents in the inode + */ + +/* + * this is extent on-disk structure + * it's used at the bottom of the tree + */ +struct ext3_extent { + __u32 ee_block; /* first logical block extent covers */ + __u16 ee_len; /* number of blocks covered by extent */ + __u16 ee_start_hi; /* high 16 bits of physical block */ + __u32 ee_start; /* low 32 bigs of physical block */ +}; + +/* + * this is index on-disk structure + * it's used at all the levels, but the bottom + */ +struct ext3_extent_idx { + __u32 ei_block; /* index covers logical blocks from 'block' */ + __u32 ei_leaf; /* pointer to the physical block of the next * + * level. leaf or next index could bet here */ + __u16 ei_leaf_hi; /* high 16 bits of physical block */ + __u16 ei_unused; +}; + +/* + * each block (leaves and indexes), even inode-stored has header + */ +struct ext3_extent_header { + __u16 eh_magic; /* probably will support different formats */ + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ + __u32 eh_generation; /* generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a + +/* + * array of ext3_ext_path contains path to some extent + * creation/lookup routines use it for traversal/splitting/etc + * truncate uses it to simulate recursive walking + */ +struct ext3_ext_path { + __u32 p_block; + __u16 p_depth; + struct ext3_extent *p_ext; + struct ext3_extent_idx *p_idx; + struct ext3_extent_header *p_hdr; + struct buffer_head *p_bh; +}; + +/* + * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an + * initialized extent. This is 2^15 and not (2^16 - 1), since we use the + * MSB of ee_len field in the extent datastructure to signify if this + * particular extent is an initialized extent or an uninitialized (i.e. + * preallocated). + * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an + * uninitialized extent. + * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an + * uninitialized one. In other words, if MSB of ee_len is set, it is an + * uninitialized extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an uninitialized extent of zero length and + * thus we make it as a special case of initialized extent with 0x8000 length. + * This way we get better extent-to-group alignment for initialized extents. + * Hence, the maximum number of blocks we can have in an *initialized* + * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + */ +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) + +#define EXT_FIRST_EXTENT(__hdr__) \ + ((struct ext3_extent *) (((char *) (__hdr__)) + \ + sizeof(struct ext3_extent_header))) +#define EXT_FIRST_INDEX(__hdr__) \ + ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ + sizeof(struct ext3_extent_header))) +#define EXT_HAS_FREE_INDEX(__path__) \ + ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) +#define EXT_LAST_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) +#define EXT_LAST_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) +#define EXT_MAX_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) + +#endif /* _LINUX_EXT3_EXTENTS */ + diff --git a/fs/ext4/format/ext4_format.c b/fs/ext4/format/ext4_format.c new file mode 100755 index 0000000..1e9dceb --- /dev/null +++ b/fs/ext4/format/ext4_format.c @@ -0,0 +1,1106 @@ +#include <common.h>
+//#include <ext_common.h>
+//#include <ext4fs.h>
+#include <malloc.h>
+#include <stddef.h>
+#include <linux/stat.h>
+#include <linux/time.h>
+
+//#include <ext4fs.h>
+#include "ext2_fs.h"
+#include "ext2fs.h"
+
+#if defined(__linux__) && defined(EXT2_OS_LINUX)
+#define CREATOR_OS EXT2_OS_LINUX
+#else
+#if defined(__GNU__) && defined(EXT2_OS_HURD)
+#define CREATOR_OS EXT2_OS_HURD
+#else
+#if defined(__FreeBSD__) && defined(EXT2_OS_FREEBSD)
+#define CREATOR_OS EXT2_OS_FREEBSD
+#else
+#if defined(LITES) && defined(EXT2_OS_LITES)
+#define CREATOR_OS EXT2_OS_LITES
+#else
+#define CREATOR_OS EXT2_OS_LINUX /* by default */
+#endif /* defined(LITES) && defined(EXT2_OS_LITES) */
+#endif /* defined(__FreeBSD__) && defined(EXT2_OS_FREEBSD) */
+#endif /* defined(__GNU__) && defined(EXT2_OS_HURD) */
+#endif /* defined(__linux__) && defined(EXT2_OS_LINUX) */
+
+static struct struct_ext2_filsys *fs;
+struct ext_filesystem {
+ /* Total Sector of partition */
+ uint32_t total_sect;//uint64_t
+ /* Block size of partition */
+ uint32_t blksz;
+ /* Inode size of partition */
+ uint32_t inodesz;
+ /* Sectors per Block */
+ uint32_t sect_perblk;
+ /* Group Descriptor Block Number */
+ uint32_t gdtable_blkno;
+ /* Total block groups of partition */
+ uint32_t no_blkgrp;
+ /* No of blocks required for bgdtable */
+ uint32_t no_blk_pergdt;
+ /* Superblock */
+ struct ext2_sblock *sb;
+ /* Block group descritpor table */
+ struct ext2_block_group *bgd;
+ char *gdtable;
+
+ /* Block Bitmap Related */
+ unsigned char **blk_bmaps;
+ long int curr_blkno;
+ uint16_t first_pass_bbmap;
+
+ /* Inode Bitmap Related */
+ unsigned char **inode_bmaps;
+ int curr_inode_no;
+ uint16_t first_pass_ibmap;
+
+ /* Journal Related */
+
+ /* Block Device Descriptor */
+ block_dev_desc_t *dev_desc;
+};
+
+struct ext_filesystem *get_fs(void);
+static int times=1;
+static unsigned long writebyte=0;
+extern unsigned long part_offset;
+
+static errcode_t open(const char *name, int flags, io_channel *channel)
+{
+ //printf("open\n");
+ times=1;
+ writebyte=0;
+ return 0;
+}
+static errcode_t close(io_channel channel)
+{
+ //printf("close\n");
+ return 0;
+}
+static errcode_t set_blksize(io_channel channel, int blksize)
+{
+ times= blksize/512;
+ return 0;
+}
+static errcode_t read_blk(io_channel channel, unsigned long block,
+ int count, const void *buf)
+{
+ struct ext_filesystem *tfs = get_fs();
+ if(count<0) {
+ //printf("read_blk form 0x%x to 0x%x\n", part_offset+block*times, part_offset+block*times+count*(-1)/512-1);
+
+ if (tfs->dev_desc->block_read(tfs->dev_desc->dev, part_offset+block*times, count*(-1)/512, (unsigned long *)buf)!=(count*(-1)/512)) {
+ printf("read_blk error\n");
+ return 1;
+ }
+ return 0;
+ }
+ //printf("read_blk form 0x%x to 0x%x\n", part_offset+block*times, part_offset+(block+count-1)*times);
+ if (tfs->dev_desc->block_read(tfs->dev_desc->dev, part_offset+block*times, count*times, (unsigned long *)buf)!=count*times) {
+ printf("read_blk error\n");
+ return 1;
+ }
+ return 0;
+}
+
+
+static errcode_t write_blk(io_channel channel, unsigned long block,
+ int count, const void *buf)
+{
+
+ struct ext_filesystem *tfs = get_fs();
+
+ if (count<0){
+ //printf("write_blk form 0x%x to 0x%x\n", part_offset+block*times, part_offset+block*times+count*(-1)/512-1);
+ //printf("count<0, 0x%x\n",count*(-1));
+ writebyte+=count*(-1);
+ if (tfs->dev_desc->block_write(tfs->dev_desc->dev, part_offset+block*times, count*(-1)/512, (unsigned long *)buf)!=(count*(-1)/512)) {
+ printf("write_blk error\n");
+ return 1;
+ }
+ return 0;
+ }
+ //printf("write_blk form 0x%x to 0x%x\n", 0x3f+block*times, 0x3f+(block+count-1)*times);
+ writebyte+=count*times*512;
+ if (tfs->dev_desc->block_write(tfs->dev_desc->dev, part_offset+block*times, count*times, (unsigned long *)buf)!=count*times) {
+ printf("write_blk error\n");
+ return 1;
+ }
+ return 0;
+
+}
+
+static errcode_t flush(io_channel channel)
+{
+ //printf("flush\n");
+ return 0;
+}
+
+static errcode_t write_byte(io_channel channel, unsigned long offset,
+ int size, const void *buf)
+{
+ //printf("write_byte\n");
+ return 0;
+}
+
+static errcode_t set_option(io_channel channel, const char *option,
+ const char *arg)
+{
+ //printf("set_option\n");
+ return 0;
+}
+
+static errcode_t get_stats(io_channel channel, io_stats *stats)
+{
+ //printf("get_stats\n");
+ (*stats)->bytes_written =writebyte;
+ //memcpy(*stats, writebyte, sizeof(unsigned long long));
+ //printf("Tina: writebyte 0x%x\n", writebyte);
+ return 0;
+}
+
+static errcode_t read_blk64(io_channel channel, unsigned long long block,
+ int count, void *buf)
+{
+ //printf("read_blk64\n");
+ return 0;
+}
+
+static errcode_t write_blk64(io_channel channel, unsigned long long block,
+ int count, const void *buf)
+{
+ //printf("write_blk64\n");
+ return 0;
+}
+
+static struct struct_io_manager struct_devio_manager = {
+ EXT2_ET_MAGIC_IO_MANAGER,
+ "Unix I/O Manager",
+ open,
+ close,
+ set_blksize,
+ read_blk,
+ write_blk,
+ flush,
+ write_byte,
+ set_option,
+ get_stats,
+ read_blk64,
+ write_blk64,
+};
+#if 0
+static void verbose_buffer(void* buf)
+{
+ int i;
+ int offset=0;
+ for(i=0;i<512;i++) {
+ printf("offset 0x%x: 0x%x 0x%x 0x%x 0x%x\n",offset,
+ *((unsigned int *)(buf+offset)),
+ *((unsigned int *)(buf+4)),
+ *((unsigned int *)(buf+8)),
+ *((unsigned int *)(buf+12)));
+ offset+=16;
+ i+=16;
+ }
+}
+#endif
+#if 0
+static void verbose_superblock(struct ext2_super_block *param)
+{
+ int i;
+
+ printf("s_inodes_count 0x%x\n", param->s_inodes_count);
+ printf("s_blocks_count 0x%x\n", param->s_blocks_count);
+ printf("s_r_blocks_count 0x%x\n", param->s_r_blocks_count);
+ printf("s_free_blocks_count 0x%x\n", param->s_free_blocks_count);
+ printf("s_free_inodes_count 0x%x\n", param->s_free_inodes_count);
+ printf("s_first_data_block 0x%x\n", param->s_first_data_block);
+ printf("s_log_block_size 0x%x\n", param->s_log_block_size);
+ printf("s_log_frag_size 0x%x\n", param->s_log_frag_size);
+ printf("s_blocks_per_group 0x%x\n", param->s_blocks_per_group);
+ printf("s_frags_per_group 0x%x\n", param->s_frags_per_group);
+ printf("s_inodes_per_group 0x%x\n", param->s_inodes_per_group);
+ printf("s_mtime 0x%x\n", param->s_mtime);
+ printf("s_wtime 0x%x\n", param->s_wtime);
+ printf("s_mnt_count 0x%x\n", param->s_mnt_count);
+ printf("s_max_mnt_count 0x%x\n", param->s_max_mnt_count);
+
+ printf("s_magic %d\n", param->s_magic);
+ printf("s_state 0x%x\n", param->s_state);
+ printf("s_errors 0x%x\n", param->s_errors);
+ printf("s_minor_rev_level 0x%x\n", param->s_minor_rev_level);
+ printf("s_lastcheck 0x%x\n", param->s_lastcheck);
+ printf("s_checkinterval 0x%x\n", param->s_checkinterval);
+ printf("s_creator_os 0x%x\n", param->s_creator_os);
+ printf("s_rev_level 0x%x\n", param->s_rev_level);
+ printf("s_def_resuid 0x%x\n", param->s_def_resuid);
+ printf("s_def_resgid 0x%x\n", param->s_def_resgid);
+
+ /*
+ * These fields are for EXT2_DYNAMIC_REV superblocks only.
+ *
+ * Note: the difference between the compatible feature set and
+ * the incompatible feature set is that if there is a bit set
+ * in the incompatible feature set that the kernel doesn't
+ * know about, it should refuse to mount the filesystem.
+ *
+ * e2fsck's requirements are more strict; if it doesn't know
+ * about a feature in either the compatible or incompatible
+ * feature set, it must abort and not try to meddle with
+ * things it doesn't understand...
+ */
+ printf("s_first_ino 0x%x\n", param->s_first_ino);
+ printf("s_inode_size 0x%x\n", param->s_inode_size);
+ printf("s_block_group_nr 0x%x\n", param->s_block_group_nr);
+ printf("s_feature_compat 0x%x\n", param->s_feature_compat);
+ printf("s_feature_incompat 0x%x\n", param->s_feature_incompat);
+ printf("s_feature_ro_compat 0x%x\n", param->s_feature_ro_compat);
+ for(i=0;i<16;i++)
+ printf("s_uuid 0x%x\n", param->s_uuid[i]);
+ printf("\n");
+ printf("s_volume_name %s\n", param->s_volume_name);
+ printf("s_last_mounted %s\n", param->s_last_mounted);
+ printf("s_algorithm_usage_bitmap 0x%x\n", param->s_algorithm_usage_bitmap);
+ printf("s_prealloc_blocks 0x%x\n", param->s_prealloc_blocks);
+ printf("s_prealloc_dir_blocks 0x%x\n", param->s_prealloc_dir_blocks);
+ printf("s_reserved_gdt_blocks 0x%x\n", param->s_reserved_gdt_blocks);
+ /*
+ * Journaling support valid if EXT2_FEATURE_COMPAT_HAS_JOURNAL set.
+ */
+
+ for(i=0;i<16;i++)
+ printf("s_journal_uuid 0x%x\n", param->s_journal_uuid[i]);
+ printf("s_journal_inum 0x%x\n", param->s_journal_inum);
+ printf("s_journal_dev 0x%x\n", param->s_journal_dev);
+ printf("s_last_orphan 0x%x\n", param->s_last_orphan);
+ for(i=0;i<4;i++)
+ printf("s_hash_seed 0x%x\n", param->s_hash_seed[i]);
+ printf("s_def_hash_version 0x%x\n", param->s_def_hash_version);
+ printf("s_jnl_backup_type 0x%x\n", param->s_jnl_backup_type);
+ printf("s_desc_size 0x%x\n", param->s_desc_size);
+ printf("s_default_mount_opts 0x%x\n", param->s_default_mount_opts);
+ printf("s_first_meta_bg 0x%x\n", param->s_first_meta_bg);
+ printf("s_mkfs_time 0x%x\n", param->s_mkfs_time);
+ for(i=0;i<17;i++)
+ printf("s_jnl_blocks 0x%x\n", param->s_jnl_blocks[i]);
+
+
+
+ printf("s_blocks_count_hi 0x%x\n", param->s_blocks_count_hi);
+ printf("s_r_blocks_count_hi 0x%x\n", param->s_r_blocks_count_hi);
+ printf("s_free_blocks_hi 0x%x\n", param->s_free_blocks_hi);
+ printf("s_min_extra_isize 0x%x\n", param->s_min_extra_isize);
+ printf("s_want_extra_isize 0x%x\n", param->s_want_extra_isize);
+ printf("s_flags 0x%x\n", param->s_flags);
+
+
+ printf("s_raid_stride 0x%x\n", param->s_raid_stride);
+ printf("s_mmp_interval 0x%x\n", param->s_mmp_interval);
+ printf("s_mmp_block[0] 0x%x\n", *((unsigned int *)&(param->s_mmp_block)));
+ printf("s_mmp_block[1] 0x%x\n", *(((char *)(&(param->s_mmp_block))+4)));
+ printf("s_raid_stripe_width 0x%x\n", param->s_raid_stripe_width);
+ printf("s_log_groups_per_flex 0x%x\n", param->s_log_groups_per_flex);
+ printf("s_reserved_char_pad 0x%x\n", param->s_reserved_char_pad);
+ printf("s_reserved_pad 0x%x\n", param->s_reserved_pad);
+
+ printf("s_kbytes_written[0] 0x%x\n", *((unsigned int *)&(param->s_kbytes_written)));
+ printf("s_kbytes_written[1] 0x%x\n", *(((char *)(&(param->s_kbytes_written))+4)));
+
+
+
+ printf("s_snapshot_inum 0x%x\n", param->s_snapshot_inum);
+ printf("s_snapshot_id 0x%x\n", param->s_snapshot_id);
+ printf("s_snapshot_r_blocks_count[0] 0x%x\n", *((unsigned int *)&(param->s_snapshot_r_blocks_count)));
+ printf("s_snapshot_r_blocks_count[1] 0x%x\n", *(((char *)(&(param->s_snapshot_r_blocks_count))+4)));
+ printf("s_snapshot_list 0x%x\n", param->s_snapshot_list);
+ printf("s_error_count 0x%x\n", param->s_error_count);
+ printf("s_first_error_time 0x%x\n", param->s_first_error_time);
+ printf("s_first_error_ino 0x%x\n", param->s_first_error_ino);
+ printf("s_first_error_block[0] 0x%x\n", *((unsigned int *)&(param->s_first_error_block)));
+ printf("s_first_error_block[1] 0x%x\n", *(((char *)(&(param->s_first_error_block))+4)));
+ for(i=0;i<32;i++)
+ printf("s_first_error_func 0x%x\n", param->s_first_error_func[i]);
+ printf("\n");
+ printf("s_first_error_line 0x%x\n", param->s_first_error_line);
+ printf("s_last_error_time 0x%x\n", param->s_last_error_time);
+
+
+ printf("s_last_error_ino 0x%x\n", param->s_last_error_ino);
+ printf("s_last_error_line 0x%x\n", param->s_last_error_line);
+
+ printf("s_last_error_block[0] 0x%x\n", *((unsigned int *)&(param->s_last_error_block)));
+ printf("s_last_error_block[1] 0x%x\n", *(((char *)(&(param->s_last_error_block))+4)));
+
+ for(i=0;i<32;i++)
+ printf("s_last_error_func 0x%x\n", param->s_last_error_func[i]);
+ printf("\n");
+ for(i=0;i<64;i++)
+ printf("s_mount_opts 0x%x\n", param->s_mount_opts[i]);
+ printf("\n");
+
+}
+#endif
+
+
+
+/*in this function we assume some parameters.
+All this parameters can be set by usr, or this time we
+make it a little easier to const value.*/
+static void ext4fs_preinitialize(void)
+{
+ int blocksize=4096;
+ int inode_ratio=16384;
+ double reserved_ratio=5.0;
+ struct ext_filesystem *fse = get_fs();
+ //According to the mkfs.ext4, we have three choice:
+ //floppy(<3M), small (<512M), default(>=512M)
+ if (fse->total_sect < 3*1024*2) {
+ //floppy
+ fs->super->s_blocks_count = fse->total_sect/2;//sector size is 512, and block size=1K
+ fs->super->s_inode_size=128;
+ inode_ratio=8192;
+ fs->super->s_blocks_count &=0xfffffffc;
+
+ fs->super->s_log_block_size=0;
+ fs->super->s_log_frag_size=0;
+ blocksize=1024;
+
+ } else if (fse->total_sect <512*1024*2) {
+ //small
+
+ fs->super->s_blocks_count = fse->total_sect/2;//sector size is 512, and block size=1K
+ fs->super->s_inode_size=128;
+ inode_ratio=4096;
+
+ fs->super->s_blocks_count &=0xfffffffc;
+
+ fs->super->s_log_block_size=0;
+ fs->super->s_log_frag_size=0;
+ blocksize=1024;
+ } else {
+ //default
+ fs->super->s_blocks_count = fse->total_sect/8;//sector size is 512, and block size=4K
+ fs->super->s_inode_size=256;
+ inode_ratio=16384;
+
+ fs->super->s_log_block_size=0x2;
+ fs->super->s_log_frag_size=0x2;
+ blocksize=4096;
+ }
+
+ /*
+ * Calculate number of blocks to reserve
+ */
+ fs->super->s_r_blocks_count = (unsigned int) (reserved_ratio *
+ fs->super->s_blocks_count / 100.0);
+
+ fs->super->s_rev_level=1;
+ fs->super->s_feature_compat = 0x3c;
+ fs->super->s_feature_incompat=0x242;
+ fs->super->s_feature_ro_compat=0x79;
+ fs->super->s_log_groups_per_flex = 0x04;
+
+
+ fs->super->s_inodes_count = ((__u64) fs->super->s_blocks_count*blocksize) / inode_ratio;
+
+
+
+}
+
+
+/*
+ * Calculate the number of GDT blocks to reserve for online filesystem growth.
+ * The absolute maximum number of GDT blocks we can reserve is determined by
+ * the number of block pointers that can fit into a single block.
+ */
+static unsigned int calc_reserved_gdt_blocks(ext2_filsys fs)
+{
+ struct ext2_super_block *sb = fs->super;
+ unsigned long bpg = sb->s_blocks_per_group;
+ unsigned int gdpb = EXT2_DESC_PER_BLOCK(sb);
+ unsigned long max_blocks = 0xffffffff;
+ unsigned long rsv_groups;
+ unsigned int rsv_gdb;
+
+ /* We set it at 1024x the current filesystem size, or
+ * the upper block count limit (2^32), whichever is lower.
+ */
+ if (sb->s_blocks_count < max_blocks / 1024)
+ max_blocks = sb->s_blocks_count * 1024;
+ rsv_groups = ext2fs_div_ceil(max_blocks - sb->s_first_data_block, bpg);
+ rsv_gdb = ext2fs_div_ceil(rsv_groups, gdpb) - fs->desc_blocks;
+ if (rsv_gdb > EXT2_ADDR_PER_BLOCK(sb))
+ rsv_gdb = EXT2_ADDR_PER_BLOCK(sb);
+#ifdef RES_GDT_DEBUG
+ printf("max_blocks %lu, rsv_groups = %lu, rsv_gdb = %u\n",
+ max_blocks, rsv_groups, rsv_gdb);
+#endif
+
+ return rsv_gdb;
+}
+
+
+static int test_root(int a, int b)
+{
+ if (a == 0)
+ return 1;
+ while (1) {
+ if (a == 1)
+ return 1;
+ if (a % b)
+ return 0;
+ a = a / b;
+ }
+}
+
+static int ext2fs_bg_has_super(ext2_filsys fs, int group_block)
+{
+ if (!(fs->super->s_feature_ro_compat &
+ EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER))
+ return 1;
+
+ if (test_root(group_block, 3) || (test_root(group_block, 5)) ||
+ test_root(group_block, 7))
+ return 1;
+
+ return 0;
+}
+
+#define EXT2_DFL_CHECKINTERVAL (86400L * 180L)
+
+int ext2fs_initialize(void)
+{
+ int retval;
+ struct ext2_super_block *super;
+ int frags_per_block;
+ unsigned int rem;
+ unsigned int overhead = 0;
+ unsigned int ipg;
+ dgrp_t i;
+ blk_t numblocks;
+ int rsv_gdt;
+ int csum_flag;
+ char c;
+
+ if (!fs){
+ retval = ext2fs_get_mem(sizeof(struct struct_ext2_filsys), &fs);
+ if (retval)
+ return retval;
+ }
+
+
+ memset(fs, 0, sizeof(struct struct_ext2_filsys));
+ fs->image_io->manager=fs->io->manager= &struct_devio_manager;
+ fs->magic = EXT2_ET_MAGIC_EXT2FS_FILSYS;
+ fs->flags = EXT2_FLAG_RW;
+ fs->umask = 022;
+#ifdef WORDS_BIGENDIAN
+ fs->flags |= EXT2_FLAG_SWAP_BYTES;
+#endif
+
+ retval = ext2fs_get_mem(SUPERBLOCK_SIZE, &super);
+ if (retval)
+ goto cleanup;
+ fs->super = super;
+
+ memset(super, 0, SUPERBLOCK_SIZE);
+ fs->io->manager->open(NULL, 0, NULL);
+
+ ext4fs_preinitialize();
+
+
+
+
+#define set_field(field, default) (super->field= super->field? \
+ super->field : (default))
+
+ super->s_magic = EXT2_SUPER_MAGIC;
+ super->s_state = EXT2_VALID_FS;
+
+ set_field(s_log_block_size, 0); /* default blocksize: 1024 bytes */
+ set_field(s_log_frag_size, 0); /* default fragsize: 1024 bytes */
+ set_field(s_first_data_block, super->s_log_block_size ? 0 : 1);
+ set_field(s_max_mnt_count, EXT2_DFL_MAX_MNT_COUNT);
+ set_field(s_errors, EXT2_ERRORS_DEFAULT);
+ set_field(s_feature_compat, 0);
+ set_field(s_feature_incompat, 0);
+ set_field(s_feature_ro_compat, 0);
+ set_field(s_first_meta_bg, 0);
+ set_field(s_raid_stride, 0); /* default stride size: 0 */
+ set_field(s_raid_stripe_width, 0); /* default stripe width: 0 */
+ set_field(s_log_groups_per_flex, 0);
+ set_field(s_flags, 0);
+ if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) {
+ retval = EXT2_ET_UNSUPP_FEATURE;
+ goto cleanup;
+ }
+ if (super->s_feature_ro_compat & ~EXT2_LIB_FEATURE_RO_COMPAT_SUPP) {
+ retval = EXT2_ET_RO_UNSUPP_FEATURE;
+ goto cleanup;
+ }
+ set_field(s_rev_level, EXT2_GOOD_OLD_REV);
+ if (super->s_rev_level >= EXT2_DYNAMIC_REV) {
+ set_field(s_first_ino, EXT2_GOOD_OLD_FIRST_INO);
+ set_field(s_inode_size, EXT2_GOOD_OLD_INODE_SIZE);
+ if (super->s_inode_size >= sizeof(struct ext2_inode_large)) {
+ int extra_isize = sizeof(struct ext2_inode_large) -
+ EXT2_GOOD_OLD_INODE_SIZE;
+ set_field(s_min_extra_isize, extra_isize);
+ set_field(s_want_extra_isize, extra_isize);
+ }
+ } else {
+ super->s_first_ino = EXT2_GOOD_OLD_FIRST_INO;
+ super->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
+ }
+
+ set_field(s_checkinterval, EXT2_DFL_CHECKINTERVAL);
+ super->s_mkfs_time = super->s_lastcheck = 0x5105cd7b;//fs->now ? fs->now : time(NULL);
+
+ super->s_creator_os = CREATOR_OS;
+
+ fs->blocksize = EXT2_BLOCK_SIZE(super);
+ fs->fragsize = EXT2_FRAG_SIZE(super);
+ frags_per_block = fs->blocksize / fs->fragsize;
+
+ /* default: (fs->blocksize*8) blocks/group, up to 2^16 (GDT limit) */
+ set_field(s_blocks_per_group, fs->blocksize * 8);
+ if (super->s_blocks_per_group > EXT2_MAX_BLOCKS_PER_GROUP(super))
+ super->s_blocks_per_group = EXT2_MAX_BLOCKS_PER_GROUP(super);
+ super->s_frags_per_group = super->s_blocks_per_group * frags_per_block;
+
+
+
+retry:
+ fs->group_desc_count = ext2fs_div_ceil(super->s_blocks_count -
+ super->s_first_data_block,
+ EXT2_BLOCKS_PER_GROUP(super));
+ if (fs->group_desc_count == 0) {
+ retval = EXT2_ET_TOOSMALL;
+ goto cleanup;
+ }
+ fs->desc_blocks = ext2fs_div_ceil(fs->group_desc_count,
+ EXT2_DESC_PER_BLOCK(super));
+
+ i = fs->blocksize >= 4096 ? 1 : 4096 / fs->blocksize;
+ set_field(s_inodes_count, super->s_blocks_count / i);
+
+ /*
+ * Make sure we have at least EXT2_FIRST_INO + 1 inodes, so
+ * that we have enough inodes for the filesystem(!)
+ */
+ if (super->s_inodes_count < EXT2_FIRST_INODE(super)+1)
+ super->s_inodes_count = EXT2_FIRST_INODE(super)+1;
+
+ /*
+ * There should be at least as many inodes as the user
+ * requested. Figure out how many inodes per group that
+ * should be. But make sure that we don't allocate more than
+ * one bitmap's worth of inodes each group.
+ */
+ ipg = ext2fs_div_ceil(super->s_inodes_count, fs->group_desc_count);
+ if (ipg > fs->blocksize * 8) {
+ if (super->s_blocks_per_group >= 256) {
+ /* Try again with slightly different parameters */
+ super->s_blocks_per_group -= 8;
+ super->s_frags_per_group = super->s_blocks_per_group *
+ frags_per_block;
+ goto retry;
+ } else {
+ retval = EXT2_ET_TOO_MANY_INODES;
+ goto cleanup;
+ }
+ }
+
+ if (ipg > (unsigned) EXT2_MAX_INODES_PER_GROUP(super))
+ ipg = EXT2_MAX_INODES_PER_GROUP(super);
+
+ipg_retry:
+ super->s_inodes_per_group = ipg;//Tina:we have to make sure how many inodes per group
+
+ /*
+ * Make sure the number of inodes per group completely fills
+ * the inode table blocks in the descriptor. If not, add some
+ * additional inodes/group. Waste not, want not...
+ */
+ fs->inode_blocks_per_group = (((super->s_inodes_per_group *
+ EXT2_INODE_SIZE(super)) +
+ EXT2_BLOCK_SIZE(super) - 1) /
+ EXT2_BLOCK_SIZE(super));
+ super->s_inodes_per_group = ((fs->inode_blocks_per_group *
+ EXT2_BLOCK_SIZE(super)) /
+ EXT2_INODE_SIZE(super));
+ /*
+ * Finally, make sure the number of inodes per group is a
+ * multiple of 8. This is needed to simplify the bitmap
+ * splicing code.
+ */
+ super->s_inodes_per_group &= ~7;
+ fs->inode_blocks_per_group = (((super->s_inodes_per_group *
+ EXT2_INODE_SIZE(super)) +
+ EXT2_BLOCK_SIZE(super) - 1) /
+ EXT2_BLOCK_SIZE(super));
+
+ /*
+ * adjust inode count to reflect the adjusted inodes_per_group
+ */
+ if ((__u64)super->s_inodes_per_group * fs->group_desc_count > ~0U) {
+ ipg--;
+ goto ipg_retry;
+ }
+ super->s_inodes_count = super->s_inodes_per_group *
+ fs->group_desc_count;
+ super->s_free_inodes_count = super->s_inodes_count;
+
+ /*
+ * check the number of reserved group descriptor table blocks
+ */
+ if (super->s_feature_compat & EXT2_FEATURE_COMPAT_RESIZE_INODE)
+ rsv_gdt = calc_reserved_gdt_blocks(fs);
+ else
+ rsv_gdt = 0;
+ set_field(s_reserved_gdt_blocks, rsv_gdt);
+ if (super->s_reserved_gdt_blocks > EXT2_ADDR_PER_BLOCK(super)) {
+ retval = EXT2_ET_RES_GDT_BLOCKS;
+ goto cleanup;
+ }
+
+ /*
+ * Calculate the maximum number of bookkeeping blocks per
+ * group. It includes the superblock, the block group
+ * descriptors, the block bitmap, the inode bitmap, the inode
+ * table, and the reserved gdt blocks.
+ */
+ overhead = (int) (3 + fs->inode_blocks_per_group +
+ fs->desc_blocks + super->s_reserved_gdt_blocks);
+
+ //printf("overhead 0x%x\n",overhead);
+ //printf("0x%x, 0x%x, 0x%x\n", fs->inode_blocks_per_group,
+ // fs->desc_blocks, super->s_reserved_gdt_blocks);
+ //printf("0x%x\n", super->s_blocks_per_group);
+ /* This can only happen if the user requested too many inodes */
+ if (overhead > super->s_blocks_per_group) {
+ retval = EXT2_ET_TOO_MANY_INODES;
+ goto cleanup;
+ }
+
+ /*
+ * See if the last group is big enough to support the
+ * necessary data structures. If not, we need to get rid of
+ * it. We need to recalculate the overhead for the last block
+ * group, since it might or might not have a superblock
+ * backup.
+ */
+ overhead = (int) (2 + fs->inode_blocks_per_group);
+ if (ext2fs_bg_has_super(fs, fs->group_desc_count - 1))
+ overhead += 1 + fs->desc_blocks + super->s_reserved_gdt_blocks;
+ rem = ((super->s_blocks_count - super->s_first_data_block) %
+ super->s_blocks_per_group);
+ if ((fs->group_desc_count == 1) && rem && (rem < overhead)) {
+ retval = EXT2_ET_TOOSMALL;
+ goto cleanup;
+ }
+ if (rem && (rem < overhead+50)) {
+ super->s_blocks_count -= rem;
+ goto retry;
+ }
+
+ /*
+ * At this point we know how big the filesystem will be. So
+ * we can do any and all allocations that depend on the block
+ * count.
+ */
+
+
+ retval = ext2fs_allocate_block_bitmap(fs, NULL, &fs->block_map);
+ if (retval) {
+ printf("ext2fs_allocate_block_bitmap cannot allocate\n");
+ goto cleanup;
+ }
+ retval = ext2fs_allocate_inode_bitmap(fs, NULL, &fs->inode_map);
+ if (retval) {
+ printf("ext2fs_allocate_inode_bitmap cannot allocate\n");
+ goto cleanup;
+
+ }
+
+
+ retval = ext2fs_get_array(fs->desc_blocks, fs->blocksize,
+ &fs->group_desc);
+ if (retval){
+ printf("ext2fs_get_array cannot get array\n");
+ goto cleanup;
+ }
+ memset(fs->group_desc, 0, (size_t) fs->desc_blocks * fs->blocksize);
+
+
+ /*
+ * Reserve the superblock and group descriptors for each
+ * group, and fill in the correct group statistics for group.
+ * Note that although the block bitmap, inode bitmap, and
+ * inode table have not been allocated (and in fact won't be
+ * by this routine), they are accounted for nevertheless.
+ *
+ * If FLEX_BG meta-data grouping is used, only account for the
+ * superblock and group descriptors (the inode tables and
+ * bitmaps will be accounted for when allocated).
+ */
+ //Tina: set s_free_blocks_count
+ //Tina: set block group descriptors
+ super->s_free_blocks_count = 0;
+ csum_flag = EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM);
+ for (i = 0; i < fs->group_desc_count; i++) {//Tina: for each group
+ /*
+ * Don't set the BLOCK_UNINIT group for the last group
+ * because the block bitmap needs to be padded.
+ */
+ if (csum_flag) {
+ if (i != fs->group_desc_count - 1)
+ fs->group_desc[i].bg_flags |=
+ EXT2_BG_BLOCK_UNINIT;
+ fs->group_desc[i].bg_flags |= EXT2_BG_INODE_UNINIT;
+ numblocks = super->s_inodes_per_group;
+ if (i == 0)
+ numblocks -= super->s_first_ino;
+ fs->group_desc[i].bg_itable_unused = numblocks;//how many inode are free in the group
+ }
+ numblocks = ext2fs_reserve_super_and_bgd(fs, i, fs->block_map);
+ if (fs->super->s_log_groups_per_flex)
+ numblocks += 2 + fs->inode_blocks_per_group;//data blocks+two bitmap blocks+inode table blocks
+
+ super->s_free_blocks_count += numblocks;
+ fs->group_desc[i].bg_free_blocks_count = numblocks;
+ fs->group_desc[i].bg_free_inodes_count =
+ fs->super->s_inodes_per_group;
+ fs->group_desc[i].bg_used_dirs_count = 0;
+ ext2fs_group_desc_csum_set(fs, i);
+ }
+
+ c = (char) 255;
+ if (((int) c) == -1) {
+ super->s_flags |= EXT2_FLAGS_SIGNED_HASH;
+ } else {
+ super->s_flags |= EXT2_FLAGS_UNSIGNED_HASH;
+ }
+
+
+ ext2fs_mark_super_dirty(fs);
+ ext2fs_mark_bb_dirty(fs);
+ ext2fs_mark_ib_dirty(fs);
+ //printf("Tina: hehe\n");
+ //printf("Tina: set_blksize 0x%x\n", fs->io->manager->set_blksize);
+ io_channel_set_blksize(fs->io, fs->blocksize);
+
+ return 0;
+
+cleanup:
+ ext2fs_free(fs);
+
+ return retval;
+}
+
+static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed)
+{
+ errcode_t retval;
+ blk_t blk;
+ dgrp_t i;
+ int num, ipb;
+
+
+ for (i = 0; i < fs->group_desc_count; i++) {
+
+ blk = fs->group_desc[i].bg_inode_table;
+ num = fs->inode_blocks_per_group;
+
+ if (lazy_flag) {
+ ipb = fs->blocksize / EXT2_INODE_SIZE(fs->super);
+ num = ((((fs->super->s_inodes_per_group -
+ fs->group_desc[i].bg_itable_unused) *
+ EXT2_INODE_SIZE(fs->super)) +
+ EXT2_BLOCK_SIZE(fs->super) - 1) /
+ EXT2_BLOCK_SIZE(fs->super));
+ }
+ if (!lazy_flag || itable_zeroed) {
+ /* The kernel doesn't need to zero the itable blocks */
+ fs->group_desc[i].bg_flags |= EXT2_BG_INODE_ZEROED;
+ ext2fs_group_desc_csum_set(fs, i);
+ }
+ retval = ext2fs_zero_blocks(fs, blk, num, &blk, &num);
+ if (retval) {
+ printf("Could not write %d blocks in inode table starting at %u \n",num, blk);
+ return;
+ }
+ }
+ ext2fs_zero_blocks(0, 0, 0, 0, 0);
+}
+
+static void create_root_dir(ext2_filsys fs)
+{
+ errcode_t retval;
+
+ retval = ext2fs_mkdir(fs, EXT2_ROOT_INO, EXT2_ROOT_INO, 0);
+ if (retval) {
+ printf("error: while creating root dir\n");
+ return;
+ }
+}
+
+static void create_lost_and_found(ext2_filsys fs)
+{
+ unsigned int lpf_size = 0;
+ errcode_t retval;
+ ext2_ino_t ino;
+ const char *name = "lost+found";
+ int i;
+
+ fs->umask = 077;
+ retval = ext2fs_mkdir(fs, EXT2_ROOT_INO, 0, name);
+ if (retval) {
+ printf("create_lost_and_found: ext2fs_mkdir error\n");
+ return ;
+ }
+
+ retval = ext2fs_lookup(fs, EXT2_ROOT_INO, name, strlen(name), 0, &ino);
+ if (retval) {
+ printf("create_lost_and_found: ext2fs_lookup\n");
+ return ;
+ }
+
+ for (i=1; i < EXT2_NDIR_BLOCKS; i++) {
+ /* Ensure that lost+found is at least 2 blocks, so we always
+ * test large empty blocks for big-block filesystems. */
+ if ((lpf_size += fs->blocksize) >= 16*1024 &&
+ lpf_size >= 2 * fs->blocksize)
+ break;
+ retval = ext2fs_expand_dir(fs, ino);
+ if (retval) {
+ printf("create_lost_and_found: ext2fs_expand_dir\n");
+ return;
+ }
+ }
+}
+
+static void reserve_inodes(ext2_filsys fs)
+{
+ ext2_ino_t i;
+
+ for (i = EXT2_ROOT_INO + 1; i < EXT2_FIRST_INODE(fs->super); i++)
+ ext2fs_inode_alloc_stats2(fs, i, +1, 0);
+ ext2fs_mark_ib_dirty(fs);
+}
+
+static void create_bad_block_inode(ext2_filsys fs, badblocks_list bb_list)
+{
+ errcode_t retval;
+
+ ext2fs_mark_inode_bitmap(fs->inode_map, EXT2_BAD_INO);
+ ext2fs_inode_alloc_stats2(fs, EXT2_BAD_INO, +1, 0);
+ retval = ext2fs_update_bb_inode(fs, bb_list);
+ if (retval) {
+ printf("create_bad_block_inode: ext2fs_update_bb_inode error\n");
+ }
+
+}
+
+/*
+ * Determine the number of journal blocks to use, either via
+ * user-specified # of megabytes, or via some intelligently selected
+ * defaults.
+ *
+ * Find a reasonable journal file size (in blocks) given the number of blocks
+ * in the filesystem. For very small filesystems, it is not reasonable to
+ * have a journal that fills more than half of the filesystem.
+ */
+static unsigned int figure_journal_size(int size, ext2_filsys fs)
+{
+ int j_blocks;
+
+ j_blocks = ext2fs_default_journal_size(fs->super->s_blocks_count);
+ if (j_blocks < 0) {
+ printf("\nFilesystem too small for a journal\n");
+ return 0;
+ }
+
+ if (size > 0) {
+ j_blocks = size * 1024 / (fs->blocksize / 1024);
+ if (j_blocks < 1024 || j_blocks > 10240000) {
+ printf("\nThe requested journal "
+ "size is %d blocks; it must be\n"
+ "between 1024 and 10240000 blocks. "
+ "Aborting.\n");
+ return 1;
+ }
+ if ((unsigned) j_blocks > fs->super->s_free_blocks_count / 2) {
+ printf("\nJournal size too big for filesystem.\n");
+ return 1;
+ }
+ }
+ return j_blocks;
+}
+
+#if 0
+static void zap_sector(ext2_filsys fs, int sect, int nsect)
+{
+ char *buf;
+ int retval;
+
+ buf = malloc(512*nsect);
+ if (!buf) {
+ printf("Out of memory erasing sectors %d-%d\n",
+ sect, sect + nsect - 1);
+ return;
+ }
+
+ memset(buf, 0, 512*nsect);
+ io_channel_set_blksize(fs->io, 512);
+ retval = io_channel_write_blk(fs->io, sect, nsect, buf);
+ io_channel_set_blksize(fs->io, fs->blocksize);
+ free(buf);
+ if (retval)
+ printf("could not erase sector %d\n",sect);
+}
+#endif
+
+int ext4_format(void)
+{
+ errcode_t retval;
+ int val;
+ unsigned int i;
+ badblocks_list bb_list = 0;
+ unsigned int journal_blocks;
+ int journal_flags=0;
+ int journal_size=0;;
+ if (ext2fs_initialize()){
+ printf("error cannot format\n");
+ return -1;
+ }
+
+ if ((fs->super->s_feature_incompat &
+ (EXT3_FEATURE_INCOMPAT_EXTENTS|EXT4_FEATURE_INCOMPAT_FLEX_BG)) ||
+ (fs->super->s_feature_ro_compat &
+ (EXT4_FEATURE_RO_COMPAT_HUGE_FILE|EXT4_FEATURE_RO_COMPAT_GDT_CSUM|
+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK|
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)))
+ fs->super->s_kbytes_written = 1;
+
+ //Parse or generate a UUID
+ //Since we don't know how to generate a UUID in uboot, we give it the fixed number temporarily
+ *((unsigned int *)(&fs->super->s_uuid[0]))=0x2b8761f7;
+ *((unsigned int *)(&fs->super->s_uuid[4]))=0xf344e580;
+ *((unsigned int *)(&fs->super->s_uuid[8]))=0x16b499a6;
+ *((unsigned int *)(&fs->super->s_uuid[12]))=0xd29fb741;
+
+ //initialize the directory index variables
+ fs->super->s_def_hash_version=EXT2_HASH_HALF_MD4;
+ fs->super->s_hash_seed[0]=0xae1ba9fb;
+ fs->super->s_hash_seed[1]=0x2145bde9;
+ fs->super->s_hash_seed[2]=0x1182c581;
+ fs->super->s_hash_seed[3]=0xfc00aa81;
+
+ /*
+ * Add "jitter" to the superblock's check interval so that we
+ * don't check all the filesystems at the same time. We use a
+ * kludgy hack of using the UUID to derive a random jitter value.
+ */
+ for (i = 0, val = 0 ; i < sizeof(fs->super->s_uuid); i++)
+ val += fs->super->s_uuid[i];
+ fs->super->s_max_mnt_count += val % EXT2_DFL_MAX_MNT_COUNT;
+
+
+ /*
+ * For the Hurd, we will turn off filetype since it doesn't
+ * support it.
+ */
+ if (fs->super->s_creator_os == EXT2_OS_HURD)
+ fs->super->s_feature_incompat &=
+ ~EXT2_FEATURE_INCOMPAT_FILETYPE;
+ //printf("Tina: allocate tables\n");
+ retval = ext2fs_allocate_tables(fs);
+ if (retval) {
+ printf("Error: while trying to allocate filesystem tables\n");
+ return 1;
+ }
+
+
+ {
+ /* rsv must be a power of two (64kB is MD RAID sb alignment) */
+ unsigned int rsv = 65536 / fs->blocksize;
+ unsigned long blocks = fs->super->s_blocks_count;
+ unsigned long start;
+ blk_t ret_blk;
+ //zap_sector(fs, 0, 2);//clear the first two sectors
+
+ /*
+ * Wipe out any old MD RAID (or other) metadata at the end
+ * of the device. This will also verify that the device is
+ * as large as we think. Be careful with very small devices.
+ */
+ start = (blocks & ~(rsv - 1));
+ if (start > rsv)
+ start -= rsv;
+ if (start > 0)
+ retval = ext2fs_zero_blocks(fs, start, blocks - start,
+ &ret_blk, NULL);
+
+ if (retval) {
+ printf("error while zeroing block %u at end of filesystem\n", ret_blk);
+ }
+ printf("write inode tables\n");
+ write_inode_tables(fs, 1, 0);
+ create_root_dir(fs);
+ create_lost_and_found(fs);
+ printf("reserve inodes\n");
+ reserve_inodes(fs);
+ printf("creating bad block inode\n");
+ create_bad_block_inode(fs, bb_list);
+
+ if (fs->super->s_feature_compat &
+ EXT2_FEATURE_COMPAT_RESIZE_INODE) {
+ printf("creating resize inode\n");
+ retval = ext2fs_create_resize_inode(fs);
+ if (retval) {
+ printf("ext2fs_create_resize_inode error\n");
+ return 1;
+ }
+ }
+ }
+
+ //about the journal
+ //printf("figure_journal_size\n");
+ journal_blocks = figure_journal_size(journal_size, fs);
+ if (!journal_blocks) {
+ fs->super->s_feature_compat &=
+ ~EXT3_FEATURE_COMPAT_HAS_JOURNAL;
+ goto no_journal;
+ }
+
+ printf("Do the journal and the journal size is 0x%x blocks: \n", journal_blocks);
+
+ retval = ext2fs_add_journal_inode(fs, journal_blocks,
+ journal_flags);
+ if (retval) {
+ printf("ext2fs_add_journal_inode error\n");
+ return 1;
+ }
+
+ //verbose_superblock(fs->super);
+
+no_journal:
+ printf("\nWriting superblocks and filesystem accounting information: \n");
+
+ retval = ext2fs_flush(fs);
+ if (retval) {
+ printf("\nWarning, had trouble writing out superblocks.\n");
+ }
+
+ val = ext2fs_close(fs);
+
+ return 0;
+}
+
diff --git a/fs/ext4/format/extent.c b/fs/ext4/format/extent.c new file mode 100755 index 0000000..5dfe1f0 --- /dev/null +++ b/fs/ext4/format/extent.c @@ -0,0 +1,2000 @@ +/* + * extent.c --- routines to implement extents support + * + * Copyright (C) 2007 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + + +#include "ext2_fs.h" +#include "ext2fsP.h" +#include "e2image.h" + +/* + * Definitions to be dropped in lib/ext2fs/ext2fs.h + */ + +/* + * Private definitions + */ + +struct extent_path { + char *buf; + int entries; + int max_entries; + int left; + int visit_num; + int flags; + blk64_t end_blk; + void *curr; +}; + + +struct ext2_extent_handle { + errcode_t magic; + ext2_filsys fs; + ext2_ino_t ino; + struct ext2_inode *inode; + int type; + int level; + int max_depth; + struct extent_path *path; +}; + +struct ext2_extent_path { + errcode_t magic; + int leaf_height; + blk64_t lblk; +}; + +/* + * Useful Debugging stuff + */ + +#ifdef DEBUG +static void dbg_show_header(struct ext3_extent_header *eh) +{ + printf("header: magic=%x entries=%u max=%u depth=%u generation=%u\n", + ext2fs_le16_to_cpu(eh->eh_magic), + ext2fs_le16_to_cpu(eh->eh_entries), + ext2fs_le16_to_cpu(eh->eh_max), + ext2fs_le16_to_cpu(eh->eh_depth), + ext2fs_le32_to_cpu(eh->eh_generation)); +} + +static void dbg_show_index(struct ext3_extent_idx *ix) +{ + printf("index: block=%u leaf=%u leaf_hi=%u unused=%u\n", + ext2fs_le32_to_cpu(ix->ei_block), + ext2fs_le32_to_cpu(ix->ei_leaf), + ext2fs_le16_to_cpu(ix->ei_leaf_hi), + ext2fs_le16_to_cpu(ix->ei_unused)); +} + +static void dbg_show_extent(struct ext3_extent *ex) +{ + printf("extent: block=%u-%u len=%u start=%u start_hi=%u\n", + ext2fs_le32_to_cpu(ex->ee_block), + ext2fs_le32_to_cpu(ex->ee_block) + + ext2fs_le16_to_cpu(ex->ee_len) - 1, + ext2fs_le16_to_cpu(ex->ee_len), + ext2fs_le32_to_cpu(ex->ee_start), + ext2fs_le16_to_cpu(ex->ee_start_hi)); +} + +static void dbg_print_extent(char *desc, struct ext2fs_extent *extent) +{ + if (desc) + printf("%s: ", desc); + printf("extent: lblk %llu--%llu, len %u, pblk %llu, flags: ", + extent->e_lblk, extent->e_lblk + extent->e_len - 1, + extent->e_len, extent->e_pblk); + if (extent->e_flags & EXT2_EXTENT_FLAGS_LEAF) + fputs("LEAF ", stdout); + if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT) + fputs("UNINIT ", stdout); + if (extent->e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) + fputs("2ND_VISIT ", stdout); + if (!extent->e_flags) + fputs("(none)", stdout); + fputc('\n', stdout); + +} + +#else +#define dbg_show_header(eh) do { } while (0) +#define dbg_show_index(ix) do { } while (0) +#define dbg_show_extent(ex) do { } while (0) +#define dbg_print_extent(desc, ex) do { } while (0) +#endif + +/* + * Verify the extent header as being sane + */ +errcode_t ext2fs_extent_header_verify(void *ptr, int size) +{ + int eh_max, entry_size; + struct ext3_extent_header *eh = ptr; + + dbg_show_header(eh); + if (ext2fs_le16_to_cpu(eh->eh_magic) != EXT3_EXT_MAGIC) + return EXT2_ET_EXTENT_HEADER_BAD; + if (ext2fs_le16_to_cpu(eh->eh_entries) > ext2fs_le16_to_cpu(eh->eh_max)) + return EXT2_ET_EXTENT_HEADER_BAD; + if (eh->eh_depth == 0) + entry_size = sizeof(struct ext3_extent); + else + entry_size = sizeof(struct ext3_extent_idx); + + eh_max = (size - sizeof(*eh)) / entry_size; + /* Allow two extent-sized items at the end of the block, for + * ext4_extent_tail with checksum in the future. */ + if ((ext2fs_le16_to_cpu(eh->eh_max) > eh_max) || + (ext2fs_le16_to_cpu(eh->eh_max) < (eh_max - 2))) + return EXT2_ET_EXTENT_HEADER_BAD; + + return 0; +} + + +/* + * Begin functions to handle an inode's extent information + */ +extern void ext2fs_extent_free(ext2_extent_handle_t handle) +{ + int i; + + if (!handle) + return; + + if (handle->inode) + ext2fs_free_mem(&handle->inode); + if (handle->path) { + for (i=1; i <= handle->max_depth; i++) { + if (handle->path[i].buf) + ext2fs_free_mem(&handle->path[i].buf); + } + ext2fs_free_mem(&handle->path); + } + ext2fs_free_mem(&handle); +} + +extern errcode_t ext2fs_extent_open(ext2_filsys fs, ext2_ino_t ino, + ext2_extent_handle_t *ret_handle) +{ + return ext2fs_extent_open2(fs, ino, NULL, ret_handle); +} + +extern errcode_t ext2fs_extent_open2(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode, + ext2_extent_handle_t *ret_handle) +{ + struct ext2_extent_handle *handle; + errcode_t retval; + int i; + struct ext3_extent_header *eh; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (!inode) + if ((ino == 0) || (ino > fs->super->s_inodes_count)) + return EXT2_ET_BAD_INODE_NUM; + + retval = ext2fs_get_mem(sizeof(struct ext2_extent_handle), &handle); + if (retval) + return retval; + memset(handle, 0, sizeof(struct ext2_extent_handle)); + + retval = ext2fs_get_mem(sizeof(struct ext2_inode), &handle->inode); + if (retval) + goto errout; + + handle->ino = ino; + handle->fs = fs; + + if (inode) { + memcpy(handle->inode, inode, sizeof(struct ext2_inode)); + } + else { + retval = ext2fs_read_inode(fs, ino, handle->inode); + if (retval) + goto errout; + } + + eh = (struct ext3_extent_header *) &handle->inode->i_block[0]; + + for (i=0; i < EXT2_N_BLOCKS; i++) + if (handle->inode->i_block[i]) + break; + if (i >= EXT2_N_BLOCKS) { + eh->eh_magic = ext2fs_cpu_to_le16(EXT3_EXT_MAGIC); + eh->eh_depth = 0; + eh->eh_entries = 0; + i = (sizeof(handle->inode->i_block) - sizeof(*eh)) / + sizeof(struct ext3_extent); + eh->eh_max = ext2fs_cpu_to_le16(i); + handle->inode->i_flags |= EXT4_EXTENTS_FL; + } + + if (!(handle->inode->i_flags & EXT4_EXTENTS_FL)) { + retval = EXT2_ET_INODE_NOT_EXTENT; + goto errout; + } + + retval = ext2fs_extent_header_verify(eh, sizeof(handle->inode->i_block)); + if (retval) + goto errout; + + handle->max_depth = ext2fs_le16_to_cpu(eh->eh_depth); + handle->type = ext2fs_le16_to_cpu(eh->eh_magic); + + retval = ext2fs_get_mem(((handle->max_depth+1) * + sizeof(struct extent_path)), + &handle->path); + memset(handle->path, 0, + (handle->max_depth+1) * sizeof(struct extent_path)); + handle->path[0].buf = (char *) handle->inode->i_block; + + handle->path[0].left = handle->path[0].entries = + ext2fs_le16_to_cpu(eh->eh_entries); + handle->path[0].max_entries = ext2fs_le16_to_cpu(eh->eh_max); + handle->path[0].curr = 0; + handle->path[0].end_blk = + ((((__u64) handle->inode->i_size_high << 32) + + handle->inode->i_size + (fs->blocksize - 1)) + >> EXT2_BLOCK_SIZE_BITS(fs->super)); + handle->path[0].visit_num = 1; + handle->level = 0; + handle->magic = EXT2_ET_MAGIC_EXTENT_HANDLE; + + *ret_handle = handle; + return 0; + +errout: + ext2fs_extent_free(handle); + return retval; +} + +/* + * This function is responsible for (optionally) moving through the + * extent tree and then returning the current extent + */ +errcode_t ext2fs_extent_get(ext2_extent_handle_t handle, + int flags, struct ext2fs_extent *extent) +{ + struct extent_path *path, *newpath; + struct ext3_extent_header *eh; + struct ext3_extent_idx *ix = 0; + struct ext3_extent *ex; + errcode_t retval; + blk_t blk; + blk64_t end_blk; + int orig_op, op; + + EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE); + + if (!handle->path) + return EXT2_ET_NO_CURRENT_NODE; + + orig_op = op = flags & EXT2_EXTENT_MOVE_MASK; + +retry: + path = handle->path + handle->level; + if ((orig_op == EXT2_EXTENT_NEXT) || + (orig_op == EXT2_EXTENT_NEXT_LEAF)) { + if (handle->level < handle->max_depth) { + /* interior node */ + if (path->visit_num == 0) { + path->visit_num++; + op = EXT2_EXTENT_DOWN; + } else if (path->left > 0) + op = EXT2_EXTENT_NEXT_SIB; + else if (handle->level > 0) + op = EXT2_EXTENT_UP; + else + return EXT2_ET_EXTENT_NO_NEXT; + } else { + /* leaf node */ + if (path->left > 0) + op = EXT2_EXTENT_NEXT_SIB; + else if (handle->level > 0) + op = EXT2_EXTENT_UP; + else + return EXT2_ET_EXTENT_NO_NEXT; + } + if (op != EXT2_EXTENT_NEXT_SIB) { +#ifdef DEBUG_GET_EXTENT + printf("<<<< OP = %s\n", + (op == EXT2_EXTENT_DOWN) ? "down" : + ((op == EXT2_EXTENT_UP) ? "up" : "unknown")); +#endif + } + } + + if ((orig_op == EXT2_EXTENT_PREV) || + (orig_op == EXT2_EXTENT_PREV_LEAF)) { + if (handle->level < handle->max_depth) { + /* interior node */ + if (path->visit_num > 0 ) { + /* path->visit_num = 0; */ + op = EXT2_EXTENT_DOWN_AND_LAST; + } else if (path->left < path->entries-1) + op = EXT2_EXTENT_PREV_SIB; + else if (handle->level > 0) + op = EXT2_EXTENT_UP; + else + return EXT2_ET_EXTENT_NO_PREV; + } else { + /* leaf node */ + if (path->left < path->entries-1) + op = EXT2_EXTENT_PREV_SIB; + else if (handle->level > 0) + op = EXT2_EXTENT_UP; + else + return EXT2_ET_EXTENT_NO_PREV; + } + if (op != EXT2_EXTENT_PREV_SIB) { +#ifdef DEBUG_GET_EXTENT + printf("<<<< OP = %s\n", + (op == EXT2_EXTENT_DOWN_AND_LAST) ? "down/last" : + ((op == EXT2_EXTENT_UP) ? "up" : "unknown")); +#endif + } + } + + if (orig_op == EXT2_EXTENT_LAST_LEAF) { + if ((handle->level < handle->max_depth) && + (path->left == 0)) + op = EXT2_EXTENT_DOWN; + else + op = EXT2_EXTENT_LAST_SIB; +#ifdef DEBUG_GET_EXTENT + printf("<<<< OP = %s\n", + (op == EXT2_EXTENT_DOWN) ? "down" : "last_sib"); +#endif + } + + switch (op) { + case EXT2_EXTENT_CURRENT: + ix = path->curr; + break; + case EXT2_EXTENT_ROOT: + handle->level = 0; + path = handle->path + handle->level; + case EXT2_EXTENT_FIRST_SIB: + path->left = path->entries; + path->curr = 0; + case EXT2_EXTENT_NEXT_SIB: + if (path->left <= 0) + return EXT2_ET_EXTENT_NO_NEXT; + if (path->curr) { + ix = path->curr; + ix++; + } else { + eh = (struct ext3_extent_header *) path->buf; + ix = EXT_FIRST_INDEX(eh); + } + path->left--; + path->curr = ix; + path->visit_num = 0; + break; + case EXT2_EXTENT_PREV_SIB: + if (!path->curr || + path->left+1 >= path->entries) + return EXT2_ET_EXTENT_NO_PREV; + ix = path->curr; + ix--; + path->curr = ix; + path->left++; + if (handle->level < handle->max_depth) + path->visit_num = 1; + break; + case EXT2_EXTENT_LAST_SIB: + eh = (struct ext3_extent_header *) path->buf; + path->curr = EXT_LAST_EXTENT(eh); + ix = path->curr; + path->left = 0; + path->visit_num = 0; + break; + case EXT2_EXTENT_UP: + if (handle->level <= 0) + return EXT2_ET_EXTENT_NO_UP; + handle->level--; + path--; + ix = path->curr; + if ((orig_op == EXT2_EXTENT_PREV) || + (orig_op == EXT2_EXTENT_PREV_LEAF)) + path->visit_num = 0; + break; + case EXT2_EXTENT_DOWN: + case EXT2_EXTENT_DOWN_AND_LAST: + if (!path->curr ||(handle->level >= handle->max_depth)) + return EXT2_ET_EXTENT_NO_DOWN; + + ix = path->curr; + newpath = path + 1; + if (!newpath->buf) { + retval = ext2fs_get_mem(handle->fs->blocksize, + &newpath->buf); + if (retval) + return retval; + } + blk = ext2fs_le32_to_cpu(ix->ei_leaf) + + ((__u64) ext2fs_le16_to_cpu(ix->ei_leaf_hi) << 32); + if ((handle->fs->flags & EXT2_FLAG_IMAGE_FILE) && + (handle->fs->io != handle->fs->image_io)) + memset(newpath->buf, 0, handle->fs->blocksize); + else { + retval = io_channel_read_blk(handle->fs->io, + blk, 1, newpath->buf); + if (retval) + return retval; + } + handle->level++; + + eh = (struct ext3_extent_header *) newpath->buf; + + retval = ext2fs_extent_header_verify(eh, handle->fs->blocksize); + if (retval) { + handle->level--; + return retval; + } + + newpath->left = newpath->entries = + ext2fs_le16_to_cpu(eh->eh_entries); + newpath->max_entries = ext2fs_le16_to_cpu(eh->eh_max); + + if (path->left > 0) { + ix++; + newpath->end_blk = ext2fs_le32_to_cpu(ix->ei_block); + } else + newpath->end_blk = path->end_blk; + + path = newpath; + if (op == EXT2_EXTENT_DOWN) { + ix = EXT_FIRST_INDEX((struct ext3_extent_header *) eh); + path->curr = ix; + path->left = path->entries - 1; + path->visit_num = 0; + } else { + ix = EXT_LAST_INDEX((struct ext3_extent_header *) eh); + path->curr = ix; + path->left = 0; + if (handle->level < handle->max_depth) + path->visit_num = 1; + } +#ifdef DEBUG_GET_EXTENT + printf("Down to level %d/%d, end_blk=%llu\n", + handle->level, handle->max_depth, + path->end_blk); +#endif + break; + default: + return EXT2_ET_OP_NOT_SUPPORTED; + } + + if (!ix) + return EXT2_ET_NO_CURRENT_NODE; + + extent->e_flags = 0; +#ifdef DEBUG_GET_EXTENT + printf("(Left %d)\n", path->left); +#endif + + if (handle->level == handle->max_depth) { + ex = (struct ext3_extent *) ix; + + extent->e_pblk = ext2fs_le32_to_cpu(ex->ee_start) + + ((__u64) ext2fs_le16_to_cpu(ex->ee_start_hi) << 32); + extent->e_lblk = ext2fs_le32_to_cpu(ex->ee_block); + extent->e_len = ext2fs_le16_to_cpu(ex->ee_len); + extent->e_flags |= EXT2_EXTENT_FLAGS_LEAF; + if (extent->e_len > EXT_INIT_MAX_LEN) { + extent->e_len -= EXT_INIT_MAX_LEN; + extent->e_flags |= EXT2_EXTENT_FLAGS_UNINIT; + } + } else { + extent->e_pblk = ext2fs_le32_to_cpu(ix->ei_leaf) + + ((__u64) ext2fs_le16_to_cpu(ix->ei_leaf_hi) << 32); + extent->e_lblk = ext2fs_le32_to_cpu(ix->ei_block); + if (path->left > 0) { + ix++; + end_blk = ext2fs_le32_to_cpu(ix->ei_block); + } else + end_blk = path->end_blk; + + extent->e_len = end_blk - extent->e_lblk; + } + if (path->visit_num) + extent->e_flags |= EXT2_EXTENT_FLAGS_SECOND_VISIT; + + if (((orig_op == EXT2_EXTENT_NEXT_LEAF) || + (orig_op == EXT2_EXTENT_PREV_LEAF)) && + (handle->level != handle->max_depth)) + goto retry; + + if ((orig_op == EXT2_EXTENT_LAST_LEAF) && + ((handle->level != handle->max_depth) || + (path->left != 0))) + goto retry; + + return 0; +} + +static errcode_t update_path(ext2_extent_handle_t handle) +{ + blk64_t blk; + errcode_t retval; + struct ext3_extent_idx *ix; + + if (handle->level == 0) { + retval = ext2fs_write_inode(handle->fs, handle->ino, + handle->inode); + } else { + ix = handle->path[handle->level - 1].curr; + blk = ext2fs_le32_to_cpu(ix->ei_leaf) + + ((__u64) ext2fs_le16_to_cpu(ix->ei_leaf_hi) << 32); + + retval = io_channel_write_blk(handle->fs->io, + blk, 1, handle->path[handle->level].buf); + } + return retval; +} + +#if 0 +errcode_t ext2fs_extent_save_path(ext2_extent_handle_t handle, + ext2_extent_path_t *ret_path) +{ + ext2_extent_path_t save_path; + struct ext2fs_extent extent; + struct ext2_extent_info info; + errcode_t retval; + + retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent); + if (retval) + return retval; + + retval = ext2fs_extent_get_info(handle, &info); + if (retval) + return retval; + + retval = ext2fs_get_mem(sizeof(struct ext2_extent_path), &save_path); + if (retval) + return retval; + memset(save_path, 0, sizeof(struct ext2_extent_path)); + + save_path->magic = EXT2_ET_MAGIC_EXTENT_PATH; + save_path->leaf_height = info.max_depth - info.curr_level - 1; + save_path->lblk = extent.e_lblk; + + *ret_path = save_path; + return 0; +} + +errcode_t ext2fs_extent_free_path(ext2_extent_path_t path) +{ + EXT2_CHECK_MAGIC(path, EXT2_ET_MAGIC_EXTENT_PATH); + + ext2fs_free_mem(&path); + return 0; +} +#endif + +/* + * Go to the node at leaf_level which contains logical block blk. + * + * leaf_level is height from the leaf node level, i.e. + * leaf_level 0 is at leaf node, leaf_level 1 is 1 above etc. + * + * If "blk" has no mapping (hole) then handle is left at last + * extent before blk. + */ +static errcode_t extent_goto(ext2_extent_handle_t handle, + int leaf_level, blk64_t blk) +{ + struct ext2fs_extent extent; + errcode_t retval; + + retval = ext2fs_extent_get(handle, EXT2_EXTENT_ROOT, &extent); + if (retval) { + if (retval == EXT2_ET_EXTENT_NO_NEXT) + retval = EXT2_ET_EXTENT_NOT_FOUND; + return retval; + } + + if (leaf_level > handle->max_depth) { +#ifdef DEBUG + printf("leaf level %d greater than tree depth %d\n", + leaf_level, handle->max_depth); +#endif + return EXT2_ET_OP_NOT_SUPPORTED; + } + +#ifdef DEBUG + printf("goto extent ino %u, level %d, %llu\n", handle->ino, + leaf_level, blk); +#endif + +#ifdef DEBUG_GOTO_EXTENTS + dbg_print_extent("root", &extent); +#endif + while (1) { + if (handle->max_depth - handle->level == leaf_level) { + /* block is in this &extent */ + if ((blk >= extent.e_lblk) && + (blk < extent.e_lblk + extent.e_len)) + return 0; + if (blk < extent.e_lblk) { + retval = ext2fs_extent_get(handle, + EXT2_EXTENT_PREV_SIB, + &extent); + return EXT2_ET_EXTENT_NOT_FOUND; + } + retval = ext2fs_extent_get(handle, + EXT2_EXTENT_NEXT_SIB, + &extent); + if (retval == EXT2_ET_EXTENT_NO_NEXT) + return EXT2_ET_EXTENT_NOT_FOUND; + if (retval) + return retval; + continue; + } + + retval = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT_SIB, + &extent); + if (retval == EXT2_ET_EXTENT_NO_NEXT) + goto go_down; + if (retval) + return retval; + +#ifdef DEBUG_GOTO_EXTENTS + dbg_print_extent("next", &extent); +#endif + if (blk == extent.e_lblk) + goto go_down; + if (blk > extent.e_lblk) + continue; + + retval = ext2fs_extent_get(handle, EXT2_EXTENT_PREV_SIB, + &extent); + if (retval) + return retval; + +#ifdef DEBUG_GOTO_EXTENTS + dbg_print_extent("prev", &extent); +#endif + + go_down: + retval = ext2fs_extent_get(handle, EXT2_EXTENT_DOWN, + &extent); + if (retval) + return retval; + +#ifdef DEBUG_GOTO_EXTENTS + dbg_print_extent("down", &extent); +#endif + } +} + +errcode_t ext2fs_extent_goto(ext2_extent_handle_t handle, + blk64_t blk) +{ + return extent_goto(handle, 0, blk); +} + +/* + * Traverse back up to root fixing parents of current node as needed. + * + * If we changed start of first entry in a node, fix parent index start + * and so on. + * + * Safe to call for any position in node; if not at the first entry, + * will simply return. + */ +static errcode_t ext2fs_extent_fix_parents(ext2_extent_handle_t handle) +{ + int retval = 0; + blk64_t start; + struct extent_path *path; + struct ext2fs_extent extent; + + EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE); + + if (!(handle->fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if (!handle->path) + return EXT2_ET_NO_CURRENT_NODE; + + path = handle->path + handle->level; + if (!path->curr) + return EXT2_ET_NO_CURRENT_NODE; + + retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent); + if (retval) + goto done; + + /* modified node's start block */ + start = extent.e_lblk; + + /* traverse up until index not first, or startblk matches, or top */ + while (handle->level > 0 && + (path->left == path->entries - 1)) { + retval = ext2fs_extent_get(handle, EXT2_EXTENT_UP, &extent); + if (retval) + goto done; + if (extent.e_lblk == start) + break; + path = handle->path + handle->level; + extent.e_len += (extent.e_lblk - start); + extent.e_lblk = start; + retval = ext2fs_extent_replace(handle, 0, &extent); + if (retval) + goto done; + update_path(handle); + } + + /* put handle back to where we started */ + retval = ext2fs_extent_goto(handle, start); +done: + return retval; +} + +errcode_t ext2fs_extent_replace(ext2_extent_handle_t handle, + int flags EXT2FS_ATTR((unused)), + struct ext2fs_extent *extent) +{ + struct extent_path *path; + struct ext3_extent_idx *ix; + struct ext3_extent *ex; + + EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE); + + if (!(handle->fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if (!handle->path) + return EXT2_ET_NO_CURRENT_NODE; + + path = handle->path + handle->level; + if (!path->curr) + return EXT2_ET_NO_CURRENT_NODE; + +#ifdef DEBUG + printf("extent replace: %u ", handle->ino); + dbg_print_extent(0, extent); +#endif + + if (handle->level == handle->max_depth) { + ex = path->curr; + + ex->ee_block = ext2fs_cpu_to_le32(extent->e_lblk); + ex->ee_start = ext2fs_cpu_to_le32(extent->e_pblk & 0xFFFFFFFF); + ex->ee_start_hi = ext2fs_cpu_to_le16(extent->e_pblk >> 32); + if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT) { + if (extent->e_len > EXT_UNINIT_MAX_LEN) + return EXT2_ET_EXTENT_INVALID_LENGTH; + ex->ee_len = ext2fs_cpu_to_le16(extent->e_len + + EXT_INIT_MAX_LEN); + } else { + if (extent->e_len > EXT_INIT_MAX_LEN) + return EXT2_ET_EXTENT_INVALID_LENGTH; + ex->ee_len = ext2fs_cpu_to_le16(extent->e_len); + } + } else { + ix = path->curr; + + ix->ei_leaf = ext2fs_cpu_to_le32(extent->e_pblk & 0xFFFFFFFF); + ix->ei_leaf_hi = ext2fs_cpu_to_le16(extent->e_pblk >> 32); + ix->ei_block = ext2fs_cpu_to_le32(extent->e_lblk); + ix->ei_unused = 0; + } + update_path(handle); + return 0; +} + +/* + * allocate a new block, move half the current node to it, and update parent + * + * handle will be left pointing at original record. + */ +static errcode_t extent_node_split(ext2_extent_handle_t handle) +{ + errcode_t retval = 0; + blk_t new_node_pblk; + blk64_t new_node_start; + blk64_t orig_lblk; + blk64_t goal_blk = 0; + int orig_height; + char *block_buf = NULL; + struct ext2fs_extent extent; + struct extent_path *path, *newpath = 0; + struct ext3_extent_header *eh, *neweh; + int tocopy; + int new_root = 0; + struct ext2_extent_info info; + + /* basic sanity */ + EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE); + + if (!(handle->fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if (!handle->path) + return EXT2_ET_NO_CURRENT_NODE; + +#ifdef DEBUG + printf("splitting node at level %d\n", handle->level); +#endif + retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent); + if (retval) + goto done; + + retval = ext2fs_extent_get_info(handle, &info); + if (retval) + goto done; + + /* save the position we were originally splitting... */ + orig_height = info.max_depth - info.curr_level; + orig_lblk = extent.e_lblk; + + /* Is there room in the parent for a new entry? */ + if (handle->level && + (handle->path[handle->level - 1].entries >= + handle->path[handle->level - 1].max_entries)) { + +#ifdef DEBUG + printf("parent level %d full; splitting it too\n", + handle->level - 1); +#endif + /* split the parent */ + retval = ext2fs_extent_get(handle, EXT2_EXTENT_UP, &extent); + if (retval) + goto done; + goal_blk = extent.e_pblk; + + retval = extent_node_split(handle); + if (retval) + goto done; + + /* get handle back to our original split position */ + retval = extent_goto(handle, orig_height, orig_lblk); + if (retval) + goto done; + } + + /* At this point, parent should have room for this split */ + path = handle->path + handle->level; + if (!path->curr) + return EXT2_ET_NO_CURRENT_NODE; + + /* extent header of the current node we'll split */ + eh = (struct ext3_extent_header *)path->buf; + + /* splitting root level means moving them all out */ + if (handle->level == 0) { + new_root = 1; + tocopy = ext2fs_le16_to_cpu(eh->eh_entries); + retval = ext2fs_get_mem(((handle->max_depth+2) * + sizeof(struct extent_path)), + &newpath); + if (retval) + goto done; + memset(newpath, 0, + ((handle->max_depth+2) * sizeof(struct extent_path))); + } else { + tocopy = ext2fs_le16_to_cpu(eh->eh_entries) / 2; + } + +#ifdef DEBUG + printf("will copy out %d of %d entries at level %d\n", + tocopy, ext2fs_le16_to_cpu(eh->eh_entries), + handle->level); +#endif + + if (!tocopy) { +#ifdef DEBUG + printf("Nothing to copy to new block!\n"); +#endif + retval = EXT2_ET_CANT_SPLIT_EXTENT; + goto done; + } + + /* first we need a new block, or can do nothing. */ + block_buf = malloc(handle->fs->blocksize); + if (!block_buf) { + retval = ENOMEM; + goto done; + } + + if (!goal_blk) { + dgrp_t group = ext2fs_group_of_ino(handle->fs, handle->ino); + __u8 log_flex = handle->fs->super->s_log_groups_per_flex; + + if (log_flex) + group = group & ~((1 << (log_flex)) - 1); + goal_blk = (group * handle->fs->super->s_blocks_per_group) + + handle->fs->super->s_first_data_block; + } + retval = ext2fs_alloc_block(handle->fs, (blk_t) goal_blk, block_buf, + &new_node_pblk); + if (retval) + goto done; + +#ifdef DEBUG + printf("will copy to new node at block %lu\n", + (unsigned long) new_node_pblk); +#endif + + /* Copy data into new block buffer */ + /* First the header for the new block... */ + neweh = (struct ext3_extent_header *) block_buf; + memcpy(neweh, eh, sizeof(struct ext3_extent_header)); + neweh->eh_entries = ext2fs_cpu_to_le16(tocopy); + neweh->eh_max = ext2fs_cpu_to_le16((handle->fs->blocksize - + sizeof(struct ext3_extent_header)) / + sizeof(struct ext3_extent)); + + /* then the entries for the new block... */ + memcpy(EXT_FIRST_INDEX(neweh), + EXT_FIRST_INDEX(eh) + + (ext2fs_le16_to_cpu(eh->eh_entries) - tocopy), + sizeof(struct ext3_extent_idx) * tocopy); + + new_node_start = ext2fs_le32_to_cpu(EXT_FIRST_INDEX(neweh)->ei_block); + + /* ...and write the new node block out to disk. */ + retval = io_channel_write_blk(handle->fs->io, new_node_pblk, 1, block_buf); + + if (retval) + goto done; + + /* OK! we've created the new node; now adjust the tree */ + + /* current path now has fewer active entries, we copied some out */ + if (handle->level == 0) { + memcpy(newpath, path, + sizeof(struct extent_path) * (handle->max_depth+1)); + handle->path = newpath; + newpath = path; + path = handle->path; + path->entries = 1; + path->left = path->max_entries - 1; + handle->max_depth++; + eh->eh_depth = ext2fs_cpu_to_le16(handle->max_depth); + } else { + path->entries -= tocopy; + path->left -= tocopy; + } + + eh->eh_entries = ext2fs_cpu_to_le16(path->entries); + /* this writes out the node, incl. the modified header */ + retval = update_path(handle); + if (retval) + goto done; + + /* now go up and insert/replace index for new node we created */ + if (new_root) { + retval = ext2fs_extent_get(handle, EXT2_EXTENT_FIRST_SIB, &extent); + if (retval) + goto done; + + extent.e_lblk = new_node_start; + extent.e_pblk = new_node_pblk; + extent.e_len = handle->path[0].end_blk - extent.e_lblk; + retval = ext2fs_extent_replace(handle, 0, &extent); + if (retval) + goto done; + } else { + __u32 new_node_length; + + retval = ext2fs_extent_get(handle, EXT2_EXTENT_UP, &extent); + /* will insert after this one; it's length is shorter now */ + new_node_length = new_node_start - extent.e_lblk; + extent.e_len -= new_node_length; + retval = ext2fs_extent_replace(handle, 0, &extent); + if (retval) + goto done; + + /* now set up the new extent and insert it */ + extent.e_lblk = new_node_start; + extent.e_pblk = new_node_pblk; + extent.e_len = new_node_length; + retval = ext2fs_extent_insert(handle, EXT2_EXTENT_INSERT_AFTER, &extent); + if (retval) + goto done; + } + + /* get handle back to our original position */ + retval = extent_goto(handle, orig_height, orig_lblk); + if (retval) + goto done; + + /* new node hooked in, so update inode block count (do this here?) */ + handle->inode->i_blocks += handle->fs->blocksize / 512; + retval = ext2fs_write_inode(handle->fs, handle->ino, + handle->inode); + if (retval) + goto done; + +done: + if (newpath) + ext2fs_free_mem(&newpath); + free(block_buf); + + return retval; +} + +errcode_t ext2fs_extent_insert(ext2_extent_handle_t handle, int flags, + struct ext2fs_extent *extent) +{ + struct extent_path *path; + struct ext3_extent_idx *ix; + struct ext3_extent_header *eh; + errcode_t retval; + + EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE); + + if (!(handle->fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if (!handle->path) + return EXT2_ET_NO_CURRENT_NODE; + +#ifdef DEBUG + printf("extent insert: %u ", handle->ino); + dbg_print_extent(0, extent); +#endif + + path = handle->path + handle->level; + + if (path->entries >= path->max_entries) { + if (flags & EXT2_EXTENT_INSERT_NOSPLIT) { + return EXT2_ET_CANT_INSERT_EXTENT; + } else { +#ifdef DEBUG + printf("node full (level %d) - splitting\n", + handle->level); +#endif + retval = extent_node_split(handle); + if (retval) + return retval; + path = handle->path + handle->level; + } + } + + eh = (struct ext3_extent_header *) path->buf; + if (path->curr) { + ix = path->curr; + if (flags & EXT2_EXTENT_INSERT_AFTER) { + ix++; + path->left--; + } + } else + ix = EXT_FIRST_INDEX(eh); + + path->curr = ix; + + if (path->left >= 0) + memmove(ix + 1, ix, + (path->left+1) * sizeof(struct ext3_extent_idx)); + path->left++; + path->entries++; + + eh = (struct ext3_extent_header *) path->buf; + eh->eh_entries = ext2fs_cpu_to_le16(path->entries); + + retval = ext2fs_extent_replace(handle, 0, extent); + if (retval) + goto errout; + + retval = update_path(handle); + if (retval) + goto errout; + + return 0; + +errout: + ext2fs_extent_delete(handle, 0); + return retval; +} + +/* + * Sets the physical block for a logical file block in the extent tree. + * + * May: map unmapped, unmap mapped, or remap mapped blocks. + * + * Mapping an unmapped block adds a single-block extent. + * + * Unmapping first or last block modifies extent in-place + * - But may need to fix parent's starts too in first-block case + * + * Mapping any unmapped block requires adding a (single-block) extent + * and inserting into proper point in tree. + * + * Modifying (unmapping or remapping) a block in the middle + * of an extent requires splitting the extent. + * - Remapping case requires new single-block extent. + * + * Remapping first or last block adds an extent. + * + * We really need extent adding to be smart about merging. + */ + +errcode_t ext2fs_extent_set_bmap(ext2_extent_handle_t handle, + blk64_t logical, blk64_t physical, int flags) +{ + errcode_t ec, retval = 0; + int mapped = 1; /* logical is mapped? */ + int orig_height; + int extent_uninit = 0; + int prev_uninit = 0; + int next_uninit = 0; + int new_uninit = 0; + int max_len = EXT_INIT_MAX_LEN; + int has_prev, has_next; + blk64_t orig_lblk; + struct extent_path *path; + struct ext2fs_extent extent, next_extent, prev_extent; + struct ext2fs_extent newextent; + struct ext2_extent_info info; + + EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE); + +#ifdef DEBUG + printf("set_bmap ino %u log %lld phys %lld flags %d\n", + handle->ino, logical, physical, flags); +#endif + + if (!(handle->fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if (!handle->path) + return EXT2_ET_NO_CURRENT_NODE; + + path = handle->path + handle->level; + + if (flags & EXT2_EXTENT_SET_BMAP_UNINIT) { + new_uninit = 1; + max_len = EXT_UNINIT_MAX_LEN; + } + + /* if (re)mapping, set up new extent to insert */ + if (physical) { + newextent.e_len = 1; + newextent.e_pblk = physical; + newextent.e_lblk = logical; + newextent.e_flags = EXT2_EXTENT_FLAGS_LEAF; + if (new_uninit) + newextent.e_flags |= EXT2_EXTENT_FLAGS_UNINIT; + } + + /* special case if the extent tree is completely empty */ + if ((handle->max_depth == 0) && (path->entries == 0)) { + retval = ext2fs_extent_insert(handle, 0, &newextent); + return retval; + } + + /* save our original location in the extent tree */ + if ((retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, + &extent))) { + if (retval != EXT2_ET_NO_CURRENT_NODE) + return retval; + memset(&extent, 0, sizeof(extent)); + } + if ((retval = ext2fs_extent_get_info(handle, &info))) + return retval; + orig_height = info.max_depth - info.curr_level; + orig_lblk = extent.e_lblk; + + /* go to the logical spot we want to (re/un)map */ + retval = ext2fs_extent_goto(handle, logical); + if (retval) { + if (retval == EXT2_ET_EXTENT_NOT_FOUND) { + retval = 0; + mapped = 0; + if (!physical) { +#ifdef DEBUG + printf("block %llu already unmapped\n", + logical); +#endif + goto done; + } + } else + goto done; + } + + /* + * This may be the extent *before* the requested logical, + * if it's currently unmapped. + * + * Get the previous and next leaf extents, if they are present. + */ + retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent); + if (retval) + goto done; + if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) + extent_uninit = 1; + retval = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT_LEAF, &next_extent); + if (retval) { + has_next = 0; + if (retval != EXT2_ET_EXTENT_NO_NEXT) + goto done; + } else { + dbg_print_extent("set_bmap: next_extent", + &next_extent); + has_next = 1; + if (next_extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) + next_uninit = 1; + } + retval = ext2fs_extent_goto(handle, logical); + if (retval && retval != EXT2_ET_EXTENT_NOT_FOUND) + goto done; + retval = ext2fs_extent_get(handle, EXT2_EXTENT_PREV_LEAF, &prev_extent); + if (retval) { + has_prev = 0; + if (retval != EXT2_ET_EXTENT_NO_PREV) + goto done; + } else { + has_prev = 1; + dbg_print_extent("set_bmap: prev_extent", + &prev_extent); + if (prev_extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) + prev_uninit = 1; + } + retval = ext2fs_extent_goto(handle, logical); + if (retval && retval != EXT2_ET_EXTENT_NOT_FOUND) + goto done; + + /* check if already pointing to the requested physical */ + if (mapped && (new_uninit == extent_uninit) && + (extent.e_pblk + (logical - extent.e_lblk) == physical)) { +#ifdef DEBUG + printf("physical block (at %llu) unchanged\n", logical); +#endif + goto done; + } + + if (!mapped) { +#ifdef DEBUG + printf("mapping unmapped logical block %llu\n", logical); +#endif + if ((logical == extent.e_lblk + extent.e_len) && + (physical == extent.e_pblk + extent.e_len) && + (new_uninit == extent_uninit) && + ((int) extent.e_len < max_len-1)) { + extent.e_len++; + retval = ext2fs_extent_replace(handle, 0, &extent); + } else if ((logical == extent.e_lblk - 1) && + (physical == extent.e_pblk - 1) && + (new_uninit == extent_uninit) && + ((int) extent.e_len < max_len - 1)) { + extent.e_len++; + extent.e_lblk--; + extent.e_pblk--; + retval = ext2fs_extent_replace(handle, 0, &extent); + } else if (has_next && + (logical == next_extent.e_lblk - 1) && + (physical == next_extent.e_pblk - 1) && + (new_uninit == next_uninit) && + ((int) next_extent.e_len < max_len - 1)) { + retval = ext2fs_extent_get(handle, + EXT2_EXTENT_NEXT_LEAF, + &next_extent); + if (retval) + goto done; + next_extent.e_len++; + next_extent.e_lblk--; + next_extent.e_pblk--; + retval = ext2fs_extent_replace(handle, 0, &next_extent); + } else if (logical < extent.e_lblk) + retval = ext2fs_extent_insert(handle, 0, &newextent); + else + retval = ext2fs_extent_insert(handle, + EXT2_EXTENT_INSERT_AFTER, &newextent); + if (retval) + goto done; + retval = ext2fs_extent_fix_parents(handle); + if (retval) + goto done; + } else if ((logical == extent.e_lblk) && (extent.e_len == 1)) { +#ifdef DEBUG + printf("(re/un)mapping only block in extent\n"); +#endif + if (physical) { + retval = ext2fs_extent_replace(handle, 0, &newextent); + } else { + retval = ext2fs_extent_delete(handle, 0); + if (retval) + goto done; + ec = ext2fs_extent_fix_parents(handle); + if (ec != EXT2_ET_NO_CURRENT_NODE) + retval = ec; + } + + if (retval) + goto done; + } else if (logical == extent.e_lblk + extent.e_len - 1) { +#ifdef DEBUG + printf("(re/un)mapping last block in extent\n"); +#endif + if (physical) { + if (has_next && + (logical == (next_extent.e_lblk - 1)) && + (physical == (next_extent.e_pblk - 1)) && + (new_uninit == next_uninit) && + ((int) next_extent.e_len < max_len - 1)) { + retval = ext2fs_extent_get(handle, + EXT2_EXTENT_NEXT_LEAF, &next_extent); + if (retval) + goto done; + next_extent.e_len++; + next_extent.e_lblk--; + next_extent.e_pblk--; + retval = ext2fs_extent_replace(handle, 0, + &next_extent); + if (retval) + goto done; + } else + retval = ext2fs_extent_insert(handle, + EXT2_EXTENT_INSERT_AFTER, &newextent); + if (retval) + goto done; + /* Now pointing at inserted extent; move back to prev */ + retval = ext2fs_extent_get(handle, + EXT2_EXTENT_PREV_LEAF, + &extent); + if (retval) + goto done; + } + extent.e_len--; + retval = ext2fs_extent_replace(handle, 0, &extent); + if (retval) + goto done; + } else if (logical == extent.e_lblk) { +#ifdef DEBUG + printf("(re/un)mapping first block in extent\n"); +#endif + if (physical) { + if (has_prev && + (logical == (prev_extent.e_lblk + + prev_extent.e_len)) && + (physical == (prev_extent.e_pblk + + prev_extent.e_len)) && + (new_uninit == prev_uninit) && + ((int) prev_extent.e_len < max_len-1)) { + retval = ext2fs_extent_get(handle, + EXT2_EXTENT_PREV_LEAF, &prev_extent); + if (retval) + goto done; + prev_extent.e_len++; + retval = ext2fs_extent_replace(handle, 0, + &prev_extent); + } else + retval = ext2fs_extent_insert(handle, + 0, &newextent); + if (retval) + goto done; + retval = ext2fs_extent_get(handle, + EXT2_EXTENT_NEXT_LEAF, + &extent); + if (retval) + goto done; + } + extent.e_pblk++; + extent.e_lblk++; + extent.e_len--; + retval = ext2fs_extent_replace(handle, 0, &extent); + if (retval) + goto done; + } else { + __u32 orig_length; + +#ifdef DEBUG + printf("(re/un)mapping in middle of extent\n"); +#endif + /* need to split this extent; later */ + + orig_length = extent.e_len; + + /* shorten pre-split extent */ + extent.e_len = (logical - extent.e_lblk); + retval = ext2fs_extent_replace(handle, 0, &extent); + if (retval) + goto done; + /* insert our new extent, if any */ + if (physical) { + /* insert new extent after current */ + retval = ext2fs_extent_insert(handle, + EXT2_EXTENT_INSERT_AFTER, &newextent); + if (retval) + goto done; + } + /* add post-split extent */ + extent.e_pblk += extent.e_len + 1; + extent.e_lblk += extent.e_len + 1; + extent.e_len = orig_length - extent.e_len - 1; + retval = ext2fs_extent_insert(handle, + EXT2_EXTENT_INSERT_AFTER, &extent); + if (retval) + goto done; + } + +done: + /* get handle back to its position */ + if (orig_height > handle->max_depth) + orig_height = handle->max_depth; /* In case we shortened the tree */ + extent_goto(handle, orig_height, orig_lblk); + return retval; +} + +errcode_t ext2fs_extent_delete(ext2_extent_handle_t handle, int flags) +{ + struct extent_path *path; + char *cp; + struct ext3_extent_header *eh; + errcode_t retval = 0; + + EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE); + + if (!(handle->fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if (!handle->path) + return EXT2_ET_NO_CURRENT_NODE; + +#ifdef DEBUG + { + struct ext2fs_extent extent; + + retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, + &extent); + if (retval == 0) { + printf("extent delete %u ", handle->ino); + dbg_print_extent(0, &extent); + } + } +#endif + + path = handle->path + handle->level; + if (!path->curr) + return EXT2_ET_NO_CURRENT_NODE; + + cp = path->curr; + + if (path->left) { + memmove(cp, cp + sizeof(struct ext3_extent_idx), + path->left * sizeof(struct ext3_extent_idx)); + path->left--; + } else { + struct ext3_extent_idx *ix = path->curr; + ix--; + path->curr = ix; + } + if (--path->entries == 0) + path->curr = 0; + + /* if non-root node has no entries left, remove it & parent ptr to it */ + if (path->entries == 0 && handle->level) { + if (!(flags & EXT2_EXTENT_DELETE_KEEP_EMPTY)) { + struct ext2fs_extent extent; + + retval = ext2fs_extent_get(handle, EXT2_EXTENT_UP, + &extent); + if (retval) + return retval; + + retval = ext2fs_extent_delete(handle, flags); + handle->inode->i_blocks -= handle->fs->blocksize / 512; + retval = ext2fs_write_inode(handle->fs, handle->ino, + handle->inode); + ext2fs_block_alloc_stats(handle->fs, extent.e_pblk, -1); + } + } else { + eh = (struct ext3_extent_header *) path->buf; + eh->eh_entries = ext2fs_cpu_to_le16(path->entries); + if ((path->entries == 0) && (handle->level == 0)) + eh->eh_depth = handle->max_depth = 0; + retval = update_path(handle); + } + return retval; +} + +errcode_t ext2fs_extent_get_info(ext2_extent_handle_t handle, + struct ext2_extent_info *info) +{ + struct extent_path *path; + + EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE); + + memset(info, 0, sizeof(struct ext2_extent_info)); + + path = handle->path + handle->level; + if (path) { + if (path->curr) + info->curr_entry = ((char *) path->curr - path->buf) / + sizeof(struct ext3_extent_idx); + else + info->curr_entry = 0; + info->num_entries = path->entries; + info->max_entries = path->max_entries; + info->bytes_avail = (path->max_entries - path->entries) * + sizeof(struct ext3_extent); + } + + info->curr_level = handle->level; + info->max_depth = handle->max_depth; + info->max_lblk = ((__u64) 1 << 32) - 1; + info->max_pblk = ((__u64) 1 << 48) - 1; + info->max_len = (1UL << 15); + info->max_uninit_len = (1UL << 15) - 1; + + return 0; +} + +#ifdef DEBUG + +#include "ss/ss.h" + +#include "debugfs.h" + +/* + * Hook in new commands into debugfs + */ +const char *debug_prog_name = "tst_extents"; +extern ss_request_table extent_cmds; +ss_request_table *extra_cmds = &extent_cmds; + +ext2_ino_t current_ino = 0; +ext2_extent_handle_t current_handle; + +int common_extent_args_process(int argc, char *argv[], int min_argc, + int max_argc, const char *cmd, + const char *usage, int flags) +{ + if (common_args_process(argc, argv, min_argc, max_argc, cmd, + usage, flags)) + return 1; + + if (!current_handle) { + //com_err(cmd, 0, "Extent handle not open"); + return 1; + } + return 0; +} + +void do_inode(int argc, char *argv[]) +{ + ext2_ino_t inode; + int i; + struct ext3_extent_header *eh; + errcode_t retval; + + if (check_fs_open(argv[0])) + return; + + if (argc == 1) { + if (current_ino) + printf("Current inode is %d\n", current_ino); + else + printf("No current inode\n"); + return; + } + + if (common_inode_args_process(argc, argv, &inode, 0)) { + return; + } + + current_ino = 0; + + retval = ext2fs_extent_open(current_fs, inode, ¤t_handle); + if (retval) { + //com_err(argv[1], retval, "while opening extent handle"); + return; + } + + current_ino = inode; + + printf("Loaded inode %d\n", current_ino); + + return; +} + +void generic_goto_node(char *cmd_name, int op) +{ + struct ext2fs_extent extent; + errcode_t retval; + + if (check_fs_open(cmd_name)) + return; + + if (!current_handle) { + //com_err(cmd_name, 0, "Extent handle not open"); + return; + } + + retval = ext2fs_extent_get(current_handle, op, &extent); + if (retval) { + //com_err(cmd_name, retval, 0); + return; + } + dbg_print_extent(0, &extent); +} + +void do_current_node(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_CURRENT); +} + +void do_root_node(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_ROOT); +} + +void do_last_leaf(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_LAST_LEAF); +} + +void do_first_sib(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_FIRST_SIB); +} + +void do_last_sib(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_LAST_SIB); +} + +void do_next_sib(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_NEXT_SIB); +} + +void do_prev_sib(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_PREV_SIB); +} + +void do_next_leaf(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_NEXT_LEAF); +} + +void do_prev_leaf(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_PREV_LEAF); +} + +void do_next(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_NEXT); +} + +void do_prev(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_PREV); +} + +void do_up(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_UP); +} + +void do_down(int argc, char *argv[]) +{ + generic_goto_node(argv[0], EXT2_EXTENT_DOWN); +} + +void do_delete_node(int argc, char *argv[]) +{ + errcode_t retval; + int err; + + if (common_extent_args_process(argc, argv, 1, 1, "delete_node", + "", CHECK_FS_RW | CHECK_FS_BITMAPS)) + return; + + retval = ext2fs_extent_delete(current_handle, 0); + if (retval) { + //com_err(argv[0], retval, 0); + return; + } + if (current_handle->path && current_handle->path[0].curr) + do_current_node(argc, argv); +} + +void do_replace_node(int argc, char *argv[]) +{ + const char *usage = "[--uninit] <lblk> <len> <pblk>"; + errcode_t retval; + struct ext2fs_extent extent; + int err; + + if (common_extent_args_process(argc, argv, 3, 5, "replace_node", + usage, CHECK_FS_RW | CHECK_FS_BITMAPS)) + return; + + extent.e_flags = 0; + + if (!strcmp(argv[1], "--uninit")) { + argc--; + argv++; + extent.e_flags |= EXT2_EXTENT_FLAGS_UNINIT; + } + + if (argc != 4) { + fprintf(stderr, "Usage: %s %s\n", argv[0], usage); + return; + } + + extent.e_lblk = parse_ulong(argv[1], argv[0], "logical block", &err); + if (err) + return; + + extent.e_len = parse_ulong(argv[2], argv[0], "logical block", &err); + if (err) + return; + + extent.e_pblk = parse_ulong(argv[3], argv[0], "logical block", &err); + if (err) + return; + + retval = ext2fs_extent_replace(current_handle, 0, &extent); + if (retval) { + //com_err(argv[0], retval, 0); + return; + } + do_current_node(argc, argv); +} + +void do_split_node(int argc, char *argv[]) +{ + errcode_t retval; + struct ext2fs_extent extent; + int err; + + if (common_extent_args_process(argc, argv, 1, 1, "split_node", + "", CHECK_FS_RW | CHECK_FS_BITMAPS)) + return; + + retval = extent_node_split(current_handle); + if (retval) { + //com_err(argv[0], retval, 0); + return; + } + do_current_node(argc, argv); +} + +void do_insert_node(int argc, char *argv[]) +{ + const char *usage = "[--after] [--uninit] <lblk> <len> <pblk>"; + errcode_t retval; + struct ext2fs_extent extent; + char *cmd; + int err; + int flags = 0; + + if (common_extent_args_process(argc, argv, 3, 6, "insert_node", + usage, CHECK_FS_RW | CHECK_FS_BITMAPS)) + return; + + cmd = argv[0]; + + extent.e_flags = 0; + + while (argc > 2) { + if (!strcmp(argv[1], "--after")) { + argc--; + argv++; + flags |= EXT2_EXTENT_INSERT_AFTER; + continue; + } + if (!strcmp(argv[1], "--uninit")) { + argc--; + argv++; + extent.e_flags |= EXT2_EXTENT_FLAGS_UNINIT; + continue; + } + break; + } + + if (argc != 4) { + fprintf(stderr, "usage: %s %s\n", cmd, usage); + return; + } + + extent.e_lblk = parse_ulong(argv[1], cmd, + "logical block", &err); + if (err) + return; + + extent.e_len = parse_ulong(argv[2], cmd, + "length", &err); + if (err) + return; + + extent.e_pblk = parse_ulong(argv[3], cmd, + "pysical block", &err); + if (err) + return; + + retval = ext2fs_extent_insert(current_handle, flags, &extent); + if (retval) { + //com_err(cmd, retval, 0); + return; + } + do_current_node(argc, argv); +} + +void do_set_bmap(int argc, char **argv) +{ + const char *usage = "[--uninit] <lblk> <pblk>"; + errcode_t retval; + blk_t logical; + blk_t physical; + char *cmd = argv[0]; + int flags = 0; + int err; + + if (common_extent_args_process(argc, argv, 3, 5, "set_bmap", + usage, CHECK_FS_RW | CHECK_FS_BITMAPS)) + return; + + if (argc > 2 && !strcmp(argv[1], "--uninit")) { + argc--; + argv++; + flags |= EXT2_EXTENT_SET_BMAP_UNINIT; + } + + if (argc != 3) { + fprintf(stderr, "Usage: %s %s\n", cmd, usage); + return; + } + + logical = parse_ulong(argv[1], cmd, + "logical block", &err); + if (err) + return; + + physical = parse_ulong(argv[2], cmd, + "physical block", &err); + if (err) + return; + + retval = ext2fs_extent_set_bmap(current_handle, logical, + (blk64_t) physical, flags); + if (retval) { + //com_err(cmd, retval, 0); + return; + } + if (current_handle->path && current_handle->path[0].curr) + do_current_node(argc, argv); +} + +void do_print_all(int argc, char **argv) +{ + const char *usage = "[--leaf-only|--reverse|--reverse-leaf]"; + struct ext2fs_extent extent; + errcode_t retval; + errcode_t end_err = EXT2_ET_EXTENT_NO_NEXT; + int op = EXT2_EXTENT_NEXT; + int first_op = EXT2_EXTENT_ROOT; + + + if (common_extent_args_process(argc, argv, 1, 2, "print_all", + usage, 0)) + return; + + if (argc == 2) { + if (!strcmp(argv[1], "--leaf-only")) + op = EXT2_EXTENT_NEXT_LEAF; + else if (!strcmp(argv[1], "--reverse")) { + op = EXT2_EXTENT_PREV; + first_op = EXT2_EXTENT_LAST_LEAF; + end_err = EXT2_ET_EXTENT_NO_PREV; + } else if (!strcmp(argv[1], "--reverse-leaf")) { + op = EXT2_EXTENT_PREV_LEAF; + first_op = EXT2_EXTENT_LAST_LEAF; + end_err = EXT2_ET_EXTENT_NO_PREV; + } else { + fprintf(stderr, "Usage: %s %s\n", argv[0], usage); + return; + } + } + + retval = ext2fs_extent_get(current_handle, first_op, &extent); + if (retval) { + //com_err(argv[0], retval, 0); + return; + } + dbg_print_extent(0, &extent); + + while (1) { + retval = ext2fs_extent_get(current_handle, op, &extent); + if (retval == end_err) + break; + + if (retval) { + //com_err(argv[0], retval, 0); + return; + } + dbg_print_extent(0, &extent); + } +} + +void do_info(int argc, char **argv) +{ + struct ext2fs_extent extent; + struct ext2_extent_info info; + errcode_t retval; + + if (common_extent_args_process(argc, argv, 1, 1, "info", "", 0)) + return; + + retval = ext2fs_extent_get_info(current_handle, &info); + if (retval) { + //com_err(argv[0], retval, 0); + return; + } + + retval = ext2fs_extent_get(current_handle, + EXT2_EXTENT_CURRENT, &extent); + if (retval) { + //com_err(argv[0], retval, 0); + return; + } + + dbg_print_extent(0, &extent); + + printf("Current handle location: %d/%d (max: %d, bytes %d), level %d/%d\n", + info.curr_entry, info.num_entries, info.max_entries, + info.bytes_avail, info.curr_level, info.max_depth); + printf("\tmax lblk: %llu, max pblk: %llu\n", info.max_lblk, + info.max_pblk); + printf("\tmax_len: %u, max_uninit_len: %u\n", info.max_len, + info.max_uninit_len); +} + +void do_goto_block(int argc, char **argv) +{ + struct ext2fs_extent extent; + errcode_t retval; + int op = EXT2_EXTENT_NEXT_LEAF; + blk_t blk; + int level = 0; + + if (common_extent_args_process(argc, argv, 2, 3, "goto_block", + "block [level]", 0)) + return; + + if (strtoblk(argv[0], argv[1], &blk)) + return; + + if (argc == 3) + if (strtoblk(argv[0], argv[2], &level)) + return; + + retval = extent_goto(current_handle, level, (blk64_t) blk); + + if (retval) { + //com_err(argv[0], retval, + // "while trying to go to block %u, level %d", + // blk, level); + return; + } + + generic_goto_node(argv[0], EXT2_EXTENT_CURRENT); +} +#endif + diff --git a/fs/ext4/format/freefs.c b/fs/ext4/format/freefs.c new file mode 100755 index 0000000..0876dad --- /dev/null +++ b/fs/ext4/format/freefs.c @@ -0,0 +1,115 @@ +/* + * freefs.c --- free an ext2 filesystem + * + * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + +#include "ext2_fs.h" +#include "ext2fsP.h" + +static void ext2fs_free_inode_cache(struct ext2_inode_cache *icache); + +void ext2fs_free(ext2_filsys fs) +{ + if (!fs || (fs->magic != EXT2_ET_MAGIC_EXT2FS_FILSYS)) + return; + if (fs->image_io != fs->io) { + if (fs->image_io) + io_channel_close(fs->image_io); + } + if (fs->io) { + io_channel_close(fs->io); + } + //if (fs->device_name) + // ext2fs_free_mem(&fs->device_name); + if (fs->super) + ext2fs_free_mem(&fs->super); + if (fs->orig_super) + ext2fs_free_mem(&fs->orig_super); + if (fs->group_desc) + ext2fs_free_mem(&fs->group_desc); + if (fs->block_map) + ext2fs_free_block_bitmap(fs->block_map); + if (fs->inode_map) + ext2fs_free_inode_bitmap(fs->inode_map); + + if (fs->badblocks) + ext2fs_badblocks_list_free(fs->badblocks); + fs->badblocks = 0; + + if (fs->dblist) + ext2fs_free_dblist(fs->dblist); + + if (fs->icache) + ext2fs_free_inode_cache(fs->icache); + + fs->magic = 0; + + ext2fs_free_mem(fs); +} + +/* + * Free the inode cache structure + */ +static void ext2fs_free_inode_cache(struct ext2_inode_cache *icache) +{ + if (--icache->refcount) + return; + if (icache->buffer) + ext2fs_free_mem(&icache->buffer); + if (icache->cache) + ext2fs_free_mem(&icache->cache); + icache->buffer_blk = 0; + ext2fs_free_mem(&icache); +} + +/* + * This procedure frees a badblocks list. + */ +void ext2fs_u32_list_free(ext2_u32_list bb) +{ + if (bb->magic != EXT2_ET_MAGIC_BADBLOCKS_LIST) + return; + + if (bb->list) + ext2fs_free_mem(&bb->list); + bb->list = 0; + ext2fs_free_mem(&bb); +} + +void ext2fs_badblocks_list_free(ext2_badblocks_list bb) +{ + ext2fs_u32_list_free((ext2_u32_list) bb); +} + + +/* + * Free a directory block list + */ +void ext2fs_free_dblist(ext2_dblist dblist) +{ + if (!dblist || (dblist->magic != EXT2_ET_MAGIC_DBLIST)) + return; + + if (dblist->list) + ext2fs_free_mem(&dblist->list); + dblist->list = 0; + if (dblist->fs && dblist->fs->dblist == dblist) + dblist->fs->dblist = 0; + dblist->magic = 0; + ext2fs_free_mem(&dblist); +} + diff --git a/fs/ext4/format/gen_bitmap.c b/fs/ext4/format/gen_bitmap.c new file mode 100755 index 0000000..a3e387b --- /dev/null +++ b/fs/ext4/format/gen_bitmap.c @@ -0,0 +1,456 @@ +/* + * gen_bitmap.c --- Generic (32-bit) bitmap routines + * + * Copyright (C) 2001 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +struct ext2fs_struct_generic_bitmap { + errcode_t magic; + ext2_filsys fs; + __u32 start, end; + __u32 real_end; + char * description; + char * bitmap; + errcode_t base_error_code; + __u32 reserved[7]; +}; + +/* + * Used by previously inlined function, so we have to export this and + * not change the function signature + */ +void ext2fs_warn_bitmap2(ext2fs_generic_bitmap bitmap, + int code, unsigned long arg) +{ + printf("ext2fs error code 0x%x, arg %lu\n", code, arg); +} + +static errcode_t check_magic(ext2fs_generic_bitmap bitmap) +{ + if (!bitmap || !((bitmap->magic == EXT2_ET_MAGIC_GENERIC_BITMAP) || + (bitmap->magic == EXT2_ET_MAGIC_INODE_BITMAP) || + (bitmap->magic == EXT2_ET_MAGIC_BLOCK_BITMAP))) + return EXT2_ET_MAGIC_GENERIC_BITMAP; + return 0; +} + +errcode_t ext2fs_make_generic_bitmap(errcode_t magic, ext2_filsys fs, + __u32 start, __u32 end, __u32 real_end, + const char *descr, char *init_map, + ext2fs_generic_bitmap *ret) +{ + ext2fs_generic_bitmap bitmap; + errcode_t retval; + size_t size; + + retval = ext2fs_get_mem(sizeof(struct ext2fs_struct_generic_bitmap), + &bitmap); + if (retval) + return retval; + + bitmap->magic = magic; + bitmap->fs = fs; + bitmap->start = start; + bitmap->end = end; + bitmap->real_end = real_end; + switch (magic) { + case EXT2_ET_MAGIC_INODE_BITMAP: + bitmap->base_error_code = EXT2_ET_BAD_INODE_MARK; + break; + case EXT2_ET_MAGIC_BLOCK_BITMAP: + bitmap->base_error_code = EXT2_ET_BAD_BLOCK_MARK; + break; + default: + bitmap->base_error_code = EXT2_ET_BAD_GENERIC_MARK; + } + if (descr) { + retval = ext2fs_get_mem(strlen(descr)+1, &bitmap->description); + if (retval) { + ext2fs_free_mem(&bitmap); + return retval; + } + strcpy(bitmap->description, descr); + } else + bitmap->description = 0; + + size = (size_t) (((bitmap->real_end - bitmap->start) / 8) + 1); + /* Round up to allow for the BT x86 instruction */ + size = (size + 7) & ~3; + retval = ext2fs_get_mem(size, &bitmap->bitmap); + if (retval) { + ext2fs_free_mem(&bitmap->description); + ext2fs_free_mem(&bitmap); + return retval; + } + + if (init_map) + memcpy(bitmap->bitmap, init_map, size); + else + memset(bitmap->bitmap, 0, size); + *ret = bitmap; + return 0; +} + +errcode_t ext2fs_allocate_generic_bitmap(__u32 start, + __u32 end, + __u32 real_end, + const char *descr, + ext2fs_generic_bitmap *ret) +{ + return ext2fs_make_generic_bitmap(EXT2_ET_MAGIC_GENERIC_BITMAP, 0, + start, end, real_end, descr, 0, ret); +} + +errcode_t ext2fs_copy_generic_bitmap(ext2fs_generic_bitmap src, + ext2fs_generic_bitmap *dest) +{ + return (ext2fs_make_generic_bitmap(src->magic, src->fs, + src->start, src->end, + src->real_end, + src->description, src->bitmap, + dest)); +} + +void ext2fs_free_generic_bitmap(ext2fs_inode_bitmap bitmap) +{ + if (check_magic(bitmap)) + return; + + bitmap->magic = 0; + if (bitmap->description) { + ext2fs_free_mem(&bitmap->description); + bitmap->description = 0; + } + if (bitmap->bitmap) { + ext2fs_free_mem(&bitmap->bitmap); + bitmap->bitmap = 0; + } + ext2fs_free_mem(&bitmap); +} + +int ext2fs_test_generic_bitmap(ext2fs_generic_bitmap bitmap, + blk_t bitno) +{ + if ((bitno < bitmap->start) || (bitno > bitmap->end)) { + ext2fs_warn_bitmap2(bitmap, EXT2FS_TEST_ERROR, bitno); + return 0; + } + return ext2fs_test_bit(bitno - bitmap->start, bitmap->bitmap); +} + +int ext2fs_mark_generic_bitmap(ext2fs_generic_bitmap bitmap, + __u32 bitno) +{ + if ((bitno < bitmap->start) || (bitno > bitmap->end)) { + ext2fs_warn_bitmap2(bitmap, EXT2FS_MARK_ERROR, bitno); + return 0; + } + return ext2fs_set_bit(bitno - bitmap->start, bitmap->bitmap); +} + +int ext2fs_unmark_generic_bitmap(ext2fs_generic_bitmap bitmap, + blk_t bitno) +{ + if ((bitno < bitmap->start) || (bitno > bitmap->end)) { + ext2fs_warn_bitmap2(bitmap, EXT2FS_UNMARK_ERROR, bitno); + return 0; + } + return ext2fs_clear_bit(bitno - bitmap->start, bitmap->bitmap); +} + +__u32 ext2fs_get_generic_bitmap_start(ext2fs_generic_bitmap bitmap) +{ + return bitmap->start; +} + +__u32 ext2fs_get_generic_bitmap_end(ext2fs_generic_bitmap bitmap) +{ + return bitmap->end; +} + +void ext2fs_clear_generic_bitmap(ext2fs_generic_bitmap bitmap) +{ + if (check_magic(bitmap)) + return; + + memset(bitmap->bitmap, 0, + (size_t) (((bitmap->real_end - bitmap->start) / 8) + 1)); +} + +errcode_t ext2fs_fudge_generic_bitmap_end(ext2fs_inode_bitmap bitmap, + errcode_t magic, errcode_t neq, + ext2_ino_t end, ext2_ino_t *oend) +{ + EXT2_CHECK_MAGIC(bitmap, magic); + + if (end > bitmap->real_end) + return neq; + if (oend) + *oend = bitmap->end; + bitmap->end = end; + return 0; +} + +errcode_t ext2fs_resize_generic_bitmap(errcode_t magic, + __u32 new_end, __u32 new_real_end, + ext2fs_generic_bitmap bmap) +{ + errcode_t retval; + size_t size, new_size; + __u32 bitno; + + if (!bmap || (bmap->magic != magic)) + return magic; + + /* + * If we're expanding the bitmap, make sure all of the new + * parts of the bitmap are zero. + */ + if (new_end > bmap->end) { + bitno = bmap->real_end; + if (bitno > new_end) + bitno = new_end; + for (; bitno > bmap->end; bitno--) + ext2fs_clear_bit(bitno - bmap->start, bmap->bitmap); + } + if (new_real_end == bmap->real_end) { + bmap->end = new_end; + return 0; + } + + size = ((bmap->real_end - bmap->start) / 8) + 1; + new_size = ((new_real_end - bmap->start) / 8) + 1; + + if (size != new_size) { + retval = ext2fs_resize_mem(size, new_size, &bmap->bitmap); + if (retval) + return retval; + } + if (new_size > size) + memset(bmap->bitmap + size, 0, new_size - size); + + bmap->end = new_end; + bmap->real_end = new_real_end; + return 0; +} + +errcode_t ext2fs_compare_generic_bitmap(errcode_t magic, errcode_t neq, + ext2fs_generic_bitmap bm1, + ext2fs_generic_bitmap bm2) +{ + blk_t i; + + if (!bm1 || bm1->magic != magic) + return magic; + if (!bm2 || bm2->magic != magic) + return magic; + + if ((bm1->start != bm2->start) || + (bm1->end != bm2->end) || + (memcmp(bm1->bitmap, bm2->bitmap, + (size_t) (bm1->end - bm1->start)/8))) + return neq; + + for (i = bm1->end - ((bm1->end - bm1->start) % 8); i <= bm1->end; i++) + if (ext2fs_fast_test_block_bitmap(bm1, i) != + ext2fs_fast_test_block_bitmap(bm2, i)) + return neq; + + return 0; +} + +void ext2fs_set_generic_bitmap_padding(ext2fs_generic_bitmap map) +{ + __u32 i, j; + + /* Protect loop from wrap-around if map->real_end is maxed */ + for (i=map->end+1, j = i - map->start; + i <= map->real_end && i > map->end; + i++, j++) + ext2fs_set_bit(j, map->bitmap); +} + +errcode_t ext2fs_get_generic_bitmap_range(ext2fs_generic_bitmap bmap, + errcode_t magic, + __u32 start, __u32 num, + void *out) +{ + if (!bmap || (bmap->magic != magic)) + return magic; + + if ((start < bmap->start) || (start+num-1 > bmap->real_end)) + return EXT2_ET_INVALID_ARGUMENT; + + memcpy(out, bmap->bitmap + (start >> 3), (num+7) >> 3); + return 0; +} + +errcode_t ext2fs_set_generic_bitmap_range(ext2fs_generic_bitmap bmap, + errcode_t magic, + __u32 start, __u32 num, + void *in) +{ + if (!bmap || (bmap->magic != magic)) + return magic; + + if ((start < bmap->start) || (start+num-1 > bmap->real_end)) + return EXT2_ET_INVALID_ARGUMENT; + + memcpy(bmap->bitmap + (start >> 3), in, (num+7) >> 3); + return 0; +} + +/* + * Compare @mem to zero buffer by 256 bytes. + * Return 1 if @mem is zeroed memory, otherwise return 0. + */ +static int mem_is_zero(const char *mem, size_t len) +{ + static const char zero_buf[256]; + + while (len >= sizeof(zero_buf)) { + if (memcmp(mem, zero_buf, sizeof(zero_buf))) + return 0; + len -= sizeof(zero_buf); + mem += sizeof(zero_buf); + } + /* Deal with leftover bytes. */ + if (len) + return !memcmp(mem, zero_buf, len); + return 1; +} + +/* + * Return true if all of the bits in a specified range are clear + */ +static int ext2fs_test_clear_generic_bitmap_range(ext2fs_generic_bitmap bitmap, + unsigned int start, + unsigned int len) +{ + size_t start_byte, len_byte = len >> 3; + unsigned int start_bit, len_bit = len % 8; + int first_bit = 0; + int last_bit = 0; + int mark_count = 0; + int mark_bit = 0; + int i; + const char *ADDR = bitmap->bitmap; + + start -= bitmap->start; + start_byte = start >> 3; + start_bit = start % 8; + + if (start_bit != 0) { + /* + * The compared start block number or start inode number + * is not the first bit in a byte. + */ + mark_count = 8 - start_bit; + if (len < 8 - start_bit) { + mark_count = (int)len; + mark_bit = len + start_bit - 1; + } else + mark_bit = 7; + + for (i = mark_count; i > 0; i--, mark_bit--) + first_bit |= 1 << mark_bit; + + /* + * Compare blocks or inodes in the first byte. + * If there is any marked bit, this function returns 0. + */ + if (first_bit & ADDR[start_byte]) + return 0; + else if (len <= 8 - start_bit) + return 1; + + start_byte++; + len_bit = (len - mark_count) % 8; + len_byte = (len - mark_count) >> 3; + } + + /* + * The compared start block number or start inode number is + * the first bit in a byte. + */ + if (len_bit != 0) { + /* + * The compared end block number or end inode number is + * not the last bit in a byte. + */ + for (mark_bit = len_bit - 1; mark_bit >= 0; mark_bit--) + last_bit |= 1 << mark_bit; + + /* + * Compare blocks or inodes in the last byte. + * If there is any marked bit, this function returns 0. + */ + if (last_bit & ADDR[start_byte + len_byte]) + return 0; + else if (len_byte == 0) + return 1; + } + + /* Check whether all bytes are 0 */ + return mem_is_zero(ADDR + start_byte, len_byte); +} + +int ext2fs_test_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num) +{ + EXT2_CHECK_MAGIC(bitmap, EXT2_ET_MAGIC_BLOCK_BITMAP); + if ((block < bitmap->start) || (block+num-1 > bitmap->real_end)) { + ext2fs_warn_bitmap(EXT2_ET_BAD_BLOCK_TEST, + block, bitmap->description); + return 0; + } + return ext2fs_test_clear_generic_bitmap_range((ext2fs_generic_bitmap) + bitmap, block, num); +} + + +void ext2fs_mark_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num) +{ + int i; + + if ((block < bitmap->start) || (block+num-1 > bitmap->end)) { + ext2fs_warn_bitmap(EXT2_ET_BAD_BLOCK_MARK, block, + bitmap->description); + return; + } + for (i=0; i < num; i++) + ext2fs_fast_set_bit(block + i - bitmap->start, bitmap->bitmap); +} + +void ext2fs_unmark_block_bitmap_range(ext2fs_block_bitmap bitmap, + blk_t block, int num) +{ + int i; + + if ((block < bitmap->start) || (block+num-1 > bitmap->end)) { + ext2fs_warn_bitmap(EXT2_ET_BAD_BLOCK_UNMARK, block, + bitmap->description); + return; + } + for (i=0; i < num; i++) + ext2fs_fast_clear_bit(block + i - bitmap->start, + bitmap->bitmap); +} diff --git a/fs/ext4/format/i_block.c b/fs/ext4/format/i_block.c new file mode 100755 index 0000000..fe5b693 --- /dev/null +++ b/fs/ext4/format/i_block.c @@ -0,0 +1,82 @@ +/* + * i_block.c --- Manage the i_block field for i_blocks + * + * Copyright (C) 2008 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +errcode_t ext2fs_iblk_add_blocks(ext2_filsys fs, struct ext2_inode *inode, + blk64_t num_blocks) +{ + unsigned long long b = inode->i_blocks; + + if (!(fs->super->s_feature_ro_compat & + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) || + !(inode->i_flags & EXT4_HUGE_FILE_FL)) + num_blocks *= fs->blocksize / 512; + + b += num_blocks; + + if (fs->super->s_feature_ro_compat & + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) { + b += ((long long) inode->osd2.linux2.l_i_blocks_hi) << 32; + inode->osd2.linux2.l_i_blocks_hi = b >> 32; + } else if (b > 0xFFFFFFFF) + return EOVERFLOW; + inode->i_blocks = b & 0xFFFFFFFF; + return 0; +} + +errcode_t ext2fs_iblk_sub_blocks(ext2_filsys fs, struct ext2_inode *inode, + blk64_t num_blocks) +{ + unsigned long long b = inode->i_blocks; + + if (!(fs->super->s_feature_ro_compat & + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) || + !(inode->i_flags & EXT4_HUGE_FILE_FL)) + num_blocks *= fs->blocksize / 512; + + if (num_blocks > b) + return EOVERFLOW; + + b -= num_blocks; + + if (fs->super->s_feature_ro_compat & + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) { + b += ((long long) inode->osd2.linux2.l_i_blocks_hi) << 32; + inode->osd2.linux2.l_i_blocks_hi = b >> 32; + } + inode->i_blocks = b & 0xFFFFFFFF; + return 0; +} + +errcode_t ext2fs_iblk_set(ext2_filsys fs, struct ext2_inode *inode, blk64_t b) +{ + if (!(fs->super->s_feature_ro_compat & + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) || + !(inode->i_flags & EXT4_HUGE_FILE_FL)) + b *= fs->blocksize / 512; + + inode->i_blocks = b & 0xFFFFFFFF; + if (fs->super->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_HUGE_FILE) + inode->osd2.linux2.l_i_blocks_hi = b >> 32; + else if (b >> 32) + return EOVERFLOW; + return 0; +} diff --git a/fs/ext4/format/icount.c b/fs/ext4/format/icount.c new file mode 100755 index 0000000..6b81d03 --- /dev/null +++ b/fs/ext4/format/icount.c @@ -0,0 +1,706 @@ +/* + * icount.c --- an efficient inode count abstraction + * + * Copyright (C) 1997 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#if HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <string.h> +#include <stdio.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> + +#include "ext2_fs.h" +#include "ext2fs.h" +#include "tdb.h" + +/* + * The data storage strategy used by icount relies on the observation + * that most inode counts are either zero (for non-allocated inodes), + * one (for most files), and only a few that are two or more + * (directories and files that are linked to more than one directory). + * + * Also, e2fsck tends to load the icount data sequentially. + * + * So, we use an inode bitmap to indicate which inodes have a count of + * one, and then use a sorted list to store the counts for inodes + * which are greater than one. + * + * We also use an optional bitmap to indicate which inodes are already + * in the sorted list, to speed up the use of this abstraction by + * e2fsck's pass 2. Pass 2 increments inode counts as it finds them, + * so this extra bitmap avoids searching the sorted list to see if a + * particular inode is on the sorted list already. + */ + +struct ext2_icount_el { + ext2_ino_t ino; + __u32 count; +}; + +struct ext2_icount { + errcode_t magic; + ext2fs_inode_bitmap single; + ext2fs_inode_bitmap multiple; + ext2_ino_t count; + ext2_ino_t size; + ext2_ino_t num_inodes; + ext2_ino_t cursor; + struct ext2_icount_el *list; + struct ext2_icount_el *last_lookup; + char *tdb_fn; + TDB_CONTEXT *tdb; +}; + +/* + * We now use a 32-bit counter field because it doesn't cost us + * anything extra for the in-memory data structure, due to alignment + * padding. But there's no point changing the interface if most of + * the time we only care if the number is bigger than 65,000 or not. + * So use the following translation function to return a 16-bit count. + */ +#define icount_16_xlate(x) (((x) > 65500) ? 65500 : (x)) + +void ext2fs_free_icount(ext2_icount_t icount) +{ + if (!icount) + return; + + icount->magic = 0; + if (icount->list) + ext2fs_free_mem(&icount->list); + if (icount->single) + ext2fs_free_inode_bitmap(icount->single); + if (icount->multiple) + ext2fs_free_inode_bitmap(icount->multiple); + if (icount->tdb) + tdb_close(icount->tdb); + if (icount->tdb_fn) { + unlink(icount->tdb_fn); + free(icount->tdb_fn); + } + + ext2fs_free_mem(&icount); +} + +static errcode_t alloc_icount(ext2_filsys fs, int flags, ext2_icount_t *ret) +{ + ext2_icount_t icount; + errcode_t retval; + + *ret = 0; + + retval = ext2fs_get_mem(sizeof(struct ext2_icount), &icount); + if (retval) + return retval; + memset(icount, 0, sizeof(struct ext2_icount)); + + retval = ext2fs_allocate_inode_bitmap(fs, 0, &icount->single); + if (retval) + goto errout; + + if (flags & EXT2_ICOUNT_OPT_INCREMENT) { + retval = ext2fs_allocate_inode_bitmap(fs, 0, + &icount->multiple); + if (retval) + goto errout; + } else + icount->multiple = 0; + + icount->magic = EXT2_ET_MAGIC_ICOUNT; + icount->num_inodes = fs->super->s_inodes_count; + + *ret = icount; + return 0; + +errout: + ext2fs_free_icount(icount); + return(retval); +} + +struct uuid { + __u32 time_low; + __u16 time_mid; + __u16 time_hi_and_version; + __u16 clock_seq; + __u8 node[6]; +}; + +static void unpack_uuid(void *in, struct uuid *uu) +{ + __u8 *ptr = in; + __u32 tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + tmp = (tmp << 8) | *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_low = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_mid = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_hi_and_version = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->clock_seq = tmp; + + memcpy(uu->node, ptr, 6); +} + +static void uuid_unparse(void *uu, char *out) +{ + struct uuid uuid; + + unpack_uuid(uu, &uuid); + sprintf(out, + "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid.time_low, uuid.time_mid, uuid.time_hi_and_version, + uuid.clock_seq >> 8, uuid.clock_seq & 0xFF, + uuid.node[0], uuid.node[1], uuid.node[2], + uuid.node[3], uuid.node[4], uuid.node[5]); +} + +errcode_t ext2fs_create_icount_tdb(ext2_filsys fs, char *tdb_dir, + int flags, ext2_icount_t *ret) +{ + ext2_icount_t icount; + errcode_t retval; + char *fn, uuid[40]; + int fd; + + retval = alloc_icount(fs, flags, &icount); + if (retval) + return retval; + + retval = ext2fs_get_mem(strlen(tdb_dir) + 64, &fn); + if (retval) + goto errout; + uuid_unparse(fs->super->s_uuid, uuid); + sprintf(fn, "%s/%s-icount-XXXXXX", tdb_dir, uuid); + fd = mkstemp(fn); + + icount->tdb_fn = fn; + icount->tdb = tdb_open(fn, 0, TDB_CLEAR_IF_FIRST, + O_RDWR | O_CREAT | O_TRUNC, 0600); + if (icount->tdb) { + close(fd); + *ret = icount; + return 0; + } + + retval = errno; + close(fd); + +errout: + ext2fs_free_icount(icount); + return(retval); +} + +errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, unsigned int size, + ext2_icount_t hint, ext2_icount_t *ret) +{ + ext2_icount_t icount; + errcode_t retval; + size_t bytes; + ext2_ino_t i; + + if (hint) { + EXT2_CHECK_MAGIC(hint, EXT2_ET_MAGIC_ICOUNT); + if (hint->size > size) + size = (size_t) hint->size; + } + + retval = alloc_icount(fs, flags, &icount); + if (retval) + return retval; + + if (size) { + icount->size = size; + } else { + /* + * Figure out how many special case inode counts we will + * have. We know we will need one for each directory; + * we also need to reserve some extra room for file links + */ + retval = ext2fs_get_num_dirs(fs, &icount->size); + if (retval) + goto errout; + icount->size += fs->super->s_inodes_count / 50; + } + + bytes = (size_t) (icount->size * sizeof(struct ext2_icount_el)); +#if 0 + printf("Icount allocated %u entries, %d bytes.\n", + icount->size, bytes); +#endif + retval = ext2fs_get_array(icount->size, sizeof(struct ext2_icount_el), + &icount->list); + if (retval) + goto errout; + memset(icount->list, 0, bytes); + + icount->count = 0; + icount->cursor = 0; + + /* + * Populate the sorted list with those entries which were + * found in the hint icount (since those are ones which will + * likely need to be in the sorted list this time around). + */ + if (hint) { + for (i=0; i < hint->count; i++) + icount->list[i].ino = hint->list[i].ino; + icount->count = hint->count; + } + + *ret = icount; + return 0; + +errout: + ext2fs_free_icount(icount); + return(retval); +} + +errcode_t ext2fs_create_icount(ext2_filsys fs, int flags, + unsigned int size, + ext2_icount_t *ret) +{ + return ext2fs_create_icount2(fs, flags, size, 0, ret); +} + +/* + * insert_icount_el() --- Insert a new entry into the sorted list at a + * specified position. + */ +static struct ext2_icount_el *insert_icount_el(ext2_icount_t icount, + ext2_ino_t ino, int pos) +{ + struct ext2_icount_el *el; + errcode_t retval; + ext2_ino_t new_size = 0; + int num; + + if (icount->last_lookup && icount->last_lookup->ino == ino) + return icount->last_lookup; + + if (icount->count >= icount->size) { + if (icount->count) { + new_size = icount->list[(unsigned)icount->count-1].ino; + new_size = (ext2_ino_t) (icount->count * + ((float) icount->num_inodes / new_size)); + } + if (new_size < (icount->size + 100)) + new_size = icount->size + 100; +#if 0 + printf("Reallocating icount %u entries...\n", new_size); +#endif + retval = ext2fs_resize_mem((size_t) icount->size * + sizeof(struct ext2_icount_el), + (size_t) new_size * + sizeof(struct ext2_icount_el), + &icount->list); + if (retval) + return 0; + icount->size = new_size; + } + num = (int) icount->count - pos; + if (num < 0) + return 0; /* should never happen */ + if (num) { + memmove(&icount->list[pos+1], &icount->list[pos], + sizeof(struct ext2_icount_el) * num); + } + icount->count++; + el = &icount->list[pos]; + el->count = 0; + el->ino = ino; + icount->last_lookup = el; + return el; +} + +/* + * get_icount_el() --- given an inode number, try to find icount + * information in the sorted list. If the create flag is set, + * and we can't find an entry, create one in the sorted list. + */ +static struct ext2_icount_el *get_icount_el(ext2_icount_t icount, + ext2_ino_t ino, int create) +{ + float range; + int low, high, mid; + ext2_ino_t lowval, highval; + + if (!icount || !icount->list) + return 0; + + if (create && ((icount->count == 0) || + (ino > icount->list[(unsigned)icount->count-1].ino))) { + return insert_icount_el(icount, ino, (unsigned) icount->count); + } + if (icount->count == 0) + return 0; + + if (icount->cursor >= icount->count) + icount->cursor = 0; + if (ino == icount->list[icount->cursor].ino) + return &icount->list[icount->cursor++]; +#if 0 + printf("Non-cursor get_icount_el: %u\n", ino); +#endif + low = 0; + high = (int) icount->count-1; + while (low <= high) { +#if 0 + mid = (low+high)/2; +#else + if (low == high) + mid = low; + else { + /* Interpolate for efficiency */ + lowval = icount->list[low].ino; + highval = icount->list[high].ino; + + if (ino < lowval) + range = 0; + else if (ino > highval) + range = 1; + else { + range = ((float) (ino - lowval)) / + (highval - lowval); + if (range > 0.9) + range = 0.9; + if (range < 0.1) + range = 0.1; + } + mid = low + ((int) (range * (high-low))); + } +#endif + if (ino == icount->list[mid].ino) { + icount->cursor = mid+1; + return &icount->list[mid]; + } + if (ino < icount->list[mid].ino) + high = mid-1; + else + low = mid+1; + } + /* + * If we need to create a new entry, it should be right at + * low (where high will be left at low-1). + */ + if (create) + return insert_icount_el(icount, ino, low); + return 0; +} + +static errcode_t set_inode_count(ext2_icount_t icount, ext2_ino_t ino, + __u32 count) +{ + struct ext2_icount_el *el; + TDB_DATA key, data; + + if (icount->tdb) { + key.dptr = (unsigned char *) &ino; + key.dsize = sizeof(ext2_ino_t); + data.dptr = (unsigned char *) &count; + data.dsize = sizeof(__u32); + if (count) { + if (tdb_store(icount->tdb, key, data, TDB_REPLACE)) + return tdb_error(icount->tdb) + + EXT2_ET_TDB_SUCCESS; + } else { + if (tdb_delete(icount->tdb, key)) + return tdb_error(icount->tdb) + + EXT2_ET_TDB_SUCCESS; + } + return 0; + } + + el = get_icount_el(icount, ino, 1); + if (!el) + return EXT2_ET_NO_MEMORY; + + el->count = count; + return 0; +} + +static errcode_t get_inode_count(ext2_icount_t icount, ext2_ino_t ino, + __u32 *count) +{ + struct ext2_icount_el *el; + TDB_DATA key, data; + + if (icount->tdb) { + key.dptr = (unsigned char *) &ino; + key.dsize = sizeof(ext2_ino_t); + + data = tdb_fetch(icount->tdb, key); + if (data.dptr == NULL) { + *count = 0; + return tdb_error(icount->tdb) + EXT2_ET_TDB_SUCCESS; + } + + *count = *((__u32 *) data.dptr); + free(data.dptr); + return 0; + } + el = get_icount_el(icount, ino, 0); + if (!el) { + *count = 0; + return ENOENT; + } + + *count = el->count; + return 0; +} + + + +errcode_t ext2fs_icount_fetch(ext2_icount_t icount, ext2_ino_t ino, __u16 *ret) +{ + __u32 val; + EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT); + + if (!ino || (ino > icount->num_inodes)) + return EXT2_ET_INVALID_ARGUMENT; + + if (ext2fs_test_inode_bitmap(icount->single, ino)) { + *ret = 1; + return 0; + } + if (icount->multiple && + !ext2fs_test_inode_bitmap(icount->multiple, ino)) { + *ret = 0; + return 0; + } + get_inode_count(icount, ino, &val); + *ret = icount_16_xlate(val); + return 0; +} + +errcode_t ext2fs_icount_increment(ext2_icount_t icount, ext2_ino_t ino, + __u16 *ret) +{ + __u32 curr_value; + + EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT); + + if (!ino || (ino > icount->num_inodes)) + return EXT2_ET_INVALID_ARGUMENT; + + if (ext2fs_test_inode_bitmap(icount->single, ino)) { + /* + * If the existing count is 1, then we know there is + * no entry in the list. + */ + if (set_inode_count(icount, ino, 2)) + return EXT2_ET_NO_MEMORY; + curr_value = 2; + ext2fs_unmark_inode_bitmap(icount->single, ino); + } else if (icount->multiple) { + /* + * The count is either zero or greater than 1; if the + * inode is set in icount->multiple, then there should + * be an entry in the list, so we need to fix it. + */ + if (ext2fs_test_inode_bitmap(icount->multiple, ino)) { + get_inode_count(icount, ino, &curr_value); + curr_value++; + if (set_inode_count(icount, ino, curr_value)) + return EXT2_ET_NO_MEMORY; + } else { + /* + * The count was zero; mark the single bitmap + * and return. + */ + ext2fs_mark_inode_bitmap(icount->single, ino); + if (ret) + *ret = 1; + return 0; + } + } else { + /* + * The count is either zero or greater than 1; try to + * find an entry in the list to determine which. + */ + get_inode_count(icount, ino, &curr_value); + curr_value++; + if (set_inode_count(icount, ino, curr_value)) + return EXT2_ET_NO_MEMORY; + } + if (icount->multiple) + ext2fs_mark_inode_bitmap(icount->multiple, ino); + if (ret) + *ret = icount_16_xlate(curr_value); + return 0; +} + + + +errcode_t ext2fs_icount_store(ext2_icount_t icount, ext2_ino_t ino, + __u16 count) +{ + if (!ino || (ino > icount->num_inodes)) + return EXT2_ET_INVALID_ARGUMENT; + + EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT); + + if (count == 1) { + ext2fs_mark_inode_bitmap(icount->single, ino); + if (icount->multiple) + ext2fs_unmark_inode_bitmap(icount->multiple, ino); + return 0; + } + if (count == 0) { + ext2fs_unmark_inode_bitmap(icount->single, ino); + if (icount->multiple) { + /* + * If the icount->multiple bitmap is enabled, + * we can just clear both bitmaps and we're done + */ + ext2fs_unmark_inode_bitmap(icount->multiple, ino); + } else + set_inode_count(icount, ino, 0); + return 0; + } + + if (set_inode_count(icount, ino, count)) + return EXT2_ET_NO_MEMORY; + ext2fs_unmark_inode_bitmap(icount->single, ino); + if (icount->multiple) + ext2fs_mark_inode_bitmap(icount->multiple, ino); + return 0; +} + +ext2_ino_t ext2fs_get_icount_size(ext2_icount_t icount) +{ + if (!icount || icount->magic != EXT2_ET_MAGIC_ICOUNT) + return 0; + + return icount->size; +} + +#ifdef DEBUG + +ext2_filsys test_fs; +ext2_icount_t icount; + +#define EXIT 0x00 +#define FETCH 0x01 +#define STORE 0x02 +#define INCREMENT 0x03 +#define DECREMENT 0x04 + +struct test_program { + int cmd; + ext2_ino_t ino; + __u16 arg; + __u16 expected; +}; + +struct test_program prog[] = { + { STORE, 42, 42, 42 }, + { STORE, 1, 1, 1 }, + { STORE, 2, 2, 2 }, + { STORE, 3, 3, 3 }, + { STORE, 10, 1, 1 }, + { STORE, 42, 0, 0 }, + { INCREMENT, 5, 0, 1 }, + { INCREMENT, 5, 0, 2 }, + { INCREMENT, 5, 0, 3 }, + { INCREMENT, 5, 0, 4 }, + { DECREMENT, 5, 0, 3 }, + { DECREMENT, 5, 0, 2 }, + { DECREMENT, 5, 0, 1 }, + { DECREMENT, 5, 0, 0 }, + { FETCH, 10, 0, 1 }, + { FETCH, 1, 0, 1 }, + { FETCH, 2, 0, 2 }, + { FETCH, 3, 0, 3 }, + { INCREMENT, 1, 0, 2 }, + { DECREMENT, 2, 0, 1 }, + { DECREMENT, 2, 0, 0 }, + { FETCH, 12, 0, 0 }, + { EXIT, 0, 0, 0 } +}; + +struct test_program extended[] = { + { STORE, 1, 1, 1 }, + { STORE, 2, 2, 2 }, + { STORE, 3, 3, 3 }, + { STORE, 4, 4, 4 }, + { STORE, 5, 5, 5 }, + { STORE, 6, 1, 1 }, + { STORE, 7, 2, 2 }, + { STORE, 8, 3, 3 }, + { STORE, 9, 4, 4 }, + { STORE, 10, 5, 5 }, + { STORE, 11, 1, 1 }, + { STORE, 12, 2, 2 }, + { STORE, 13, 3, 3 }, + { STORE, 14, 4, 4 }, + { STORE, 15, 5, 5 }, + { STORE, 16, 1, 1 }, + { STORE, 17, 2, 2 }, + { STORE, 18, 3, 3 }, + { STORE, 19, 4, 4 }, + { STORE, 20, 5, 5 }, + { STORE, 21, 1, 1 }, + { STORE, 22, 2, 2 }, + { STORE, 23, 3, 3 }, + { STORE, 24, 4, 4 }, + { STORE, 25, 5, 5 }, + { STORE, 26, 1, 1 }, + { STORE, 27, 2, 2 }, + { STORE, 28, 3, 3 }, + { STORE, 29, 4, 4 }, + { STORE, 30, 5, 5 }, + { EXIT, 0, 0, 0 } +}; + +/* + * Setup the variables for doing the inode scan test. + */ +static void setup(void) +{ + errcode_t retval; + struct ext2_super_block param; + + initialize_ext2_error_table(); + + memset(¶m, 0, sizeof(param)); + param.s_blocks_count = 12000; + + retval = ext2fs_initialize("test fs", 0, ¶m, + test_io_manager, &test_fs); + if (retval) { + //com_err("setup", retval, + // "while initializing filesystem"); + //exit(1); + return 1; + } + retval = ext2fs_allocate_tables(test_fs); + if (retval) { + //com_err("setup", retval, + // "while allocating tables for test filesystem"); + //exit(1); + return 1; + } +} + + + +#endif diff --git a/fs/ext4/format/ind_block.c b/fs/ext4/format/ind_block.c new file mode 100755 index 0000000..e28d634 --- /dev/null +++ b/fs/ext4/format/ind_block.c @@ -0,0 +1,66 @@ +/* + * ind_block.c --- indirect block I/O routines + * + * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, + * 2001, 2002, 2003, 2004, 2005 by Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +errcode_t ext2fs_read_ind_block(ext2_filsys fs, blk_t blk, void *buf) +{ + errcode_t retval; +#ifdef WORDS_BIGENDIAN + blk_t *block_nr; + int i; + int limit = fs->blocksize >> 2; +#endif + + if ((fs->flags & EXT2_FLAG_IMAGE_FILE) && + (fs->io != fs->image_io)) + memset(buf, 0, fs->blocksize); + else { + retval = io_channel_read_blk(fs->io, blk, 1, buf); + if (retval) + return retval; + } +#ifdef WORDS_BIGENDIAN + block_nr = (blk_t *) buf; + for (i = 0; i < limit; i++, block_nr++) + *block_nr = ext2fs_swab32(*block_nr); +#endif + return 0; +} + +errcode_t ext2fs_write_ind_block(ext2_filsys fs, blk_t blk, void *buf) +{ +#ifdef WORDS_BIGENDIAN + blk_t *block_nr; + int i; + int limit = fs->blocksize >> 2; +#endif + + if (fs->flags & EXT2_FLAG_IMAGE_FILE) + return 0; + +#ifdef WORDS_BIGENDIAN + block_nr = (blk_t *) buf; + for (i = 0; i < limit; i++, block_nr++) + *block_nr = ext2fs_swab32(*block_nr); +#endif + return io_channel_write_blk(fs->io, blk, 1, buf); +} + + diff --git a/fs/ext4/format/inode.c b/fs/ext4/format/inode.c new file mode 100755 index 0000000..ceff411 --- /dev/null +++ b/fs/ext4/format/inode.c @@ -0,0 +1,831 @@ +/* + * inode.c --- utility routines to read and write inodes + * + * Copyright (C) 1993, 1994, 1995, 1996, 1997 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + +#include "ext2fs.h" + +#include "ext2_fs.h" +#include "ext2fsP.h" +#include "e2image.h" + +struct ext2_struct_inode_scan { + errcode_t magic; + ext2_filsys fs; + ext2_ino_t current_inode; + blk_t current_block; + dgrp_t current_group; + ext2_ino_t inodes_left; + blk_t blocks_left; + dgrp_t groups_left; + blk_t inode_buffer_blocks; + char * inode_buffer; + int inode_size; + char * ptr; + int bytes_left; + char *temp_buffer; + errcode_t (*done_group)(ext2_filsys fs, + ext2_inode_scan scan, + dgrp_t group, + void * priv_data); + void * done_group_data; + int bad_block_ptr; + int scan_flags; + int reserved[6]; +}; + +/* + * This routine flushes the icache, if it exists. + */ +errcode_t ext2fs_flush_icache(ext2_filsys fs) +{ + int i; + + if (!fs->icache) + return 0; + + for (i=0; i < fs->icache->cache_size; i++) + fs->icache->cache[i].ino = 0; + + fs->icache->buffer_blk = 0; + return 0; +} + +static errcode_t create_icache(ext2_filsys fs) +{ + errcode_t retval; + + if (fs->icache) + return 0; + retval = ext2fs_get_mem(sizeof(struct ext2_inode_cache), &fs->icache); + if (retval) + return retval; + + memset(fs->icache, 0, sizeof(struct ext2_inode_cache)); + retval = ext2fs_get_mem(fs->blocksize, &fs->icache->buffer); + if (retval) { + ext2fs_free_mem(&fs->icache); + return retval; + } + fs->icache->buffer_blk = 0; + fs->icache->cache_last = -1; + fs->icache->cache_size = 4; + fs->icache->refcount = 1; + retval = ext2fs_get_array(fs->icache->cache_size, + sizeof(struct ext2_inode_cache_ent), + &fs->icache->cache); + if (retval) { + ext2fs_free_mem(&fs->icache->buffer); + ext2fs_free_mem(&fs->icache); + return retval; + } + ext2fs_flush_icache(fs); + return 0; +} + + + + +errcode_t ext2fs_open_inode_scan(ext2_filsys fs, int buffer_blocks, + ext2_inode_scan *ret_scan) +{ + ext2_inode_scan scan; + errcode_t retval; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); +#if 0 + /* + * If fs->badblocks isn't set, then set it --- since the inode + * scanning functions require it. + */ + if (fs->badblocks == 0) { + /* + * Temporarly save fs->get_blocks and set it to zero, + * for compatibility with old e2fsck's. + */ + save_get_blocks = fs->get_blocks; + fs->get_blocks = 0; + retval = ext2fs_read_bb_inode(fs, &fs->badblocks); + if (retval && fs->badblocks) { + ext2fs_badblocks_list_free(fs->badblocks); + fs->badblocks = 0; + } + fs->get_blocks = save_get_blocks; + } +#endif + retval = ext2fs_get_mem(sizeof(struct ext2_struct_inode_scan), &scan); + if (retval) + return retval; + memset(scan, 0, sizeof(struct ext2_struct_inode_scan)); + + scan->magic = EXT2_ET_MAGIC_INODE_SCAN; + scan->fs = fs; + scan->inode_size = EXT2_INODE_SIZE(fs->super); + scan->bytes_left = 0; + scan->current_group = 0; + scan->groups_left = fs->group_desc_count - 1; + scan->inode_buffer_blocks = buffer_blocks ? buffer_blocks : 8; + scan->current_block = scan->fs-> + group_desc[scan->current_group].bg_inode_table; + scan->inodes_left = EXT2_INODES_PER_GROUP(scan->fs->super); + scan->blocks_left = scan->fs->inode_blocks_per_group; + if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + scan->inodes_left -= + fs->group_desc[scan->current_group].bg_itable_unused; + scan->blocks_left = + (scan->inodes_left + + (fs->blocksize / scan->inode_size - 1)) * + scan->inode_size / fs->blocksize; + } + retval = ext2fs_get_memalign(scan->inode_buffer_blocks * fs->blocksize, + fs->blocksize, &scan->inode_buffer); + scan->done_group = 0; + scan->done_group_data = 0; + scan->bad_block_ptr = 0; + if (retval) { + ext2fs_free_mem(&scan); + return retval; + } + retval = ext2fs_get_mem(scan->inode_size, &scan->temp_buffer); + if (retval) { + ext2fs_free_mem(&scan->inode_buffer); + ext2fs_free_mem(&scan); + return retval; + } + if (scan->fs->badblocks && scan->fs->badblocks->num) + scan->scan_flags |= EXT2_SF_CHK_BADBLOCKS; + if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + scan->scan_flags |= EXT2_SF_DO_LAZY; + *ret_scan = scan; + return 0; +} + +void ext2fs_close_inode_scan(ext2_inode_scan scan) +{ + if (!scan || (scan->magic != EXT2_ET_MAGIC_INODE_SCAN)) + return; + + ext2fs_free_mem(&scan->inode_buffer); + scan->inode_buffer = NULL; + ext2fs_free_mem(&scan->temp_buffer); + scan->temp_buffer = NULL; + ext2fs_free_mem(&scan); + return; +} + +void ext2fs_set_inode_callback(ext2_inode_scan scan, + errcode_t (*done_group)(ext2_filsys fs, + ext2_inode_scan scan, + dgrp_t group, + void * priv_data), + void *done_group_data) +{ + if (!scan || (scan->magic != EXT2_ET_MAGIC_INODE_SCAN)) + return; + + scan->done_group = done_group; + scan->done_group_data = done_group_data; +} + +int ext2fs_inode_scan_flags(ext2_inode_scan scan, int set_flags, + int clear_flags) +{ + int old_flags; + + if (!scan || (scan->magic != EXT2_ET_MAGIC_INODE_SCAN)) + return 0; + + old_flags = scan->scan_flags; + scan->scan_flags &= ~clear_flags; + scan->scan_flags |= set_flags; + return old_flags; +} + +/* + * This function is called by ext2fs_get_next_inode when it needs to + * get ready to read in a new blockgroup. + */ +static errcode_t get_next_blockgroup(ext2_inode_scan scan) +{ + ext2_filsys fs = scan->fs; + + scan->current_group++; + scan->groups_left--; + + scan->current_block =fs->group_desc[scan->current_group].bg_inode_table; + + scan->current_inode = scan->current_group * + EXT2_INODES_PER_GROUP(fs->super); + + scan->bytes_left = 0; + scan->inodes_left = EXT2_INODES_PER_GROUP(fs->super); + scan->blocks_left = fs->inode_blocks_per_group; + if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + scan->inodes_left -= + fs->group_desc[scan->current_group].bg_itable_unused; + scan->blocks_left = + (scan->inodes_left + + (fs->blocksize / scan->inode_size - 1)) * + scan->inode_size / fs->blocksize; + } + + return 0; +} + +errcode_t ext2fs_inode_scan_goto_blockgroup(ext2_inode_scan scan, + int group) +{ + scan->current_group = group - 1; + scan->groups_left = scan->fs->group_desc_count - group; + return get_next_blockgroup(scan); +} + +/* + * This function is called by get_next_blocks() to check for bad + * blocks in the inode table. + * + * This function assumes that badblocks_list->list is sorted in + * increasing order. + */ +static errcode_t check_for_inode_bad_blocks(ext2_inode_scan scan, + blk_t *num_blocks) +{ + blk_t blk = scan->current_block; + badblocks_list bb = scan->fs->badblocks; + + /* + * If the inode table is missing, then obviously there are no + * bad blocks. :-) + */ + if (blk == 0) + return 0; + + /* + * If the current block is greater than the bad block listed + * in the bad block list, then advance the pointer until this + * is no longer the case. If we run out of bad blocks, then + * we don't need to do any more checking! + */ + while (blk > bb->list[scan->bad_block_ptr]) { + if (++scan->bad_block_ptr >= bb->num) { + scan->scan_flags &= ~EXT2_SF_CHK_BADBLOCKS; + return 0; + } + } + + /* + * If the current block is equal to the bad block listed in + * the bad block list, then handle that one block specially. + * (We could try to handle runs of bad blocks, but that + * only increases CPU efficiency by a small amount, at the + * expense of a huge expense of code complexity, and for an + * uncommon case at that.) + */ + if (blk == bb->list[scan->bad_block_ptr]) { + scan->scan_flags |= EXT2_SF_BAD_INODE_BLK; + *num_blocks = 1; + if (++scan->bad_block_ptr >= bb->num) + scan->scan_flags &= ~EXT2_SF_CHK_BADBLOCKS; + return 0; + } + + /* + * If there is a bad block in the range that we're about to + * read in, adjust the number of blocks to read so that we we + * don't read in the bad block. (Then the next block to read + * will be the bad block, which is handled in the above case.) + */ + if ((blk + *num_blocks) > bb->list[scan->bad_block_ptr]) + *num_blocks = (int) (bb->list[scan->bad_block_ptr] - blk); + + return 0; +} + +/* + * This function is called by ext2fs_get_next_inode when it needs to + * read in more blocks from the current blockgroup's inode table. + */ +static errcode_t get_next_blocks(ext2_inode_scan scan) +{ + blk_t num_blocks; + errcode_t retval; + + /* + * Figure out how many blocks to read; we read at most + * inode_buffer_blocks, and perhaps less if there aren't that + * many blocks left to read. + */ + num_blocks = scan->inode_buffer_blocks; + if (num_blocks > scan->blocks_left) + num_blocks = scan->blocks_left; + + /* + * If the past block "read" was a bad block, then mark the + * left-over extra bytes as also being bad. + */ + if (scan->scan_flags & EXT2_SF_BAD_INODE_BLK) { + if (scan->bytes_left) + scan->scan_flags |= EXT2_SF_BAD_EXTRA_BYTES; + scan->scan_flags &= ~EXT2_SF_BAD_INODE_BLK; + } + + /* + * Do inode bad block processing, if necessary. + */ + if (scan->scan_flags & EXT2_SF_CHK_BADBLOCKS) { + retval = check_for_inode_bad_blocks(scan, &num_blocks); + if (retval) + return retval; + } + + if ((scan->scan_flags & EXT2_SF_BAD_INODE_BLK) || + (scan->current_block == 0)) { + memset(scan->inode_buffer, 0, + (size_t) num_blocks * scan->fs->blocksize); + } else { + retval = io_channel_read_blk(scan->fs->io, + scan->current_block, + (int) num_blocks, + scan->inode_buffer); + if (retval) + return EXT2_ET_NEXT_INODE_READ; + } + scan->ptr = scan->inode_buffer; + scan->bytes_left = num_blocks * scan->fs->blocksize; + + scan->blocks_left -= num_blocks; + if (scan->current_block) + scan->current_block += num_blocks; + return 0; +} + +#if 0 +/* + * Returns 1 if the entire inode_buffer has a non-zero size and + * contains all zeros. (Not just deleted inodes, since that means + * that part of the inode table was used at one point; we want all + * zeros, which means that the inode table is pristine.) + */ +static inline int is_empty_scan(ext2_inode_scan scan) +{ + int i; + + if (scan->bytes_left == 0) + return 0; + + for (i=0; i < scan->bytes_left; i++) + if (scan->ptr[i]) + return 0; + return 1; +} +#endif + +errcode_t ext2fs_get_next_inode_full(ext2_inode_scan scan, ext2_ino_t *ino, + struct ext2_inode *inode, int bufsize) +{ + errcode_t retval; + int extra_bytes = 0; + + EXT2_CHECK_MAGIC(scan, EXT2_ET_MAGIC_INODE_SCAN); + + /* + * Do we need to start reading a new block group? + */ + if (scan->inodes_left <= 0) { + force_new_group: + if (scan->done_group) { + retval = (scan->done_group) + (scan->fs, scan, scan->current_group, + scan->done_group_data); + if (retval) + return retval; + } + if (scan->groups_left <= 0) { + *ino = 0; + return 0; + } + retval = get_next_blockgroup(scan); + if (retval) + return retval; + } + /* + * These checks are done outside the above if statement so + * they can be done for block group #0. + */ + if ((scan->scan_flags & EXT2_SF_DO_LAZY) && + (scan->fs->group_desc[scan->current_group].bg_flags & + EXT2_BG_INODE_UNINIT)) + goto force_new_group; + if (scan->inodes_left == 0) + goto force_new_group; + if (scan->current_block == 0) { + if (scan->scan_flags & EXT2_SF_SKIP_MISSING_ITABLE) { + goto force_new_group; + } else + return EXT2_ET_MISSING_INODE_TABLE; + } + + + /* + * Have we run out of space in the inode buffer? If so, we + * need to read in more blocks. + */ + if (scan->bytes_left < scan->inode_size) { + memcpy(scan->temp_buffer, scan->ptr, scan->bytes_left); + extra_bytes = scan->bytes_left; + + retval = get_next_blocks(scan); + if (retval) + return retval; +#if 0 + /* + * XXX test Need check for used inode somehow. + * (Note: this is hard.) + */ + if (is_empty_scan(scan)) + goto force_new_group; +#endif + } + + retval = 0; + if (extra_bytes) { + memcpy(scan->temp_buffer+extra_bytes, scan->ptr, + scan->inode_size - extra_bytes); + scan->ptr += scan->inode_size - extra_bytes; + scan->bytes_left -= scan->inode_size - extra_bytes; + +#ifdef WORDS_BIGENDIAN + memset(inode, 0, bufsize); + ext2fs_swap_inode_full(scan->fs, + (struct ext2_inode_large *) inode, + (struct ext2_inode_large *) scan->temp_buffer, + 0, bufsize); +#else + *inode = *((struct ext2_inode *) scan->temp_buffer); +#endif + if (scan->scan_flags & EXT2_SF_BAD_EXTRA_BYTES) + retval = EXT2_ET_BAD_BLOCK_IN_INODE_TABLE; + scan->scan_flags &= ~EXT2_SF_BAD_EXTRA_BYTES; + } else { +#ifdef WORDS_BIGENDIAN + memset(inode, 0, bufsize); + ext2fs_swap_inode_full(scan->fs, + (struct ext2_inode_large *) inode, + (struct ext2_inode_large *) scan->ptr, + 0, bufsize); +#else + memcpy(inode, scan->ptr, bufsize); +#endif + scan->ptr += scan->inode_size; + scan->bytes_left -= scan->inode_size; + if (scan->scan_flags & EXT2_SF_BAD_INODE_BLK) + retval = EXT2_ET_BAD_BLOCK_IN_INODE_TABLE; + } + + scan->inodes_left--; + scan->current_inode++; + *ino = scan->current_inode; + return retval; +} + +errcode_t ext2fs_get_next_inode(ext2_inode_scan scan, ext2_ino_t *ino, + struct ext2_inode *inode) +{ + return ext2fs_get_next_inode_full(scan, ino, inode, + sizeof(struct ext2_inode)); +} + +/* + * Functions to read and write a single inode. + */ +errcode_t ext2fs_read_inode_full(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode * inode, int bufsize) +{ + unsigned long group, block, block_nr, offset; + char *ptr; + errcode_t retval; + int clen, i, inodes_per_block, length; + io_channel io; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + /* Check to see if user has an override function */ + if (fs->read_inode) { + retval = (fs->read_inode)(fs, ino, inode); + if (retval != EXT2_ET_CALLBACK_NOTHANDLED) + return retval; + } + if ((ino == 0) || (ino > fs->super->s_inodes_count)) + return EXT2_ET_BAD_INODE_NUM; + /* Create inode cache if not present */ + if (!fs->icache) { + retval = create_icache(fs); + if (retval) + return retval; + } + + /* Check to see if it's in the inode cache */ + if (bufsize == sizeof(struct ext2_inode)) { + /* only old good inode can be retrieved from the cache */ + for (i=0; i < fs->icache->cache_size; i++) { + if (fs->icache->cache[i].ino == ino) { + *inode = fs->icache->cache[i].inode; + return 0; + } + } + } + + if (fs->flags & EXT2_FLAG_IMAGE_FILE) { + inodes_per_block = fs->blocksize / EXT2_INODE_SIZE(fs->super); + block_nr = fs->image_header->offset_inode / fs->blocksize; + block_nr += (ino - 1) / inodes_per_block; + offset = ((ino - 1) % inodes_per_block) * + EXT2_INODE_SIZE(fs->super); + io = fs->image_io; + } else { + group = (ino - 1) / EXT2_INODES_PER_GROUP(fs->super); + if (group > fs->group_desc_count) + return EXT2_ET_BAD_INODE_NUM; + offset = ((ino - 1) % EXT2_INODES_PER_GROUP(fs->super)) * + EXT2_INODE_SIZE(fs->super); + block = offset >> EXT2_BLOCK_SIZE_BITS(fs->super); + if (!fs->group_desc[(unsigned)group].bg_inode_table) + return EXT2_ET_MISSING_INODE_TABLE; + block_nr = fs->group_desc[(unsigned)group].bg_inode_table + + block; + io = fs->io; + } + + offset &= (EXT2_BLOCK_SIZE(fs->super) - 1); + + length = EXT2_INODE_SIZE(fs->super); + if (bufsize < length) + length = bufsize; + + ptr = (char *) inode; + while (length) { + clen = length; + if ((offset + length) > fs->blocksize) + clen = fs->blocksize - offset; + + if (block_nr != fs->icache->buffer_blk) { + retval = io_channel_read_blk(io, block_nr, 1, + fs->icache->buffer); + if (retval) + return retval; + fs->icache->buffer_blk = block_nr; + } + + memcpy(ptr, ((char *) fs->icache->buffer) + (unsigned) offset, + clen); + + offset = 0; + length -= clen; + ptr += clen; + block_nr++; + } + +#ifdef WORDS_BIGENDIAN + ext2fs_swap_inode_full(fs, (struct ext2_inode_large *) inode, + (struct ext2_inode_large *) inode, + 0, bufsize); +#endif + + /* Update the inode cache */ + fs->icache->cache_last = (fs->icache->cache_last + 1) % + fs->icache->cache_size; + fs->icache->cache[fs->icache->cache_last].ino = ino; + fs->icache->cache[fs->icache->cache_last].inode = *inode; + + return 0; +} + +errcode_t ext2fs_read_inode(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode * inode) +{ + return ext2fs_read_inode_full(fs, ino, inode, + sizeof(struct ext2_inode)); +} + +errcode_t ext2fs_write_inode_full(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode * inode, int bufsize) +{ + unsigned long group, block, block_nr, offset; + errcode_t retval = 0; + struct ext2_inode_large temp_inode, *w_inode; + char *ptr; + int clen, i, length; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + /* Check to see if user provided an override function */ + if (fs->write_inode) { + retval = (fs->write_inode)(fs, ino, inode); + if (retval != EXT2_ET_CALLBACK_NOTHANDLED) + return retval; + } + + /* Check to see if the inode cache needs to be updated */ + if (fs->icache) { + for (i=0; i < fs->icache->cache_size; i++) { + if (fs->icache->cache[i].ino == ino) { + fs->icache->cache[i].inode = *inode; + break; + } + } + } else { + retval = create_icache(fs); + if (retval) + return retval; + } + + if (!(fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if ((ino == 0) || (ino > fs->super->s_inodes_count)) + return EXT2_ET_BAD_INODE_NUM; + + length = bufsize; + if (length < EXT2_INODE_SIZE(fs->super)) + length = EXT2_INODE_SIZE(fs->super); + + if (length > (int) sizeof(struct ext2_inode_large)) { + w_inode = malloc(length); + if (!w_inode) + return ENOMEM; + } else + w_inode = &temp_inode; + memset(w_inode, 0, length); + +#ifdef WORDS_BIGENDIAN + ext2fs_swap_inode_full(fs, w_inode, + (struct ext2_inode_large *) inode, + 1, bufsize); +#else + memcpy(w_inode, inode, bufsize); +#endif + + group = (ino - 1) / EXT2_INODES_PER_GROUP(fs->super); + offset = ((ino - 1) % EXT2_INODES_PER_GROUP(fs->super)) * + EXT2_INODE_SIZE(fs->super); + block = offset >> EXT2_BLOCK_SIZE_BITS(fs->super); + if (!fs->group_desc[(unsigned) group].bg_inode_table) { + retval = EXT2_ET_MISSING_INODE_TABLE; + goto errout; + } + block_nr = fs->group_desc[(unsigned) group].bg_inode_table + block; + + offset &= (EXT2_BLOCK_SIZE(fs->super) - 1); + + length = EXT2_INODE_SIZE(fs->super); + if (length > bufsize) + length = bufsize; + + ptr = (char *) w_inode; + + + while (length) { + clen = length; + if ((offset + length) > fs->blocksize) + clen = fs->blocksize - offset; + + if (fs->icache->buffer_blk != block_nr) { + retval = io_channel_read_blk(fs->io, block_nr, 1, + fs->icache->buffer); + if (retval) + goto errout; + fs->icache->buffer_blk = block_nr; + } + + memcpy((char *) fs->icache->buffer + (unsigned) offset, + ptr, clen); + retval = io_channel_write_blk(fs->io, block_nr, 1, + fs->icache->buffer); + if (retval) + goto errout; + + offset = 0; + ptr += clen; + length -= clen; + block_nr++; + } + + fs->flags |= EXT2_FLAG_CHANGED; +errout: + if (w_inode && w_inode != &temp_inode) + free(w_inode); + return retval; +} + +errcode_t ext2fs_write_inode(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode) +{ + return ext2fs_write_inode_full(fs, ino, inode, + sizeof(struct ext2_inode)); +} + +/* + * This function should be called when writing a new inode. It makes + * sure that extra part of large inodes is initialized properly. + */ +errcode_t ext2fs_write_new_inode(ext2_filsys fs, ext2_ino_t ino, + struct ext2_inode *inode) +{ + struct ext2_inode *buf; + int size = EXT2_INODE_SIZE(fs->super); + struct ext2_inode_large *large_inode; + errcode_t retval; + __u32 t = 0x5105cd7b;//fs->now ? fs->now : time(NULL); + + if (!inode->i_ctime) + inode->i_ctime = t; + if (!inode->i_mtime) + inode->i_mtime = t; + if (!inode->i_atime) + inode->i_atime = t; + + if (size == sizeof(struct ext2_inode)) + return ext2fs_write_inode_full(fs, ino, inode, + sizeof(struct ext2_inode)); + + buf = malloc(size); + if (!buf) + return ENOMEM; + + memset(buf, 0, size); + *buf = *inode; + + large_inode = (struct ext2_inode_large *) buf; + large_inode->i_extra_isize = sizeof(struct ext2_inode_large) - + EXT2_GOOD_OLD_INODE_SIZE; + if (!large_inode->i_crtime) + large_inode->i_crtime = t; + + retval = ext2fs_write_inode_full(fs, ino, buf, size); + free(buf); + return retval; +} + + +errcode_t ext2fs_get_blocks(ext2_filsys fs, ext2_ino_t ino, blk_t *blocks) +{ + struct ext2_inode inode; + int i; + errcode_t retval; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (ino > fs->super->s_inodes_count) + return EXT2_ET_BAD_INODE_NUM; + + if (fs->get_blocks) { + if (!(*fs->get_blocks)(fs, ino, blocks)) + return 0; + } + retval = ext2fs_read_inode(fs, ino, &inode); + if (retval) + return retval; + for (i=0; i < EXT2_N_BLOCKS; i++) + blocks[i] = inode.i_block[i]; + return 0; +} + +errcode_t ext2fs_check_directory(ext2_filsys fs, ext2_ino_t ino) +{ + struct ext2_inode inode; + errcode_t retval; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (ino > fs->super->s_inodes_count) + return EXT2_ET_BAD_INODE_NUM; + + if (fs->check_directory) { + retval = (fs->check_directory)(fs, ino); + if (retval != EXT2_ET_CALLBACK_NOTHANDLED) + return retval; + } + retval = ext2fs_read_inode(fs, ino, &inode); + if (retval) + return retval; + if (!LINUX_S_ISDIR(inode.i_mode)) + return EXT2_ET_NO_DIRECTORY; + return 0; +} + diff --git a/fs/ext4/format/io_manager.c b/fs/ext4/format/io_manager.c new file mode 100755 index 0000000..fea8797 --- /dev/null +++ b/fs/ext4/format/io_manager.c @@ -0,0 +1,96 @@ +/* + * io_manager.c --- the I/O manager abstraction + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +errcode_t io_channel_set_options(io_channel channel, const char *opts) +{ + errcode_t retval = 0; + char *next, *ptr, *options, *arg; + + EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); + + if (!opts) + return 0; + + if (!channel->manager->set_option) + return EXT2_ET_INVALID_ARGUMENT; + + options = malloc(strlen(opts)+1); + if (!options) + return EXT2_ET_NO_MEMORY; + strcpy(options, opts); + ptr = options; + + while (ptr && *ptr) { + next = strchr(ptr, '&'); + if (next) + *next++ = 0; + + arg = strchr(ptr, '='); + if (arg) + *arg++ = 0; + + retval = (channel->manager->set_option)(channel, ptr, arg); + if (retval) + break; + ptr = next; + } + free(options); + return retval; +} + +errcode_t io_channel_write_byte(io_channel channel, unsigned long offset, + int count, const void *data) +{ + EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); + + if (channel->manager->write_byte) + return channel->manager->write_byte(channel, offset, + count, data); + + return EXT2_ET_UNIMPLEMENTED; +} + +errcode_t io_channel_read_blk64(io_channel channel, unsigned long long block, + int count, void *data) +{ + EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); + + if (channel->manager->read_blk64) + return (channel->manager->read_blk64)(channel, block, + count, data); + + if ((block >> 32) != 0) + return EXT2_ET_IO_CHANNEL_NO_SUPPORT_64; + + return (channel->manager->read_blk)(channel, (unsigned long) block, + count, data); +} + +errcode_t io_channel_write_blk64(io_channel channel, unsigned long long block, + int count, const void *data) +{ + EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); + + if (channel->manager->write_blk64) + return (channel->manager->write_blk64)(channel, block, + count, data); + + if ((block >> 32) != 0) + return EXT2_ET_IO_CHANNEL_NO_SUPPORT_64; + + return (channel->manager->write_blk)(channel, (unsigned long) block, + count, data); +} diff --git a/fs/ext4/format/link.c b/fs/ext4/format/link.c new file mode 100755 index 0000000..e40b692 --- /dev/null +++ b/fs/ext4/format/link.c @@ -0,0 +1,153 @@ +/* + * link.c --- create links in a ext2fs directory + * + * Copyright (C) 1993, 1994 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +struct link_struct { + ext2_filsys fs; + const char *name; + int namelen; + ext2_ino_t inode; + int flags; + int done; + unsigned int blocksize; + errcode_t err; + struct ext2_super_block *sb; +}; + +static int link_proc(struct ext2_dir_entry *dirent, + int offset, + int blocksize, + char *buf, + void *priv_data) +{ + struct link_struct *ls = (struct link_struct *) priv_data; + struct ext2_dir_entry *next; + unsigned int rec_len, min_rec_len, curr_rec_len; + int ret = 0; + + rec_len = EXT2_DIR_REC_LEN(ls->namelen); + + ls->err = ext2fs_get_rec_len(ls->fs, dirent, &curr_rec_len); + if (ls->err) + return DIRENT_ABORT; + + /* + * See if the following directory entry (if any) is unused; + * if so, absorb it into this one. + */ + next = (struct ext2_dir_entry *) (buf + offset + curr_rec_len); + if ((offset + curr_rec_len < blocksize - 8) && + (next->inode == 0) && + (offset + curr_rec_len + next->rec_len <= blocksize)) { + curr_rec_len += next->rec_len; + ls->err = ext2fs_set_rec_len(ls->fs, curr_rec_len, dirent); + if (ls->err) + return DIRENT_ABORT; + ret = DIRENT_CHANGED; + } + + /* + * If the directory entry is used, see if we can split the + * directory entry to make room for the new name. If so, + * truncate it and return. + */ + if (dirent->inode) { + min_rec_len = EXT2_DIR_REC_LEN(dirent->name_len & 0xFF); + if (curr_rec_len < (min_rec_len + rec_len)) + return ret; + rec_len = curr_rec_len - min_rec_len; + ls->err = ext2fs_set_rec_len(ls->fs, min_rec_len, dirent); + if (ls->err) + return DIRENT_ABORT; + next = (struct ext2_dir_entry *) (buf + offset + + dirent->rec_len); + next->inode = 0; + next->name_len = 0; + ls->err = ext2fs_set_rec_len(ls->fs, rec_len, next); + if (ls->err) + return DIRENT_ABORT; + return DIRENT_CHANGED; + } + + /* + * If we get this far, then the directory entry is not used. + * See if we can fit the request entry in. If so, do it. + */ + if (curr_rec_len < rec_len) + return ret; + dirent->inode = ls->inode; + dirent->name_len = ls->namelen; + strncpy(dirent->name, ls->name, ls->namelen); + if (ls->sb->s_feature_incompat & EXT2_FEATURE_INCOMPAT_FILETYPE) + dirent->name_len |= (ls->flags & 0x7) << 8; + + ls->done++; + return DIRENT_ABORT|DIRENT_CHANGED; +} + +/* + * Note: the low 3 bits of the flags field are used as the directory + * entry filetype. + */ +#ifdef __TURBOC__ + #pragma argsused +#endif +errcode_t ext2fs_link(ext2_filsys fs, ext2_ino_t dir, const char *name, + ext2_ino_t ino, int flags) +{ + errcode_t retval; + struct link_struct ls; + struct ext2_inode inode; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (!(fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + ls.fs = fs; + ls.name = name; + ls.namelen = name ? strlen(name) : 0; + ls.inode = ino; + ls.flags = flags; + ls.done = 0; + ls.sb = fs->super; + ls.blocksize = fs->blocksize; + ls.err = 0; + + retval = ext2fs_dir_iterate(fs, dir, DIRENT_FLAG_INCLUDE_EMPTY, + 0, link_proc, &ls); + if (retval) + return retval; + if (ls.err) + return ls.err; + + if (!ls.done) + return EXT2_ET_DIR_NO_SPACE; + + if ((retval = ext2fs_read_inode(fs, dir, &inode)) != 0) + return retval; + + if (inode.i_flags & EXT2_INDEX_FL) { + inode.i_flags &= ~EXT2_INDEX_FL; + if ((retval = ext2fs_write_inode(fs, dir, &inode)) != 0) + return retval; + } + + return 0; +} diff --git a/fs/ext4/format/lookup.c b/fs/ext4/format/lookup.c new file mode 100755 index 0000000..0fa0290 --- /dev/null +++ b/fs/ext4/format/lookup.c @@ -0,0 +1,69 @@ +/* + * lookup.c --- ext2fs directory lookup operations + * + * Copyright (C) 1993, 1994, 1994, 1995 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +struct lookup_struct { + const char *name; + int len; + ext2_ino_t *inode; + int found; +}; + +#ifdef __TURBOC__ + #pragma argsused +#endif +static int lookup_proc(struct ext2_dir_entry *dirent, + int offset EXT2FS_ATTR((unused)), + int blocksize EXT2FS_ATTR((unused)), + char *buf EXT2FS_ATTR((unused)), + void *priv_data) +{ + struct lookup_struct *ls = (struct lookup_struct *) priv_data; + + if (ls->len != (dirent->name_len & 0xFF)) + return 0; + if (strncmp(ls->name, dirent->name, (dirent->name_len & 0xFF))) + return 0; + *ls->inode = dirent->inode; + ls->found++; + return DIRENT_ABORT; +} + + +errcode_t ext2fs_lookup(ext2_filsys fs, ext2_ino_t dir, const char *name, + int namelen, char *buf, ext2_ino_t *inode) +{ + errcode_t retval; + struct lookup_struct ls; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + ls.name = name; + ls.len = namelen; + ls.inode = inode; + ls.found = 0; + + retval = ext2fs_dir_iterate(fs, dir, 0, buf, lookup_proc, &ls); + if (retval) + return retval; + + return (ls.found) ? 0 : EXT2_ET_FILE_NOT_FOUND; +} + + diff --git a/fs/ext4/format/mkdir.c b/fs/ext4/format/mkdir.c new file mode 100755 index 0000000..ac3a679 --- /dev/null +++ b/fs/ext4/format/mkdir.c @@ -0,0 +1,135 @@ +/* + * mkdir.c --- make a directory in the filesystem + * + * Copyright (C) 1994, 1995 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +#ifndef EXT2_FT_DIR +#define EXT2_FT_DIR 2 +#endif + +errcode_t ext2fs_mkdir(ext2_filsys fs, ext2_ino_t parent, ext2_ino_t inum, + const char *name) +{ + errcode_t retval; + struct ext2_inode parent_inode, inode; + ext2_ino_t ino = inum; + ext2_ino_t scratch_ino; + blk_t blk; + char *block = 0; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + /* + * Allocate an inode, if necessary + */ + if (!ino) { + retval = ext2fs_new_inode(fs, parent, LINUX_S_IFDIR | 0755, + 0, &ino); + if (retval) + goto cleanup; + } + + /* + * Allocate a data block for the directory + */ + retval = ext2fs_new_block(fs, 0, 0, &blk); + if (retval) + goto cleanup; + + /* + * Create a scratch template for the directory + */ + retval = ext2fs_new_dir_block(fs, ino, parent, &block); + if (retval) + goto cleanup; + + /* + * Get the parent's inode, if necessary + */ + if (parent != ino) { + retval = ext2fs_read_inode(fs, parent, &parent_inode); + if (retval) + goto cleanup; + } else + memset(&parent_inode, 0, sizeof(parent_inode)); + + /* + * Create the inode structure.... + */ + memset(&inode, 0, sizeof(struct ext2_inode)); + inode.i_mode = LINUX_S_IFDIR | (0777 & ~fs->umask); + inode.i_uid = inode.i_gid = 0; + ext2fs_iblk_set(fs, &inode, 1); + inode.i_block[0] = blk; + inode.i_links_count = 2; + inode.i_size = fs->blocksize; + + /* + * Write out the inode and inode data block + */ + retval = ext2fs_write_dir_block(fs, blk, block); + if (retval) + goto cleanup; + retval = ext2fs_write_new_inode(fs, ino, &inode); + if (retval) + goto cleanup; + + /* + * Link the directory into the filesystem hierarchy + */ + if (name) { + retval = ext2fs_lookup(fs, parent, name, strlen(name), 0, + &scratch_ino); + if (!retval) { + retval = EXT2_ET_DIR_EXISTS; + name = 0; + goto cleanup; + } + if (retval != EXT2_ET_FILE_NOT_FOUND) + goto cleanup; + retval = ext2fs_link(fs, parent, name, ino, EXT2_FT_DIR); + if (retval) + goto cleanup; + } + + /* + * Update parent inode's counts + */ + if (parent != ino) { + parent_inode.i_links_count++; + retval = ext2fs_write_inode(fs, parent, &parent_inode); + if (retval) + goto cleanup; + } + + /* + * Update accounting.... + */ + ext2fs_block_alloc_stats(fs, blk, +1); + ext2fs_inode_alloc_stats2(fs, ino, +1, 1); + +cleanup: + if (block) + ext2fs_free_mem(&block); + return retval; + +} + + diff --git a/fs/ext4/format/mkjournal.c b/fs/ext4/format/mkjournal.c new file mode 100755 index 0000000..e4c8d70 --- /dev/null +++ b/fs/ext4/format/mkjournal.c @@ -0,0 +1,628 @@ +/* + * mkjournal.c --- make a journal for a filesystem + * + * Copyright (C) 2000 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +//#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> +#include <linux/byteorder/little_endian.h> +#include <linux/byteorder/generic.h> + +#include "ext2_fs.h" +//#include "e2p/e2p.h" +#include "ext2fs.h" +//#include "jfs_user.h" + +#define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */ + +/* + * On-disk structures + */ + +/* + * Descriptor block types: + */ + +#define JFS_DESCRIPTOR_BLOCK 1 +#define JFS_COMMIT_BLOCK 2 +#define JFS_SUPERBLOCK_V1 3 +#define JFS_SUPERBLOCK_V2 4 +#define JFS_REVOKE_BLOCK 5 + +/* + * Standard header for all descriptor blocks: + */ +typedef struct journal_header_s +{ + __u32 h_magic; + __u32 h_blocktype; + __u32 h_sequence; +} journal_header_t; + + + +/* + * The journal superblock. All fields are in big-endian byte order. + */ +typedef struct journal_superblock_s +{ +/* 0x0000 */ + journal_header_t s_header; + +/* 0x000C */ + /* Static information describing the journal */ + __u32 s_blocksize; /* journal device blocksize */ + __u32 s_maxlen; /* total blocks in journal file */ + __u32 s_first; /* first block of log information */ + +/* 0x0018 */ + /* Dynamic information describing the current state of the log */ + __u32 s_sequence; /* first commit ID expected in log */ + __u32 s_start; /* blocknr of start of log */ + +/* 0x0020 */ + /* Error value, as set by journal_abort(). */ + __s32 s_errno; + +/* 0x0024 */ + /* Remaining fields are only valid in a version-2 superblock */ + __u32 s_feature_compat; /* compatible feature set */ + __u32 s_feature_incompat; /* incompatible feature set */ + __u32 s_feature_ro_compat; /* readonly-compatible feature set */ +/* 0x0030 */ + __u8 s_uuid[16]; /* 128-bit uuid for journal */ + +/* 0x0040 */ + __u32 s_nr_users; /* Nr of filesystems sharing log */ + + __u32 s_dynsuper; /* Blocknr of dynamic superblock copy*/ + +/* 0x0048 */ + __u32 s_max_transaction; /* Limit of journal blocks per trans.*/ + __u32 s_max_trans_data; /* Limit of data blocks per trans. */ + +/* 0x0050 */ + __u32 s_padding[44]; + +/* 0x0100 */ + __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ +/* 0x0400 */ +} journal_superblock_t; + +/* + * This function automatically sets up the journal superblock and + * returns it as an allocated block. + */ +errcode_t ext2fs_create_journal_superblock(ext2_filsys fs, + __u32 size, int flags, + char **ret_jsb) +{ + + errcode_t retval; + journal_superblock_t *jsb; + + if (size < 1024) + return EXT2_ET_JOURNAL_TOO_SMALL; + + if ((retval = ext2fs_get_mem(fs->blocksize, &jsb))) + return retval; + + memset (jsb, 0, fs->blocksize); + + jsb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER); + if (flags & EXT2_MKJOURNAL_V1_SUPER) + jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V1); + else + jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2); + jsb->s_blocksize = htonl(fs->blocksize); + jsb->s_maxlen = htonl(size); + jsb->s_nr_users = htonl(1); + jsb->s_first = htonl(1); + jsb->s_sequence = htonl(1); + memcpy(jsb->s_uuid, fs->super->s_uuid, sizeof(fs->super->s_uuid)); + /* + * If we're creating an external journal device, we need to + * adjust these fields. + */ + if (fs->super->s_feature_incompat & + EXT3_FEATURE_INCOMPAT_JOURNAL_DEV) { + jsb->s_nr_users = 0; + if (fs->blocksize == 1024) + jsb->s_first = htonl(3); + else + jsb->s_first = htonl(2); + } + + *ret_jsb = (char *) jsb; + + return 0; +} + +/* + * This function writes a journal using POSIX routines. It is used + * for creating external journals and creating journals on live + * filesystems. + */ +static errcode_t write_journal_file(ext2_filsys fs, char *filename, + blk_t size, int flags) +{ + errcode_t retval=0; +#if 0 + char *buf = 0; + int fd, ret_size; + blk_t i; + + if ((retval = ext2fs_create_journal_superblock(fs, size, flags, &buf))) + return retval; + + /* Open the device or journal file */ + if ((fd = open(filename, O_WRONLY)) < 0) { + retval = errno; + goto errout; + } + + /* Write the superblock out */ + retval = EXT2_ET_SHORT_WRITE; + ret_size = write(fd, buf, fs->blocksize); + if (ret_size < 0) { + retval = errno; + goto errout; + } + if (ret_size != (int) fs->blocksize) + goto errout; + memset(buf, 0, fs->blocksize); + + for (i = 1; i < size; i++) { + ret_size = write(fd, buf, fs->blocksize); + if (ret_size < 0) { + retval = errno; + goto errout; + } + if (ret_size != (int) fs->blocksize) + goto errout; + } + close(fd); + + retval = 0; +errout: + ext2fs_free_mem(&buf); +#endif + return retval; +} + +/* + * Convenience function which zeros out _num_ blocks starting at + * _blk_. In case of an error, the details of the error is returned + * via _ret_blk_ and _ret_count_ if they are non-NULL pointers. + * Returns 0 on success, and an error code on an error. + * + * As a special case, if the first argument is NULL, then it will + * attempt to free the static zeroizing buffer. (This is to keep + * programs that check for memory leaks happy.) + */ +#define STRIDE_LENGTH 8 +errcode_t ext2fs_zero_blocks(ext2_filsys fs, blk_t blk, int num, + blk_t *ret_blk, int *ret_count) +{ + int j, count; + static char *buf; + errcode_t retval; + + /* If fs is null, clean up the static buffer and return */ + if (!fs) { + if (buf) { + free(buf); + buf = 0; + } + return 0; + } + /* Allocate the zeroizing buffer if necessary */ + if (!buf) { + buf = malloc(fs->blocksize * STRIDE_LENGTH); + if (!buf) + return ENOMEM; + memset(buf, 0, fs->blocksize * STRIDE_LENGTH); + } + /* OK, do the write loop */ + j=0; + while (j < num) { + if (blk % STRIDE_LENGTH) { + count = STRIDE_LENGTH - (blk % STRIDE_LENGTH); + if (count > (num - j)) + count = num - j; + } else { + count = num - j; + if (count > STRIDE_LENGTH) + count = STRIDE_LENGTH; + } + retval = io_channel_write_blk(fs->io, blk, count, buf); + if (retval) { + if (ret_count) + *ret_count = count; + if (ret_blk) + *ret_blk = blk; + return retval; + } + j += count; blk += count; + } + return 0; +} + +/* + * Helper function for creating the journal using direct I/O routines + */ +struct mkjournal_struct { + int num_blocks; + int newblocks; + blk_t goal; + blk_t blk_to_zero; + int zero_count; + char *buf; + errcode_t err; +}; + +static int mkjournal_proc(ext2_filsys fs, + blk_t *blocknr, + e2_blkcnt_t blockcnt, + blk_t ref_block EXT2FS_ATTR((unused)), + int ref_offset EXT2FS_ATTR((unused)), + void *priv_data) +{ + struct mkjournal_struct *es = (struct mkjournal_struct *) priv_data; + blk_t new_blk; + errcode_t retval; + + if (*blocknr) { + es->goal = *blocknr; + return 0; + } + retval = ext2fs_new_block(fs, es->goal, 0, &new_blk); + if (retval) { + es->err = retval; + return BLOCK_ABORT; + } + if (blockcnt >= 0) + es->num_blocks--; + + es->newblocks++; + retval = 0; + if (blockcnt <= 0) + retval = io_channel_write_blk(fs->io, new_blk, 1, es->buf); + else { + if (es->zero_count) { + if ((es->blk_to_zero + es->zero_count == new_blk) && + (es->zero_count < 1024)) + es->zero_count++; + else { + retval = ext2fs_zero_blocks(fs, + es->blk_to_zero, + es->zero_count, + 0, 0); + es->zero_count = 0; + } + } + if (es->zero_count == 0) { + es->blk_to_zero = new_blk; + es->zero_count = 1; + } + } + + if (blockcnt == 0) + memset(es->buf, 0, fs->blocksize); + + if (retval) { + es->err = retval; + return BLOCK_ABORT; + } + *blocknr = es->goal = new_blk; + ext2fs_block_alloc_stats(fs, new_blk, +1); + + if (es->num_blocks == 0) + return (BLOCK_CHANGED | BLOCK_ABORT); + else + return BLOCK_CHANGED; + +} + +/* + * This function creates a journal using direct I/O routines. + */ +static errcode_t write_journal_inode(ext2_filsys fs, ext2_ino_t journal_ino, + blk_t size, int flags) +{ + char *buf; + dgrp_t group, start, end, i, log_flex; + errcode_t retval; + struct ext2_inode inode; + struct mkjournal_struct es; + + if ((retval = ext2fs_create_journal_superblock(fs, size, flags, &buf))) + return retval; + + if ((retval = ext2fs_read_bitmaps(fs))) + return retval; + + if ((retval = ext2fs_read_inode(fs, journal_ino, &inode))) + return retval; + + if (inode.i_blocks > 0) + return EEXIST; + + es.num_blocks = size; + es.newblocks = 0; + es.buf = buf; + es.err = 0; + es.zero_count = 0; + if (fs->super->s_feature_incompat & EXT3_FEATURE_INCOMPAT_EXTENTS) { + inode.i_flags |= EXT4_EXTENTS_FL; + if ((retval = ext2fs_write_inode(fs, journal_ino, &inode))) + return retval; + } + + /* + * Set the initial goal block to be roughly at the middle of + * the filesystem. Pick a group that has the largest number + * of free blocks. + */ + //Tina: here we decide which group is used by journal + group = ext2fs_group_of_blk(fs, (fs->super->s_blocks_count - + fs->super->s_first_data_block) / 2); + log_flex = 1 << fs->super->s_log_groups_per_flex; + if (fs->super->s_log_groups_per_flex && (group > log_flex)) { + group = group & ~(log_flex - 1); + while ((group < fs->group_desc_count) && + fs->group_desc[group].bg_free_blocks_count == 0) + group++; + if (group == fs->group_desc_count) + group = 0; + start = group; + } else + start = (group > 0) ? group-1 : group; + end = ((group+1) < fs->group_desc_count) ? group+1 : group; + group = start; + for (i=start+1; i <= end; i++) + if (fs->group_desc[i].bg_free_blocks_count > + fs->group_desc[group].bg_free_blocks_count) + group = i; + printf("the journal group is 0x%x\n",group); + es.goal = (fs->super->s_blocks_per_group * group) + + fs->super->s_first_data_block; + + retval = ext2fs_block_iterate2(fs, journal_ino, BLOCK_FLAG_APPEND, + 0, mkjournal_proc, &es); + if (es.err) { + retval = es.err; + goto errout; + } + if (es.zero_count) { + retval = ext2fs_zero_blocks(fs, es.blk_to_zero, + es.zero_count, 0, 0); + if (retval) + goto errout; + } + + if ((retval = ext2fs_read_inode(fs, journal_ino, &inode))) + goto errout; + + inode.i_size += fs->blocksize * size; + ext2fs_iblk_add_blocks(fs, &inode, es.newblocks); + inode.i_mtime = inode.i_ctime = 0x5105cd7b;//fs->now ? fs->now : time(0); + inode.i_links_count = 1; + inode.i_mode = LINUX_S_IFREG | 0600; + + if ((retval = ext2fs_write_new_inode(fs, journal_ino, &inode))) + goto errout; + retval = 0; + memcpy(fs->super->s_jnl_blocks, inode.i_block, EXT2_N_BLOCKS*4); + fs->super->s_jnl_blocks[16] = inode.i_size; + fs->super->s_jnl_backup_type = EXT3_JNL_BACKUP_BLOCKS; + + + ext2fs_mark_super_dirty(fs); +errout: + ext2fs_free_mem(&buf); + return retval; +} + +/* + * Find a reasonable journal file size (in blocks) given the number of blocks + * in the filesystem. For very small filesystems, it is not reasonable to + * have a journal that fills more than half of the filesystem. + */ +int ext2fs_default_journal_size(__u64 blocks) +{ + if (blocks < 2048) + return -1; + if (blocks < 32768) + return (1024); + if (blocks < 256*1024) + return (4096); + if (blocks < 512*1024) + return (8192); + if (blocks < 1024*1024) + return (16384); + return 32768; +} + +/* + * This function adds a journal device to a filesystem + */ +errcode_t ext2fs_add_journal_device(ext2_filsys fs, ext2_filsys journal_dev) +{ +#if 0 + struct stat st; + errcode_t retval; + char buf[1024]; + journal_superblock_t *jsb; + int start; + __u32 i, nr_users; + + /* Make sure the device exists and is a block device */ + if (stat(journal_dev->device_name, &st) < 0) + return errno; + + if (!S_ISBLK(st.st_mode)) + return EXT2_ET_JOURNAL_NOT_BLOCK; /* Must be a block device */ + + /* Get the journal superblock */ + start = 1; + if (journal_dev->blocksize == 1024) + start++; + if ((retval = io_channel_read_blk(journal_dev->io, start, -1024, buf))) + return retval; + + jsb = (journal_superblock_t *) buf; + if ((jsb->s_header.h_magic != (unsigned) ntohl(JFS_MAGIC_NUMBER)) || + (jsb->s_header.h_blocktype != (unsigned) ntohl(JFS_SUPERBLOCK_V2))) + return EXT2_ET_NO_JOURNAL_SB; + + if (ntohl(jsb->s_blocksize) != (unsigned long) fs->blocksize) + return EXT2_ET_UNEXPECTED_BLOCK_SIZE; + + /* Check and see if this filesystem has already been added */ + nr_users = ntohl(jsb->s_nr_users); + for (i=0; i < nr_users; i++) { + if (memcmp(fs->super->s_uuid, + &jsb->s_users[i*16], 16) == 0) + break; + } + if (i >= nr_users) { + memcpy(&jsb->s_users[nr_users*16], + fs->super->s_uuid, 16); + jsb->s_nr_users = htonl(nr_users+1); + } + + /* Writeback the journal superblock */ + if ((retval = io_channel_write_blk(journal_dev->io, start, -1024, buf))) + return retval; + + fs->super->s_journal_inum = 0; + fs->super->s_journal_dev = st.st_rdev; + memcpy(fs->super->s_journal_uuid, jsb->s_uuid, + sizeof(fs->super->s_journal_uuid)); + fs->super->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL; + ext2fs_mark_super_dirty(fs); +#endif + return 0; +} + +/* + * This function adds a journal inode to a filesystem, using either + * POSIX routines if the filesystem is mounted, or using direct I/O + * functions if it is not. + */ +errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t size, int flags) +{ + errcode_t retval=0; + + ext2_ino_t journal_ino; + struct stat st; + char jfile[1024]; + int mount_flags, f; + int fd = -1; + + journal_ino = EXT2_JOURNAL_INO; + if ((retval = write_journal_inode(fs, journal_ino, + size, flags))) + return retval; +#if 0 + + if ((retval = ext2fs_check_mount_point(fs->device_name, &mount_flags, + jfile, sizeof(jfile)-10))) + return retval; + + if (mount_flags & EXT2_MF_MOUNTED) { + strcat(jfile, "/.journal"); + + /* + * If .../.journal already exists, make sure any + * immutable or append-only flags are cleared. + */ +#if defined(HAVE_CHFLAGS) && defined(UF_NODUMP) + (void) chflags (jfile, 0); +#else +#if HAVE_EXT2_IOCTLS + fd = open(jfile, O_RDONLY); + if (fd >= 0) { + f = 0; + ioctl(fd, EXT2_IOC_SETFLAGS, &f); + close(fd); + } +#endif +#endif + + /* Create the journal file */ + if ((fd = open(jfile, O_CREAT|O_WRONLY, 0600)) < 0) + return errno; + + if ((retval = write_journal_file(fs, jfile, size, flags))) + goto errout; + + /* Get inode number of the journal file */ + if (fstat(fd, &st) < 0) { + retval = errno; + goto errout; + } + +#if defined(HAVE_CHFLAGS) && defined(UF_NODUMP) + retval = fchflags (fd, UF_NODUMP|UF_IMMUTABLE); +#else +#if HAVE_EXT2_IOCTLS + if (ioctl(fd, EXT2_IOC_GETFLAGS, &f) < 0) { + retval = errno; + goto errout; + } + f |= EXT2_NODUMP_FL | EXT2_IMMUTABLE_FL; + retval = ioctl(fd, EXT2_IOC_SETFLAGS, &f); +#endif +#endif + if (retval) { + retval = errno; + goto errout; + } + + if (close(fd) < 0) { + retval = errno; + fd = -1; + goto errout; + } + journal_ino = st.st_ino; + } else { + if ((mount_flags & EXT2_MF_BUSY) && + !(fs->flags & EXT2_FLAG_EXCLUSIVE)) { + retval = EBUSY; + goto errout; + } + journal_ino = EXT2_JOURNAL_INO; + if ((retval = write_journal_inode(fs, journal_ino, + size, flags))) + return retval; + } +#endif + + fs->super->s_journal_inum = journal_ino; + fs->super->s_journal_dev = 0; + memset(fs->super->s_journal_uuid, 0, + sizeof(fs->super->s_journal_uuid)); + fs->super->s_feature_compat |= EXT3_FEATURE_COMPAT_HAS_JOURNAL; + + ext2fs_mark_super_dirty(fs); + return 0; +errout: + if (fd > 0) + close(fd); + + return retval; +} + diff --git a/fs/ext4/format/newdir.c b/fs/ext4/format/newdir.c new file mode 100755 index 0000000..8fae414 --- /dev/null +++ b/fs/ext4/format/newdir.c @@ -0,0 +1,80 @@ +/* + * newdir.c --- create a new directory block + * + * Copyright (C) 1994, 1995 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + + +#include "ext2_fs.h" +#include "ext2fs.h" + +#ifndef EXT2_FT_DIR +#define EXT2_FT_DIR 2 +#endif + +/* + * Create new directory block + */ +errcode_t ext2fs_new_dir_block(ext2_filsys fs, ext2_ino_t dir_ino, + ext2_ino_t parent_ino, char **block) +{ + struct ext2_dir_entry *dir = NULL; + errcode_t retval; + char *buf; + int rec_len; + int filetype = 0; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + retval = ext2fs_get_mem(fs->blocksize, &buf); + if (retval) + return retval; + memset(buf, 0, fs->blocksize); + dir = (struct ext2_dir_entry *) buf; + + retval = ext2fs_set_rec_len(fs, fs->blocksize, dir); + if (retval) + return retval; + + if (dir_ino) { + if (fs->super->s_feature_incompat & + EXT2_FEATURE_INCOMPAT_FILETYPE) + filetype = EXT2_FT_DIR << 8; + /* + * Set up entry for '.' + */ + dir->inode = dir_ino; + dir->name_len = 1 | filetype; + dir->name[0] = '.'; + rec_len = fs->blocksize - EXT2_DIR_REC_LEN(1); + dir->rec_len = EXT2_DIR_REC_LEN(1); + + /* + * Set up entry for '..' + */ + dir = (struct ext2_dir_entry *) (buf + dir->rec_len); + retval = ext2fs_set_rec_len(fs, rec_len, dir); + if (retval) + return retval; + dir->inode = parent_ino; + dir->name_len = 2 | filetype; + dir->name[0] = '.'; + dir->name[1] = '.'; + + } + *block = buf; + return 0; +} diff --git a/fs/ext4/format/res_gdt.c b/fs/ext4/format/res_gdt.c new file mode 100755 index 0000000..f505e41 --- /dev/null +++ b/fs/ext4/format/res_gdt.c @@ -0,0 +1,224 @@ +/* + * res_gdt.c --- reserve blocks for growing the group descriptor table + * during online resizing. + * + * Copyright (C) 2002 Andreas Dilger + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + +#include "ext2_fs.h" +#include "ext2fs.h" + +/* + * Iterate through the groups which hold BACKUP superblock/GDT copies in an + * ext3 filesystem. The counters should be initialized to 1, 5, and 7 before + * calling this for the first time. In a sparse filesystem it will be the + * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... + * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... + */ +static unsigned int list_backups(ext2_filsys fs, unsigned int *three, + unsigned int *five, unsigned int *seven) +{ + unsigned int *min = three; + int mult = 3; + unsigned int ret; + + if (!(fs->super->s_feature_ro_compat & + EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + ret = *min; + *min += 1; + return ret; + } + + if (*five < *min) { + min = five; + mult = 5; + } + if (*seven < *min) { + min = seven; + mult = 7; + } + + ret = *min; + *min *= mult; + + return ret; +} + +/* + * This code assumes that the reserved blocks have already been marked in-use + * during ext2fs_initialize(), so that they are not allocated for other + * uses before we can add them to the resize inode (which has to come + * after the creation of the inode table). + */ +errcode_t ext2fs_create_resize_inode(ext2_filsys fs) +{ + errcode_t retval, retval2; + struct ext2_super_block *sb; + struct ext2_inode inode; + __u32 *dindir_buf, *gdt_buf; + unsigned long long apb, inode_size; + blk_t dindir_blk, rsv_off, gdt_off, gdt_blk; + int dindir_dirty = 0, inode_dirty = 0; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + sb = fs->super; + + retval = ext2fs_get_array(2, fs->blocksize, &dindir_buf); + if (retval) + goto out_free; + gdt_buf = (__u32 *)((char *)dindir_buf + fs->blocksize); + + retval = ext2fs_read_inode(fs, EXT2_RESIZE_INO, &inode); + if (retval) + goto out_free; + + /* Maximum possible file size (we donly use the dindirect blocks) */ + apb = EXT2_ADDR_PER_BLOCK(sb); + if ((dindir_blk = inode.i_block[EXT2_DIND_BLOCK])) { +#ifdef RES_GDT_DEBUG + printf("reading GDT dindir %u\n", dindir_blk); +#endif + retval = ext2fs_read_ind_block(fs, dindir_blk, dindir_buf); + if (retval) + goto out_inode; + } else { + blk_t goal = sb->s_first_data_block + fs->desc_blocks + + sb->s_reserved_gdt_blocks + 2 + + fs->inode_blocks_per_group; + + retval = ext2fs_alloc_block(fs, goal, 0, &dindir_blk); + if (retval) + goto out_free; + inode.i_mode = LINUX_S_IFREG | 0600; + inode.i_links_count = 1; + inode.i_block[EXT2_DIND_BLOCK] = dindir_blk; + ext2fs_iblk_set(fs, &inode, 1); + memset(dindir_buf, 0, fs->blocksize); +#ifdef RES_GDT_DEBUG + printf("allocated GDT dindir %u\n", dindir_blk); +#endif + dindir_dirty = inode_dirty = 1; + inode_size = apb*apb + apb + EXT2_NDIR_BLOCKS; + inode_size *= fs->blocksize; + inode.i_size = inode_size & 0xFFFFFFFF; + inode.i_size_high = (inode_size >> 32) & 0xFFFFFFFF; + if(inode.i_size_high) { + sb->s_feature_ro_compat |= + EXT2_FEATURE_RO_COMPAT_LARGE_FILE; + } + inode.i_ctime = 0x5105cd7b;//fs->now ? fs->now : time(0); + } + + for (rsv_off = 0, gdt_off = fs->desc_blocks, + gdt_blk = sb->s_first_data_block + 1 + fs->desc_blocks; + rsv_off < sb->s_reserved_gdt_blocks; + rsv_off++, gdt_off++, gdt_blk++) { + unsigned int three = 1, five = 5, seven = 7; + unsigned int grp, last = 0; + int gdt_dirty = 0; + + gdt_off %= apb; + if (!dindir_buf[gdt_off]) { + /* FIXME XXX XXX + blk_t new_blk; + + retval = ext2fs_new_block(fs, gdt_blk, 0, &new_blk); + if (retval) + goto out_free; + if (new_blk != gdt_blk) { + // XXX free block + retval = -1; // XXX + } + */ + gdt_dirty = dindir_dirty = inode_dirty = 1; + memset(gdt_buf, 0, fs->blocksize); + dindir_buf[gdt_off] = gdt_blk; + ext2fs_iblk_add_blocks(fs, &inode, 1); +#ifdef RES_GDT_DEBUG + printf("added primary GDT block %u at %u[%u]\n", + gdt_blk, dindir_blk, gdt_off); +#endif + } else if (dindir_buf[gdt_off] == gdt_blk) { +#ifdef RES_GDT_DEBUG + printf("reading primary GDT block %u\n", gdt_blk); +#endif + retval = ext2fs_read_ind_block(fs, gdt_blk, gdt_buf); + if (retval) + goto out_dindir; + } else { +#ifdef RES_GDT_DEBUG + printf("bad primary GDT %u != %u at %u[%u]\n", + dindir_buf[gdt_off], gdt_blk,dindir_blk,gdt_off); +#endif + retval = EXT2_ET_RESIZE_INODE_CORRUPT; + goto out_dindir; + } + + while ((grp = list_backups(fs, &three, &five, &seven)) < + fs->group_desc_count) { + blk_t expect = gdt_blk + grp * sb->s_blocks_per_group; + + if (!gdt_buf[last]) { +#ifdef RES_GDT_DEBUG + printf("added backup GDT %u grp %u@%u[%u]\n", + expect, grp, gdt_blk, last); +#endif + gdt_buf[last] = expect; + ext2fs_iblk_add_blocks(fs, &inode, 1); + gdt_dirty = inode_dirty = 1; + } else if (gdt_buf[last] != expect) { +#ifdef RES_GDT_DEBUG + printf("bad backup GDT %u != %u at %u[%u]\n", + gdt_buf[last], expect, gdt_blk, last); +#endif + retval = EXT2_ET_RESIZE_INODE_CORRUPT; + goto out_dindir; + } + last++; + } + if (gdt_dirty) { +#ifdef RES_GDT_DEBUG + printf("writing primary GDT block %u\n", gdt_blk); +#endif + retval = ext2fs_write_ind_block(fs, gdt_blk, gdt_buf); + if (retval) + goto out_dindir; + } + } + +out_dindir: + if (dindir_dirty) { + retval2 = ext2fs_write_ind_block(fs, dindir_blk, dindir_buf); + if (!retval) + retval = retval2; + } +out_inode: +#ifdef RES_GDT_DEBUG + printf("inode.i_blocks = %u, i_size = %u\n", inode.i_blocks, + inode.i_size); +#endif + if (inode_dirty) { + inode.i_atime = inode.i_mtime = 0x5105cd7b;//fs->now ? fs->now : time(0); + retval2 = ext2fs_write_new_inode(fs, EXT2_RESIZE_INO, &inode); + if (!retval) + retval = retval2; + } +out_free: + ext2fs_free_mem(&dindir_buf); + return retval; +} + diff --git a/fs/ext4/format/rw_bitmaps.c b/fs/ext4/format/rw_bitmaps.c new file mode 100755 index 0000000..cac55e8 --- /dev/null +++ b/fs/ext4/format/rw_bitmaps.c @@ -0,0 +1,336 @@ +/* + * rw_bitmaps.c --- routines to read and write the inode and block bitmaps. + * + * Copyright (C) 1993, 1994, 1994, 1996 Theodore Ts'o. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#include <common.h> +//#include <ext_common.h> +//#include <ext4fs.h> +#include <malloc.h> +#include <stddef.h> +#include <linux/stat.h> +#include <linux/time.h> + +#include "ext2_fs.h" +#include "ext2fs.h" +#include "e2image.h" + +static errcode_t write_bitmaps(ext2_filsys fs, int do_inode, int do_block) +{ + dgrp_t i; + unsigned int j; + int block_nbytes, inode_nbytes; + unsigned int nbits; + errcode_t retval; + char *block_buf, *inode_buf; + int csum_flag = 0; + blk_t blk; + blk_t blk_itr = fs->super->s_first_data_block; + ext2_ino_t ino_itr = 1; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + if (!(fs->flags & EXT2_FLAG_RW)) + return EXT2_ET_RO_FILSYS; + + if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + csum_flag = 1; + + inode_nbytes = block_nbytes = 0; + if (do_block) { + block_nbytes = EXT2_BLOCKS_PER_GROUP(fs->super) / 8; + retval = ext2fs_get_memalign(fs->blocksize, fs->blocksize, + &block_buf); + if (retval) + return retval; + memset(block_buf, 0xff, fs->blocksize); + } + if (do_inode) { + inode_nbytes = (size_t) + ((EXT2_INODES_PER_GROUP(fs->super)+7) / 8); + retval = ext2fs_get_memalign(fs->blocksize, fs->blocksize, + &inode_buf); + if (retval) + return retval; + memset(inode_buf, 0xff, fs->blocksize); + } + + for (i = 0; i < fs->group_desc_count; i++) { + if (!do_block) + goto skip_block_bitmap; + + if (csum_flag && fs->group_desc[i].bg_flags & + EXT2_BG_BLOCK_UNINIT) + goto skip_this_block_bitmap; + + retval = ext2fs_get_block_bitmap_range(fs->block_map, + blk_itr, block_nbytes << 3, block_buf); + if (retval) + return retval; + + if (i == fs->group_desc_count - 1) { + /* Force bitmap padding for the last group */ + nbits = ((fs->super->s_blocks_count + - fs->super->s_first_data_block) + % EXT2_BLOCKS_PER_GROUP(fs->super)); + if (nbits) + for (j = nbits; j < fs->blocksize * 8; j++) + ext2fs_set_bit(j, block_buf); + } + blk = fs->group_desc[i].bg_block_bitmap; + if (blk) { + + retval = io_channel_write_blk(fs->io, blk, 1, + block_buf); + if (retval) + return EXT2_ET_BLOCK_BITMAP_WRITE; + } + skip_this_block_bitmap: + blk_itr += block_nbytes << 3; + skip_block_bitmap: + + if (!do_inode) + continue; + + if (csum_flag && fs->group_desc[i].bg_flags & + EXT2_BG_INODE_UNINIT) + goto skip_this_inode_bitmap; + + retval = ext2fs_get_inode_bitmap_range(fs->inode_map, + ino_itr, inode_nbytes << 3, inode_buf); + if (retval) + return retval; + + blk = fs->group_desc[i].bg_inode_bitmap; + if (blk) { + retval = io_channel_write_blk(fs->io, blk, 1, + inode_buf); + if (retval) + return EXT2_ET_INODE_BITMAP_WRITE; + } + skip_this_inode_bitmap: + ino_itr += inode_nbytes << 3; + + } + if (do_block) { + fs->flags &= ~EXT2_FLAG_BB_DIRTY; + ext2fs_free_mem(&block_buf); + } + if (do_inode) { + fs->flags &= ~EXT2_FLAG_IB_DIRTY; + ext2fs_free_mem(&inode_buf); + } + return 0; +} + +static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block) +{ + dgrp_t i; + char *block_bitmap = 0, *inode_bitmap = 0; + char *buf; + errcode_t retval; + int block_nbytes = EXT2_BLOCKS_PER_GROUP(fs->super) / 8; + int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8; + int csum_flag = 0; + int do_image = fs->flags & EXT2_FLAG_IMAGE_FILE; + unsigned int cnt; + blk_t blk; + blk_t blk_itr = fs->super->s_first_data_block; + blk_t blk_cnt; + ext2_ino_t ino_itr = 1; + ext2_ino_t ino_cnt; + + EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + + fs->write_bitmaps = ext2fs_write_bitmaps; + + if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + csum_flag = 1; + + if (do_block) { + if (fs->block_map) + ext2fs_free_block_bitmap(fs->block_map); + retval = ext2fs_allocate_block_bitmap(fs, NULL, &fs->block_map); + if (retval) + goto cleanup; + if (do_image) + retval = ext2fs_get_mem(fs->blocksize, &block_bitmap); + else + retval = ext2fs_get_memalign((unsigned) block_nbytes, + fs->blocksize, + &block_bitmap); + + if (retval) + goto cleanup; + } else + block_nbytes = 0; + if (do_inode) { + if (fs->inode_map) + ext2fs_free_inode_bitmap(fs->inode_map); + retval = ext2fs_allocate_inode_bitmap(fs, NULL, &fs->inode_map); + if (retval) + goto cleanup; + retval = ext2fs_get_mem(do_image ? fs->blocksize : + (unsigned) inode_nbytes, &inode_bitmap); + if (retval) + goto cleanup; + } else + inode_nbytes = 0; + + if (fs->flags & EXT2_FLAG_IMAGE_FILE) { + blk = (fs->image_header->offset_inodemap / fs->blocksize); + ino_cnt = fs->super->s_inodes_count; + while (inode_nbytes > 0) { + retval = io_channel_read_blk(fs->image_io, blk++, + 1, inode_bitmap); + if (retval) + goto cleanup; + cnt = fs->blocksize << 3; + if (cnt > ino_cnt) + cnt = ino_cnt; + retval = ext2fs_set_inode_bitmap_range(fs->inode_map, + ino_itr, cnt, inode_bitmap); + if (retval) + goto cleanup; + ino_itr += fs->blocksize << 3; + ino_cnt -= fs->blocksize << 3; + inode_nbytes -= fs->blocksize; + } + blk = (fs->image_header->offset_blockmap / + fs->blocksize); + blk_cnt = EXT2_BLOCKS_PER_GROUP(fs->super) * + fs->group_desc_count; + while (block_nbytes > 0) { + retval = io_channel_read_blk(fs->image_io, blk++, + 1, block_bitmap); + if (retval) + goto cleanup; + cnt = fs->blocksize << 3; + if (cnt > blk_cnt) + cnt = blk_cnt; + retval = ext2fs_set_block_bitmap_range(fs->block_map, + blk_itr, cnt, block_bitmap); + if (retval) + goto cleanup; + blk_itr += fs->blocksize << 3; + blk_cnt -= fs->blocksize << 3; + block_nbytes -= fs->blocksize; + } + goto success_cleanup; + } + + for (i = 0; i < fs->group_desc_count; i++) { + if (block_bitmap) { + blk = fs->group_desc[i].bg_block_bitmap; + if (csum_flag && fs->group_desc[i].bg_flags & + EXT2_BG_BLOCK_UNINIT && + ext2fs_group_desc_csum_verify(fs, i)) + blk = 0; + if (blk) { + retval = io_channel_read_blk(fs->io, blk, + -block_nbytes, block_bitmap); + if (retval) { + retval = EXT2_ET_BLOCK_BITMAP_READ; + goto cleanup; + } + } else + memset(block_bitmap, 0, block_nbytes); + cnt = block_nbytes << 3; + retval = ext2fs_set_block_bitmap_range(fs->block_map, + blk_itr, cnt, block_bitmap); + if (retval) + goto cleanup; + blk_itr += block_nbytes << 3; + } + if (inode_bitmap) { + blk = fs->group_desc[i].bg_inode_bitmap; + if (csum_flag && fs->group_desc[i].bg_flags & + EXT2_BG_INODE_UNINIT && + ext2fs_group_desc_csum_verify(fs, i)) + blk = 0; + if (blk) { + retval = io_channel_read_blk(fs->io, blk, + -inode_nbytes, inode_bitmap); + if (retval) { + retval = EXT2_ET_INODE_BITMAP_READ; + goto cleanup; + } + } else + memset(inode_bitmap, 0, inode_nbytes); + cnt = inode_nbytes << 3; + retval = ext2fs_set_inode_bitmap_range(fs->inode_map, + ino_itr, cnt, inode_bitmap); + if (retval) + goto cleanup; + ino_itr += inode_nbytes << 3; + } + } +success_cleanup: + if (inode_bitmap) + ext2fs_free_mem(&inode_bitmap); + if (block_bitmap) + ext2fs_free_mem(&block_bitmap); + return 0; + +cleanup: + if (do_block) { + ext2fs_free_mem(&fs->block_map); + fs->block_map = 0; + } + if (do_inode) { + ext2fs_free_mem(&fs->inode_map); + fs->inode_map = 0; + } + if (inode_bitmap) + ext2fs_free_mem(&inode_bitmap); + if (block_bitmap) + ext2fs_free_mem(&block_bitmap); + return retval; +} + +errcode_t ext2fs_read_inode_bitmap(ext2_filsys fs) +{ + return read_bitmaps(fs, 1, 0); +} + +errcode_t ext2fs_read_block_bitmap(ext2_filsys fs) +{ + return read_bitmaps(fs, 0, 1); +} + +errcode_t ext2fs_write_inode_bitmap(ext2_filsys fs) +{ + return write_bitmaps(fs, 1, 0); +} + +errcode_t ext2fs_write_block_bitmap (ext2_filsys fs) +{ + return write_bitmaps(fs, 0, 1); +} + +errcode_t ext2fs_read_bitmaps(ext2_filsys fs) +{ + if (fs->inode_map && fs->block_map) + return 0; + + return read_bitmaps(fs, !fs->inode_map, !fs->block_map); +} + +errcode_t ext2fs_write_bitmaps(ext2_filsys fs) +{ + int do_inode = fs->inode_map && ext2fs_test_ib_dirty(fs); + int do_block = fs->block_map && ext2fs_test_bb_dirty(fs); + + if (!do_inode && !do_block) + return 0; + + return write_bitmaps(fs, do_inode, do_block); +} diff --git a/fs/ext4/format/tdb.c b/fs/ext4/format/tdb.c new file mode 100755 index 0000000..0c82a24 --- /dev/null +++ b/fs/ext4/format/tdb.c @@ -0,0 +1,4143 @@ +/* +URL: svn://svnanon.samba.org/samba/branches/SAMBA_4_0/source/lib/tdb/common +Rev: 23590 +Last Changed Date: 2007-06-22 13:36:10 -0400 (Fri, 22 Jun 2007) +*/ + /* + trivial database library - standalone version + + Copyright (C) Andrew Tridgell 1999-2005 + Copyright (C) Jeremy Allison 2000-2006 + Copyright (C) Paul `Rusty' Russell 2000 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef CONFIG_STAND_ALONE +#define HAVE_MMAP +#define HAVE_STRDUP +#define HAVE_SYS_MMAN_H +#define HAVE_UTIME_H +#define HAVE_UTIME +#endif +#define _XOPEN_SOURCE 600 + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stddef.h> +#include <errno.h> +#include <string.h> +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#ifdef HAVE_UTIME_H +#include <utime.h> +#endif +#include <sys/stat.h> +#include <sys/file.h> +#include <fcntl.h> + +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif + +#ifndef MAP_FILE +#define MAP_FILE 0 +#endif + +#ifndef MAP_FAILED +#define MAP_FAILED ((void *)-1) +#endif + +#ifndef HAVE_STRDUP +#define strdup rep_strdup +static char *rep_strdup(const char *s) +{ + char *ret; + int length; + if (!s) + return NULL; + + if (!length) + length = strlen(s); + + ret = malloc(length + 1); + if (ret) { + strncpy(ret, s, length); + ret[length] = '\0'; + } + return ret; +} +#endif + +#ifndef PRINTF_ATTRIBUTE +#if (__GNUC__ >= 3) && (__GNUC_MINOR__ >= 1 ) +/** Use gcc attribute to check printf fns. a1 is the 1-based index of + * the parameter containing the format, and a2 the index of the first + * argument. Note that some gcc 2.x versions don't handle this + * properly **/ +#define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format (__printf__, a1, a2))) +#else +#define PRINTF_ATTRIBUTE(a1, a2) +#endif +#endif + +typedef int bool; + +#include "tdb.h" + +#ifndef u32 +#define u32 unsigned +#endif + +#ifndef HAVE_GETPAGESIZE +#define getpagesize() 0x2000 +#endif + +typedef u32 tdb_len_t; +typedef u32 tdb_off_t; + +#ifndef offsetof +#define offsetof(t,f) ((unsigned int)&((t *)0)->f) +#endif + +#define TDB_MAGIC_FOOD "TDB file\n" +#define TDB_VERSION (0x26011967 + 6) +#define TDB_MAGIC (0x26011999U) +#define TDB_FREE_MAGIC (~TDB_MAGIC) +#define TDB_DEAD_MAGIC (0xFEE1DEAD) +#define TDB_RECOVERY_MAGIC (0xf53bc0e7U) +#define TDB_ALIGNMENT 4 +#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT) +#define DEFAULT_HASH_SIZE 131 +#define FREELIST_TOP (sizeof(struct tdb_header)) +#define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1)) +#define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24)) +#define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC) +#define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r)) +#define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t)) +#define TDB_HASHTABLE_SIZE(tdb) ((tdb->header.hash_size+1)*sizeof(tdb_off_t)) +#define TDB_DATA_START(hash_size) TDB_HASH_TOP(hash_size-1) +#define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start) +#define TDB_SEQNUM_OFS offsetof(struct tdb_header, sequence_number) +#define TDB_PAD_BYTE 0x42 +#define TDB_PAD_U32 0x42424242 + +/* NB assumes there is a local variable called "tdb" that is the + * current context, also takes doubly-parenthesized print-style + * argument. */ +#define TDB_LOG(x) tdb->log.log_fn x + +/* lock offsets */ +#define GLOBAL_LOCK 0 +#define ACTIVE_LOCK 4 +#define TRANSACTION_LOCK 8 + +/* free memory if the pointer is valid and zero the pointer */ +#ifndef SAFE_FREE +#define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0) +#endif + +#define BUCKET(hash) ((hash) % tdb->header.hash_size) + +#define DOCONV() (tdb->flags & TDB_CONVERT) +#define CONVERT(x) (DOCONV() ? tdb_convert(&x, sizeof(x)) : &x) + + +/* the body of the database is made of one list_struct for the free space + plus a separate data list for each hash value */ +struct list_struct { + tdb_off_t next; /* offset of the next record in the list */ + tdb_len_t rec_len; /* total byte length of record */ + tdb_len_t key_len; /* byte length of key */ + tdb_len_t data_len; /* byte length of data */ + u32 full_hash; /* the full 32 bit hash of the key */ + u32 magic; /* try to catch errors */ + /* the following union is implied: + union { + char record[rec_len]; + struct { + char key[key_len]; + char data[data_len]; + } + u32 totalsize; (tailer) + } + */ +}; + + +/* this is stored at the front of every database */ +struct tdb_header { + char magic_food[32]; /* for /etc/magic */ + u32 version; /* version of the code */ + u32 hash_size; /* number of hash entries */ + tdb_off_t rwlocks; /* obsolete - kept to detect old formats */ + tdb_off_t recovery_start; /* offset of transaction recovery region */ + tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */ + tdb_off_t reserved[29]; +}; + +struct tdb_lock_type { + int list; + u32 count; + u32 ltype; +}; + +struct tdb_traverse_lock { + struct tdb_traverse_lock *next; + u32 off; + u32 hash; + int lock_rw; +}; + + +struct tdb_methods { + int (*tdb_read)(struct tdb_context *, tdb_off_t , void *, tdb_len_t , int ); + int (*tdb_write)(struct tdb_context *, tdb_off_t, const void *, tdb_len_t); + void (*next_hash_chain)(struct tdb_context *, u32 *); + int (*tdb_oob)(struct tdb_context *, tdb_off_t , int ); + int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t ); + int (*tdb_brlock)(struct tdb_context *, tdb_off_t , int, int, int, size_t); +}; + +struct tdb_context { + char *name; /* the name of the database */ + void *map_ptr; /* where it is currently mapped */ + int fd; /* open file descriptor for the database */ + tdb_len_t map_size; /* how much space has been mapped */ + int read_only; /* opened read-only */ + int traverse_read; /* read-only traversal */ + struct tdb_lock_type global_lock; + int num_lockrecs; + struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ + enum TDB_ERROR ecode; /* error code for last tdb error */ + struct tdb_header header; /* a cached copy of the header */ + u32 flags; /* the flags passed to tdb_open */ + struct tdb_traverse_lock travlocks; /* current traversal locks */ + struct tdb_context *next; /* all tdbs to avoid multiple opens */ + dev_t device; /* uniquely identifies this tdb */ + ino_t inode; /* uniquely identifies this tdb */ + struct tdb_logging_context log; + unsigned int (*hash_fn)(TDB_DATA *key); + int open_flags; /* flags used in the open - needed by reopen */ + unsigned int num_locks; /* number of chain locks held */ + const struct tdb_methods *methods; + struct tdb_transaction *transaction; + int page_size; + int max_dead_records; + bool have_transaction_lock; +}; + + +/* + internal prototypes +*/ +static int tdb_munmap(struct tdb_context *tdb); +static void tdb_mmap(struct tdb_context *tdb); +static int tdb_lock(struct tdb_context *tdb, int list, int ltype); +static int tdb_unlock(struct tdb_context *tdb, int list, int ltype); +static int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, int rw_type, int lck_type, int probe, size_t len); +static int tdb_transaction_lock(struct tdb_context *tdb, int ltype); +static int tdb_transaction_unlock(struct tdb_context *tdb); +static int tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len); +static int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off); +static int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off); +static int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); +static int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); +static void *tdb_convert(void *buf, u32 size); +static int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec); +static tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_struct *rec); +static int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); +static int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); +static int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off); +static int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off); +static int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec); +static int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec); +static int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec); +static unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len); +static int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key, + tdb_off_t offset, tdb_len_t len, + int (*parser)(TDB_DATA key, TDB_DATA data, + void *private_data), + void *private_data); +static tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype, + struct list_struct *rec); +static void tdb_io_init(struct tdb_context *tdb); +static int tdb_expand(struct tdb_context *tdb, tdb_off_t size); +static int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, + struct list_struct *rec); + + +/* file: error.c */ + +enum TDB_ERROR tdb_error(struct tdb_context *tdb) +{ + return tdb->ecode; +} + +static struct tdb_errname { + enum TDB_ERROR ecode; const char *estring; +} emap[] = { {TDB_SUCCESS, "Success"}, + {TDB_ERR_CORRUPT, "Corrupt database"}, + {TDB_ERR_IO, "IO Error"}, + {TDB_ERR_LOCK, "Locking error"}, + {TDB_ERR_OOM, "Out of memory"}, + {TDB_ERR_EXISTS, "Record exists"}, + {TDB_ERR_NOLOCK, "Lock exists on other keys"}, + {TDB_ERR_EINVAL, "Invalid parameter"}, + {TDB_ERR_NOEXIST, "Record does not exist"}, + {TDB_ERR_RDONLY, "write not permitted"} }; + +/* Error string for the last tdb error */ +const char *tdb_errorstr(struct tdb_context *tdb) +{ + u32 i; + for (i = 0; i < sizeof(emap) / sizeof(struct tdb_errname); i++) + if (tdb->ecode == emap[i].ecode) + return emap[i].estring; + return "Invalid error code"; +} + +/* file: lock.c */ + +#define TDB_MARK_LOCK 0x80000000 + +/* a byte range locking function - return 0 on success + this functions locks/unlocks 1 byte at the specified offset. + + On error, errno is also set so that errors are passed back properly + through tdb_open(). + + note that a len of zero means lock to end of file +*/ +int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, + int rw_type, int lck_type, int probe, size_t len) +{ + struct flock fl; + int ret; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if ((rw_type == F_WRLCK) && (tdb->read_only || tdb->traverse_read)) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + + fl.l_type = rw_type; + fl.l_whence = SEEK_SET; + fl.l_start = offset; + fl.l_len = len; + fl.l_pid = 0; + + do { + ret = fcntl(tdb->fd,lck_type,&fl); + } while (ret == -1 && errno == EINTR); + + if (ret == -1) { + /* Generic lock error. errno set by fcntl. + * EAGAIN is an expected return from non-blocking + * locks. */ + if (!probe && lck_type != F_SETLK) { + /* Ensure error code is set for log fun to examine. */ + tdb->ecode = TDB_ERR_LOCK; + TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d len=%d\n", + tdb->fd, offset, rw_type, lck_type, (int)len)); + } + return TDB_ERRCODE(TDB_ERR_LOCK, -1); + } + return 0; +} + + +/* + upgrade a read lock to a write lock. This needs to be handled in a + special way as some OSes (such as solaris) have too conservative + deadlock detection and claim a deadlock when progress can be + made. For those OSes we may loop for a while. +*/ +int tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len) +{ + int count = 1000; + while (count--) { + struct timeval tv; + if (tdb_brlock(tdb, offset, F_WRLCK, F_SETLKW, 1, len) == 0) { + return 0; + } + if (errno != EDEADLK) { + break; + } + /* sleep for as short a time as we can - more portable than usleep() */ + tv.tv_sec = 0; + tv.tv_usec = 1; + select(0, NULL, NULL, NULL, &tv); + } + TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock_upgrade failed at offset %d\n", offset)); + return -1; +} + + +/* lock a list in the database. list -1 is the alloc list */ +static int _tdb_lock(struct tdb_context *tdb, int list, int ltype, int op) +{ + struct tdb_lock_type *new_lck; + int i; + bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK); + + ltype &= ~TDB_MARK_LOCK; + + /* a global lock allows us to avoid per chain locks */ + if (tdb->global_lock.count && + (ltype == tdb->global_lock.ltype || ltype == F_RDLCK)) { + return 0; + } + + if (tdb->global_lock.count) { + return TDB_ERRCODE(TDB_ERR_LOCK, -1); + } + + if (list < -1 || list >= (int)tdb->header.hash_size) { + TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid list %d for ltype=%d\n", + list, ltype)); + return -1; + } + if (tdb->flags & TDB_NOLOCK) + return 0; + + for (i=0; i<tdb->num_lockrecs; i++) { + if (tdb->lockrecs[i].list == list) { + if (tdb->lockrecs[i].count == 0) { + /* + * Can't happen, see tdb_unlock(). It should + * be an assert. + */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock: " + "lck->count == 0 for list %d", list)); + } + /* + * Just increment the in-memory struct, posix locks + * don't stack. + */ + tdb->lockrecs[i].count++; + return 0; + } + } + + new_lck = (struct tdb_lock_type *)realloc( + tdb->lockrecs, + sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1)); + if (new_lck == NULL) { + errno = ENOMEM; + return -1; + } + tdb->lockrecs = new_lck; + + /* Since fcntl locks don't nest, we do a lock for the first one, + and simply bump the count for future ones */ + if (!mark_lock && + tdb->methods->tdb_brlock(tdb,FREELIST_TOP+4*list, ltype, op, + 0, 1)) { + return -1; + } + + tdb->num_locks++; + + tdb->lockrecs[tdb->num_lockrecs].list = list; + tdb->lockrecs[tdb->num_lockrecs].count = 1; + tdb->lockrecs[tdb->num_lockrecs].ltype = ltype; + tdb->num_lockrecs += 1; + + return 0; +} + +/* lock a list in the database. list -1 is the alloc list */ +int tdb_lock(struct tdb_context *tdb, int list, int ltype) +{ + int ret; + ret = _tdb_lock(tdb, list, ltype, F_SETLKW); + if (ret) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock failed on list %d " + "ltype=%d (%s)\n", list, ltype, strerror(errno))); + } + return ret; +} + +/* lock a list in the database. list -1 is the alloc list. non-blocking lock */ +int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype) +{ + return _tdb_lock(tdb, list, ltype, F_SETLK); +} + + +/* unlock the database: returns void because it's too late for errors. */ + /* changed to return int it may be interesting to know there + has been an error --simo */ +int tdb_unlock(struct tdb_context *tdb, int list, int ltype) +{ + int ret = -1; + int i; + struct tdb_lock_type *lck = NULL; + bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK); + + ltype &= ~TDB_MARK_LOCK; + + /* a global lock allows us to avoid per chain locks */ + if (tdb->global_lock.count && + (ltype == tdb->global_lock.ltype || ltype == F_RDLCK)) { + return 0; + } + + if (tdb->global_lock.count) { + return TDB_ERRCODE(TDB_ERR_LOCK, -1); + } + + if (tdb->flags & TDB_NOLOCK) + return 0; + + /* Sanity checks */ + if (list < -1 || list >= (int)tdb->header.hash_size) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size)); + return ret; + } + + for (i=0; i<tdb->num_lockrecs; i++) { + if (tdb->lockrecs[i].list == list) { + lck = &tdb->lockrecs[i]; + break; + } + } + + if ((lck == NULL) || (lck->count == 0)) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: count is 0\n")); + return -1; + } + + if (lck->count > 1) { + lck->count--; + return 0; + } + + /* + * This lock has count==1 left, so we need to unlock it in the + * kernel. We don't bother with decrementing the in-memory array + * element, we're about to overwrite it with the last array element + * anyway. + */ + + if (mark_lock) { + ret = 0; + } else { + ret = tdb->methods->tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK, + F_SETLKW, 0, 1); + } + tdb->num_locks--; + + /* + * Shrink the array by overwriting the element just unlocked with the + * last array element. + */ + + if (tdb->num_lockrecs > 1) { + *lck = tdb->lockrecs[tdb->num_lockrecs-1]; + } + tdb->num_lockrecs -= 1; + + /* + * We don't bother with realloc when the array shrinks, but if we have + * a completely idle tdb we should get rid of the locked array. + */ + + if (tdb->num_lockrecs == 0) { + SAFE_FREE(tdb->lockrecs); + } + + if (ret) + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: An error occurred unlocking!\n")); + return ret; +} + +/* + get the transaction lock + */ +int tdb_transaction_lock(struct tdb_context *tdb, int ltype) +{ + if (tdb->have_transaction_lock || tdb->global_lock.count) { + return 0; + } + if (tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, ltype, + F_SETLKW, 0, 1) == -1) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_lock: failed to get transaction lock\n")); + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + tdb->have_transaction_lock = 1; + return 0; +} + +/* + release the transaction lock + */ +int tdb_transaction_unlock(struct tdb_context *tdb) +{ + int ret; + if (!tdb->have_transaction_lock) { + return 0; + } + ret = tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); + if (ret == 0) { + tdb->have_transaction_lock = 0; + } + return ret; +} + + + + +/* lock/unlock entire database */ +static int _tdb_lockall(struct tdb_context *tdb, int ltype, int op) +{ + bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK); + + ltype &= ~TDB_MARK_LOCK; + + /* There are no locks on read-only dbs */ + if (tdb->read_only || tdb->traverse_read) + return TDB_ERRCODE(TDB_ERR_LOCK, -1); + + if (tdb->global_lock.count && tdb->global_lock.ltype == ltype) { + tdb->global_lock.count++; + return 0; + } + + if (tdb->global_lock.count) { + /* a global lock of a different type exists */ + return TDB_ERRCODE(TDB_ERR_LOCK, -1); + } + + if (tdb->num_locks != 0) { + /* can't combine global and chain locks */ + return TDB_ERRCODE(TDB_ERR_LOCK, -1); + } + + if (!mark_lock && + tdb->methods->tdb_brlock(tdb, FREELIST_TOP, ltype, op, + 0, 4*tdb->header.hash_size)) { + if (op == F_SETLKW) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lockall failed (%s)\n", strerror(errno))); + } + return -1; + } + + tdb->global_lock.count = 1; + tdb->global_lock.ltype = ltype; + + return 0; +} + + + +/* unlock entire db */ +static int _tdb_unlockall(struct tdb_context *tdb, int ltype) +{ + bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK); + + ltype &= ~TDB_MARK_LOCK; + + /* There are no locks on read-only dbs */ + if (tdb->read_only || tdb->traverse_read) { + return TDB_ERRCODE(TDB_ERR_LOCK, -1); + } + + if (tdb->global_lock.ltype != ltype || tdb->global_lock.count == 0) { + return TDB_ERRCODE(TDB_ERR_LOCK, -1); + } + + if (tdb->global_lock.count > 1) { + tdb->global_lock.count--; + return 0; + } + + if (!mark_lock && + tdb->methods->tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, + 0, 4*tdb->header.hash_size)) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno))); + return -1; + } + + tdb->global_lock.count = 0; + tdb->global_lock.ltype = 0; + + return 0; +} + +/* lock entire database with write lock */ +int tdb_lockall(struct tdb_context *tdb) +{ + return _tdb_lockall(tdb, F_WRLCK, F_SETLKW); +} + +/* lock entire database with write lock - mark only */ +int tdb_lockall_mark(struct tdb_context *tdb) +{ + return _tdb_lockall(tdb, F_WRLCK | TDB_MARK_LOCK, F_SETLKW); +} + +/* unlock entire database with write lock - unmark only */ +int tdb_lockall_unmark(struct tdb_context *tdb) +{ + return _tdb_unlockall(tdb, F_WRLCK | TDB_MARK_LOCK); +} + +/* lock entire database with write lock - nonblocking varient */ +int tdb_lockall_nonblock(struct tdb_context *tdb) +{ + return _tdb_lockall(tdb, F_WRLCK, F_SETLK); +} + +/* unlock entire database with write lock */ +int tdb_unlockall(struct tdb_context *tdb) +{ + return _tdb_unlockall(tdb, F_WRLCK); +} + +/* lock entire database with read lock */ +int tdb_lockall_read(struct tdb_context *tdb) +{ + return _tdb_lockall(tdb, F_RDLCK, F_SETLKW); +} + +/* lock entire database with read lock - nonblock varient */ +int tdb_lockall_read_nonblock(struct tdb_context *tdb) +{ + return _tdb_lockall(tdb, F_RDLCK, F_SETLK); +} + +/* unlock entire database with read lock */ +int tdb_unlockall_read(struct tdb_context *tdb) +{ + return _tdb_unlockall(tdb, F_RDLCK); +} + +/* lock/unlock one hash chain. This is meant to be used to reduce + contention - it cannot guarantee how many records will be locked */ +int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK); +} + +/* lock/unlock one hash chain, non-blocking. This is meant to be used + to reduce contention - it cannot guarantee how many records will be + locked */ +int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb_lock_nonblock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK); +} + +/* mark a chain as locked without actually locking it. Warning! use with great caution! */ +int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK | TDB_MARK_LOCK); +} + +/* unmark a chain as locked without actually locking it. Warning! use with great caution! */ +int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK | TDB_MARK_LOCK); +} + +int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK); +} + +int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK); +} + +int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK); +} + + + +/* record lock stops delete underneath */ +int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off) +{ + return off ? tdb->methods->tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0, 1) : 0; +} + +/* + Write locks override our own fcntl readlocks, so check it here. + Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not + an error to fail to get the lock here. +*/ +int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off) +{ + struct tdb_traverse_lock *i; + for (i = &tdb->travlocks; i; i = i->next) + if (i->off == off) + return -1; + return tdb->methods->tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1, 1); +} + +/* + Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not + an error to fail to get the lock here. +*/ +int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off) +{ + return tdb->methods->tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0, 1); +} + +/* fcntl locks don't stack: avoid unlocking someone else's */ +int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off) +{ + struct tdb_traverse_lock *i; + u32 count = 0; + + if (off == 0) + return 0; + for (i = &tdb->travlocks; i; i = i->next) + if (i->off == off) + count++; + return (count == 1 ? tdb->methods->tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0, 1) : 0); +} + +/* file: io.c */ + +/* check for an out of bounds access - if it is out of bounds then + see if the database has been expanded by someone else and expand + if necessary + note that "len" is the minimum length needed for the db +*/ +static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe) +{ + struct stat st; + if (len <= tdb->map_size) + return 0; + if (tdb->flags & TDB_INTERNAL) { + if (!probe) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n", + (int)len, (int)tdb->map_size)); + } + return TDB_ERRCODE(TDB_ERR_IO, -1); + } + + if (fstat(tdb->fd, &st) == -1) { + return TDB_ERRCODE(TDB_ERR_IO, -1); + } + + if (st.st_size < (size_t)len) { + if (!probe) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n", + (int)len, (int)st.st_size)); + } + return TDB_ERRCODE(TDB_ERR_IO, -1); + } + + /* Unmap, update size, remap */ + if (tdb_munmap(tdb) == -1) + return TDB_ERRCODE(TDB_ERR_IO, -1); + tdb->map_size = st.st_size; + tdb_mmap(tdb); + return 0; +} + +/* write a lump of data at a specified offset */ +static int tdb_write(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + if (len == 0) { + return 0; + } + + if (tdb->read_only || tdb->traverse_read) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + + if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) + return -1; + + if (tdb->map_ptr) { + memcpy(off + (char *)tdb->map_ptr, buf, len); + } else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d len=%d (%s)\n", + off, len, strerror(errno))); + return TDB_ERRCODE(TDB_ERR_IO, -1); + } + return 0; +} + +/* Endian conversion: we only ever deal with 4 byte quantities */ +void *tdb_convert(void *buf, u32 size) +{ + u32 i, *p = (u32 *)buf; + for (i = 0; i < size / 4; i++) + p[i] = TDB_BYTEREV(p[i]); + return buf; +} + + +/* read a lump of data at a specified offset, maybe convert */ +static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, + tdb_len_t len, int cv) +{ + if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) { + return -1; + } + + if (tdb->map_ptr) { + memcpy(buf, off + (char *)tdb->map_ptr, len); + } else { + ssize_t ret = pread(tdb->fd, buf, len, off); + if (ret != (ssize_t)len) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d " + "len=%d ret=%d (%s) map_size=%d\n", + (int)off, (int)len, (int)ret, strerror(errno), + (int)tdb->map_size)); + return TDB_ERRCODE(TDB_ERR_IO, -1); + } + } + if (cv) { + tdb_convert(buf, len); + } + return 0; +} + + + +/* + do an unlocked scan of the hash table heads to find the next non-zero head. The value + will then be confirmed with the lock held +*/ +static void tdb_next_hash_chain(struct tdb_context *tdb, u32 *chain) +{ + u32 h = *chain; + if (tdb->map_ptr) { + for (;h < tdb->header.hash_size;h++) { + if (0 != *(u32 *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) { + break; + } + } + } else { + u32 off=0; + for (;h < tdb->header.hash_size;h++) { + if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) { + break; + } + } + } + (*chain) = h; +} + + +int tdb_munmap(struct tdb_context *tdb) +{ + if (tdb->flags & TDB_INTERNAL) + return 0; + +#ifdef HAVE_MMAP + if (tdb->map_ptr) { + int ret = munmap(tdb->map_ptr, tdb->map_size); + if (ret != 0) + return ret; + } +#endif + tdb->map_ptr = NULL; + return 0; +} + +void tdb_mmap(struct tdb_context *tdb) +{ + if (tdb->flags & TDB_INTERNAL) + return; + +#ifdef HAVE_MMAP + if (!(tdb->flags & TDB_NOMMAP)) { + tdb->map_ptr = mmap(NULL, tdb->map_size, + PROT_READ|(tdb->read_only? 0:PROT_WRITE), + MAP_SHARED|MAP_FILE, tdb->fd, 0); + + /* + * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! + */ + + if (tdb->map_ptr == MAP_FAILED) { + tdb->map_ptr = NULL; + TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n", + tdb->map_size, strerror(errno))); + } + } else { + tdb->map_ptr = NULL; + } +#else + tdb->map_ptr = NULL; +#endif +} + +/* expand a file. we prefer to use ftruncate, as that is what posix + says to use for mmap expansion */ +static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition) +{ + char buf[1024]; + + if (tdb->read_only || tdb->traverse_read) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + + if (ftruncate(tdb->fd, size+addition) == -1) { + char b = 0; + if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n", + size+addition, strerror(errno))); + return -1; + } + } + + /* now fill the file with something. This ensures that the + file isn't sparse, which would be very bad if we ran out of + disk. This must be done with write, not via mmap */ + memset(buf, TDB_PAD_BYTE, sizeof(buf)); + while (addition) { + int n = addition>sizeof(buf)?sizeof(buf):addition; + int ret = pwrite(tdb->fd, buf, n, size); + if (ret != n) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of %d failed (%s)\n", + n, strerror(errno))); + return -1; + } + addition -= n; + size += n; + } + return 0; +} + + +/* expand the database at least size bytes by expanding the underlying + file and doing the mmap again if necessary */ +int tdb_expand(struct tdb_context *tdb, tdb_off_t size) +{ + struct list_struct rec; + tdb_off_t offset; + + if (tdb_lock(tdb, -1, F_WRLCK) == -1) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n")); + return -1; + } + + /* must know about any previous expansions by another process */ + tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); + + /* always make room for at least 10 more records, and round + the database up to a multiple of the page size */ + size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size; + + if (!(tdb->flags & TDB_INTERNAL)) + tdb_munmap(tdb); + + /* + * We must ensure the file is unmapped before doing this + * to ensure consistency with systems like OpenBSD where + * writes and mmaps are not consistent. + */ + + /* expand the file itself */ + if (!(tdb->flags & TDB_INTERNAL)) { + if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0) + goto fail; + } + + tdb->map_size += size; + + if (tdb->flags & TDB_INTERNAL) { + char *new_map_ptr = (char *)realloc(tdb->map_ptr, + tdb->map_size); + if (!new_map_ptr) { + tdb->map_size -= size; + goto fail; + } + tdb->map_ptr = new_map_ptr; + } else { + /* + * We must ensure the file is remapped before adding the space + * to ensure consistency with systems like OpenBSD where + * writes and mmaps are not consistent. + */ + + /* We're ok if the mmap fails as we'll fallback to read/write */ + tdb_mmap(tdb); + } + + /* form a new freelist record */ + memset(&rec,'\0',sizeof(rec)); + rec.rec_len = size - sizeof(rec); + + /* link it into the free list */ + offset = tdb->map_size - size; + if (tdb_free(tdb, offset, &rec) == -1) + goto fail; + + tdb_unlock(tdb, -1, F_WRLCK); + return 0; + fail: + tdb_unlock(tdb, -1, F_WRLCK); + return -1; +} + +/* read/write a tdb_off_t */ +int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d) +{ + return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV()); +} + +int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d) +{ + tdb_off_t off = *d; + return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d)); +} + + +/* read a lump of data, allocating the space for it */ +unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len) +{ + unsigned char *buf; + + /* some systems don't like zero length malloc */ + if (len == 0) { + len = 1; + } + + if (!(buf = (unsigned char *)malloc(len))) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_OOM; + TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n", + len, strerror(errno))); + return TDB_ERRCODE(TDB_ERR_OOM, buf); + } + if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) { + SAFE_FREE(buf); + return NULL; + } + return buf; +} + +/* Give a piece of tdb data to a parser */ + +int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key, + tdb_off_t offset, tdb_len_t len, + int (*parser)(TDB_DATA key, TDB_DATA data, + void *private_data), + void *private_data) +{ + TDB_DATA data; + int result; + + data.dsize = len; + + if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) { + /* + * Optimize by avoiding the malloc/memcpy/free, point the + * parser directly at the mmap area. + */ + if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) { + return -1; + } + data.dptr = offset + (unsigned char *)tdb->map_ptr; + return parser(key, data, private_data); + } + + if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) { + return -1; + } + + result = parser(key, data, private_data); + free(data.dptr); + return result; +} + +/* read/write a record */ +int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) +{ + if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1) + return -1; + if (TDB_BAD_MAGIC(rec)) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_CORRUPT; + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset)); + return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); + } + return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0); +} + +int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) +{ + struct list_struct r = *rec; + return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r)); +} + +static const struct tdb_methods io_methods = { + tdb_read, + tdb_write, + tdb_next_hash_chain, + tdb_oob, + tdb_expand_file, + tdb_brlock +}; + +/* + initialise the default methods table +*/ +void tdb_io_init(struct tdb_context *tdb) +{ + tdb->methods = &io_methods; +} + +/* file: transaction.c */ + +/* + transaction design: + + - only allow a single transaction at a time per database. This makes + using the transaction API simpler, as otherwise the caller would + have to cope with temporary failures in transactions that conflict + with other current transactions + + - keep the transaction recovery information in the same file as the + database, using a special 'transaction recovery' record pointed at + by the header. This removes the need for extra journal files as + used by some other databases + + - dynamically allocated the transaction recover record, re-using it + for subsequent transactions. If a larger record is needed then + tdb_free() the old record to place it on the normal tdb freelist + before allocating the new record + + - during transactions, keep a linked list of writes all that have + been performed by intercepting all tdb_write() calls. The hooked + transaction versions of tdb_read() and tdb_write() check this + linked list and try to use the elements of the list in preference + to the real database. + + - don't allow any locks to be held when a transaction starts, + otherwise we can end up with deadlock (plus lack of lock nesting + in posix locks would mean the lock is lost) + + - if the caller gains a lock during the transaction but doesn't + release it then fail the commit + + - allow for nested calls to tdb_transaction_start(), re-using the + existing transaction record. If the inner transaction is cancelled + then a subsequent commit will fail + + - keep a mirrored copy of the tdb hash chain heads to allow for the + fast hash heads scan on traverse, updating the mirrored copy in + the transaction version of tdb_write + + - allow callers to mix transaction and non-transaction use of tdb, + although once a transaction is started then an exclusive lock is + gained until the transaction is committed or cancelled + + - the commit stategy involves first saving away all modified data + into a linearised buffer in the transaction recovery area, then + marking the transaction recovery area with a magic value to + indicate a valid recovery record. In total 4 fsync/msync calls are + needed per commit to prevent race conditions. It might be possible + to reduce this to 3 or even 2 with some more work. + + - check for a valid recovery record on open of the tdb, while the + global lock is held. Automatically recover from the transaction + recovery area if needed, then continue with the open as + usual. This allows for smooth crash recovery with no administrator + intervention. + + - if TDB_NOSYNC is passed to flags in tdb_open then transactions are + still available, but no transaction recovery area is used and no + fsync/msync calls are made. + +*/ + +struct tdb_transaction_el { + struct tdb_transaction_el *next, *prev; + tdb_off_t offset; + tdb_len_t length; + unsigned char *data; +}; + +/* + hold the context of any current transaction +*/ +struct tdb_transaction { + /* we keep a mirrored copy of the tdb hash heads here so + tdb_next_hash_chain() can operate efficiently */ + u32 *hash_heads; + + /* the original io methods - used to do IOs to the real db */ + const struct tdb_methods *io_methods; + + /* the list of transaction elements. We use a doubly linked + list with a last pointer to allow us to keep the list + ordered, with first element at the front of the list. It + needs to be doubly linked as the read/write traversals need + to be backwards, while the commit needs to be forwards */ + struct tdb_transaction_el *elements, *elements_last; + + /* non-zero when an internal transaction error has + occurred. All write operations will then fail until the + transaction is ended */ + int transaction_error; + + /* when inside a transaction we need to keep track of any + nested tdb_transaction_start() calls, as these are allowed, + but don't create a new transaction */ + int nesting; + + /* old file size before transaction */ + tdb_len_t old_map_size; +}; + + +/* + read while in a transaction. We need to check first if the data is in our list + of transaction elements, then if not do a real read +*/ +static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf, + tdb_len_t len, int cv) +{ + struct tdb_transaction_el *el; + + /* we need to walk the list backwards to get the most recent data */ + for (el=tdb->transaction->elements_last;el;el=el->prev) { + tdb_len_t partial; + + if (off+len <= el->offset) { + continue; + } + if (off >= el->offset + el->length) { + continue; + } + + /* an overlapping read - needs to be split into up to + 2 reads and a memcpy */ + if (off < el->offset) { + partial = el->offset - off; + if (transaction_read(tdb, off, buf, partial, cv) != 0) { + goto fail; + } + len -= partial; + off += partial; + buf = (void *)(partial + (char *)buf); + } + if (off + len <= el->offset + el->length) { + partial = len; + } else { + partial = el->offset + el->length - off; + } + memcpy(buf, el->data + (off - el->offset), partial); + if (cv) { + tdb_convert(buf, len); + } + len -= partial; + off += partial; + buf = (void *)(partial + (char *)buf); + + if (len != 0 && transaction_read(tdb, off, buf, len, cv) != 0) { + goto fail; + } + + return 0; + } + + /* its not in the transaction elements - do a real read */ + return tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv); + +fail: + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%d len=%d\n", off, len)); + tdb->ecode = TDB_ERR_IO; + tdb->transaction->transaction_error = 1; + return -1; +} + + +/* + write while in a transaction +*/ +static int transaction_write(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + struct tdb_transaction_el *el, *best_el=NULL; + + if (len == 0) { + return 0; + } + + /* if the write is to a hash head, then update the transaction + hash heads */ + if (len == sizeof(tdb_off_t) && off >= FREELIST_TOP && + off < FREELIST_TOP+TDB_HASHTABLE_SIZE(tdb)) { + u32 chain = (off-FREELIST_TOP) / sizeof(tdb_off_t); + memcpy(&tdb->transaction->hash_heads[chain], buf, len); + } + + /* first see if we can replace an existing entry */ + for (el=tdb->transaction->elements_last;el;el=el->prev) { + tdb_len_t partial; + + if (best_el == NULL && off == el->offset+el->length) { + best_el = el; + } + + if (off+len <= el->offset) { + continue; + } + if (off >= el->offset + el->length) { + continue; + } + + /* an overlapping write - needs to be split into up to + 2 writes and a memcpy */ + if (off < el->offset) { + partial = el->offset - off; + if (transaction_write(tdb, off, buf, partial) != 0) { + goto fail; + } + len -= partial; + off += partial; + buf = (const void *)(partial + (const char *)buf); + } + if (off + len <= el->offset + el->length) { + partial = len; + } else { + partial = el->offset + el->length - off; + } + memcpy(el->data + (off - el->offset), buf, partial); + len -= partial; + off += partial; + buf = (const void *)(partial + (const char *)buf); + + if (len != 0 && transaction_write(tdb, off, buf, len) != 0) { + goto fail; + } + + return 0; + } + + /* see if we can append the new entry to an existing entry */ + if (best_el && best_el->offset + best_el->length == off && + (off+len < tdb->transaction->old_map_size || + off > tdb->transaction->old_map_size)) { + unsigned char *data = best_el->data; + el = best_el; + el->data = (unsigned char *)realloc(el->data, + el->length + len); + if (el->data == NULL) { + tdb->ecode = TDB_ERR_OOM; + tdb->transaction->transaction_error = 1; + el->data = data; + return -1; + } + if (buf) { + memcpy(el->data + el->length, buf, len); + } else { + memset(el->data + el->length, TDB_PAD_BYTE, len); + } + el->length += len; + return 0; + } + + /* add a new entry at the end of the list */ + el = (struct tdb_transaction_el *)malloc(sizeof(*el)); + if (el == NULL) { + tdb->ecode = TDB_ERR_OOM; + tdb->transaction->transaction_error = 1; + return -1; + } + el->next = NULL; + el->prev = tdb->transaction->elements_last; + el->offset = off; + el->length = len; + el->data = (unsigned char *)malloc(len); + if (el->data == NULL) { + free(el); + tdb->ecode = TDB_ERR_OOM; + tdb->transaction->transaction_error = 1; + return -1; + } + if (buf) { + memcpy(el->data, buf, len); + } else { + memset(el->data, TDB_PAD_BYTE, len); + } + if (el->prev) { + el->prev->next = el; + } else { + tdb->transaction->elements = el; + } + tdb->transaction->elements_last = el; + return 0; + +fail: + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", off, len)); + tdb->ecode = TDB_ERR_IO; + tdb->transaction->transaction_error = 1; + return -1; +} + +/* + accelerated hash chain head search, using the cached hash heads +*/ +static void transaction_next_hash_chain(struct tdb_context *tdb, u32 *chain) +{ + u32 h = *chain; + for (;h < tdb->header.hash_size;h++) { + /* the +1 takes account of the freelist */ + if (0 != tdb->transaction->hash_heads[h+1]) { + break; + } + } + (*chain) = h; +} + +/* + out of bounds check during a transaction +*/ +static int transaction_oob(struct tdb_context *tdb, tdb_off_t len, int probe) +{ + if (len <= tdb->map_size) { + return 0; + } + return TDB_ERRCODE(TDB_ERR_IO, -1); +} + +/* + transaction version of tdb_expand(). +*/ +static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size, + tdb_off_t addition) +{ + /* add a write to the transaction elements, so subsequent + reads see the zero data */ + if (transaction_write(tdb, size, NULL, addition) != 0) { + return -1; + } + + return 0; +} + +/* + brlock during a transaction - ignore them +*/ +static int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset, + int rw_type, int lck_type, int probe, size_t len) +{ + return 0; +} + +static const struct tdb_methods transaction_methods = { + transaction_read, + transaction_write, + transaction_next_hash_chain, + transaction_oob, + transaction_expand_file, + transaction_brlock +}; + + +/* + start a tdb transaction. No token is returned, as only a single + transaction is allowed to be pending per tdb_context +*/ +int tdb_transaction_start(struct tdb_context *tdb) +{ + /* some sanity checks */ + if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); + tdb->ecode = TDB_ERR_EINVAL; + return -1; + } + + /* cope with nested tdb_transaction_start() calls */ + if (tdb->transaction != NULL) { + tdb->transaction->nesting++; + TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n", + tdb->transaction->nesting)); + return 0; + } + + if (tdb->num_locks != 0 || tdb->global_lock.count) { + /* the caller must not have any locks when starting a + transaction as otherwise we'll be screwed by lack + of nested locks in posix */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction with locks held\n")); + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + + if (tdb->travlocks.next != NULL) { + /* you cannot use transactions inside a traverse (although you can use + traverse inside a transaction) as otherwise you can end up with + deadlock */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction within a traverse\n")); + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + + tdb->transaction = (struct tdb_transaction *) + calloc(sizeof(struct tdb_transaction), 1); + if (tdb->transaction == NULL) { + tdb->ecode = TDB_ERR_OOM; + return -1; + } + + /* get the transaction write lock. This is a blocking lock. As + discussed with Volker, there are a number of ways we could + make this async, which we will probably do in the future */ + if (tdb_transaction_lock(tdb, F_WRLCK) == -1) { + SAFE_FREE(tdb->transaction); + return -1; + } + + /* get a read lock from the freelist to the end of file. This + is upgraded to a write lock during the commit */ + if (tdb_brlock(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get hash locks\n")); + tdb->ecode = TDB_ERR_LOCK; + goto fail; + } + + /* setup a copy of the hash table heads so the hash scan in + traverse can be fast */ + tdb->transaction->hash_heads = (u32 *) + calloc(tdb->header.hash_size+1, sizeof(u32)); + if (tdb->transaction->hash_heads == NULL) { + tdb->ecode = TDB_ERR_OOM; + goto fail; + } + if (tdb->methods->tdb_read(tdb, FREELIST_TOP, tdb->transaction->hash_heads, + TDB_HASHTABLE_SIZE(tdb), 0) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to read hash heads\n")); + tdb->ecode = TDB_ERR_IO; + goto fail; + } + + /* make sure we know about any file expansions already done by + anyone else */ + tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); + tdb->transaction->old_map_size = tdb->map_size; + + /* finally hook the io methods, replacing them with + transaction specific methods */ + tdb->transaction->io_methods = tdb->methods; + tdb->methods = &transaction_methods; + + /* by calling this transaction write here, we ensure that we don't grow the + transaction linked list due to hash table updates */ + if (transaction_write(tdb, FREELIST_TOP, tdb->transaction->hash_heads, + TDB_HASHTABLE_SIZE(tdb)) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to prime hash table\n")); + tdb->ecode = TDB_ERR_IO; + tdb->methods = tdb->transaction->io_methods; + goto fail; + } + + return 0; + +fail: + tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); + tdb_transaction_unlock(tdb); + SAFE_FREE(tdb->transaction->hash_heads); + SAFE_FREE(tdb->transaction); + return -1; +} + + +/* + cancel the current transaction +*/ +int tdb_transaction_cancel(struct tdb_context *tdb) +{ + if (tdb->transaction == NULL) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n")); + return -1; + } + + if (tdb->transaction->nesting != 0) { + tdb->transaction->transaction_error = 1; + tdb->transaction->nesting--; + return 0; + } + + tdb->map_size = tdb->transaction->old_map_size; + + /* free all the transaction elements */ + while (tdb->transaction->elements) { + struct tdb_transaction_el *el = tdb->transaction->elements; + tdb->transaction->elements = el->next; + free(el->data); + free(el); + } + + /* remove any global lock created during the transaction */ + if (tdb->global_lock.count != 0) { + tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size); + tdb->global_lock.count = 0; + } + + /* remove any locks created during the transaction */ + if (tdb->num_locks != 0) { + int i; + for (i=0;i<tdb->num_lockrecs;i++) { + tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list, + F_UNLCK,F_SETLKW, 0, 1); + } + tdb->num_locks = 0; + tdb->num_lockrecs = 0; + SAFE_FREE(tdb->lockrecs); + } + + /* restore the normal io methods */ + tdb->methods = tdb->transaction->io_methods; + + tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); + tdb_transaction_unlock(tdb); + SAFE_FREE(tdb->transaction->hash_heads); + SAFE_FREE(tdb->transaction); + + return 0; +} + +/* + sync to disk +*/ +static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t length) +{ + if (fsync(tdb->fd) != 0) { + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: fsync failed\n")); + return -1; + } +#ifdef MS_SYNC + if (tdb->map_ptr) { + tdb_off_t moffset = offset & ~(tdb->page_size-1); + if (msync(moffset + (char *)tdb->map_ptr, + length + (offset - moffset), MS_SYNC) != 0) { + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: msync failed - %s\n", + strerror(errno))); + return -1; + } + } +#endif + return 0; +} + + +/* + work out how much space the linearised recovery data will consume +*/ +static tdb_len_t tdb_recovery_size(struct tdb_context *tdb) +{ + struct tdb_transaction_el *el; + tdb_len_t recovery_size = 0; + + recovery_size = sizeof(u32); + for (el=tdb->transaction->elements;el;el=el->next) { + if (el->offset >= tdb->transaction->old_map_size) { + continue; + } + recovery_size += 2*sizeof(tdb_off_t) + el->length; + } + + return recovery_size; +} + +/* + allocate the recovery area, or use an existing recovery area if it is + large enough +*/ +static int tdb_recovery_allocate(struct tdb_context *tdb, + tdb_len_t *recovery_size, + tdb_off_t *recovery_offset, + tdb_len_t *recovery_max_size) +{ + struct list_struct rec; + const struct tdb_methods *methods = tdb->transaction->io_methods; + tdb_off_t recovery_head; + + if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery head\n")); + return -1; + } + + rec.rec_len = 0; + + if (recovery_head != 0 && + methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery record\n")); + return -1; + } + + *recovery_size = tdb_recovery_size(tdb); + + if (recovery_head != 0 && *recovery_size <= rec.rec_len) { + /* it fits in the existing area */ + *recovery_max_size = rec.rec_len; + *recovery_offset = recovery_head; + return 0; + } + + /* we need to free up the old recovery area, then allocate a + new one at the end of the file. Note that we cannot use + tdb_allocate() to allocate the new one as that might return + us an area that is being currently used (as of the start of + the transaction) */ + if (recovery_head != 0) { + if (tdb_free(tdb, recovery_head, &rec) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to free previous recovery area\n")); + return -1; + } + } + + /* the tdb_free() call might have increased the recovery size */ + *recovery_size = tdb_recovery_size(tdb); + + /* round up to a multiple of page size */ + *recovery_max_size = TDB_ALIGN(sizeof(rec) + *recovery_size, tdb->page_size) - sizeof(rec); + *recovery_offset = tdb->map_size; + recovery_head = *recovery_offset; + + if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, + (tdb->map_size - tdb->transaction->old_map_size) + + sizeof(rec) + *recovery_max_size) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to create recovery area\n")); + return -1; + } + + /* remap the file (if using mmap) */ + methods->tdb_oob(tdb, tdb->map_size + 1, 1); + + /* we have to reset the old map size so that we don't try to expand the file + again in the transaction commit, which would destroy the recovery area */ + tdb->transaction->old_map_size = tdb->map_size; + + /* write the recovery header offset and sync - we can sync without a race here + as the magic ptr in the recovery record has not been set */ + CONVERT(recovery_head); + if (methods->tdb_write(tdb, TDB_RECOVERY_HEAD, + &recovery_head, sizeof(tdb_off_t)) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n")); + return -1; + } + + return 0; +} + + +/* + setup the recovery data that will be used on a crash during commit +*/ +static int transaction_setup_recovery(struct tdb_context *tdb, + tdb_off_t *magic_offset) +{ + struct tdb_transaction_el *el; + tdb_len_t recovery_size; + unsigned char *data, *p; + const struct tdb_methods *methods = tdb->transaction->io_methods; + struct list_struct *rec; + tdb_off_t recovery_offset, recovery_max_size; + tdb_off_t old_map_size = tdb->transaction->old_map_size; + u32 magic, tailer; + + /* + check that the recovery area has enough space + */ + if (tdb_recovery_allocate(tdb, &recovery_size, + &recovery_offset, &recovery_max_size) == -1) { + return -1; + } + + data = (unsigned char *)malloc(recovery_size + sizeof(*rec)); + if (data == NULL) { + tdb->ecode = TDB_ERR_OOM; + return -1; + } + + rec = (struct list_struct *)data; + memset(rec, 0, sizeof(*rec)); + + rec->magic = 0; + rec->data_len = recovery_size; + rec->rec_len = recovery_max_size; + rec->key_len = old_map_size; + CONVERT(rec); + + /* build the recovery data into a single blob to allow us to do a single + large write, which should be more efficient */ + p = data + sizeof(*rec); + for (el=tdb->transaction->elements;el;el=el->next) { + if (el->offset >= old_map_size) { + continue; + } + if (el->offset + el->length > tdb->transaction->old_map_size) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: transaction data over new region boundary\n")); + free(data); + tdb->ecode = TDB_ERR_CORRUPT; + return -1; + } + memcpy(p, &el->offset, 4); + memcpy(p+4, &el->length, 4); + if (DOCONV()) { + tdb_convert(p, 8); + } + /* the recovery area contains the old data, not the + new data, so we have to call the original tdb_read + method to get it */ + if (methods->tdb_read(tdb, el->offset, p + 8, el->length, 0) != 0) { + free(data); + tdb->ecode = TDB_ERR_IO; + return -1; + } + p += 8 + el->length; + } + + /* and the tailer */ + tailer = sizeof(*rec) + recovery_max_size; + memcpy(p, &tailer, 4); + CONVERT(p); + + /* write the recovery data to the recovery area */ + if (methods->tdb_write(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write recovery data\n")); + free(data); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* as we don't have ordered writes, we have to sync the recovery + data before we update the magic to indicate that the recovery + data is present */ + if (transaction_sync(tdb, recovery_offset, sizeof(*rec) + recovery_size) == -1) { + free(data); + return -1; + } + + free(data); + + magic = TDB_RECOVERY_MAGIC; + CONVERT(magic); + + *magic_offset = recovery_offset + offsetof(struct list_struct, magic); + + if (methods->tdb_write(tdb, *magic_offset, &magic, sizeof(magic)) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write recovery magic\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* ensure the recovery magic marker is on disk */ + if (transaction_sync(tdb, *magic_offset, sizeof(magic)) == -1) { + return -1; + } + + return 0; +} + +/* + commit the current transaction +*/ +int tdb_transaction_commit(struct tdb_context *tdb) +{ + const struct tdb_methods *methods; + tdb_off_t magic_offset = 0; + u32 zero = 0; + + if (tdb->transaction == NULL) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n")); + return -1; + } + + if (tdb->transaction->transaction_error) { + tdb->ecode = TDB_ERR_IO; + tdb_transaction_cancel(tdb); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: transaction error pending\n")); + return -1; + } + + if (tdb->transaction->nesting != 0) { + tdb->transaction->nesting--; + return 0; + } + + /* check for a null transaction */ + if (tdb->transaction->elements == NULL) { + tdb_transaction_cancel(tdb); + return 0; + } + + methods = tdb->transaction->io_methods; + + /* if there are any locks pending then the caller has not + nested their locks properly, so fail the transaction */ + if (tdb->num_locks || tdb->global_lock.count) { + tdb->ecode = TDB_ERR_LOCK; + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: locks pending on commit\n")); + tdb_transaction_cancel(tdb); + return -1; + } + + /* upgrade the main transaction lock region to a write lock */ + if (tdb_brlock_upgrade(tdb, FREELIST_TOP, 0) == -1) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to upgrade hash locks\n")); + tdb->ecode = TDB_ERR_LOCK; + tdb_transaction_cancel(tdb); + return -1; + } + + /* get the global lock - this prevents new users attaching to the database + during the commit */ + if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: failed to get global lock\n")); + tdb->ecode = TDB_ERR_LOCK; + tdb_transaction_cancel(tdb); + return -1; + } + + if (!(tdb->flags & TDB_NOSYNC)) { + /* write the recovery data to the end of the file */ + if (transaction_setup_recovery(tdb, &magic_offset) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to setup recovery data\n")); + tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_transaction_cancel(tdb); + return -1; + } + } + + /* expand the file to the new size if needed */ + if (tdb->map_size != tdb->transaction->old_map_size) { + if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, + tdb->map_size - + tdb->transaction->old_map_size) == -1) { + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: expansion failed\n")); + tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_transaction_cancel(tdb); + return -1; + } + tdb->map_size = tdb->transaction->old_map_size; + methods->tdb_oob(tdb, tdb->map_size + 1, 1); + } + + /* perform all the writes */ + while (tdb->transaction->elements) { + struct tdb_transaction_el *el = tdb->transaction->elements; + + if (methods->tdb_write(tdb, el->offset, el->data, el->length) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n")); + + /* we've overwritten part of the data and + possibly expanded the file, so we need to + run the crash recovery code */ + tdb->methods = methods; + tdb_transaction_recover(tdb); + + tdb_transaction_cancel(tdb); + tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n")); + return -1; + } + tdb->transaction->elements = el->next; + free(el->data); + free(el); + } + + if (!(tdb->flags & TDB_NOSYNC)) { + /* ensure the new data is on disk */ + if (transaction_sync(tdb, 0, tdb->map_size) == -1) { + return -1; + } + + /* remove the recovery marker */ + if (methods->tdb_write(tdb, magic_offset, &zero, 4) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to remove recovery magic\n")); + return -1; + } + + /* ensure the recovery marker has been removed on disk */ + if (transaction_sync(tdb, magic_offset, 4) == -1) { + return -1; + } + } + + tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + + /* + TODO: maybe write to some dummy hdr field, or write to magic + offset without mmap, before the last sync, instead of the + utime() call + */ + + /* on some systems (like Linux 2.6.x) changes via mmap/msync + don't change the mtime of the file, this means the file may + not be backed up (as tdb rounding to block sizes means that + file size changes are quite rare too). The following forces + mtime changes when a transaction completes */ +#ifdef HAVE_UTIME + utime(tdb->name, NULL); +#endif + + /* use a transaction cancel to free memory and remove the + transaction locks */ + tdb_transaction_cancel(tdb); + return 0; +} + + +/* + recover from an aborted transaction. Must be called with exclusive + database write access already established (including the global + lock to prevent new processes attaching) +*/ +int tdb_transaction_recover(struct tdb_context *tdb) +{ + tdb_off_t recovery_head, recovery_eof; + unsigned char *data, *p; + u32 zero = 0; + struct list_struct rec; + + /* find the recovery area */ + if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery head\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + if (recovery_head == 0) { + /* we have never allocated a recovery record */ + return 0; + } + + /* read the recovery record */ + if (tdb->methods->tdb_read(tdb, recovery_head, &rec, + sizeof(rec), DOCONV()) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery record\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + if (rec.magic != TDB_RECOVERY_MAGIC) { + /* there is no valid recovery data */ + return 0; + } + + if (tdb->read_only) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: attempt to recover read only database\n")); + tdb->ecode = TDB_ERR_CORRUPT; + return -1; + } + + recovery_eof = rec.key_len; + + data = (unsigned char *)malloc(rec.data_len); + if (data == NULL) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to allocate recovery data\n")); + tdb->ecode = TDB_ERR_OOM; + return -1; + } + + /* read the full recovery data */ + if (tdb->methods->tdb_read(tdb, recovery_head + sizeof(rec), data, + rec.data_len, 0) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery data\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* recover the file data */ + p = data; + while (p+8 < data + rec.data_len) { + u32 ofs, len; + if (DOCONV()) { + tdb_convert(p, 8); + } + memcpy(&ofs, p, 4); + memcpy(&len, p+4, 4); + + if (tdb->methods->tdb_write(tdb, ofs, p+8, len) == -1) { + free(data); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to recover %d bytes at offset %d\n", len, ofs)); + tdb->ecode = TDB_ERR_IO; + return -1; + } + p += 8 + len; + } + + free(data); + + if (transaction_sync(tdb, 0, tdb->map_size) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync recovery\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* if the recovery area is after the recovered eof then remove it */ + if (recovery_eof <= recovery_head) { + if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &zero) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery head\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + } + + /* remove the recovery magic */ + if (tdb_ofs_write(tdb, recovery_head + offsetof(struct list_struct, magic), + &zero) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery magic\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* reduce the file size to the old size */ + tdb_munmap(tdb); + if (ftruncate(tdb->fd, recovery_eof) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to reduce to recovery size\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + tdb->map_size = recovery_eof; + tdb_mmap(tdb); + + if (transaction_sync(tdb, 0, recovery_eof) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync2 recovery\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_recover: recovered %d byte database\n", + recovery_eof)); + + /* all done */ + return 0; +} + +/* file: freelist.c */ + +/* read a freelist record and check for simple errors */ +static int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct *rec) +{ + if (tdb->methods->tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1) + return -1; + + if (rec->magic == TDB_MAGIC) { + /* this happens when a app is showdown while deleting a record - we should + not completely fail when this happens */ + TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%d - fixing\n", + rec->magic, off)); + rec->magic = TDB_FREE_MAGIC; + if (tdb->methods->tdb_write(tdb, off, rec, sizeof(*rec)) == -1) + return -1; + } + + if (rec->magic != TDB_FREE_MAGIC) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_CORRUPT; + TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%d\n", + rec->magic, off)); + return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); + } + if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0) + return -1; + return 0; +} + + + +/* Remove an element from the freelist. Must have alloc lock. */ +static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_t next) +{ + tdb_off_t last_ptr, i; + + /* read in the freelist top */ + last_ptr = FREELIST_TOP; + while (tdb_ofs_read(tdb, last_ptr, &i) != -1 && i != 0) { + if (i == off) { + /* We've found it! */ + return tdb_ofs_write(tdb, last_ptr, &next); + } + /* Follow chain (next offset is at start of record) */ + last_ptr = i; + } + TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%d\n", off)); + return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); +} + + +/* update a record tailer (must hold allocation lock) */ +static int update_tailer(struct tdb_context *tdb, tdb_off_t offset, + const struct list_struct *rec) +{ + tdb_off_t totalsize; + + /* Offset of tailer from record header */ + totalsize = sizeof(*rec) + rec->rec_len; + return tdb_ofs_write(tdb, offset + totalsize - sizeof(tdb_off_t), + &totalsize); +} + +/* Add an element into the freelist. Merge adjacent records if + neccessary. */ +int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) +{ + tdb_off_t right, left; + + /* Allocation and tailer lock */ + if (tdb_lock(tdb, -1, F_WRLCK) != 0) + return -1; + + /* set an initial tailer, so if we fail we don't leave a bogus record */ + if (update_tailer(tdb, offset, rec) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed!\n")); + goto fail; + } + + /* Look right first (I'm an Australian, dammit) */ + right = offset + sizeof(*rec) + rec->rec_len; + if (right + sizeof(*rec) <= tdb->map_size) { + struct list_struct r; + + if (tdb->methods->tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right read failed at %u\n", right)); + goto left; + } + + /* If it's free, expand to include it. */ + if (r.magic == TDB_FREE_MAGIC) { + if (remove_from_freelist(tdb, right, r.next) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right free failed at %u\n", right)); + goto left; + } + rec->rec_len += sizeof(r) + r.rec_len; + } + } + +left: + /* Look left */ + left = offset - sizeof(tdb_off_t); + if (left > TDB_DATA_START(tdb->header.hash_size)) { + struct list_struct l; + tdb_off_t leftsize; + + /* Read in tailer and jump back to header */ + if (tdb_ofs_read(tdb, left, &leftsize) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left)); + goto update; + } + + /* it could be uninitialised data */ + if (leftsize == 0 || leftsize == TDB_PAD_U32) { + goto update; + } + + left = offset - leftsize; + + /* Now read in record */ + if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize)); + goto update; + } + + /* If it's free, expand to include it. */ + if (l.magic == TDB_FREE_MAGIC) { + if (remove_from_freelist(tdb, left, l.next) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left free failed at %u\n", left)); + goto update; + } else { + offset = left; + rec->rec_len += leftsize; + } + } + } + +update: + if (update_tailer(tdb, offset, rec) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset)); + goto fail; + } + + /* Now, prepend to free list */ + rec->magic = TDB_FREE_MAGIC; + + if (tdb_ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 || + tdb_rec_write(tdb, offset, rec) == -1 || + tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%d\n", offset)); + goto fail; + } + + /* And we're done. */ + tdb_unlock(tdb, -1, F_WRLCK); + return 0; + + fail: + tdb_unlock(tdb, -1, F_WRLCK); + return -1; +} + + +/* + the core of tdb_allocate - called when we have decided which + free list entry to use + */ +static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, tdb_len_t length, tdb_off_t rec_ptr, + struct list_struct *rec, tdb_off_t last_ptr) +{ + struct list_struct newrec; + tdb_off_t newrec_ptr; + + memset(&newrec, '\0', sizeof(newrec)); + + /* found it - now possibly split it up */ + if (rec->rec_len > length + MIN_REC_SIZE) { + /* Length of left piece */ + length = TDB_ALIGN(length, TDB_ALIGNMENT); + + /* Right piece to go on free list */ + newrec.rec_len = rec->rec_len - (sizeof(*rec) + length); + newrec_ptr = rec_ptr + sizeof(*rec) + length; + + /* And left record is shortened */ + rec->rec_len = length; + } else { + newrec_ptr = 0; + } + + /* Remove allocated record from the free list */ + if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) { + return 0; + } + + /* Update header: do this before we drop alloc + lock, otherwise tdb_free() might try to + merge with us, thinking we're free. + (Thanks Jeremy Allison). */ + rec->magic = TDB_MAGIC; + if (tdb_rec_write(tdb, rec_ptr, rec) == -1) { + return 0; + } + + /* Did we create new block? */ + if (newrec_ptr) { + /* Update allocated record tailer (we + shortened it). */ + if (update_tailer(tdb, rec_ptr, rec) == -1) { + return 0; + } + + /* Free new record */ + if (tdb_free(tdb, newrec_ptr, &newrec) == -1) { + return 0; + } + } + + /* all done - return the new record offset */ + return rec_ptr; +} + +/* allocate some space from the free list. The offset returned points + to a unconnected list_struct within the database with room for at + least length bytes of total data + + 0 is returned if the space could not be allocated + */ +tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_struct *rec) +{ + tdb_off_t rec_ptr, last_ptr, newrec_ptr; + struct { + tdb_off_t rec_ptr, last_ptr; + tdb_len_t rec_len; + } bestfit; + + if (tdb_lock(tdb, -1, F_WRLCK) == -1) + return 0; + + /* Extra bytes required for tailer */ + length += sizeof(tdb_off_t); + + again: + last_ptr = FREELIST_TOP; + + /* read in the freelist top */ + if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1) + goto fail; + + bestfit.rec_ptr = 0; + bestfit.last_ptr = 0; + bestfit.rec_len = 0; + + /* + this is a best fit allocation strategy. Originally we used + a first fit strategy, but it suffered from massive fragmentation + issues when faced with a slowly increasing record size. + */ + while (rec_ptr) { + if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) { + goto fail; + } + + if (rec->rec_len >= length) { + if (bestfit.rec_ptr == 0 || + rec->rec_len < bestfit.rec_len) { + bestfit.rec_len = rec->rec_len; + bestfit.rec_ptr = rec_ptr; + bestfit.last_ptr = last_ptr; + /* consider a fit to be good enough if + we aren't wasting more than half + the space */ + if (bestfit.rec_len < 2*length) { + break; + } + } + } + + /* move to the next record */ + last_ptr = rec_ptr; + rec_ptr = rec->next; + } + + if (bestfit.rec_ptr != 0) { + if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) { + goto fail; + } + + newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr); + tdb_unlock(tdb, -1, F_WRLCK); + return newrec_ptr; + } + + /* we didn't find enough space. See if we can expand the + database and if we can then try again */ + if (tdb_expand(tdb, length + sizeof(*rec)) == 0) + goto again; + fail: + tdb_unlock(tdb, -1, F_WRLCK); + return 0; +} + +/* file: freelistcheck.c */ + +/* Check the freelist is good and contains no loops. + Very memory intensive - only do this as a consistency + checker. Heh heh - uses an in memory tdb as the storage + for the "seen" record list. For some reason this strikes + me as extremely clever as I don't have to write another tree + data structure implementation :-). + */ + +static int seen_insert(struct tdb_context *mem_tdb, tdb_off_t rec_ptr) +{ + TDB_DATA key, data; + + memset(&data, '\0', sizeof(data)); + key.dptr = (unsigned char *)&rec_ptr; + key.dsize = sizeof(rec_ptr); + return tdb_store(mem_tdb, key, data, TDB_INSERT); +} + +int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries) +{ + struct tdb_context *mem_tdb = NULL; + struct list_struct rec; + tdb_off_t rec_ptr, last_ptr; + int ret = -1; + + *pnum_entries = 0; + + mem_tdb = tdb_open("flval", tdb->header.hash_size, + TDB_INTERNAL, O_RDWR, 0600); + if (!mem_tdb) { + return -1; + } + + if (tdb_lock(tdb, -1, F_WRLCK) == -1) { + tdb_close(mem_tdb); + return 0; + } + + last_ptr = FREELIST_TOP; + + /* Store the FREELIST_TOP record. */ + if (seen_insert(mem_tdb, last_ptr) == -1) { + ret = TDB_ERRCODE(TDB_ERR_CORRUPT, -1); + goto fail; + } + + /* read in the freelist top */ + if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1) { + goto fail; + } + + while (rec_ptr) { + + /* If we can't store this record (we've seen it + before) then the free list has a loop and must + be corrupt. */ + + if (seen_insert(mem_tdb, rec_ptr)) { + ret = TDB_ERRCODE(TDB_ERR_CORRUPT, -1); + goto fail; + } + + if (tdb_rec_free_read(tdb, rec_ptr, &rec) == -1) { + goto fail; + } + + /* move to the next record */ + last_ptr = rec_ptr; + rec_ptr = rec.next; + *pnum_entries += 1; + } + + ret = 0; + + fail: + + tdb_close(mem_tdb); + tdb_unlock(tdb, -1, F_WRLCK); + return ret; +} + +/* file: traverse.c */ + +/* Uses traverse lock: 0 = finish, -1 = error, other = record offset */ +static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tlock, + struct list_struct *rec) +{ + int want_next = (tlock->off != 0); + + /* Lock each chain from the start one. */ + for (; tlock->hash < tdb->header.hash_size; tlock->hash++) { + if (!tlock->off && tlock->hash != 0) { + /* this is an optimisation for the common case where + the hash chain is empty, which is particularly + common for the use of tdb with ldb, where large + hashes are used. In that case we spend most of our + time in tdb_brlock(), locking empty hash chains. + + To avoid this, we do an unlocked pre-check to see + if the hash chain is empty before starting to look + inside it. If it is empty then we can avoid that + hash chain. If it isn't empty then we can't believe + the value we get back, as we read it without a + lock, so instead we get the lock and re-fetch the + value below. + + Notice that not doing this optimisation on the + first hash chain is critical. We must guarantee + that we have done at least one fcntl lock at the + start of a search to guarantee that memory is + coherent on SMP systems. If records are added by + others during the search then thats OK, and we + could possibly miss those with this trick, but we + could miss them anyway without this trick, so the + semantics don't change. + + With a non-indexed ldb search this trick gains us a + factor of around 80 in speed on a linux 2.6.x + system (testing using ldbtest). + */ + tdb->methods->next_hash_chain(tdb, &tlock->hash); + if (tlock->hash == tdb->header.hash_size) { + continue; + } + } + + if (tdb_lock(tdb, tlock->hash, tlock->lock_rw) == -1) + return -1; + + /* No previous record? Start at top of chain. */ + if (!tlock->off) { + if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->hash), + &tlock->off) == -1) + goto fail; + } else { + /* Otherwise unlock the previous record. */ + if (tdb_unlock_record(tdb, tlock->off) != 0) + goto fail; + } + + if (want_next) { + /* We have offset of old record: grab next */ + if (tdb_rec_read(tdb, tlock->off, rec) == -1) + goto fail; + tlock->off = rec->next; + } + + /* Iterate through chain */ + while( tlock->off) { + tdb_off_t current; + if (tdb_rec_read(tdb, tlock->off, rec) == -1) + goto fail; + + /* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */ + if (tlock->off == rec->next) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: loop detected.\n")); + goto fail; + } + + if (!TDB_DEAD(rec)) { + /* Woohoo: we found one! */ + if (tdb_lock_record(tdb, tlock->off) != 0) + goto fail; + return tlock->off; + } + + /* Try to clean dead ones from old traverses */ + current = tlock->off; + tlock->off = rec->next; + if (!(tdb->read_only || tdb->traverse_read) && + tdb_do_delete(tdb, current, rec) != 0) + goto fail; + } + tdb_unlock(tdb, tlock->hash, tlock->lock_rw); + want_next = 0; + } + /* We finished iteration without finding anything */ + return TDB_ERRCODE(TDB_SUCCESS, 0); + + fail: + tlock->off = 0; + if (tdb_unlock(tdb, tlock->hash, tlock->lock_rw) != 0) + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: On error unlock failed!\n")); + return -1; +} + +/* traverse the entire database - calling fn(tdb, key, data) on each element. + return -1 on error or the record count traversed + if fn is NULL then it is not called + a non-zero return value from fn() indicates that the traversal should stop + */ +static int tdb_traverse_internal(struct tdb_context *tdb, + tdb_traverse_func fn, void *private_data, + struct tdb_traverse_lock *tl) +{ + TDB_DATA key, dbuf; + struct list_struct rec; + int ret, count = 0; + + /* This was in the initializaton, above, but the IRIX compiler + * did not like it. crh + */ + tl->next = tdb->travlocks.next; + + /* fcntl locks don't stack: beware traverse inside traverse */ + tdb->travlocks.next = tl; + + /* tdb_next_lock places locks on the record returned, and its chain */ + while ((ret = tdb_next_lock(tdb, tl, &rec)) > 0) { + count++; + /* now read the full record */ + key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec), + rec.key_len + rec.data_len); + if (!key.dptr) { + ret = -1; + if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) + goto out; + if (tdb_unlock_record(tdb, tl->off) != 0) + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n")); + goto out; + } + key.dsize = rec.key_len; + dbuf.dptr = key.dptr + rec.key_len; + dbuf.dsize = rec.data_len; + + /* Drop chain lock, call out */ + if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) { + ret = -1; + SAFE_FREE(key.dptr); + goto out; + } + if (fn && fn(tdb, key, dbuf, private_data)) { + /* They want us to terminate traversal */ + ret = count; + if (tdb_unlock_record(tdb, tl->off) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: unlock_record failed!\n"));; + ret = -1; + } + SAFE_FREE(key.dptr); + goto out; + } + SAFE_FREE(key.dptr); + } +out: + tdb->travlocks.next = tl->next; + if (ret < 0) + return -1; + else + return count; +} + + +/* + a write style traverse - temporarily marks the db read only +*/ +int tdb_traverse_read(struct tdb_context *tdb, + tdb_traverse_func fn, void *private_data) +{ + struct tdb_traverse_lock tl = { NULL, 0, 0, F_RDLCK }; + int ret; + + /* we need to get a read lock on the transaction lock here to + cope with the lock ordering semantics of solaris10 */ + if (tdb_transaction_lock(tdb, F_RDLCK)) { + return -1; + } + + tdb->traverse_read++; + ret = tdb_traverse_internal(tdb, fn, private_data, &tl); + tdb->traverse_read--; + + tdb_transaction_unlock(tdb); + + return ret; +} + +/* + a write style traverse - needs to get the transaction lock to + prevent deadlocks +*/ +int tdb_traverse(struct tdb_context *tdb, + tdb_traverse_func fn, void *private_data) +{ + struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK }; + int ret; + + if (tdb->read_only || tdb->traverse_read) { + return tdb_traverse_read(tdb, fn, private_data); + } + + if (tdb_transaction_lock(tdb, F_WRLCK)) { + return -1; + } + + ret = tdb_traverse_internal(tdb, fn, private_data, &tl); + + tdb_transaction_unlock(tdb); + + return ret; +} + + +/* find the first entry in the database and return its key */ +TDB_DATA tdb_firstkey(struct tdb_context *tdb) +{ + TDB_DATA key; + struct list_struct rec; + + /* release any old lock */ + if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0) + return tdb_null; + tdb->travlocks.off = tdb->travlocks.hash = 0; + tdb->travlocks.lock_rw = F_RDLCK; + + /* Grab first record: locks chain and returned record. */ + if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0) + return tdb_null; + /* now read the key */ + key.dsize = rec.key_len; + key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize); + + /* Unlock the hash chain of the record we just read. */ + if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n")); + return key; +} + +/* find the next entry in the database, returning its key */ +TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) +{ + u32 oldhash; + TDB_DATA key = tdb_null; + struct list_struct rec; + unsigned char *k = NULL; + + /* Is locked key the old key? If so, traverse will be reliable. */ + if (tdb->travlocks.off) { + if (tdb_lock(tdb,tdb->travlocks.hash,tdb->travlocks.lock_rw)) + return tdb_null; + if (tdb_rec_read(tdb, tdb->travlocks.off, &rec) == -1 + || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec), + rec.key_len)) + || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) { + /* No, it wasn't: unlock it and start from scratch */ + if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0) { + SAFE_FREE(k); + return tdb_null; + } + if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) { + SAFE_FREE(k); + return tdb_null; + } + tdb->travlocks.off = 0; + } + + SAFE_FREE(k); + } + + if (!tdb->travlocks.off) { + /* No previous element: do normal find, and lock record */ + tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), tdb->travlocks.lock_rw, &rec); + if (!tdb->travlocks.off) + return tdb_null; + tdb->travlocks.hash = BUCKET(rec.full_hash); + if (tdb_lock_record(tdb, tdb->travlocks.off) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno))); + return tdb_null; + } + } + oldhash = tdb->travlocks.hash; + + /* Grab next record: locks chain and returned record, + unlocks old record */ + if (tdb_next_lock(tdb, &tdb->travlocks, &rec) > 0) { + key.dsize = rec.key_len; + key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec), + key.dsize); + /* Unlock the chain of this new record */ + if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n")); + } + /* Unlock the chain of old record */ + if (tdb_unlock(tdb, BUCKET(oldhash), tdb->travlocks.lock_rw) != 0) + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n")); + return key; +} + +/* file: dump.c */ + +static tdb_off_t tdb_dump_record(struct tdb_context *tdb, int hash, + tdb_off_t offset) +{ + struct list_struct rec; + tdb_off_t tailer_ofs, tailer; + + if (tdb->methods->tdb_read(tdb, offset, (char *)&rec, + sizeof(rec), DOCONV()) == -1) { + printf("ERROR: failed to read record at %u\n", offset); + return 0; + } + + printf(" rec: hash=%d offset=0x%08x next=0x%08x rec_len=%d " + "key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n", + hash, offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, + rec.full_hash, rec.magic); + + tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off_t); + + if (tdb_ofs_read(tdb, tailer_ofs, &tailer) == -1) { + printf("ERROR: failed to read tailer at %u\n", tailer_ofs); + return rec.next; + } + + if (tailer != rec.rec_len + sizeof(rec)) { + printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n", + (unsigned int)tailer, (unsigned int)(rec.rec_len + sizeof(rec))); + } + return rec.next; +} + +static int tdb_dump_chain(struct tdb_context *tdb, int i) +{ + tdb_off_t rec_ptr, top; + + top = TDB_HASH_TOP(i); + + if (tdb_lock(tdb, i, F_WRLCK) != 0) + return -1; + + if (tdb_ofs_read(tdb, top, &rec_ptr) == -1) + return tdb_unlock(tdb, i, F_WRLCK); + + if (rec_ptr) + printf("hash=%d\n", i); + + while (rec_ptr) { + rec_ptr = tdb_dump_record(tdb, i, rec_ptr); + } + + return tdb_unlock(tdb, i, F_WRLCK); +} + +void tdb_dump_all(struct tdb_context *tdb) +{ + int i; + for (i=0;i<tdb->header.hash_size;i++) { + tdb_dump_chain(tdb, i); + } + printf("freelist:\n"); + tdb_dump_chain(tdb, -1); +} + +int tdb_printfreelist(struct tdb_context *tdb) +{ + int ret; + long total_free = 0; + tdb_off_t offset, rec_ptr; + struct list_struct rec; + + if ((ret = tdb_lock(tdb, -1, F_WRLCK)) != 0) + return ret; + + offset = FREELIST_TOP; + + /* read in the freelist top */ + if (tdb_ofs_read(tdb, offset, &rec_ptr) == -1) { + tdb_unlock(tdb, -1, F_WRLCK); + return 0; + } + + printf("freelist top=[0x%08x]\n", rec_ptr ); + while (rec_ptr) { + if (tdb->methods->tdb_read(tdb, rec_ptr, (char *)&rec, + sizeof(rec), DOCONV()) == -1) { + tdb_unlock(tdb, -1, F_WRLCK); + return -1; + } + + if (rec.magic != TDB_FREE_MAGIC) { + printf("bad magic 0x%08x in free list\n", rec.magic); + tdb_unlock(tdb, -1, F_WRLCK); + return -1; + } + + printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n", + rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len); + total_free += rec.rec_len; + + /* move to the next record */ + rec_ptr = rec.next; + } + printf("total rec_len = [0x%08x (%d)]\n", (int)total_free, + (int)total_free); + + return tdb_unlock(tdb, -1, F_WRLCK); +} + +/* file: tdb.c */ + +TDB_DATA tdb_null; + +/* + non-blocking increment of the tdb sequence number if the tdb has been opened using + the TDB_SEQNUM flag +*/ +void tdb_increment_seqnum_nonblock(struct tdb_context *tdb) +{ + tdb_off_t seqnum=0; + + if (!(tdb->flags & TDB_SEQNUM)) { + return; + } + + /* we ignore errors from this, as we have no sane way of + dealing with them. + */ + tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum); + seqnum++; + tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum); +} + +/* + increment the tdb sequence number if the tdb has been opened using + the TDB_SEQNUM flag +*/ +static void tdb_increment_seqnum(struct tdb_context *tdb) +{ + if (!(tdb->flags & TDB_SEQNUM)) { + return; + } + + if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) { + return; + } + + tdb_increment_seqnum_nonblock(tdb); + + tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1); +} + +static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data) +{ + return memcmp(data.dptr, key.dptr, data.dsize); +} + +/* Returns 0 on fail. On success, return offset of record, and fills + in rec */ +static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash, + struct list_struct *r) +{ + tdb_off_t rec_ptr; + + /* read in the hash top */ + if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) + return 0; + + /* keep looking until we find the right record */ + while (rec_ptr) { + if (tdb_rec_read(tdb, rec_ptr, r) == -1) + return 0; + + if (!TDB_DEAD(r) && hash==r->full_hash + && key.dsize==r->key_len + && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r), + r->key_len, tdb_key_compare, + NULL) == 0) { + return rec_ptr; + } + rec_ptr = r->next; + } + return TDB_ERRCODE(TDB_ERR_NOEXIST, 0); +} + +/* As tdb_find, but if you succeed, keep the lock */ +tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype, + struct list_struct *rec) +{ + u32 rec_ptr; + + if (tdb_lock(tdb, BUCKET(hash), locktype) == -1) + return 0; + if (!(rec_ptr = tdb_find(tdb, key, hash, rec))) + tdb_unlock(tdb, BUCKET(hash), locktype); + return rec_ptr; +} + + +/* update an entry in place - this only works if the new data size + is <= the old data size and the key exists. + on failure return -1. +*/ +static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf) +{ + struct list_struct rec; + tdb_off_t rec_ptr; + + /* find entry */ + if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) + return -1; + + /* must be long enough key, data and tailer */ + if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) { + tdb->ecode = TDB_SUCCESS; /* Not really an error */ + return -1; + } + + if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len, + dbuf.dptr, dbuf.dsize) == -1) + return -1; + + if (dbuf.dsize != rec.data_len) { + /* update size */ + rec.data_len = dbuf.dsize; + return tdb_rec_write(tdb, rec_ptr, &rec); + } + + return 0; +} + +/* find an entry in the database given a key */ +/* If an entry doesn't exist tdb_err will be set to + * TDB_ERR_NOEXIST. If a key has no data attached + * then the TDB_DATA will have zero length but + * a non-zero pointer + */ +TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key) +{ + tdb_off_t rec_ptr; + struct list_struct rec; + TDB_DATA ret; + u32 hash; + + /* find which hash bucket it is in */ + hash = tdb->hash_fn(&key); + if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) + return tdb_null; + + ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len, + rec.data_len); + ret.dsize = rec.data_len; + tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); + return ret; +} + +/* + * Find an entry in the database and hand the record's data to a parsing + * function. The parsing function is executed under the chain read lock, so it + * should be fast and should not block on other syscalls. + * + * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS. + * + * For mmapped tdb's that do not have a transaction open it points the parsing + * function directly at the mmap area, it avoids the malloc/memcpy in this + * case. If a transaction is open or no mmap is available, it has to do + * malloc/read/parse/free. + * + * This is interesting for all readers of potentially large data structures in + * the tdb records, ldb indexes being one example. + */ + +int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key, + int (*parser)(TDB_DATA key, TDB_DATA data, + void *private_data), + void *private_data) +{ + tdb_off_t rec_ptr; + struct list_struct rec; + int ret; + u32 hash; + + /* find which hash bucket it is in */ + hash = tdb->hash_fn(&key); + + if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) { + return TDB_ERRCODE(TDB_ERR_NOEXIST, 0); + } + + ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len, + rec.data_len, parser, private_data); + + tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); + + return ret; +} + +/* check if an entry in the database exists + + note that 1 is returned if the key is found and 0 is returned if not found + this doesn't match the conventions in the rest of this module, but is + compatible with gdbm +*/ +static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash) +{ + struct list_struct rec; + + if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0) + return 0; + tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); + return 1; +} + +int tdb_exists(struct tdb_context *tdb, TDB_DATA key) +{ + u32 hash = tdb->hash_fn(&key); + return tdb_exists_hash(tdb, key, hash); +} + +/* actually delete an entry in the database given the offset */ +int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec) +{ + tdb_off_t last_ptr, i; + struct list_struct lastrec; + + if (tdb->read_only || tdb->traverse_read) return -1; + + if (tdb_write_lock_record(tdb, rec_ptr) == -1) { + /* Someone traversing here: mark it as dead */ + rec->magic = TDB_DEAD_MAGIC; + return tdb_rec_write(tdb, rec_ptr, rec); + } + if (tdb_write_unlock_record(tdb, rec_ptr) != 0) + return -1; + + /* find previous record in hash chain */ + if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1) + return -1; + for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next) + if (tdb_rec_read(tdb, i, &lastrec) == -1) + return -1; + + /* unlink it: next ptr is at start of record. */ + if (last_ptr == 0) + last_ptr = TDB_HASH_TOP(rec->full_hash); + if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) + return -1; + + /* recover the space */ + if (tdb_free(tdb, rec_ptr, rec) == -1) + return -1; + return 0; +} + +static int tdb_count_dead(struct tdb_context *tdb, u32 hash) +{ + int res = 0; + tdb_off_t rec_ptr; + struct list_struct rec; + + /* read in the hash top */ + if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) + return 0; + + while (rec_ptr) { + if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) + return 0; + + if (rec.magic == TDB_DEAD_MAGIC) { + res += 1; + } + rec_ptr = rec.next; + } + return res; +} + +/* + * Purge all DEAD records from a hash chain + */ +static int tdb_purge_dead(struct tdb_context *tdb, u32 hash) +{ + int res = -1; + struct list_struct rec; + tdb_off_t rec_ptr; + + if (tdb_lock(tdb, -1, F_WRLCK) == -1) { + return -1; + } + + /* read in the hash top */ + if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) + goto fail; + + while (rec_ptr) { + tdb_off_t next; + + if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) { + goto fail; + } + + next = rec.next; + + if (rec.magic == TDB_DEAD_MAGIC + && tdb_do_delete(tdb, rec_ptr, &rec) == -1) { + goto fail; + } + rec_ptr = next; + } + res = 0; + fail: + tdb_unlock(tdb, -1, F_WRLCK); + return res; +} + +/* delete an entry in the database given a key */ +static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash) +{ + tdb_off_t rec_ptr; + struct list_struct rec; + int ret; + + if (tdb->max_dead_records != 0) { + + /* + * Allow for some dead records per hash chain, mainly for + * tdb's with a very high create/delete rate like locking.tdb. + */ + + if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) + return -1; + + if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) { + /* + * Don't let the per-chain freelist grow too large, + * delete all existing dead records + */ + tdb_purge_dead(tdb, hash); + } + + if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) { + tdb_unlock(tdb, BUCKET(hash), F_WRLCK); + return -1; + } + + /* + * Just mark the record as dead. + */ + rec.magic = TDB_DEAD_MAGIC; + ret = tdb_rec_write(tdb, rec_ptr, &rec); + } + else { + if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, + &rec))) + return -1; + + ret = tdb_do_delete(tdb, rec_ptr, &rec); + } + + if (ret == 0) { + tdb_increment_seqnum(tdb); + } + + if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0) + TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n")); + return ret; +} + +int tdb_delete(struct tdb_context *tdb, TDB_DATA key) +{ + u32 hash = tdb->hash_fn(&key); + return tdb_delete_hash(tdb, key, hash); +} + +/* + * See if we have a dead record around with enough space + */ +static tdb_off_t tdb_find_dead(struct tdb_context *tdb, u32 hash, + struct list_struct *r, tdb_len_t length) +{ + tdb_off_t rec_ptr; + + /* read in the hash top */ + if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) + return 0; + + /* keep looking until we find the right record */ + while (rec_ptr) { + if (tdb_rec_read(tdb, rec_ptr, r) == -1) + return 0; + + if (TDB_DEAD(r) && r->rec_len >= length) { + /* + * First fit for simple coding, TODO: change to best + * fit + */ + return rec_ptr; + } + rec_ptr = r->next; + } + return 0; +} + +/* store an element in the database, replacing any existing element + with the same key + + return 0 on success, -1 on failure +*/ +int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) +{ + struct list_struct rec; + u32 hash; + tdb_off_t rec_ptr; + char *p = NULL; + int ret = -1; + + if (tdb->read_only || tdb->traverse_read) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + + /* find which hash bucket it is in */ + hash = tdb->hash_fn(&key); + if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) + return -1; + + /* check for it existing, on insert. */ + if (flag == TDB_INSERT) { + if (tdb_exists_hash(tdb, key, hash)) { + tdb->ecode = TDB_ERR_EXISTS; + goto fail; + } + } else { + /* first try in-place update, on modify or replace. */ + if (tdb_update_hash(tdb, key, hash, dbuf) == 0) { + goto done; + } + if (tdb->ecode == TDB_ERR_NOEXIST && + flag == TDB_MODIFY) { + /* if the record doesn't exist and we are in TDB_MODIFY mode then + we should fail the store */ + goto fail; + } + } + /* reset the error code potentially set by the tdb_update() */ + tdb->ecode = TDB_SUCCESS; + + /* delete any existing record - if it doesn't exist we don't + care. Doing this first reduces fragmentation, and avoids + coalescing with `allocated' block before it's updated. */ + if (flag != TDB_INSERT) + tdb_delete_hash(tdb, key, hash); + + /* Copy key+value *before* allocating free space in case malloc + fails and we are left with a dead spot in the tdb. */ + + if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) { + tdb->ecode = TDB_ERR_OOM; + goto fail; + } + + memcpy(p, key.dptr, key.dsize); + if (dbuf.dsize) + memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize); + + if (tdb->max_dead_records != 0) { + /* + * Allow for some dead records per hash chain, look if we can + * find one that can hold the new record. We need enough space + * for key, data and tailer. If we find one, we don't have to + * consult the central freelist. + */ + rec_ptr = tdb_find_dead( + tdb, hash, &rec, + key.dsize + dbuf.dsize + sizeof(tdb_off_t)); + + if (rec_ptr != 0) { + rec.key_len = key.dsize; + rec.data_len = dbuf.dsize; + rec.full_hash = hash; + rec.magic = TDB_MAGIC; + if (tdb_rec_write(tdb, rec_ptr, &rec) == -1 + || tdb->methods->tdb_write( + tdb, rec_ptr + sizeof(rec), + p, key.dsize + dbuf.dsize) == -1) { + goto fail; + } + goto done; + } + } + + /* + * We have to allocate some space from the freelist, so this means we + * have to lock it. Use the chance to purge all the DEAD records from + * the hash chain under the freelist lock. + */ + + if (tdb_lock(tdb, -1, F_WRLCK) == -1) { + goto fail; + } + + if ((tdb->max_dead_records != 0) + && (tdb_purge_dead(tdb, hash) == -1)) { + tdb_unlock(tdb, -1, F_WRLCK); + goto fail; + } + + /* we have to allocate some space */ + rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec); + + tdb_unlock(tdb, -1, F_WRLCK); + + if (rec_ptr == 0) { + goto fail; + } + + /* Read hash top into next ptr */ + if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1) + goto fail; + + rec.key_len = key.dsize; + rec.data_len = dbuf.dsize; + rec.full_hash = hash; + rec.magic = TDB_MAGIC; + + /* write out and point the top of the hash chain at it */ + if (tdb_rec_write(tdb, rec_ptr, &rec) == -1 + || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1 + || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) { + /* Need to tdb_unallocate() here */ + goto fail; + } + + done: + ret = 0; + fail: + if (ret == 0) { + tdb_increment_seqnum(tdb); + } + + SAFE_FREE(p); + tdb_unlock(tdb, BUCKET(hash), F_WRLCK); + return ret; +} + + +/* Append to an entry. Create if not exist. */ +int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf) +{ + u32 hash; + TDB_DATA dbuf; + int ret = -1; + + /* find which hash bucket it is in */ + hash = tdb->hash_fn(&key); + if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) + return -1; + + dbuf = tdb_fetch(tdb, key); + + if (dbuf.dptr == NULL) { + dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize); + } else { + unsigned char *new_dptr = (unsigned char *)realloc(dbuf.dptr, + dbuf.dsize + new_dbuf.dsize); + if (new_dptr == NULL) { + free(dbuf.dptr); + } + dbuf.dptr = new_dptr; + } + + if (dbuf.dptr == NULL) { + tdb->ecode = TDB_ERR_OOM; + goto failed; + } + + memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize); + dbuf.dsize += new_dbuf.dsize; + + ret = tdb_store(tdb, key, dbuf, 0); + +failed: + tdb_unlock(tdb, BUCKET(hash), F_WRLCK); + SAFE_FREE(dbuf.dptr); + return ret; +} + + +/* + return the name of the current tdb file + useful for external logging functions +*/ +const char *tdb_name(struct tdb_context *tdb) +{ + return tdb->name; +} + +/* + return the underlying file descriptor being used by tdb, or -1 + useful for external routines that want to check the device/inode + of the fd +*/ +int tdb_fd(struct tdb_context *tdb) +{ + return tdb->fd; +} + +/* + return the current logging function + useful for external tdb routines that wish to log tdb errors +*/ +tdb_log_func tdb_log_fn(struct tdb_context *tdb) +{ + return tdb->log.log_fn; +} + + +/* + get the tdb sequence number. Only makes sense if the writers opened + with TDB_SEQNUM set. Note that this sequence number will wrap quite + quickly, so it should only be used for a 'has something changed' + test, not for code that relies on the count of the number of changes + made. If you want a counter then use a tdb record. + + The aim of this sequence number is to allow for a very lightweight + test of a possible tdb change. +*/ +int tdb_get_seqnum(struct tdb_context *tdb) +{ + tdb_off_t seqnum=0; + + tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum); + return seqnum; +} + +int tdb_hash_size(struct tdb_context *tdb) +{ + return tdb->header.hash_size; +} + +size_t tdb_map_size(struct tdb_context *tdb) +{ + return tdb->map_size; +} + +int tdb_get_flags(struct tdb_context *tdb) +{ + return tdb->flags; +} + + +/* + enable sequence number handling on an open tdb +*/ +void tdb_enable_seqnum(struct tdb_context *tdb) +{ + tdb->flags |= TDB_SEQNUM; +} + +/* file: open.c */ + +/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */ +static struct tdb_context *tdbs = NULL; + + +/* This is based on the hash algorithm from gdbm */ +static unsigned int default_tdb_hash(TDB_DATA *key) +{ + u32 value; /* Used to compute the hash value. */ + u32 i; /* Used to cycle through random values. */ + + /* Set the initial value from the key size. */ + for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) + value = (value + (key->dptr[i] << (i*5 % 24))); + + return (1103515243 * value + 12345); +} + + +/* initialise a new database with a specified hash size */ +static int tdb_new_database(struct tdb_context *tdb, int hash_size) +{ + struct tdb_header *newdb; + int size, ret = -1; + + /* We make it up in memory, then write it out if not internal */ + size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off_t); + if (!(newdb = (struct tdb_header *)calloc(size, 1))) + return TDB_ERRCODE(TDB_ERR_OOM, -1); + + /* Fill in the header */ + newdb->version = TDB_VERSION; + newdb->hash_size = hash_size; + if (tdb->flags & TDB_INTERNAL) { + tdb->map_size = size; + tdb->map_ptr = (char *)newdb; + memcpy(&tdb->header, newdb, sizeof(tdb->header)); + /* Convert the `ondisk' version if asked. */ + CONVERT(*newdb); + return 0; + } + if (lseek(tdb->fd, 0, SEEK_SET) == -1) + goto fail; + + if (ftruncate(tdb->fd, 0) == -1) + goto fail; + + /* This creates an endian-converted header, as if read from disk */ + CONVERT(*newdb); + memcpy(&tdb->header, newdb, sizeof(tdb->header)); + /* Don't endian-convert the magic food! */ + memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); + if (write(tdb->fd, newdb, size) != size) { + ret = -1; + } else { + ret = 0; + } + + fail: + SAFE_FREE(newdb); + return ret; +} + + + +static int tdb_already_open(dev_t device, + ino_t ino) +{ + struct tdb_context *i; + + for (i = tdbs; i; i = i->next) { + if (i->device == device && i->inode == ino) { + return 1; + } + } + + return 0; +} + +/* open the database, creating it if necessary + + The open_flags and mode are passed straight to the open call on the + database file. A flags value of O_WRONLY is invalid. The hash size + is advisory, use zero for a default value. + + Return is NULL on error, in which case errno is also set. Don't + try to call tdb_error or tdb_errname, just do strerror(errno). + + @param name may be NULL for internal databases. */ +struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags, + int open_flags, mode_t mode) +{ + return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL, NULL); +} + +/* a default logging function */ +static void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, const char *fmt, ...) PRINTF_ATTRIBUTE(3, 4); +static void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, const char *fmt, ...) +{ +} + + +struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, + int open_flags, mode_t mode, + const struct tdb_logging_context *log_ctx, + tdb_hash_func hash_fn) +{ + struct tdb_context *tdb; + struct stat st; + int rev = 0, locked = 0; + unsigned char *vp; + u32 vertest; + + if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) { + /* Can't log this */ + errno = ENOMEM; + goto fail; + } + tdb_io_init(tdb); + tdb->fd = -1; + tdb->name = NULL; + tdb->map_ptr = NULL; + tdb->flags = tdb_flags; + tdb->open_flags = open_flags; + if (log_ctx) { + tdb->log = *log_ctx; + } else { + tdb->log.log_fn = null_log_fn; + tdb->log.log_private = NULL; + } + tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash; + + /* cache the page size */ + tdb->page_size = getpagesize(); + if (tdb->page_size <= 0) { + tdb->page_size = 0x2000; + } + + if ((open_flags & O_ACCMODE) == O_WRONLY) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n", + name)); + errno = EINVAL; + goto fail; + } + + if (hash_size == 0) + hash_size = DEFAULT_HASH_SIZE; + if ((open_flags & O_ACCMODE) == O_RDONLY) { + tdb->read_only = 1; + /* read only databases don't do locking or clear if first */ + tdb->flags |= TDB_NOLOCK; + tdb->flags &= ~TDB_CLEAR_IF_FIRST; + } + + /* internal databases don't mmap or lock, and start off cleared */ + if (tdb->flags & TDB_INTERNAL) { + tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP); + tdb->flags &= ~TDB_CLEAR_IF_FIRST; + if (tdb_new_database(tdb, hash_size) != 0) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: tdb_new_database failed!")); + goto fail; + } + goto internal; + } + + if ((tdb->fd = open(name, open_flags, mode)) == -1) { + TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_open_ex: could not open file %s: %s\n", + name, strerror(errno))); + goto fail; /* errno set by open(2) */ + } + + /* ensure there is only one process initialising at once */ + if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get global lock on %s: %s\n", + name, strerror(errno))); + goto fail; /* errno set by tdb_brlock */ + } + + /* we need to zero database if we are the only one with it open */ + if ((tdb_flags & TDB_CLEAR_IF_FIRST) && + (locked = (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0, 1) == 0))) { + open_flags |= O_CREAT; + if (ftruncate(tdb->fd, 0) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " + "failed to truncate %s: %s\n", + name, strerror(errno))); + goto fail; /* errno set by ftruncate */ + } + } + + if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header) + || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0 + || (tdb->header.version != TDB_VERSION + && !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION))))) { + /* its not a valid database - possibly initialise it */ + if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) { + errno = EIO; /* ie bad format or something */ + goto fail; + } + rev = (tdb->flags & TDB_CONVERT); + } + vp = (unsigned char *)&tdb->header.version; + vertest = (((u32)vp[0]) << 24) | (((u32)vp[1]) << 16) | + (((u32)vp[2]) << 8) | (u32)vp[3]; + tdb->flags |= (vertest==TDB_VERSION) ? TDB_BIGENDIAN : 0; + if (!rev) + tdb->flags &= ~TDB_CONVERT; + else { + tdb->flags |= TDB_CONVERT; + tdb_convert(&tdb->header, sizeof(tdb->header)); + } + if (fstat(tdb->fd, &st) == -1) + goto fail; + + if (tdb->header.rwlocks != 0) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); + goto fail; + } + + /* Is it already in the open list? If so, fail. */ + if (tdb_already_open(st.st_dev, st.st_ino)) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " + "%s (%d,%d) is already open in this process\n", + name, (int)st.st_dev, (int)st.st_ino)); + errno = EBUSY; + goto fail; + } + + if (!(tdb->name = (char *)strdup(name))) { + errno = ENOMEM; + goto fail; + } + + tdb->map_size = st.st_size; + tdb->device = st.st_dev; + tdb->inode = st.st_ino; + tdb->max_dead_records = 0; + tdb_mmap(tdb); + if (locked) { + if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0, 1) == -1) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " + "failed to take ACTIVE_LOCK on %s: %s\n", + name, strerror(errno))); + goto fail; + } + + } + + /* We always need to do this if the CLEAR_IF_FIRST flag is set, even if + we didn't get the initial exclusive lock as we need to let all other + users know we're using it. */ + + if (tdb_flags & TDB_CLEAR_IF_FIRST) { + /* leave this lock in place to indicate it's in use */ + if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1) + goto fail; + } + + /* if needed, run recovery */ + if (tdb_transaction_recover(tdb) == -1) { + goto fail; + } + + internal: + /* Internal (memory-only) databases skip all the code above to + * do with disk files, and resume here by releasing their + * global lock and hooking into the active list. */ + if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1) == -1) + goto fail; + tdb->next = tdbs; + tdbs = tdb; + return tdb; + + fail: + { int save_errno = errno; + + if (!tdb) + return NULL; + + if (tdb->map_ptr) { + if (tdb->flags & TDB_INTERNAL) + SAFE_FREE(tdb->map_ptr); + else + tdb_munmap(tdb); + } + SAFE_FREE(tdb->name); + if (tdb->fd != -1) + if (close(tdb->fd) != 0) + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to close tdb->fd on error!\n")); + SAFE_FREE(tdb); + errno = save_errno; + return NULL; + } +} + +/* + * Set the maximum number of dead records per hash chain + */ + +void tdb_set_max_dead(struct tdb_context *tdb, int max_dead) +{ + tdb->max_dead_records = max_dead; +} + +/** + * Close a database. + * + * @returns -1 for error; 0 for success. + **/ +int tdb_close(struct tdb_context *tdb) +{ + struct tdb_context **i; + int ret = 0; + + if (tdb->transaction) { + tdb_transaction_cancel(tdb); + } + + if (tdb->map_ptr) { + if (tdb->flags & TDB_INTERNAL) + SAFE_FREE(tdb->map_ptr); + else + tdb_munmap(tdb); + } + SAFE_FREE(tdb->name); + if (tdb->fd != -1) + ret = close(tdb->fd); + SAFE_FREE(tdb->lockrecs); + + /* Remove from contexts list */ + for (i = &tdbs; *i; i = &(*i)->next) { + if (*i == tdb) { + *i = tdb->next; + break; + } + } + + memset(tdb, 0, sizeof(*tdb)); + SAFE_FREE(tdb); + + return ret; +} + +/* register a loging function */ +void tdb_set_logging_function(struct tdb_context *tdb, + const struct tdb_logging_context *log_ctx) +{ + tdb->log = *log_ctx; +} + +void *tdb_get_logging_private(struct tdb_context *tdb) +{ + return tdb->log.log_private; +} + +/* reopen a tdb - this can be used after a fork to ensure that we have an independent + seek pointer from our parent and to re-establish locks */ +int tdb_reopen(struct tdb_context *tdb) +{ + struct stat st; + + if (tdb->flags & TDB_INTERNAL) { + return 0; /* Nothing to do. */ + } + + if (tdb->num_locks != 0 || tdb->global_lock.count) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed with locks held\n")); + goto fail; + } + + if (tdb->transaction != 0) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed inside a transaction\n")); + goto fail; + } + + if (tdb_munmap(tdb) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: munmap failed (%s)\n", strerror(errno))); + goto fail; + } + if (close(tdb->fd) != 0) + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: WARNING closing tdb->fd failed!\n")); + tdb->fd = open(tdb->name, tdb->open_flags & ~(O_CREAT|O_TRUNC), 0); + if (tdb->fd == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno))); + goto fail; + } + if ((tdb->flags & TDB_CLEAR_IF_FIRST) && + (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: failed to obtain active lock\n")); + goto fail; + } + if (fstat(tdb->fd, &st) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno))); + goto fail; + } + if (st.st_ino != tdb->inode || st.st_dev != tdb->device) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n")); + goto fail; + } + tdb_mmap(tdb); + + return 0; + +fail: + tdb_close(tdb); + return -1; +} + +/* reopen all tdb's */ +int tdb_reopen_all(int parent_longlived) +{ + struct tdb_context *tdb; + + for (tdb=tdbs; tdb; tdb = tdb->next) { + /* + * If the parent is longlived (ie. a + * parent daemon architecture), we know + * it will keep it's active lock on a + * tdb opened with CLEAR_IF_FIRST. Thus + * for child processes we don't have to + * add an active lock. This is essential + * to improve performance on systems that + * keep POSIX locks as a non-scalable data + * structure in the kernel. + */ + if (parent_longlived) { + /* Ensure no clear-if-first. */ + tdb->flags &= ~TDB_CLEAR_IF_FIRST; + } + + if (tdb_reopen(tdb) != 0) + return -1; + } + + return 0; +} diff --git a/fs/fat/Makefile b/fs/fat/Makefile new file mode 100755 index 0000000..e462757 --- /dev/null +++ b/fs/fat/Makefile @@ -0,0 +1,46 @@ +# +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +TOPDIR=../../ + +include $(TOPDIR)/config.mk + +LIB = libfat.a + +AOBJS = +COBJS = fat.o file.o + +OBJS = $(AOBJS) $(COBJS) + +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend + +######################################################################### diff --git a/fs/fat/fat.c b/fs/fat/fat.c new file mode 100755 index 0000000..26c4668 --- /dev/null +++ b/fs/fat/fat.c @@ -0,0 +1,3023 @@ +/*++ +Copyright (c) 2010 WonderMedia Technologies, Inc. + +This program is free software: you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, either version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the GNU General Public License for more details. You +should have received a copy of the GNU General Public License along with this +program. If not, see http://www.gnu.org/licenses/>. + +WonderMedia Technologies, Inc. +10F, 529, Chung-Cheng Road, Hsin-Tien, Taipei 231, R.O.C. +--*/ +/* + * fat.c + * + * R/O (V)FAT 12/16/32 filesystem implementation by Marcus Sundberg + * + * 2002-07-28 - rjones@nexus-tech.net - ported to ppcboot v1.1.6 + * 2003-03-10 - kharris@nexus-tech.net - ported to uboot + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <common.h> +#include <config.h> +#include <exports.h> +#include <fat.h> +#include <asm/byteorder.h> +#include <part.h> +#include <malloc.h> +#include <linux/compiler.h> + +/* + * Convert a string to lowercase. + */ +static void downcase(char *str) +{ + while (*str != '\0') { + TOLOWER(*str); + str++; + } +} + +static block_dev_desc_t *cur_dev; +static unsigned int cur_part_nr; +static disk_partition_t cur_part_info; +static int total_sector; + + +#define DOS_BOOT_MAGIC_OFFSET 0x1fe +#define DOS_FS_TYPE_OFFSET 0x36 +#define DOS_FS32_TYPE_OFFSET 0x52 + +static int disk_read(__u32 block, __u32 nr_blocks, void *buf) +{ + if (!cur_dev || !cur_dev->block_read) + return -1; + + return cur_dev->block_read(cur_dev->dev, + cur_part_info.start + block, nr_blocks, buf); +} + + + + +int fatpre_register_device(block_dev_desc_t * dev_desc, int part_no) +{ + /* First close any currently found FAT filesystem */ + cur_dev = NULL; + +#if (defined(CONFIG_CMD_IDE) || \ + defined(CONFIG_CMD_SATA) || \ + defined(CONFIG_CMD_SCSI) || \ + defined(CONFIG_CMD_USB) || \ + defined(CONFIG_MMC) || \ + defined(CONFIG_SYSTEMACE) ) + + /* Read the partition table, if present */ + if (!get_partition_info(dev_desc, part_no, &cur_part_info)) { + cur_dev = dev_desc; + cur_part_nr = part_no; + } +#endif + + /* Otherwise it might be a superfloppy (whole-disk FAT filesystem) */ + if (!cur_dev) { + /*if (part_no != 0) { + printf("** Partition %d not valid on device %d **\n", + part_no, dev_desc->dev); + return -1; + }*/ + + cur_dev = dev_desc; + cur_part_nr = 1; + cur_part_info.start = 0; + cur_part_info.size = dev_desc->lba; + cur_part_info.blksz = dev_desc->blksz; + memset(cur_part_info.name, 0, sizeof(cur_part_info.name)); + memset(cur_part_info.type, 0, sizeof(cur_part_info.type)); + } + total_sector = cur_part_info.size; + return 0; +} + +int fat_register_device(block_dev_desc_t * dev_desc, int part_no) +{ + ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, dev_desc->blksz); +#if 0 + /* First close any currently found FAT filesystem */ + cur_dev = NULL; + +#if (defined(CONFIG_CMD_IDE) || \ + defined(CONFIG_CMD_SATA) || \ + defined(CONFIG_CMD_SCSI) || \ + defined(CONFIG_CMD_USB) || \ + defined(CONFIG_MMC) || \ + defined(CONFIG_SYSTEMACE) ) + + /* Read the partition table, if present */ + if (!get_partition_info(dev_desc, part_no, &cur_part_info)) { + cur_dev = dev_desc; + cur_part_nr = part_no; + } +#endif + + /* Otherwise it might be a superfloppy (whole-disk FAT filesystem) */ + if (!cur_dev) { + if (part_no != 0) { + printf("** Partition %d not valid on device %d **\n", + part_no, dev_desc->dev); + return -1; + } + + cur_dev = dev_desc; + cur_part_nr = 1; + cur_part_info.start = 0; + cur_part_info.size = dev_desc->lba; + cur_part_info.blksz = dev_desc->blksz; + memset(cur_part_info.name, 0, sizeof(cur_part_info.name)); + memset(cur_part_info.type, 0, sizeof(cur_part_info.type)); + } +#endif + if (fatpre_register_device(dev_desc, part_no)) + return -1; + + /* Make sure it has a valid FAT header */ + if (disk_read(0, 1, buffer) != 1) { + cur_dev = NULL; + printf("Cannot read from disk\n"); + return -1; + } + + /* Check if it's actually a DOS volume */ + if (memcmp(buffer + DOS_BOOT_MAGIC_OFFSET, "\x55\xAA", 2)) { + printf("It is not a DOS volume\n"); + cur_dev = NULL; + return -1; + } + + /* Check for FAT12/FAT16/FAT32 filesystem */ + if (!memcmp(buffer + DOS_FS_TYPE_OFFSET, "FAT", 3)) + return 0; + if (!memcmp(buffer + DOS_FS32_TYPE_OFFSET, "FAT32", 5)) + return 0; + + printf("The volume cannot be recognized\n"); + cur_dev = NULL; + return -1; +} + +/* + * Get the first occurence of a directory delimiter ('/' or '\') in a string. + * Return index into string if found, -1 otherwise. + */ +static int dirdelim(char *str) +{ + char *start = str; + + while (*str != '\0') { + if (ISDIRDELIM(*str)) + return str - start; + str++; + } + return -1; +} + +/* + * Extract zero terminated short name from a directory entry. + */ +static void get_name(dir_entry *dirent, char *s_name) +{ + char *ptr; + + memcpy(s_name, dirent->name, 8); + s_name[8] = '\0'; + ptr = s_name; + while (*ptr && *ptr != ' ') + ptr++; + if (dirent->ext[0] && dirent->ext[0] != ' ') { + *ptr = '.'; + ptr++; + memcpy(ptr, dirent->ext, 3); + ptr[3] = '\0'; + while (*ptr && *ptr != ' ') + ptr++; + } + *ptr = '\0'; + if (*s_name == DELETED_FLAG) + *s_name = '\0'; + else if (*s_name == aRING) + *s_name = DELETED_FLAG; + downcase(s_name); +} + +/* + * Get the entry at index 'entry' in a FAT (12/16/32) table. + * On failure 0x00 is returned. + */ +static __u32 get_fatent(fsdata *mydata, __u32 entry) +{ + __u32 bufnum; + __u32 off16, offset; + __u32 ret = 0x00; + __u16 val1, val2; + + switch (mydata->fatsize) { + case 32: + bufnum = entry / FAT32BUFSIZE; + offset = entry - bufnum * FAT32BUFSIZE; + break; + case 16: + bufnum = entry / FAT16BUFSIZE; + offset = entry - bufnum * FAT16BUFSIZE; + break; + case 12: + bufnum = entry / FAT12BUFSIZE; + offset = entry - bufnum * FAT12BUFSIZE; + break; + + default: + /* Unsupported FAT size */ + return ret; + } + + debug("FAT%d: entry: 0x%04x = %d, offset: 0x%04x = %d\n", + mydata->fatsize, entry, entry, offset, offset); + + /* Read a new block of FAT entries into the cache. */ + if (bufnum != mydata->fatbufnum) { + __u32 getsize = FATBUFBLOCKS; + __u8 *bufptr = mydata->fatbuf; + __u32 fatlength = mydata->fatlength; + __u32 startblock = bufnum * FATBUFBLOCKS; + + if (startblock + getsize > fatlength) + getsize = fatlength - startblock; + + startblock += mydata->fat_sect; /* Offset from start of disk */ + + if (disk_read(startblock, getsize, bufptr) < 0) { + debug("Error reading FAT blocks\n"); + return ret; + } + mydata->fatbufnum = bufnum; + } + + /* Get the actual entry from the table */ + switch (mydata->fatsize) { + case 32: + ret = FAT2CPU32(((__u32 *) mydata->fatbuf)[offset]); + break; + case 16: + ret = FAT2CPU16(((__u16 *) mydata->fatbuf)[offset]); + break; + case 12: + off16 = (offset * 3) / 4; + + switch (offset & 0x3) { + case 0: + ret = FAT2CPU16(((__u16 *) mydata->fatbuf)[off16]); + ret &= 0xfff; + break; + case 1: + val1 = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16]); + val1 &= 0xf000; + val2 = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16 + 1]); + val2 &= 0x00ff; + ret = (val2 << 4) | (val1 >> 12); + break; + case 2: + val1 = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16]); + val1 &= 0xff00; + val2 = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16 + 1]); + val2 &= 0x000f; + ret = (val2 << 8) | (val1 >> 8); + break; + case 3: + ret = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16]); + ret = (ret & 0xfff0) >> 4; + break; + default: + break; + } + break; + } + debug("FAT%d: ret: %08x, offset: %04x\n", + mydata->fatsize, ret, offset); + + return ret; +} + +/* + * Read at most 'size' bytes from the specified cluster into 'buffer'. + * Return 0 on success, -1 otherwise. + */ +static int +get_cluster(fsdata *mydata, __u32 clustnum, __u8 *buffer, unsigned long size) +{ + __u32 idx = 0; + __u32 startsect; + int ret; + + if (clustnum > 0) { + startsect = mydata->data_begin + + clustnum * mydata->clust_size; + if (mydata->fatsize!=32) {//this is for windows format + if (startsect < mydata->rootdir_sect){ + debug("get_cluster\n"); + startsect=mydata->rootdir_sect; + } + } + } else { + startsect = mydata->rootdir_sect; + } + + debug("gc - clustnum: %d, startsect: %d\n", clustnum, startsect); + + if ((unsigned long)buffer & (ARCH_DMA_MINALIGN - 1)) { + ALLOC_CACHE_ALIGN_BUFFER(__u8, tmpbuf, mydata->sect_size); + + printf("FAT: Misaligned buffer address (%p)\n", buffer); + + while (size >= mydata->sect_size) { + ret = disk_read(startsect++, 1, tmpbuf); + if (ret != 1) { + debug("Error reading data (got %d)\n", ret); + return -1; + } + + memcpy(buffer, tmpbuf, mydata->sect_size); + buffer += mydata->sect_size; + size -= mydata->sect_size; + } + } else { + idx = size / mydata->sect_size; + ret = disk_read(startsect, idx, buffer); + if (ret != idx) { + debug("Error reading data (got %d)\n", ret); + return -1; + } + startsect += idx; + idx *= mydata->sect_size; + buffer += idx; + size -= idx; + } + if (size) { + ALLOC_CACHE_ALIGN_BUFFER(__u8, tmpbuf, mydata->sect_size); + + ret = disk_read(startsect, 1, tmpbuf); + if (ret != 1) { + debug("Error reading data (got %d)\n", ret); + return -1; + } + + memcpy(buffer, tmpbuf, size); + } + + return 0; +} + +/* + * Read at most 'maxsize' bytes from 'pos' in the file associated with 'dentptr' + * into 'buffer'. + * Return the number of bytes read or -1 on fatal errors. + */ +__u8 get_contents_vfatname_block[MAX_CLUSTSIZE] + __aligned(ARCH_DMA_MINALIGN); + +static long +get_contents(fsdata *mydata, dir_entry *dentptr, unsigned long pos, + __u8 *buffer, unsigned long maxsize) +{ + unsigned long filesize = FAT2CPU32(dentptr->size), gotsize = 0; + unsigned int bytesperclust = mydata->clust_size * mydata->sect_size; + __u32 curclust = START(dentptr); + __u32 endclust, newclust; + unsigned long actsize; + + debug("Filesize: %ld bytes\n", filesize); + + if (pos >= filesize) { + debug("Read position past EOF: %lu\n", pos); + return gotsize; + } + + if (maxsize > 0 && filesize > pos + maxsize) + filesize = pos + maxsize; + + debug("%ld bytes\n", filesize); + + actsize = bytesperclust; + + /* go to cluster at pos */ + while (actsize <= pos) { + curclust = get_fatent(mydata, curclust); + if (CHECK_CLUST(curclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", curclust); + debug("Invalid FAT entry\n"); + return gotsize; + } + actsize += bytesperclust; + } + + /* actsize > pos */ + actsize -= bytesperclust; + filesize -= actsize; + pos -= actsize; + + /* align to beginning of next cluster if any */ + if (pos) { + actsize = min(filesize, bytesperclust); + if (get_cluster(mydata, curclust, get_contents_vfatname_block, + (int)actsize) != 0) { + printf("Error reading cluster\n"); + return -1; + } + filesize -= actsize; + actsize -= pos; + memcpy(buffer, get_contents_vfatname_block + pos, actsize); + gotsize += actsize; + if (!filesize) + return gotsize; + buffer += actsize; + + curclust = get_fatent(mydata, curclust); + if (CHECK_CLUST(curclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", curclust); + debug("Invalid FAT entry\n"); + return gotsize; + } + } + + actsize = bytesperclust; + endclust = curclust; + + do { + /* search for consecutive clusters */ + while (actsize < filesize) { + newclust = get_fatent(mydata, endclust); + if ((newclust - 1) != endclust) + goto getit; + if (CHECK_CLUST(newclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", newclust); + debug("Invalid FAT entry\n"); + return gotsize; + } + endclust = newclust; + actsize += bytesperclust; + } + + /* get remaining bytes */ + actsize = filesize; + if (get_cluster(mydata, curclust, buffer, (int)actsize) != 0) { + printf("Error reading cluster\n"); + return -1; + } + gotsize += actsize; + return gotsize; +getit: + if (get_cluster(mydata, curclust, buffer, (int)actsize) != 0) { + printf("Error reading cluster\n"); + return -1; + } + gotsize += (int)actsize; + filesize -= actsize; + buffer += actsize; + + curclust = get_fatent(mydata, endclust); + if (CHECK_CLUST(curclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", curclust); + printf("Invalid FAT entry\n"); + return gotsize; + } + actsize = bytesperclust; + endclust = curclust; + } while (1); +} + +#ifdef CONFIG_SUPPORT_VFAT +/* + * Extract the file name information from 'slotptr' into 'l_name', + * starting at l_name[*idx]. + * Return 1 if terminator (zero byte) is found, 0 otherwise. + */ +static int slot2str(dir_slot *slotptr, char *l_name, int *idx) +{ + int j; + + for (j = 0; j <= 8; j += 2) { + l_name[*idx] = slotptr->name0_4[j]; + if (l_name[*idx] == 0x00) + return 1; + (*idx)++; + } + for (j = 0; j <= 10; j += 2) { + l_name[*idx] = slotptr->name5_10[j]; + if (l_name[*idx] == 0x00) + return 1; + (*idx)++; + } + for (j = 0; j <= 2; j += 2) { + l_name[*idx] = slotptr->name11_12[j]; + if (l_name[*idx] == 0x00) + return 1; + (*idx)++; + } + + return 0; +} + +/* + * Extract the full long filename starting at 'retdent' (which is really + * a slot) into 'l_name'. If successful also copy the real directory entry + * into 'retdent' + * Return 0 on success, -1 otherwise. + */ +static int +get_vfatname(fsdata *mydata, int curclust, __u8 *cluster, + dir_entry *retdent, char *l_name) +{ + dir_entry *realdent; + dir_slot *slotptr = (dir_slot *)retdent; + __u8 *buflimit = cluster + mydata->sect_size * ((curclust == 0) ? + PREFETCH_BLOCKS : + mydata->clust_size); + __u8 counter = (slotptr->id & ~LAST_LONG_ENTRY_MASK) & 0xff; + int idx = 0; + + if (counter > VFAT_MAXSEQ) { + debug("Error: VFAT name is too long\n"); + return -1; + } + + while ((__u8 *)slotptr < buflimit) { + if (counter == 0) + break; + if (((slotptr->id & ~LAST_LONG_ENTRY_MASK) & 0xff) != counter) + return -1; + slotptr++; + counter--; + } + + if ((__u8 *)slotptr >= buflimit) { + dir_slot *slotptr2; + + if (curclust == 0) + return -1; + curclust = get_fatent(mydata, curclust); + if (CHECK_CLUST(curclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", curclust); + printf("Invalid FAT entry\n"); + return -1; + } + + if (get_cluster(mydata, curclust, get_contents_vfatname_block, + mydata->clust_size * mydata->sect_size) != 0) { + debug("Error: reading directory block\n"); + return -1; + } + + slotptr2 = (dir_slot *)get_contents_vfatname_block; + while (counter > 0) { + if (((slotptr2->id & ~LAST_LONG_ENTRY_MASK) + & 0xff) != counter) + return -1; + slotptr2++; + counter--; + } + + /* Save the real directory entry */ + realdent = (dir_entry *)slotptr2; + while ((__u8 *)slotptr2 > get_contents_vfatname_block) { + slotptr2--; + slot2str(slotptr2, l_name, &idx); + } + } else { + /* Save the real directory entry */ + realdent = (dir_entry *)slotptr; + } + + do { + slotptr--; + if (slot2str(slotptr, l_name, &idx)) + break; + } while (!(slotptr->id & LAST_LONG_ENTRY_MASK)); + + l_name[idx] = '\0'; + if (*l_name == DELETED_FLAG) + *l_name = '\0'; + else if (*l_name == aRING) + *l_name = DELETED_FLAG; + downcase(l_name); + + /* Return the real directory entry */ + memcpy(retdent, realdent, sizeof(dir_entry)); + + return 0; +} + +/* Calculate short name checksum */ +static __u8 mkcksum(const char *str) +{ + int i; + + __u8 ret = 0; + + for (i = 0; i < 11; i++) { + ret = (((ret & 1) << 7) | ((ret & 0xfe) >> 1)) + str[i]; + } + + return ret; +} +#endif /* CONFIG_SUPPORT_VFAT */ + +/* + * Get the directory entry associated with 'filename' from the directory + * starting at 'startsect' + */ +__u8 get_dentfromdir_block[MAX_CLUSTSIZE] + __aligned(ARCH_DMA_MINALIGN); +char fat_fwc[260]; + +static dir_entry *get_dentfromdir(fsdata *mydata, int startsect, + char *filename, dir_entry *retdent, + int dols) +{ + __u16 prevcksum = 0xffff; + __u32 curclust = START(retdent); + int files = 0, dirs = 0; + + debug("get_dentfromdir: %s\n", filename); + + while (1) { + dir_entry *dentptr; + + int i; + + if (get_cluster(mydata, curclust, get_dentfromdir_block, + mydata->clust_size * mydata->sect_size) != 0) { + debug("Error: reading directory block\n"); + return NULL; + } + + dentptr = (dir_entry *)get_dentfromdir_block; + + for (i = 0; i < DIRENTSPERCLUST; i++) { + char s_name[14], l_name[VFAT_MAXLEN_BYTES]; + + l_name[0] = '\0'; + if (dentptr->name[0] == DELETED_FLAG) { + dentptr++; + continue; + } + if ((dentptr->attr & ATTR_VOLUME)) { +#ifdef CONFIG_SUPPORT_VFAT + if ((dentptr->attr & ATTR_VFAT) == ATTR_VFAT && + (dentptr->name[0] & LAST_LONG_ENTRY_MASK)) { + prevcksum = ((dir_slot *)dentptr)->alias_checksum; + get_vfatname(mydata, curclust, + get_dentfromdir_block, + dentptr, l_name); + if (dols) { + int isdir; + char dirc; + int doit = 0; + + isdir = (dentptr->attr & ATTR_DIR); + + if (isdir) { + dirs++; + dirc = '/'; + doit = 1; + } else { + dirc = ' '; + if (l_name[0] != 0) { + files++; + doit = 1; + } + } + if (doit) { + if (dirc == ' ') { + printf(" %8ld %s%c\n", + (long)FAT2CPU32(dentptr->size), + l_name, + dirc); + if ((l_name[0] == '+') && (strstr(l_name, ".fwc") > 0)) { + strcpy(fat_fwc, l_name); + } + } else { + printf(" %s%c\n", + l_name, + dirc); + } + } + dentptr++; + continue; + } + debug("vfatname: |%s|\n", l_name); + } else +#endif + { + /* Volume label or VFAT entry */ + dentptr++; + continue; + } + } + if (dentptr->name[0] == 0) { + if (dols) { + printf("\n%d file(s), %d dir(s)\n\n", + files, dirs); + } + debug("Dentname == NULL - %d\n", i); + return NULL; + } +#ifdef CONFIG_SUPPORT_VFAT + if (dols && mkcksum(dentptr->name) == prevcksum) { + prevcksum = 0xffff; + dentptr++; + continue; + } +#endif + get_name(dentptr, s_name); + if (dols) { + int isdir = (dentptr->attr & ATTR_DIR); + char dirc; + int doit = 0; + + if (isdir) { + dirs++; + dirc = '/'; + doit = 1; + } else { + dirc = ' '; + if (s_name[0] != 0) { + files++; + doit = 1; + } + } + + if (doit) { + if (dirc == ' ') { + printf(" %8ld %s%c\n", + (long)FAT2CPU32(dentptr->size), + s_name, dirc); + } else { + printf(" %s%c\n", + s_name, dirc); + } + } + + dentptr++; + continue; + } + + if (strcmp(filename, s_name) + && strcmp(filename, l_name)) { + debug("Mismatch: |%s|%s|\n", s_name, l_name); + dentptr++; + continue; + } + + memcpy(retdent, dentptr, sizeof(dir_entry)); + + debug("DentName: %s", s_name); + debug(", start: 0x%x", START(dentptr)); + debug(", size: 0x%x %s\n", + FAT2CPU32(dentptr->size), + (dentptr->attr & ATTR_DIR) ? "(DIR)" : ""); + + return retdent; + } + + curclust = get_fatent(mydata, curclust); + if (CHECK_CLUST(curclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", curclust); + printf("Invalid FAT entry\n"); + return NULL; + } + } + + return NULL; +} + +/* + * Read boot sector and volume info from a FAT filesystem + */ +static int +read_bootsectandvi(boot_sector *bs, volume_info *volinfo, int *fatsize) +{ + __u8 *block; + volume_info *vistart; + int ret = 0; + + if (cur_dev == NULL) { + debug("Error: no device selected\n"); + return -1; + } + + block = memalign(ARCH_DMA_MINALIGN, cur_dev->blksz); + if (block == NULL) { + debug("Error: allocating block\n"); + return -1; + } + + if (disk_read(0, 1, block) < 0) { + debug("Error: reading block\n"); + goto fail; + } + + memcpy(bs, block, sizeof(boot_sector)); + bs->reserved = FAT2CPU16(bs->reserved); + bs->fat_length = FAT2CPU16(bs->fat_length); + bs->secs_track = FAT2CPU16(bs->secs_track); + bs->heads = FAT2CPU16(bs->heads); + bs->total_sect = FAT2CPU32(bs->total_sect); + + /* FAT32 entries */ + if (bs->fat_length == 0) { + /* Assume FAT32 */ + bs->fat32_length = FAT2CPU32(bs->fat32_length); + bs->flags = FAT2CPU16(bs->flags); + bs->root_cluster = FAT2CPU32(bs->root_cluster); + bs->info_sector = FAT2CPU16(bs->info_sector); + bs->backup_boot = FAT2CPU16(bs->backup_boot); + vistart = (volume_info *)(block + sizeof(boot_sector)); + *fatsize = 32; + } else { + vistart = (volume_info *)&(bs->fat32_length); + *fatsize = 0; + } + memcpy(volinfo, vistart, sizeof(volume_info)); + + if (*fatsize == 32) { + if (strncmp(FAT32_SIGN, vistart->fs_type, SIGNLEN) == 0) + goto exit; + } else { + if (strncmp(FAT12_SIGN, vistart->fs_type, SIGNLEN) == 0) { + *fatsize = 12; + goto exit; + } + if (strncmp(FAT16_SIGN, vistart->fs_type, SIGNLEN) == 0) { + *fatsize = 16; + goto exit; + } + } + + debug("Error: broken fs_type sign\n"); +fail: + ret = -1; +exit: + free(block); + return ret; +} + +__u8 do_fat_read_at_block[MAX_CLUSTSIZE] + __aligned(ARCH_DMA_MINALIGN); + +long +do_fat_read_at(const char *filename, unsigned long pos, void *buffer, + unsigned long maxsize, int dols) +{ + char fnamecopy[2048]; + boot_sector bs; + volume_info volinfo; + fsdata datablock; + fsdata *mydata = &datablock; + dir_entry *dentptr = NULL; + __u16 prevcksum = 0xffff; + char *subname = ""; + __u32 cursect; + int idx, isdir = 0; + int files = 0, dirs = 0; + long ret = -1; + int firsttime; + __u32 root_cluster = 0; + int rootdir_size = 0; + int j; + + if (read_bootsectandvi(&bs, &volinfo, &mydata->fatsize)) { + debug("Error: reading boot sector\n"); + return -1; + } + + if (mydata->fatsize == 32) { + root_cluster = bs.root_cluster; + mydata->fatlength = bs.fat32_length; + } else { + mydata->fatlength = bs.fat_length; + } + + mydata->fat_sect = bs.reserved; + + cursect = mydata->rootdir_sect + = mydata->fat_sect + mydata->fatlength * bs.fats; + + mydata->sect_size = (bs.sector_size[1] << 8) + bs.sector_size[0]; + mydata->clust_size = bs.cluster_size; + if (mydata->sect_size != cur_part_info.blksz) { + printf("Error: FAT sector size mismatch (fs=%hu, dev=%lu)\n", + mydata->sect_size, cur_part_info.blksz); + return -1; + } + + if (mydata->fatsize == 32) { + mydata->data_begin = mydata->rootdir_sect - + (mydata->clust_size * 2); + } else { + rootdir_size = ((bs.dir_entries[1] * (int)256 + + bs.dir_entries[0]) * + sizeof(dir_entry)) / + mydata->sect_size; + mydata->data_begin = mydata->rootdir_sect + + rootdir_size - + (mydata->clust_size * 2); + } + + mydata->fatbufnum = -1; + mydata->fatbuf = memalign(ARCH_DMA_MINALIGN, FATBUFSIZE); + if (mydata->fatbuf == NULL) { + debug("Error: allocating memory\n"); + return -1; + } + +#ifdef CONFIG_SUPPORT_VFAT + debug("VFAT Support enabled\n"); +#endif + debug("FAT%d, fat_sect: %d, fatlength: %d\n", + mydata->fatsize, mydata->fat_sect, mydata->fatlength); + debug("Rootdir begins at cluster: %d, sector: %d, offset: %x\n" + "Data begins at: %d\n", + root_cluster, + mydata->rootdir_sect, + mydata->rootdir_sect * mydata->sect_size, mydata->data_begin); + debug("Sector size: %d, cluster size: %d\n", mydata->sect_size, + mydata->clust_size); + + /* "cwd" is always the root... */ + while (ISDIRDELIM(*filename)) + filename++; + + /* Make a copy of the filename and convert it to lowercase */ + strcpy(fnamecopy, filename); + downcase(fnamecopy); + + if (*fnamecopy == '\0') { + if (!dols) + goto exit; + + dols = LS_ROOT; + } else if ((idx = dirdelim(fnamecopy)) >= 0) { + isdir = 1; + fnamecopy[idx] = '\0'; + subname = fnamecopy + idx + 1; + + /* Handle multiple delimiters */ + while (ISDIRDELIM(*subname)) + subname++; + } else if (dols) { + isdir = 1; + } + + j = 0; + while (1) { + int i; + + if (j == 0) { + debug("FAT read sect=%d, clust_size=%d, DIRENTSPERBLOCK=%zd\n", + cursect, mydata->clust_size, DIRENTSPERBLOCK); + + if (disk_read(cursect, + (mydata->fatsize == 32) ? + (mydata->clust_size) : + PREFETCH_BLOCKS, + do_fat_read_at_block) < 0) { + debug("Error: reading rootdir block\n"); + goto exit; + } + + dentptr = (dir_entry *) do_fat_read_at_block; + } + + for (i = 0; i < DIRENTSPERBLOCK; i++) { + char s_name[14], l_name[VFAT_MAXLEN_BYTES]; + + l_name[0] = '\0'; + if (dentptr->name[0] == DELETED_FLAG) { + dentptr++; + continue; + } + if ((dentptr->attr & ATTR_VOLUME)) { +#ifdef CONFIG_SUPPORT_VFAT + if ((dentptr->attr & ATTR_VFAT) == ATTR_VFAT && + (dentptr->name[0] & LAST_LONG_ENTRY_MASK)) { + prevcksum = + ((dir_slot *)dentptr)->alias_checksum; + + get_vfatname(mydata, + root_cluster, + do_fat_read_at_block, + dentptr, l_name); + + if (dols == LS_ROOT) { + char dirc; + int doit = 0; + int isdir = + (dentptr->attr & ATTR_DIR); + + if (isdir) { + dirs++; + dirc = '/'; + doit = 1; + } else { + dirc = ' '; + if (l_name[0] != 0) { + files++; + doit = 1; + } + } + if (doit) { + if (dirc == ' ') { + printf(" %8ld %s%c\n", + (long)FAT2CPU32(dentptr->size), + l_name, + dirc); + } else { + printf(" %s%c\n", + l_name, + dirc); + } + } + dentptr++; + continue; + } + debug("Rootvfatname: |%s|\n", + l_name); + } else +#endif + { + /* Volume label or VFAT entry */ + dentptr++; + continue; + } + } else if (dentptr->name[0] == 0) { + debug("RootDentname == NULL - %d\n", i); + if (dols == LS_ROOT) { + printf("\n%d file(s), %d dir(s)\n\n", + files, dirs); + ret = 0; + } + goto exit; + } +#ifdef CONFIG_SUPPORT_VFAT + else if (dols == LS_ROOT && + mkcksum(dentptr->name) == prevcksum) { + prevcksum = 0xffff; + dentptr++; + continue; + } +#endif + get_name(dentptr, s_name); + + if (dols == LS_ROOT) { + int isdir = (dentptr->attr & ATTR_DIR); + char dirc; + int doit = 0; + + if (isdir) { + dirc = '/'; + if (s_name[0] != 0) { + dirs++; + doit = 1; + } + } else { + dirc = ' '; + if (s_name[0] != 0) { + files++; + doit = 1; + } + } + if (doit) { + if (dirc == ' ') { + printf(" %8ld %s%c\n", + (long)FAT2CPU32(dentptr->size), + s_name, dirc); + } else { + printf(" %s%c\n", + s_name, dirc); + } + } + dentptr++; + continue; + } + + if (strcmp(fnamecopy, s_name) + && strcmp(fnamecopy, l_name)) { + debug("RootMismatch: |%s|%s|\n", s_name, + l_name); + dentptr++; + continue; + } + + if (isdir && !(dentptr->attr & ATTR_DIR)) + goto exit; + + debug("RootName: %s", s_name); + debug(", start: 0x%x", START(dentptr)); + debug(", size: 0x%x %s\n", + FAT2CPU32(dentptr->size), + isdir ? "(DIR)" : ""); + + goto rootdir_done; /* We got a match */ + } + debug("END LOOP: j=%d clust_size=%d\n", j, + mydata->clust_size); + + /* + * On FAT32 we must fetch the FAT entries for the next + * root directory clusters when a cluster has been + * completely processed. + */ + ++j; + int rootdir_end = 0; + if (mydata->fatsize == 32) { + if (j == mydata->clust_size) { + int nxtsect = 0; + int nxt_clust = 0; + + nxt_clust = get_fatent(mydata, root_cluster); + rootdir_end = CHECK_CLUST(nxt_clust, 32); + + nxtsect = mydata->data_begin + + (nxt_clust * mydata->clust_size); + + root_cluster = nxt_clust; + + cursect = nxtsect; + j = 0; + } + } else { + if (j == PREFETCH_BLOCKS) + j = 0; + + rootdir_end = (++cursect - mydata->rootdir_sect >= + rootdir_size); + } + + /* If end of rootdir reached */ + if (rootdir_end) { + if (dols == LS_ROOT) { + printf("\n%d file(s), %d dir(s)\n\n", + files, dirs); + ret = 0; + } + goto exit; + } + } +rootdir_done: + + firsttime = 1; + + while (isdir) { + int startsect = mydata->data_begin + + START(dentptr) * mydata->clust_size; + dir_entry dent; + char *nextname = NULL; + + dent = *dentptr; + dentptr = &dent; + + idx = dirdelim(subname); + + if (idx >= 0) { + subname[idx] = '\0'; + nextname = subname + idx + 1; + /* Handle multiple delimiters */ + while (ISDIRDELIM(*nextname)) + nextname++; + if (dols && *nextname == '\0') + firsttime = 0; + } else { + if (dols && firsttime) { + firsttime = 0; + } else { + isdir = 0; + } + } + + if (get_dentfromdir(mydata, startsect, subname, dentptr, + isdir ? 0 : dols) == NULL) { + if (dols && !isdir) + ret = 0; + goto exit; + } + + if (isdir && !(dentptr->attr & ATTR_DIR)) + goto exit; + + if (idx >= 0) + subname = nextname; + } + + ret = get_contents(mydata, dentptr, pos, buffer, maxsize); + debug("Size: %d, got: %ld\n", FAT2CPU32(dentptr->size), ret); + +exit: + free(mydata->fatbuf); + return ret; +} + +long +do_fat_read(const char *filename, void *buffer, unsigned long maxsize, int dols) +{ + return do_fat_read_at(filename, 0, buffer, maxsize, dols); +} + +int file_fat_detectfs(void) +{ + boot_sector bs; + volume_info volinfo; + int fatsize; + char vol_label[12]; + + if (cur_dev == NULL) { + printf("No current device\n"); + return 1; + } + +#if defined(CONFIG_CMD_IDE) || \ + defined(CONFIG_CMD_SATA) || \ + defined(CONFIG_CMD_SCSI) || \ + defined(CONFIG_CMD_USB) || \ + defined(CONFIG_MMC) + printf("Interface: "); + switch (cur_dev->if_type) { + case IF_TYPE_IDE: + printf("IDE"); + break; + case IF_TYPE_SATA: + printf("SATA"); + break; + case IF_TYPE_SCSI: + printf("SCSI"); + break; + case IF_TYPE_ATAPI: + printf("ATAPI"); + break; + case IF_TYPE_USB: + printf("USB"); + break; + case IF_TYPE_DOC: + printf("DOC"); + break; + case IF_TYPE_MMC: + printf("MMC"); + break; + default: + printf("Unknown"); + } + + printf("\n Device %d: ", cur_dev->dev); + dev_print(cur_dev); +#endif + + if (read_bootsectandvi(&bs, &volinfo, &fatsize)) { + printf("\nNo valid FAT fs found\n"); + return 1; + } + + memcpy(vol_label, volinfo.volume_label, 11); + vol_label[11] = '\0'; + volinfo.fs_type[5] = '\0'; + + printf("Partition %d: Filesystem: %s \"%s\"\n", cur_part_nr, + volinfo.fs_type, vol_label); + + return 0; +} + +int file_fat_ls(const char *dir) +{ + return do_fat_read(dir, NULL, 0, LS_YES); +} + +long file_fat_read_at(const char *filename, unsigned long pos, void *buffer, + unsigned long maxsize) +{ + printf("reading %s\n", filename); + return do_fat_read_at(filename, pos, buffer, maxsize, LS_NO); +} + +long file_fat_read(const char *filename, void *buffer, unsigned long maxsize) +{ + return file_fat_read_at(filename, 0, buffer, maxsize); +} + +static void uppercase(char *str, int len) +{ + int i; + + for (i = 0; i < len; i++) { + TOUPPER(*str); + str++; + } +} + +static int disk_write(__u32 block, __u32 nr_blocks, void *buf) +{ + if (!cur_dev || !cur_dev->block_write) + return -1; + + if (cur_part_info.start + block + nr_blocks > + cur_part_info.start + total_sector) { + printf("error: overflow occurs\n"); + return -1; + } + + return cur_dev->block_write(cur_dev->dev, + cur_part_info.start + block, nr_blocks, buf); +} + +/* + * Set short name in directory entry + */ +static void set_name(dir_entry *dirent, const char *filename) +{ + char s_name[VFAT_MAXLEN_BYTES]; + char *period; + int period_location, len, i, ext_num; + + if (filename == NULL) + return; + + len = strlen(filename); + if (len == 0) + return; + + memcpy(s_name, filename, len); + uppercase(s_name, len); + + period = strchr(s_name, '.'); + if (period == NULL) { + period_location = len; + ext_num = 0; + } else { + period_location = period - s_name; + ext_num = len - period_location - 1; + } + + /* Pad spaces when the length of file name is shorter than eight */ + if (period_location < 8) { + memcpy(dirent->name, s_name, period_location); + for (i = period_location; i < 8; i++) + dirent->name[i] = ' '; + } else if (period_location == 8) { + memcpy(dirent->name, s_name, period_location); + } else { + memcpy(dirent->name, s_name, 6); + dirent->name[6] = '~'; + dirent->name[7] = '1'; + } + + if (ext_num < 3) { + memcpy(dirent->ext, s_name + period_location + 1, ext_num); + for (i = ext_num; i < 3; i++) + dirent->ext[i] = ' '; + } else + memcpy(dirent->ext, s_name + period_location + 1, 3); + + debug("name : %s\n", dirent->name); + debug("ext : %s\n", dirent->ext); +} + +static __u8 num_of_fats; +/* + * Write fat buffer into block device + */ +static int flush_fat_buffer(fsdata *mydata) +{ + int getsize = FATBUFBLOCKS; + __u32 fatlength = mydata->fatlength; + __u8 *bufptr = mydata->fatbuf; + __u32 startblock = mydata->fatbufnum * FATBUFBLOCKS; + + fatlength *= mydata->sect_size; + startblock += mydata->fat_sect; + + if (getsize > fatlength) + getsize = fatlength; + + /* Write FAT buf */ + if (disk_write(startblock, getsize, bufptr) < 0) { + debug("error: writing FAT blocks\n"); + return -1; + } + + if (num_of_fats == 2) { + /* Update corresponding second FAT blocks */ + startblock += mydata->fatlength; + if (disk_write(startblock, getsize, bufptr) < 0) { + debug("error: writing second FAT blocks\n"); + return -1; + } + } + + return 0; +} + +/* + * Get the entry at index 'entry' in a FAT (12/16/32) table. + * On failure 0x00 is returned. + * When bufnum is changed, write back the previous fatbuf to the disk. + */ +static __u32 get_fatent_value(fsdata *mydata, __u32 entry) +{ + __u32 bufnum; + __u32 off16, offset; + __u32 ret = 0x00; + __u16 val1, val2; + + switch (mydata->fatsize) { + case 32: + bufnum = entry / FAT32BUFSIZE; + offset = entry - bufnum * FAT32BUFSIZE; + break; + case 16: + bufnum = entry / FAT16BUFSIZE; + offset = entry - bufnum * FAT16BUFSIZE; + break; + case 12: + bufnum = entry / FAT12BUFSIZE; + offset = entry - bufnum * FAT12BUFSIZE; + break; + + default: + /* Unsupported FAT size */ + return ret; + } + + debug("FAT%d: entry: 0x%04x = %d, offset: 0x%04x = %d\n", + mydata->fatsize, entry, entry, offset, offset); + + /* Read a new block of FAT entries into the cache. */ + if (bufnum != mydata->fatbufnum) { + int getsize = FATBUFBLOCKS; + __u8 *bufptr = mydata->fatbuf; + __u32 fatlength = mydata->fatlength; + __u32 startblock = bufnum * FATBUFBLOCKS; + + if (getsize > fatlength) + getsize = fatlength; + + fatlength *= mydata->sect_size; /* We want it in bytes now */ + startblock += mydata->fat_sect; /* Offset from start of disk */ + + /* Write back the fatbuf to the disk */ + if (mydata->fatbufnum != -1) { + if (flush_fat_buffer(mydata) < 0) + return -1; + } + + if (disk_read(startblock, getsize, bufptr) < 0) { + debug("Error reading FAT blocks\n"); + return ret; + } + mydata->fatbufnum = bufnum; + } + + /* Get the actual entry from the table */ + switch (mydata->fatsize) { + case 32: + ret = FAT2CPU32(((__u32 *) mydata->fatbuf)[offset]); + break; + case 16: + ret = FAT2CPU16(((__u16 *) mydata->fatbuf)[offset]); + break; + case 12: + off16 = (offset * 3) / 4; + + switch (offset & 0x3) { + case 0: + ret = FAT2CPU16(((__u16 *) mydata->fatbuf)[off16]); + ret &= 0xfff; + break; + case 1: + val1 = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16]); + val1 &= 0xf000; + val2 = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16 + 1]); + val2 &= 0x00ff; + ret = (val2 << 4) | (val1 >> 12); + break; + case 2: + val1 = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16]); + val1 &= 0xff00; + val2 = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16 + 1]); + val2 &= 0x000f; + ret = (val2 << 8) | (val1 >> 8); + break; + case 3: + ret = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16]); + ret = (ret & 0xfff0) >> 4; + break; + default: + break; + } + break; + } + debug("FAT%d: ret: %08x, entry: %08x, offset: %04x\n", + mydata->fatsize, ret, entry, offset); + + return ret; +} + +#ifdef CONFIG_SUPPORT_VFAT +/* + * Set the file name information from 'name' into 'slotptr', + */ +static int str2slot(dir_slot *slotptr, const char *name, int *idx) +{ + int j, end_idx = 0; + + for (j = 0; j <= 8; j += 2) { + if (name[*idx] == 0x00) { + slotptr->name0_4[j] = 0; + slotptr->name0_4[j + 1] = 0; + end_idx++; + goto name0_4; + } + slotptr->name0_4[j] = name[*idx]; + (*idx)++; + end_idx++; + } + for (j = 0; j <= 10; j += 2) { + if (name[*idx] == 0x00) { + slotptr->name5_10[j] = 0; + slotptr->name5_10[j + 1] = 0; + end_idx++; + goto name5_10; + } + slotptr->name5_10[j] = name[*idx]; + (*idx)++; + end_idx++; + } + for (j = 0; j <= 2; j += 2) { + if (name[*idx] == 0x00) { + slotptr->name11_12[j] = 0; + slotptr->name11_12[j + 1] = 0; + end_idx++; + goto name11_12; + } + slotptr->name11_12[j] = name[*idx]; + (*idx)++; + end_idx++; + } + + if (name[*idx] == 0x00) + return 1; + + return 0; +/* Not used characters are filled with 0xff 0xff */ +name0_4: + for (; end_idx < 5; end_idx++) { + slotptr->name0_4[end_idx * 2] = 0xff; + slotptr->name0_4[end_idx * 2 + 1] = 0xff; + } + end_idx = 5; +name5_10: + end_idx -= 5; + for (; end_idx < 6; end_idx++) { + slotptr->name5_10[end_idx * 2] = 0xff; + slotptr->name5_10[end_idx * 2 + 1] = 0xff; + } + end_idx = 11; +name11_12: + end_idx -= 11; + for (; end_idx < 2; end_idx++) { + slotptr->name11_12[end_idx * 2] = 0xff; + slotptr->name11_12[end_idx * 2 + 1] = 0xff; + } + + return 1; +} + +static int is_next_clust(fsdata *mydata, dir_entry *dentptr); +static void flush_dir_table(fsdata *mydata, dir_entry **dentptr); + +/* + * Fill dir_slot entries with appropriate name, id, and attr + * The real directory entry is returned by 'dentptr' + */ +static void +fill_dir_slot(fsdata *mydata, dir_entry **dentptr, const char *l_name) +{ + dir_slot *slotptr = (dir_slot *)get_contents_vfatname_block; + __u8 counter = 0, checksum; + int idx = 0, ret; + char s_name[16]; + + /* Get short file name and checksum value */ + strncpy(s_name, (*dentptr)->name, 16); + checksum = mkcksum(s_name); + + do { + memset(slotptr, 0x00, sizeof(dir_slot)); + ret = str2slot(slotptr, l_name, &idx); + slotptr->id = ++counter; + slotptr->attr = ATTR_VFAT; + slotptr->alias_checksum = checksum; + slotptr++; + } while (ret == 0); + + slotptr--; + slotptr->id |= LAST_LONG_ENTRY_MASK; + + while (counter >= 1) { + if (is_next_clust(mydata, *dentptr)) { + /* A new cluster is allocated for directory table */ + flush_dir_table(mydata, dentptr); + } + memcpy(*dentptr, slotptr, sizeof(dir_slot)); + (*dentptr)++; + slotptr--; + counter--; + } + + if (is_next_clust(mydata, *dentptr)) { + /* A new cluster is allocated for directory table */ + flush_dir_table(mydata, dentptr); + } +} + +static __u32 dir_curclust; + +/* + * Extract the full long filename starting at 'retdent' (which is really + * a slot) into 'l_name'. If successful also copy the real directory entry + * into 'retdent' + * If additional adjacent cluster for directory entries is read into memory, + * then 'get_contents_vfatname_block' is copied into 'get_dentfromdir_block' and + * the location of the real directory entry is returned by 'retdent' + * Return 0 on success, -1 otherwise. + */ +static int +get_long_file_name(fsdata *mydata, int curclust, __u8 *cluster, + dir_entry **retdent, char *l_name) +{ + dir_entry *realdent; + dir_slot *slotptr = (dir_slot *)(*retdent); + dir_slot *slotptr2 = NULL; + __u8 *buflimit = cluster + mydata->sect_size * ((curclust == 0) ? + PREFETCH_BLOCKS : + mydata->clust_size); + __u8 counter = (slotptr->id & ~LAST_LONG_ENTRY_MASK) & 0xff; + int idx = 0, cur_position = 0; + + if (counter > VFAT_MAXSEQ) { + debug("Error: VFAT name is too long\n"); + return -1; + } + + while ((__u8 *)slotptr < buflimit) { + if (counter == 0) + break; + if (((slotptr->id & ~LAST_LONG_ENTRY_MASK) & 0xff) != counter) + return -1; + slotptr++; + counter--; + } + + if ((__u8 *)slotptr >= buflimit) { + if (curclust == 0) + return -1; + curclust = get_fatent_value(mydata, dir_curclust); + if (CHECK_CLUST(curclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", curclust); + printf("Invalid FAT entry\n"); + return -1; + } + + dir_curclust = curclust; + + if (get_cluster(mydata, curclust, get_contents_vfatname_block, + mydata->clust_size * mydata->sect_size) != 0) { + debug("Error: reading directory block\n"); + return -1; + } + + slotptr2 = (dir_slot *)get_contents_vfatname_block; + while (counter > 0) { + if (((slotptr2->id & ~LAST_LONG_ENTRY_MASK) + & 0xff) != counter) + return -1; + slotptr2++; + counter--; + } + + /* Save the real directory entry */ + realdent = (dir_entry *)slotptr2; + while ((__u8 *)slotptr2 > get_contents_vfatname_block) { + slotptr2--; + slot2str(slotptr2, l_name, &idx); + } + } else { + /* Save the real directory entry */ + realdent = (dir_entry *)slotptr; + } + + do { + slotptr--; + if (slot2str(slotptr, l_name, &idx)) + break; + } while (!(slotptr->id & LAST_LONG_ENTRY_MASK)); + + l_name[idx] = '\0'; + if (*l_name == DELETED_FLAG) + *l_name = '\0'; + else if (*l_name == aRING) + *l_name = DELETED_FLAG; + downcase(l_name); + + /* Return the real directory entry */ + *retdent = realdent; + + if (slotptr2) { + memcpy(get_dentfromdir_block, get_contents_vfatname_block, + mydata->clust_size * mydata->sect_size); + cur_position = (__u8 *)realdent - get_contents_vfatname_block; + *retdent = (dir_entry *) &get_dentfromdir_block[cur_position]; + } + + return 0; +} + +#endif + +/* + * Set the entry at index 'entry' in a FAT (16/32) table. + */ +static int set_fatent_value(fsdata *mydata, __u32 entry, __u32 entry_value) +{ + __u32 bufnum, offset; + __u32 off16; + __u16 val; + + switch (mydata->fatsize) { + case 32: + bufnum = entry / FAT32BUFSIZE; + offset = entry - bufnum * FAT32BUFSIZE; + break; + case 16: + bufnum = entry / FAT16BUFSIZE; + offset = entry - bufnum * FAT16BUFSIZE; + break; + case 12: + bufnum = entry / FAT12BUFSIZE; + offset = entry - bufnum * FAT12BUFSIZE; + break; + default: + /* Unsupported FAT size */ + return -1; + } + + /* Read a new block of FAT entries into the cache. */ + if (bufnum != mydata->fatbufnum) { + int getsize = FATBUFBLOCKS; + __u8 *bufptr = mydata->fatbuf; + __u32 fatlength = mydata->fatlength; + __u32 startblock = bufnum * FATBUFBLOCKS; + + fatlength *= mydata->sect_size; + startblock += mydata->fat_sect; + + if (getsize > fatlength) + getsize = fatlength; + + if (mydata->fatbufnum != -1) { + if (flush_fat_buffer(mydata) < 0) + return -1; + } + + if (disk_read(startblock, getsize, bufptr) < 0) { + debug("Error reading FAT blocks\n"); + return -1; + } + mydata->fatbufnum = bufnum; + } + + /* Set the actual entry */ + switch (mydata->fatsize) { + case 32: + ((__u32 *) mydata->fatbuf)[offset] = cpu_to_le32(entry_value); + break; + case 16: + ((__u16 *) mydata->fatbuf)[offset] = cpu_to_le16(entry_value); + break; + case 12: + off16 = (offset * 3) / 4; + + switch (offset & 0x3) { + case 0: + val = FAT2CPU16(((__u16 *) mydata->fatbuf)[off16]&0xf000); + ((__u16 *) mydata->fatbuf)[off16]= val|(cpu_to_le16(entry_value)&0x0fff); + break; + case 1: + val = FAT2CPU16(((__u16 *) mydata->fatbuf)[off16]&0x0fff); + ((__u16 *)mydata->fatbuf)[off16] =val|((cpu_to_le16(entry_value)&0x000f)<<12); + val = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16 + 1]&0xff00); + ((__u16 *)mydata->fatbuf)[off16+1] =val|((cpu_to_le16(entry_value)&0x0ff0)>>4); + break; + case 2: + val = FAT2CPU16(((__u16 *) mydata->fatbuf)[off16]&0x00ff); + ((__u16 *)mydata->fatbuf)[off16] =val|((cpu_to_le16(entry_value)&0x00ff)<<8); + val = FAT2CPU16(((__u16 *)mydata->fatbuf)[off16 + 1]&0xfff0); + ((__u16 *)mydata->fatbuf)[off16+1] =val|((cpu_to_le16(entry_value)&0x0f00)>>8); + break; + case 3: + val = FAT2CPU16(((__u16 *) mydata->fatbuf)[off16]&0x000f); + ((__u16 *) mydata->fatbuf)[off16]= val|((cpu_to_le16(entry_value)&0x0fff)<<4); + break; + default: + break; + } + break; + default: + return -1; + } + + return 0; +} + +/* + * Determine the entry value at index 'entry' in a FAT (16/32) table + */ +static __u32 determine_fatent(fsdata *mydata, __u32 entry) +{ + __u32 next_fat, next_entry = entry + 1; + + while (1) { + next_fat = get_fatent_value(mydata, next_entry); + if (next_fat == 0) { + set_fatent_value(mydata, entry, next_entry); + break; + } + next_entry++; + } + debug("FAT%d: entry: %08x, entry_value: %04x\n", + mydata->fatsize, entry, next_entry); + + return next_entry; +} + +/* + * Write at most 'size' bytes from 'buffer' into the specified cluster. + * Return 0 on success, -1 otherwise. + */ +static int +set_cluster(fsdata *mydata, __u32 clustnum, __u8 *buffer, + unsigned long size) +{ + int idx = 0; + __u32 startsect; + + if (clustnum > 0) { + startsect = mydata->data_begin + + clustnum * mydata->clust_size; + + if (mydata->fatsize!=32) {//this is for windows format + if (startsect < mydata->rootdir_sect){ + debug("set_cluster\n"); + startsect=mydata->rootdir_sect; + } + } + } + else + startsect = mydata->rootdir_sect; + + debug("clustnum: %d, startsect: %d\n", clustnum, startsect); + + if (disk_write(startsect, size / mydata->sect_size, buffer) < 0) { + debug("Error writing data\n"); + return -1; + } + + if (size % mydata->sect_size) { + __u8 tmpbuf[mydata->sect_size]; + + idx = size / mydata->sect_size; + buffer += idx * mydata->sect_size; + memcpy(tmpbuf, buffer, size % mydata->sect_size); + + if (disk_write(startsect + idx, 1, tmpbuf) < 0) { + debug("Error writing data\n"); + return -1; + } + + return 0; + } + + return 0; +} + +/* + * Find the first empty cluster + */ +static int find_empty_cluster(fsdata *mydata) +{ + __u32 fat_val, entry = 3; + + while (1) { + fat_val = get_fatent_value(mydata, entry); + if (fat_val == 0) + break; + entry++; + } + + return entry; +} + +/* + * Write directory entries in 'get_dentfromdir_block' to block device + */ +static void flush_dir_table(fsdata *mydata, dir_entry **dentptr) +{ + int dir_newclust = 0; + + if (set_cluster(mydata, dir_curclust, + get_dentfromdir_block, + mydata->clust_size * mydata->sect_size) != 0) { + printf("error: wrinting directory entry\n"); + return; + } + dir_newclust = find_empty_cluster(mydata); + set_fatent_value(mydata, dir_curclust, dir_newclust); + if (mydata->fatsize == 32) + set_fatent_value(mydata, dir_newclust, 0xffffff8); + else if (mydata->fatsize == 16) + set_fatent_value(mydata, dir_newclust, 0xfff8); + else if (mydata->fatsize == 12) + set_fatent_value(mydata, dir_newclust, 0xff8); + + dir_curclust = dir_newclust; + + if (flush_fat_buffer(mydata) < 0) + return; + + memset(get_dentfromdir_block, 0x00, + mydata->clust_size * mydata->sect_size); + + *dentptr = (dir_entry *) get_dentfromdir_block; +} + +/* + * Set empty cluster from 'entry' to the end of a file + */ +static int clear_fatent(fsdata *mydata, __u32 entry) +{ + __u32 fat_val; + + while (1) { + fat_val = get_fatent_value(mydata, entry); + if (fat_val != 0) + set_fatent_value(mydata, entry, 0); + else + break; + + if (fat_val == 0xfffffff || fat_val == 0xffff) + break; + + entry = fat_val; + } + + /* Flush fat buffer */ + if (flush_fat_buffer(mydata) < 0) + return -1; + + return 0; +} + +/* + * Write at most 'maxsize' bytes from 'buffer' into + * the file associated with 'dentptr' + * Return the number of bytes read or -1 on fatal errors. + */ +static int +set_contents(fsdata *mydata, dir_entry *dentptr, __u8 *buffer, + unsigned long maxsize) +{ + unsigned long filesize = FAT2CPU32(dentptr->size), gotsize = 0; + unsigned int bytesperclust = mydata->clust_size * mydata->sect_size; + __u32 curclust = START(dentptr); + __u32 endclust = 0, newclust = 0; + unsigned long actsize; + + debug("Filesize: %ld bytes\n", filesize); + if (maxsize > 0 && filesize > maxsize) + filesize = maxsize; + + debug("%ld bytes\n", filesize); + + actsize = bytesperclust; + endclust = curclust; + do { + /* search for consecutive clusters */ + while (actsize < filesize) { + newclust = determine_fatent(mydata, endclust); + + if ((newclust - 1) != endclust) + goto getit; + + if (CHECK_CLUST(newclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", newclust); + debug("Invalid FAT entry\n"); + return gotsize; + } + endclust = newclust; + actsize += bytesperclust; + } + /* actsize >= file size */ + actsize -= bytesperclust; + /* set remaining clusters */ + if (set_cluster(mydata, curclust, buffer, (int)actsize) != 0) { + debug("error: writing cluster\n"); + return -1; + } + + /* set remaining bytes */ + gotsize += (int)actsize; + filesize -= actsize; + buffer += actsize; + actsize = filesize; + + if (set_cluster(mydata, endclust, buffer, (int)actsize) != 0) { + debug("error: writing cluster\n"); + return -1; + } + gotsize += actsize; + + /* Mark end of file in FAT */ + if (mydata->fatsize == 16) + newclust = 0xffff; + else if (mydata->fatsize == 32) + newclust = 0xfffffff; + else if (mydata->fatsize == 12) + newclust = 0x0fff; + debug("fatsize %d\n", mydata->fatsize); + debug("data_begin 0x%x fatlength 0x%x fat_sect 0x%x clust_size 0x%x\n", + mydata->data_begin, mydata->fatlength, mydata->fat_sect, + mydata->clust_size); + + + set_fatent_value(mydata, endclust, newclust); + debug("rootdir_sect 0x%x\n", mydata->rootdir_sect); + + return gotsize; +getit: + if (set_cluster(mydata, curclust, buffer, (int)actsize) != 0) { + debug("error: writing cluster\n"); + return -1; + } + gotsize += (int)actsize; + filesize -= actsize; + buffer += actsize; + + if (CHECK_CLUST(curclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", curclust); + debug("Invalid FAT entry\n"); + return gotsize; + } + actsize = bytesperclust; + curclust = endclust = newclust; + } while (1); +} + +/* + * Fill dir_entry + */ +static void fill_dentry(fsdata *mydata, dir_entry *dentptr, + const char *filename, __u32 start_cluster, __u32 size, __u8 attr) +{ + if (mydata->fatsize == 32) + dentptr->starthi = + cpu_to_le16((start_cluster & 0xffff0000) >> 16); + dentptr->start = cpu_to_le16(start_cluster & 0xffff); + dentptr->size = cpu_to_le32(size); + + dentptr->attr = attr; + + set_name(dentptr, filename); +} + +/* + * Check whether adding a file makes the file system to + * exceed the size of the block device + * Return -1 when overflow occurs, otherwise return 0 + */ +static int check_overflow(fsdata *mydata, __u32 clustnum, unsigned long size) +{ + __u32 startsect, sect_num; + + if (clustnum > 0) { + startsect = mydata->data_begin + + clustnum * mydata->clust_size; + + if (mydata->fatsize!=32) {//this is for windows format + if (startsect < mydata->rootdir_sect) { + debug("check_overflow\n"); + startsect=mydata->rootdir_sect; + } + } + } else { + startsect = mydata->rootdir_sect; + } + + sect_num = size / mydata->sect_size; + if (size % mydata->sect_size) + sect_num++; + + if (startsect + sect_num > cur_part_info.start + total_sector) + return -1; + + return 0; +} + +/* + * Check if adding several entries exceed one cluster boundary + */ +static int is_next_clust(fsdata *mydata, dir_entry *dentptr) +{ + int cur_position; + + cur_position = (__u8 *)dentptr - get_dentfromdir_block; + + if (cur_position >= mydata->clust_size * mydata->sect_size) + return 1; + else + return 0; +} + +static dir_entry *empty_dentptr; +/* + * Find a directory entry based on filename or start cluster number + * If the directory entry is not found, + * the new position for writing a directory entry will be returned + */ +static dir_entry *find_directory_entry(fsdata *mydata, int startsect, + char *filename, dir_entry *retdent, __u32 start) +{ + __u32 curclust = (startsect - mydata->data_begin) / mydata->clust_size; + //if (mydata->fatsize != 32) { + // curclust=0; + //} + debug("get_dentfromdir: %s\n", filename); + + while (1) { + dir_entry *dentptr; + + int i; + + if (get_cluster(mydata, curclust, get_dentfromdir_block, + mydata->clust_size * mydata->sect_size) != 0) { + printf("Error: reading directory block\n"); + return NULL; + } + + dentptr = (dir_entry *)get_dentfromdir_block; + + dir_curclust = curclust; + + for (i = 0; i < DIRENTSPERCLUST; i++) { + char s_name[14], l_name[VFAT_MAXLEN_BYTES]; + + l_name[0] = '\0'; + if (dentptr->name[0] == DELETED_FLAG) { + dentptr++; + if (is_next_clust(mydata, dentptr)) + break; + continue; + } + if ((dentptr->attr & ATTR_VOLUME)) { +#ifdef CONFIG_SUPPORT_VFAT + if ((dentptr->attr & ATTR_VFAT) && + (dentptr->name[0] & LAST_LONG_ENTRY_MASK)) { + get_long_file_name(mydata, curclust, + get_dentfromdir_block, + &dentptr, l_name); + debug("vfatname: |%s|\n", l_name); + } else +#endif + { + /* Volume label or VFAT entry */ + dentptr++; + if (is_next_clust(mydata, dentptr)) + break; + continue; + } + } + if (dentptr->name[0] == 0) { + debug("Dentname == NULL - %d\n", i); + empty_dentptr = dentptr; + return NULL; + } + + get_name(dentptr, s_name); + + if (strcmp(filename, s_name) + && strcmp(filename, l_name)) { + debug("Mismatch: |%s|%s|\n", + s_name, l_name); + dentptr++; + if (is_next_clust(mydata, dentptr)) + break; + continue; + } + + memcpy(retdent, dentptr, sizeof(dir_entry)); + + debug("DentName: %s", s_name); + debug(", start: 0x%x", START(dentptr)); + debug(", size: 0x%x %s\n", + FAT2CPU32(dentptr->size), + (dentptr->attr & ATTR_DIR) ? + "(DIR)" : ""); + + return dentptr; + } + + curclust = get_fatent_value(mydata, dir_curclust); + if ((curclust >= 0xffffff8) || (curclust >= 0xfff8)) { + empty_dentptr = dentptr; + return NULL; + } + if (CHECK_CLUST(curclust, mydata->fatsize)) { + debug("curclust: 0x%x\n", curclust); + debug("Invalid FAT entry\n"); + return NULL; + } + } + + return NULL; +} + +static int do_fat_write(const char *filename, void *buffer, + unsigned long size) +{ + dir_entry *dentptr, *retdent; + __u32 startsect; + __u32 start_cluster; + boot_sector bs; + volume_info volinfo; + fsdata datablock; + fsdata *mydata = &datablock; + int cursect; + int ret = -1, name_len; + char l_filename[VFAT_MAXLEN_BYTES]; + int write_size = size; + + dir_curclust = 0; + + if (read_bootsectandvi(&bs, &volinfo, &mydata->fatsize)) { + debug("error: reading boot sector\n"); + return -1; + } + + total_sector = bs.total_sect; + if (total_sector == 0) + total_sector = cur_part_info.size; + + if (mydata->fatsize == 32) + mydata->fatlength = bs.fat32_length; + else + mydata->fatlength = bs.fat_length; + + mydata->fat_sect = bs.reserved; + + cursect = mydata->rootdir_sect + = mydata->fat_sect + mydata->fatlength * bs.fats; + debug("fat_sect %d numfat %d\n", mydata->fat_sect, bs.fats); + num_of_fats = bs.fats; + + mydata->sect_size = (bs.sector_size[1] << 8) + bs.sector_size[0]; + mydata->clust_size = bs.cluster_size; + + if (mydata->fatsize == 32) { + mydata->data_begin = mydata->rootdir_sect - + (mydata->clust_size * 2); + } else { + int rootdir_size; + + rootdir_size = ((bs.dir_entries[1] * (int)256 + + bs.dir_entries[0]) * + sizeof(dir_entry)) / + mydata->sect_size; + debug("rootdir_size 0x%x\n",rootdir_size); + mydata->data_begin = mydata->rootdir_sect + + rootdir_size - + (mydata->clust_size * 2); + debug("pre data_begin 0x%x\n", mydata->data_begin); + } + + mydata->fatbufnum = -1; + mydata->fatbuf = malloc(FATBUFSIZE); + if (mydata->fatbuf == NULL) { + debug("Error: allocating memory\n"); + return -1; + } + + if (disk_read(cursect, + (mydata->fatsize == 32) ? + (mydata->clust_size) : + PREFETCH_BLOCKS, do_fat_read_at_block) < 0) { + debug("Error: reading rootdir block\n"); + goto exit; + } + dentptr = (dir_entry *) do_fat_read_at_block; + + name_len = strlen(filename); + if (name_len >= VFAT_MAXLEN_BYTES) + name_len = VFAT_MAXLEN_BYTES - 1; + + memcpy(l_filename, filename, name_len); + l_filename[name_len] = 0; /* terminate the string */ + downcase(l_filename); + + startsect = mydata->rootdir_sect; + debug("mydata->roodir_sect %d\n", startsect); + retdent = find_directory_entry(mydata, startsect, + l_filename, dentptr, 0); + if (retdent) { + /* Update file size and start_cluster in a directory entry */ + retdent->size = cpu_to_le32(size); + start_cluster = FAT2CPU16(retdent->start); + if (mydata->fatsize == 32) + start_cluster |= + (FAT2CPU16(retdent->starthi) << 16); + + ret = check_overflow(mydata, start_cluster, size); + if (ret) { + printf("Error: %ld overflow\n", size); + goto exit; + } + + ret = clear_fatent(mydata, start_cluster); + if (ret) { + printf("Error: clearing FAT entries\n"); + goto exit; + } + + ret = set_contents(mydata, retdent, buffer, size); + if (ret < 0) { + printf("Error: writing contents\n"); + goto exit; + } + write_size = ret; + debug("attempt to write 0x%x bytes\n", write_size); + + /* Flush fat buffer */ + ret = flush_fat_buffer(mydata); + if (ret) { + printf("Error: flush fat buffer\n"); + goto exit; + } + + /* Write directory table to device */ + ret = set_cluster(mydata, dir_curclust, + get_dentfromdir_block, + mydata->clust_size * mydata->sect_size); + if (ret) { + printf("Error: writing directory entry\n"); + goto exit; + } + } else { + /* Set short name to set alias checksum field in dir_slot */ + set_name(empty_dentptr, filename); + fill_dir_slot(mydata, &empty_dentptr, filename); + + ret = start_cluster = find_empty_cluster(mydata); + if (ret < 0) { + printf("Error: finding empty cluster\n"); + goto exit; + } + + ret = check_overflow(mydata, start_cluster, size); + if (ret) { + printf("Error: %ld overflow\n", size); + goto exit; + } + + /* Set attribute as archieve for regular file */ + fill_dentry(mydata, empty_dentptr, filename, + start_cluster, size, 0x20); + + ret = set_contents(mydata, empty_dentptr, buffer, size); + if (ret < 0) { + printf("Error: writing contents\n"); + goto exit; + } + write_size = ret; + debug("attempt to write 0x%x bytes\n", write_size); + + /* Flush fat buffer */ + ret = flush_fat_buffer(mydata); + if (ret) { + printf("Error: flush fat buffer\n"); + goto exit; + } + + /* Write directory table to device */ + ret = set_cluster(mydata, dir_curclust, + get_dentfromdir_block, + mydata->clust_size * mydata->sect_size); + if (ret) { + printf("Error: writing directory entry\n"); + goto exit; + } + } + +exit: + free(mydata->fatbuf); + return ret < 0 ? ret : write_size; +} + +int file_fat_write(const char *filename, void *buffer, unsigned long maxsize) +{ + printf("writing %s\n", filename); + return do_fat_write(filename, buffer, maxsize); +} + + + + + +enum { +// Perhaps this should remain constant + info_sector_number = 1, +// TODO: make these cmdline options +// dont forget sanity check: backup_boot_sector + 3 <= reserved_sect + backup_boot_sector = 3, + reserved_sect = 6, +}; + +#define NUM_FATS 2 +#define MAX_CLUST_32 0x0FFFFFF0 +#define MAX_CLUST_12 0x0FF0 + +#define FAT_FSINFO_SIG1 0x41615252 +#define FAT_FSINFO_SIG2 0x61417272 +#define BOOT_SIGN 0xAA55 +#define MARK_CLUSTER(cluster, value) \ + ((uint32_t *)fat)[cluster] = cpu_to_le32(value) +#define EOF_FAT32 0x0FFFFFF8 +#define reserved_sect16or12 1 + +int +format_fat32(void) +{ + uint32_t sect_per_clust=1; + uint32_t sect_per_fat=1; + uint32_t total_clust; + const char *volume_label = ""; + unsigned bufsize = reserved_sect; + void *buffer; + boot_sector32 *bs; + fat32_fsinfo *fsinfo; + + if ((!strcmp(cur_part_info.type, "5")) || + (!strcmp(cur_part_info.type, "15"))|| + (!strcmp(cur_part_info.type, "133"))){ + printf("Cannot format extended partition!\n"); + return -1; + } + + + + /* For FAT32, try to do the same as M$'s format command + * (see http://www.win.tue.nl/~aeb/linux/fs/fat/fatgen103.pdf p. 20): + * fs size <= 260M: 0.5k clusters + * fs size <= 8G: 4k clusters + * fs size <= 16G: 8k clusters + * fs size > 16G: 16k clusters + */ + if (cur_part_info.size< 32.5*1024*2) { + printf("The size of the device is too small for FAT32, please use FAT16/FAT12\n"); + return -1; + } else if (cur_part_info.size> 260*1024*2) { + sect_per_clust=8; + if (cur_part_info.size>8*1024*1024*2) { + sect_per_clust=16; + if (cur_part_info.size>16*1024*1024*2) { + sect_per_clust=32; + if (cur_part_info.size>32*1024*1024*2) { + sect_per_clust=64; + if (cur_part_info.size>2*1024*1024*1024*2) { + printf("FAT32 doesn't support larger than 2T\n"); + return -1; + } + } + } + } + + } + sect_per_fat = 1; + while (1) { + while (1) { + int spf_adj; + unsigned int tcl = (cur_part_info.size - reserved_sect - NUM_FATS * sect_per_fat) / sect_per_clust; + // tcl may be > MAX_CLUST_32 here, but it may be + // because sect_per_fat is underestimated, + // and with increased sect_per_fat it still may become + // <= MAX_CLUST_32. Therefore, we do not check + // against MAX_CLUST_32, but against a bigger const: + if (tcl > 0x80ffffff) + goto next; + total_clust = tcl; // fits in uint32_t + // Every cluster needs 4 bytes in FAT. +2 entries since + // FAT has space for non-existent clusters 0 and 1. + // Let's see how many sectors that needs. + //May overflow at "*4": + //spf_adj = ((total_clust+2) * 4 + bytes_per_sect-1) / bytes_per_sect - sect_per_fat; + //Same in the more obscure, non-overflowing form: + spf_adj = ((total_clust+2) + (512/4)-1) / (512/4) - sect_per_fat; +#if 0 + printf("sect_per_clust:%u sect_per_fat:%u total_clust:%u", + sect_per_clust, sect_per_fat, (int)tcl); + printf("adjust to sect_per_fat:%d", spf_adj); +#endif + if (spf_adj <= 0) { + // do not need to adjust sect_per_fat. + // so, was total_clust too big after all? + if (total_clust <= MAX_CLUST_32) + goto found_total_clust; // no + // yes, total_clust is _a bit_ too big + goto next; + } + // adjust sect_per_fat, go back and recalc total_clust + // (note: just "sect_per_fat += spf_adj" isn't ok) + sect_per_fat += ((unsigned)spf_adj / 2) | 1; + } + next: + if (sect_per_clust == 128) { + printf("can't make FAT32 with >128 sectors/cluster"); + return -1; + } + sect_per_clust *= 2; + sect_per_fat = (sect_per_fat / 2) | 1; + } +found_total_clust: + + + bufsize |= 2; // use this instead + bufsize |= sect_per_clust; + //printf("buffer size is 0x%x\n", bufsize); + buffer = memalign(ARCH_DMA_MINALIGN, bufsize*512); + if (buffer == NULL) { + printf("Error: allocating block\n"); + return -1; + } + + + memset(buffer, 0, bufsize*512); + bs=(void*)buffer; + fsinfo=(void*)(buffer + 512); + + strcpy(bs->ignored, "\xeb\x58\x90" "MSWIN4.1"); // system_id[8] included :) + STORE_LE(bs->sector_size[0], 0x00);//(__u16)cur_part_info.blksz); + STORE_LE(bs->sector_size[1], 0x02);//(__u16)cur_part_info.blksz); + STORE_LE(bs->cluster_size, (__u8)sect_per_clust); + // cast in needed on big endian to suppress a warning + STORE_LE(bs->reserved, (uint16_t)reserved_sect); + STORE_LE(bs->fats, 2); + //STORE_LE(bs->dir_entries[0], 0); // for FAT32, stays 0 + //STORE_LE(bs->dir_entries[1], 0); // for FAT32, stays 0 + //if (cur_part_info.size <= 0xffff) { + // STORE_LE(bs->sectors[0], (__u8)(cur_part_info.size&0xff)); + // STORE_LE(bs->sectors[1], (__u8)((cur_part_info.size&0xff00)>>8)); + //} + STORE_LE(bs->media, 0xf8); + //STORE_LE(bs->fat_length, 0); + STORE_LE(bs->secs_track, 63); + STORE_LE(bs->heads, 255); + STORE_LE(bs->hidden, 0); + STORE_LE(bs->total_sect, cur_part_info.size); + STORE_LE(bs->fat32_length, sect_per_fat); + STORE_LE(bs->flags, 0); + //STORE_LE(bs->version[0], 0); + //STORE_LE(bs->version[1], 0); + STORE_LE(bs->root_cluster, 2); + STORE_LE(bs->info_sector, info_sector_number); + STORE_LE(bs->backup_boot, backup_boot_sector); + //STORE_LE(bs->reserved2[0], 0); + //STORE_LE(bs->reserved2[1], 0); + //STORE_LE(bs->reserved2[2], 0); + //STORE_LE(bs->reserved2[3], 0); + //STORE_LE(bs->reserved2[4], 0); + //STORE_LE(bs->reserved2[5], 0); + STORE_LE(bs->boot_signature, 0x29); + //STORE_LE(bs->volumeID[0], 0); + //STORE_LE(bs->volumeID[1], 0); + //STORE_LE(bs->volumeID[2], 0); + //STORE_LE(bs->volumeID[3], 0); + strncpy(bs->filesystype, "FAT32 ", sizeof(bs->filesystype)); + strncpy(bs->volumelabel, volume_label, sizeof(bs->volumelabel)); + + STORE_LE(fsinfo->signature1, FAT_FSINFO_SIG1); + STORE_LE(fsinfo->signature2, FAT_FSINFO_SIG2); + // we've allocated cluster 2 for the root dir + STORE_LE(fsinfo->free_clusters, (total_clust - 1)); + STORE_LE(fsinfo->next_cluster, 2); + STORE_LE(fsinfo->boot_sign, BOOT_SIGN); + + //set reserved region + { + *((__u8*)bs+510)=0x55; + *((__u8*)bs+511)=0xAA; + *((__u8*)fsinfo+510)=0x55; + *((__u8*)fsinfo+511)=0xAA; + if (disk_write(0, backup_boot_sector, bs) < 0) { + printf("Error: writing block\n"); + goto fail; + } + if (disk_write(backup_boot_sector, reserved_sect - backup_boot_sector, bs) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + + //set fat + { + unsigned i=0,j=0; + unsigned char *fat = (void*)buffer; + memset(bs, 0, 512 * 2); + // initial FAT entries + MARK_CLUSTER(0, 0x0fffff00 | 0xf8); + MARK_CLUSTER(1, 0xffffffff); + // mark cluster 2 as EOF (used for root dir) + MARK_CLUSTER(2, EOF_FAT32); + for (i = 0; i < NUM_FATS; i++) { + if (disk_write(reserved_sect+i*sect_per_fat, 1, buffer) < 0) { + printf("Error: writing block\n"); + goto fail; + } + for (j = 1; j < sect_per_fat; j++) + if (disk_write(reserved_sect+i*sect_per_fat+j, 1, buffer+512) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + memset(buffer, 0, sect_per_clust * 512); + for(j=0;j<sect_per_clust;j++) + if (disk_write(reserved_sect+i*sect_per_fat+j, 1, buffer) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + + free(buffer); + printf("succeed\n"); + return 0; +fail: + printf("failed\n"); + free(buffer); + return -1; +} + +static void mark_FAT_cluster(char size_fat, char* fat, int cluster, unsigned int value) +{ + switch (size_fat) { + case 12: + value &= 0x0fff; + if (((cluster * 3) & 0x1) == 0) { + fat[3 * cluster / 2] = (unsigned char)(value & 0x00ff); + fat[(3 * cluster / 2) + 1] = + (unsigned char)((fat[(3 * cluster / 2) + 1] & 0x00f0) + | ((value & 0x0f00) >> 8)); + } else { + fat[3 * cluster / 2] = + (unsigned char)((fat[3 * cluster / 2] & 0x000f) | + ((value & 0x000f) << 4)); + fat[(3 * cluster / 2) + 1] = (unsigned char)((value & 0x0ff0) >> 4); + } + break; + + case 16: + value &= 0xffff; + fat[2 * cluster] = (unsigned char)(value & 0x00ff); + fat[(2 * cluster) + 1] = (unsigned char)(value >> 8); + break; + + case 32: + value &= 0xfffffff; + fat[4 * cluster] = (unsigned char)(value & 0x000000ff); + fat[(4 * cluster) + 1] = (unsigned char)((value & 0x0000ff00) >> 8); + fat[(4 * cluster) + 2] = (unsigned char)((value & 0x00ff0000) >> 16); + fat[(4 * cluster) + 3] = (unsigned char)((value & 0xff000000) >> 24); + break; + + default: + printf("Bad FAT size (not 12, 16, or 32)\n"); + } +} + + +int +format_fat16(void) +{ + uint32_t sect_per_clust=1; + uint32_t sect_per_fat=1; + uint32_t total_clust; + const char *volume_label = ""; + unsigned bufsize = reserved_sect; + void *buffer; + boot_sector16or12 *bs; + + if ((!strcmp(cur_part_info.type, "5")) || + (!strcmp(cur_part_info.type, "15"))|| + (!strcmp(cur_part_info.type, "133"))){ + printf("Cannot format extended partition!\n"); + return -1; + } + + + /* For FAT32, try to do the same as M$'s format command + * (see http://www.win.tue.nl/~aeb/linux/fs/fat/fatgen103.pdf p. 20): + * fs size <= 260M: 0.5k clusters + * fs size <= 8G: 4k clusters + * fs size <= 16G: 8k clusters + * fs size > 16G: 16k clusters + */ + if (cur_part_info.size< 4.1*1024*2) { + printf("The size of the device is too small for FAT16, please use FAT12\n"); + return -1; + } else if (cur_part_info.size> 16*1024*2) { + sect_per_clust=4; + if (cur_part_info.size>128*1024*2) { + sect_per_clust=8; + if (cur_part_info.size>256*1024*2) { + sect_per_clust=16; + if (cur_part_info.size>512*1024*2) { + sect_per_clust=32; + if (cur_part_info.size>1024*1024*2) { + sect_per_clust=64; + if (cur_part_info.size>2*1024*1024*2){ + printf("FAT16 doesn't support larger than 2G\n"); + return -1; + } + } + + } + } + } + + } + //we have to decide sect_per_fat + { + unsigned int RootDirSectors = ((512*32)+(512-1))/512; + unsigned int TmpVal1 = cur_part_info.size-(1+RootDirSectors); + unsigned int TmpVal2 = (256*sect_per_clust)+2;; + unsigned int FATSz = (TmpVal1+TmpVal2-1)/TmpVal2; + sect_per_fat = FATSz; + } + + + debug("sect_per_fat 0x%x\n", sect_per_fat); + debug("sect_per_clust 0x%x\n", sect_per_clust); + bufsize |= 2; // use this instead + bufsize |= sect_per_clust; + buffer = memalign(ARCH_DMA_MINALIGN, bufsize*512); + if (buffer == NULL) { + printf("Error: allocating block\n"); + return -1; + } + + + memset(buffer, 0, bufsize*512); + bs=(void*)buffer; + + strcpy(bs->ignored, "\xeb\x58\x90" "MSWIN4.1"); // system_id[8] included :) + STORE_LE(bs->sector_size[0], 0x00);//(__u16)cur_part_info.blksz); + STORE_LE(bs->sector_size[1], 0x02);//(__u16)cur_part_info.blksz); + STORE_LE(bs->cluster_size, (__u8)sect_per_clust); + // cast in needed on big endian to suppress a warning + STORE_LE(bs->reserved, 1); + STORE_LE(bs->fats, 2); + STORE_LE(bs->dir_entries[0], 0x00); + STORE_LE(bs->dir_entries[1], 0x02); + if (cur_part_info.size <= 0xffff) { + STORE_LE(bs->sectors[0], (__u8)(cur_part_info.size&0xff)); + STORE_LE(bs->sectors[1], (__u8)((cur_part_info.size&0xff00)>>8)); + } + STORE_LE(bs->media, 0xf8); + STORE_LE(bs->fat_length, sect_per_fat); + STORE_LE(bs->secs_track, 63); + STORE_LE(bs->heads, 255); + STORE_LE(bs->hidden, 0); + STORE_LE(bs->total_sect, cur_part_info.size); + STORE_LE(bs->drive_number, 0); + STORE_LE(bs->boot_signature, 0x29); + //STORE_LE(bs->volumeID[0], 0); + //STORE_LE(bs->volumeID[1], 0); + //STORE_LE(bs->volumeID[2], 0); + //STORE_LE(bs->volumeID[3], 0); + strncpy(bs->filesystype, "FAT16 ", sizeof(bs->filesystype)); + strncpy(bs->volumelabel, volume_label, sizeof(bs->volumelabel)); + + + //set reserved region + { + *((__u8*)bs+510)=0x55; + *((__u8*)bs+511)=0xAA; + if (disk_write(0, 1, bs) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + + + { + //set fat + unsigned i=0,j=0; + unsigned char *fat = (void*)buffer; + memset(bs, 0, 512 * 2); + // initial FAT entries + mark_FAT_cluster(16, fat, 0, 0xfffffff8); + mark_FAT_cluster(16, fat, 1, 0xffffffff); + for (i = 0; i < NUM_FATS; i++) { + if (disk_write(1+i*sect_per_fat, 1, buffer) < 0) { + printf("Error: writing block\n"); + goto fail; + } + for (j = 1; j < sect_per_fat; j++) + if (disk_write(1+i*sect_per_fat+j, 1, buffer+512) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + + //set root directory region + + memset(buffer, 0, 512); + for(j=0;j<32;j++) + if (disk_write(1+NUM_FATS*sect_per_fat+j, 1, buffer) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + + free(buffer); + printf("succeed\n"); + return 0; +fail: + printf("failed\n"); + free(buffer); + return -1; +} + +int +format_fat12(void) +{ + uint32_t sect_per_clust=0x40; + uint32_t sect_per_fat=1; + uint32_t total_clust; + const char *volume_label = ""; + unsigned bufsize = reserved_sect; + void *buffer; + boot_sector16or12 *bs; + + if ((!strcmp(cur_part_info.type, "5")) || + (!strcmp(cur_part_info.type, "15"))|| + (!strcmp(cur_part_info.type, "133"))){ + printf("Cannot format extended partition!\n"); + return -1; + } + + + + /* For FAT32, try to do the same as M$'s format command + * (see http://www.win.tue.nl/~aeb/linux/fs/fat/fatgen103.pdf p. 20): + * fs size <= 260M: 0.5k clusters + * fs size <= 8G: 4k clusters + * fs size <= 16G: 8k clusters + * fs size > 16G: 16k clusters + */ + if (cur_part_info.size > 255*63) { + printf("The size is larger than 7.8MB, Please use the FAT16/FAT32\n"); + return -1; + } + + //we have to decide sect_per_fat + sect_per_fat=0x0c; + sect_per_clust=4; + + bufsize |= 2; // use this instead + bufsize |= sect_per_clust; + buffer = memalign(ARCH_DMA_MINALIGN, bufsize*512); + if (buffer == NULL) { + printf("Error: allocating block\n"); + return -1; + } + + + memset(buffer, 0, bufsize*512); + bs=(void*)buffer; + + strcpy(bs->ignored, "\xeb\x58\x90" "MSWIN4.1"); // system_id[8] included :) + STORE_LE(bs->sector_size[0], 0x00);//(__u16)cur_part_info.blksz); + STORE_LE(bs->sector_size[1], 0x02);//(__u16)cur_part_info.blksz); + STORE_LE(bs->cluster_size, (__u8)sect_per_clust); + // cast in needed on big endian to suppress a warning + STORE_LE(bs->reserved, sect_per_clust); + STORE_LE(bs->fats, 2); + STORE_LE(bs->dir_entries[0], 0x00); + STORE_LE(bs->dir_entries[1], 0x02); + if (cur_part_info.size <= 0xffff) { + STORE_LE(bs->sectors[0], (__u8)(cur_part_info.size&0xff)); + STORE_LE(bs->sectors[1], (__u8)((cur_part_info.size&0xff00)>>8)); + } + STORE_LE(bs->media, 0xf8); + STORE_LE(bs->fat_length, sect_per_fat); + STORE_LE(bs->secs_track, 63); + STORE_LE(bs->heads, 255); + STORE_LE(bs->hidden, 0); + STORE_LE(bs->total_sect, cur_part_info.size); + STORE_LE(bs->drive_number, 0); + STORE_LE(bs->boot_signature, 0x29); + //STORE_LE(bs->volumeID[0], 0); + //STORE_LE(bs->volumeID[1], 0); + //STORE_LE(bs->volumeID[2], 0); + //STORE_LE(bs->volumeID[3], 0); + strncpy(bs->filesystype, "FAT12 ", sizeof(bs->filesystype)); + strncpy(bs->volumelabel, volume_label, sizeof(bs->volumelabel)); + + + //set reserved region + { + *((__u8*)bs+510)=0x55; + *((__u8*)bs+511)=0xAA; + if (disk_write(0, 1, bs) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + + + { + //set fat + unsigned i=0,j=0; + unsigned int reserver12=sect_per_clust; + unsigned char *fat = (void*)buffer; + memset(bs, 0, 512 * 2); + // initial FAT entries + mark_FAT_cluster(12, fat, 0, 0xfffffff8); + mark_FAT_cluster(12, fat, 1, 0xffffffff); + + for (i = 0; i < NUM_FATS; i++) { + if (disk_write(reserver12+i*sect_per_fat, 1, buffer) < 0) { + printf("Error: writing block\n"); + goto fail; + } + for (j = 1; j < sect_per_fat; j++) + if (disk_write(reserver12+i*sect_per_fat+j, 1, buffer+512) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + + //set root directory region + + memset(buffer, 0, 512); + for(j=0;j<32;j++) + if (disk_write(reserver12+NUM_FATS*sect_per_fat+j, 1, buffer) < 0) { + printf("Error: writing block\n"); + goto fail; + } + } + + free(buffer); + printf("succeed\n"); + return 0; + fail: + printf("failed\n"); + free(buffer); + return -1; + +} + diff --git a/fs/fat/file.c b/fs/fat/file.c new file mode 100755 index 0000000..f999ac5 --- /dev/null +++ b/fs/fat/file.c @@ -0,0 +1,208 @@ +/* + * file.c + * + * Mini "VFS" by Marcus Sundberg + * + * 2002-07-28 - rjones@nexus-tech.net - ported to ppcboot v1.1.6 + * 2003-03-10 - kharris@nexus-tech.net - ported to uboot + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <common.h> +#include <config.h> +#include <malloc.h> +#include <fat.h> +#include <linux/stat.h> +#include <linux/time.h> + +#if (CONFIG_COMMANDS & CFG_CMD_FAT) + +/* Supported filesystems */ +static const struct filesystem filesystems[] = { + { file_fat_detectfs, file_fat_ls, file_fat_read, "FAT" }, +}; +#define NUM_FILESYS (sizeof(filesystems)/sizeof(struct filesystem)) + +/* The filesystem which was last detected */ +static int current_filesystem = FSTYPE_NONE; + +/* The current working directory */ +#define CWD_LEN 511 +char file_cwd[CWD_LEN+1] = "/"; + +const char * +file_getfsname(int idx) +{ + if (idx < 0 || idx >= NUM_FILESYS) return NULL; + + return filesystems[idx].name; +} + + +static void +pathcpy(char *dest, const char *src) +{ + char *origdest = dest; + + do { + if (dest-file_cwd >= CWD_LEN) { + *dest = '\0'; + return; + } + *(dest) = *(src); + if (*src == '\0') { + if (dest-- != origdest && ISDIRDELIM(*dest)) { + *dest = '\0'; + } + return; + } + ++dest; + if (ISDIRDELIM(*src)) { + while (ISDIRDELIM(*src)) src++; + } else { + src++; + } + } while (1); +} + + +int +file_cd(const char *path) +{ + if (ISDIRDELIM(*path)) { + while (ISDIRDELIM(*path)) path++; + strncpy(file_cwd+1, path, CWD_LEN-1); + } else { + const char *origpath = path; + char *tmpstr = file_cwd; + int back = 0; + + while (*tmpstr != '\0') tmpstr++; + do { + tmpstr--; + } while (ISDIRDELIM(*tmpstr)); + + while (*path == '.') { + path++; + while (*path == '.') { + path++; + back++; + } + if (*path != '\0' && !ISDIRDELIM(*path)) { + path = origpath; + back = 0; + break; + } + while (ISDIRDELIM(*path)) path++; + origpath = path; + } + + while (back--) { + /* Strip off path component */ + while (!ISDIRDELIM(*tmpstr)) { + tmpstr--; + } + if (tmpstr == file_cwd) { + /* Incremented again right after the loop. */ + tmpstr--; + break; + } + /* Skip delimiters */ + while (ISDIRDELIM(*tmpstr)) tmpstr--; + } + tmpstr++; + if (*path == '\0') { + if (tmpstr == file_cwd) { + *tmpstr = '/'; + tmpstr++; + } + *tmpstr = '\0'; + return 0; + } + *tmpstr = '/'; + pathcpy(tmpstr+1, path); + } + + return 0; +} + + +int +file_detectfs(void) +{ + int i; + + current_filesystem = FSTYPE_NONE; + + for (i = 0; i < NUM_FILESYS; i++) { + if (filesystems[i].detect() == 0) { + strcpy(file_cwd, "/"); + current_filesystem = i; + break; + } + } + + return current_filesystem; +} + + +int +file_ls(const char *dir) +{ + char fullpath[1024]; + const char *arg; + + if (current_filesystem == FSTYPE_NONE) { + printf("Can't list files without a filesystem!\n"); + return -1; + } + + if (ISDIRDELIM(*dir)) { + arg = dir; + } else { + sprintf(fullpath, "%s/%s", file_cwd, dir); + arg = fullpath; + } + return filesystems[current_filesystem].ls(arg); +} + + +long +file_read(const char *filename, void *buffer, unsigned long maxsize) +{ + char fullpath[1024]; + const char *arg; + + if (current_filesystem == FSTYPE_NONE) { + printf("Can't load file without a filesystem!\n"); + return -1; + } + + if (ISDIRDELIM(*filename)) { + arg = filename; + } else { + sprintf(fullpath, "%s/%s", file_cwd, filename); + arg = fullpath; + } + + return filesystems[current_filesystem].read(arg, buffer, maxsize); +} + +#endif /* #if (CONFIG_COMMANDS & CFG_CMD_FAT) */ diff --git a/fs/fdos/Makefile b/fs/fdos/Makefile new file mode 100755 index 0000000..c25e744 --- /dev/null +++ b/fs/fdos/Makefile @@ -0,0 +1,49 @@ +# +# (C) Copyright 2002 +# Stäubli Faverges - <www.staubli.com> +# Pierre AUBERT p.aubert@staubli.com +# +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +include $(TOPDIR)/config.mk + +LIB = libfdos.a + +AOBJS = +COBJS = fat.o vfat.o dev.o fdos.o fs.o subdir.o +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += + +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend + +######################################################################### diff --git a/fs/fdos/dev.c b/fs/fdos/dev.c new file mode 100755 index 0000000..5dea5cd --- /dev/null +++ b/fs/fdos/dev.c @@ -0,0 +1,195 @@ +/* + * (C) Copyright 2002 + * Stäubli Faverges - <www.staubli.com> + * Pierre AUBERT p.aubert@staubli.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <common.h> +#include <config.h> + +#include "dos.h" +#include "fdos.h" + +#if (CONFIG_COMMANDS & CFG_CMD_FDOS) + +#define NB_HEADS 2 +#define NB_TRACKS 80 +#define NB_SECTORS 18 + + +static int lastwhere; + +/*----------------------------------------------------------------------------- + * dev_open -- + *----------------------------------------------------------------------------- + */ +int dev_open (void) +{ + lastwhere = 0; + return (0); +} + +/*----------------------------------------------------------------------------- + * dev_read -- len and where are sectors number + *----------------------------------------------------------------------------- + */ +int dev_read (void *buffer, int where, int len) +{ + PRINTF ("dev_read (len = %d, where = %d)\n", len, where); + + /* Si on ne desire pas lire a la position courante, il faut un seek */ + if (where != lastwhere) { + if (!fdc_fdos_seek (where)) { + PRINTF ("seek error in dev_read"); + lastwhere = -1; + return (-1); + } + } + + if (!fdc_fdos_read (buffer, len)) { + PRINTF ("read error\n"); + lastwhere = -1; + return (-1); + } + lastwhere = where + len; + return (0); +} +/*----------------------------------------------------------------------------- + * check_dev -- verify the diskette format + *----------------------------------------------------------------------------- + */ +int check_dev (BootSector_t *boot, Fs_t *fs) +{ + unsigned int heads, sectors, tracks; + int BootP, Infp0, InfpX, InfTm; + int sect_per_track; + + /* Display Boot header */ + PRINTF ("Jump to boot code 0x%02x 0x%02x 0x%02x\n", + boot -> jump [0], boot -> jump [1], boot -> jump[2]); + PRINTF ("OEM name & version '%*.*s'\n", + BANNER_LG, BANNER_LG, boot -> banner ); + PRINTF ("Bytes per sector hopefully 512 %d\n", + __le16_to_cpu (boot -> secsiz)); + PRINTF ("Cluster size in sectors %d\n", + boot -> clsiz); + PRINTF ("Number of reserved (boot) sectors %d\n", + __le16_to_cpu (boot -> nrsvsect)); + PRINTF ("Number of FAT tables hopefully 2 %d\n", + boot -> nfat); + PRINTF ("Number of directory slots %d\n", + __le16_to_cpu (boot -> dirents)); + PRINTF ("Total sectors on disk %d\n", + __le16_to_cpu (boot -> psect)); + PRINTF ("Media descriptor=first byte of FAT %d\n", + boot -> descr); + PRINTF ("Sectors in FAT %d\n", + __le16_to_cpu (boot -> fatlen)); + PRINTF ("Sectors/track %d\n", + __le16_to_cpu (boot -> nsect)); + PRINTF ("Heads %d\n", + __le16_to_cpu (boot -> nheads)); + PRINTF ("number of hidden sectors %d\n", + __le32_to_cpu (boot -> nhs)); + PRINTF ("big total sectors %d\n", + __le32_to_cpu (boot -> bigsect)); + PRINTF ("physical drive ? %d\n", + boot -> physdrive); + PRINTF ("reserved %d\n", + boot -> reserved); + PRINTF ("dos > 4.0 diskette %d\n", + boot -> dos4); + PRINTF ("serial number %d\n", + __le32_to_cpu (boot -> serial)); + PRINTF ("disk label %*.*s\n", + LABEL_LG, LABEL_LG, boot -> label); + PRINTF ("FAT type %8.8s\n", + boot -> fat_type); + PRINTF ("reserved by 2M %d\n", + boot -> res_2m); + PRINTF ("2M checksum (not used) %d\n", + boot -> CheckSum); + PRINTF ("2MF format version %d\n", + boot -> fmt_2mf); + PRINTF ("1 if write track after format %d\n", + boot -> wt); + PRINTF ("data transfer rate on track 0 %d\n", + boot -> rate_0); + PRINTF ("data transfer rate on track<>0 %d\n", + boot -> rate_any); + PRINTF ("offset to boot program %d\n", + __le16_to_cpu (boot -> BootP)); + PRINTF ("T1: information for track 0 %d\n", + __le16_to_cpu (boot -> Infp0)); + PRINTF ("T2: information for track<>0 %d\n", + __le16_to_cpu (boot -> InfpX)); + PRINTF ("T3: track sectors size table %d\n", + __le16_to_cpu (boot -> InfTm)); + PRINTF ("Format date 0x%04x\n", + __le16_to_cpu (boot -> DateF)); + PRINTF ("Format time 0x%04x\n", + __le16_to_cpu (boot -> TimeF)); + + + /* information is extracted from boot sector */ + heads = __le16_to_cpu (boot -> nheads); + sectors = __le16_to_cpu (boot -> nsect); + fs -> tot_sectors = __le32_to_cpu (boot -> bigsect); + if (__le16_to_cpu (boot -> psect) != 0) { + fs -> tot_sectors = __le16_to_cpu (boot -> psect); + } + + sect_per_track = heads * sectors; + tracks = (fs -> tot_sectors + sect_per_track - 1) / sect_per_track; + + BootP = __le16_to_cpu (boot -> BootP); + Infp0 = __le16_to_cpu (boot -> Infp0); + InfpX = __le16_to_cpu (boot -> InfpX); + InfTm = __le16_to_cpu (boot -> InfTm); + + if (boot -> dos4 == EXTENDED_BOOT && + strncmp( boot->banner,"2M", 2 ) == 0 && + BootP < SZ_STD_SECTOR && + Infp0 < SZ_STD_SECTOR && + InfpX < SZ_STD_SECTOR && + InfTm < SZ_STD_SECTOR && + BootP >= InfTm + 2 && + InfTm >= InfpX && + InfpX >= Infp0 && + Infp0 >= 76 ) { + + return (-1); + } + + if (heads != NB_HEADS || + tracks != NB_TRACKS || + sectors != NB_SECTORS || + __le16_to_cpu (boot -> secsiz) != SZ_STD_SECTOR || + fs -> tot_sectors == 0 || + (fs -> tot_sectors % sectors) != 0) { + return (-1); + } + + return (0); +} + + +#endif diff --git a/fs/fdos/dos.h b/fs/fdos/dos.h new file mode 100755 index 0000000..7b27b01 --- /dev/null +++ b/fs/fdos/dos.h @@ -0,0 +1,175 @@ +/* + * (C) Copyright 2002 + * Stäubli Faverges - <www.staubli.com> + * Pierre AUBERT p.aubert@staubli.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#ifndef _DOS_H_ +#define _DOS_H_ + +/* Definitions for Dos diskettes */ + +/* General definitions */ +#define SZ_STD_SECTOR 512 /* Standard sector size */ +#define MDIR_SIZE 32 /* Direntry size */ +#define FAT_BITS 12 /* Diskette use 12 bits fat */ + +#define MAX_PATH 128 /* Max size of the MSDOS PATH */ +#define MAX_DIR_SECS 64 /* Taille max d'un repertoire (en */ + /* secteurs) */ +/* Misc. definitions */ +#define DELMARK '\xe5' +#define EXTENDED_BOOT (0x29) +#define MEDIA_STD (0xf0) +#define JUMP_0_1 (0xe9) +#define JUMP_0_2 (0xeb) + +/* Boot size is 256 bytes, but we need to read almost a sector, then + assume bootsize is 512 */ +#define BOOTSIZE 512 + +/* Fat definitions for 12 bits fat */ +#define FAT12_MAX_NB 4086 +#define FAT12_LAST 0x0ff6 +#define FAT12_END 0x0fff + +/* file attributes */ +#define ATTR_READONLY 0x01 +#define ATTR_HIDDEN 0x02 +#define ATTR_SYSTEM 0x04 +#define ATTR_VOLUME 0x08 +#define ATTR_DIRECTORY 0x10 +#define ATTR_ARCHIVE 0x20 +#define ATTR_VSE 0x0f + +/* Name format */ +#define EXTCASE 0x10 +#define BASECASE 0x8 + +/* Definition of the boot sector */ +#define BANNER_LG 8 +#define LABEL_LG 11 + +typedef struct bootsector +{ + unsigned char jump [3]; /* 0 Jump to boot code */ + char banner [BANNER_LG]; /* 3 OEM name & version */ + unsigned short secsiz; /* 11 Bytes per sector hopefully 512 */ + unsigned char clsiz; /* 13 Cluster size in sectors */ + unsigned short nrsvsect; /* 14 Number of reserved (boot) sectors */ + unsigned char nfat; /* 16 Number of FAT tables hopefully 2 */ + unsigned short dirents; /* 17 Number of directory slots */ + unsigned short psect; /* 19 Total sectors on disk */ + unsigned char descr; /* 21 Media descriptor=first byte of FAT */ + unsigned short fatlen; /* 22 Sectors in FAT */ + unsigned short nsect; /* 24 Sectors/track */ + unsigned short nheads; /* 26 Heads */ + unsigned int nhs; /* 28 number of hidden sectors */ + unsigned int bigsect; /* 32 big total sectors */ + unsigned char physdrive; /* 36 physical drive ? */ + unsigned char reserved; /* 37 reserved */ + unsigned char dos4; /* 38 dos > 4.0 diskette */ + unsigned int serial; /* 39 serial number */ + char label [LABEL_LG]; /* 43 disk label */ + char fat_type [8]; /* 54 FAT type */ + unsigned char res_2m; /* 62 reserved by 2M */ + unsigned char CheckSum; /* 63 2M checksum (not used) */ + unsigned char fmt_2mf; /* 64 2MF format version */ + unsigned char wt; /* 65 1 if write track after format */ + unsigned char rate_0; /* 66 data transfer rate on track 0 */ + unsigned char rate_any; /* 67 data transfer rate on track<>0 */ + unsigned short BootP; /* 68 offset to boot program */ + unsigned short Infp0; /* 70 T1: information for track 0 */ + unsigned short InfpX; /* 72 T2: information for track<>0 */ + unsigned short InfTm; /* 74 T3: track sectors size table */ + unsigned short DateF; /* 76 Format date */ + unsigned short TimeF; /* 78 Format time */ + unsigned char junk [BOOTSIZE - 80]; /* 80 remaining data */ +} __attribute__ ((packed)) BootSector_t; + +/* Structure d'une entree de repertoire */ +typedef struct directory { + char name [8]; /* file name */ + char ext [3]; /* file extension */ + unsigned char attr; /* attribute byte */ + unsigned char Case; /* case of short filename */ + unsigned char reserved [9]; /* ?? */ + unsigned char time [2]; /* time stamp */ + unsigned char date [2]; /* date stamp */ + unsigned short start; /* starting cluster number */ + unsigned int size; /* size of the file */ +} __attribute__ ((packed)) Directory_t; + + +#define MAX_VFAT_SUBENTRIES 20 +#define VSE_NAMELEN 13 + +#define VSE1SIZE 5 +#define VSE2SIZE 6 +#define VSE3SIZE 2 + +#define VBUFSIZE ((MAX_VFAT_SUBENTRIES * VSE_NAMELEN) + 1) + +#define MAX_VNAMELEN (255) + +#define VSE_PRESENT 0x01 +#define VSE_LAST 0x40 +#define VSE_MASK 0x1f + +/* Flag used by vfat_lookup */ +#define DO_OPEN 1 +#define ACCEPT_PLAIN 0x20 +#define ACCEPT_DIR 0x10 +#define ACCEPT_LABEL 0x08 +#define SINGLE 2 +#define MATCH_ANY 0x40 + +struct vfat_subentry { + unsigned char id; /* VSE_LAST pour la fin, VSE_MASK */ + /* pour un VSE */ + char text1 [VSE1SIZE * 2]; /* Caracteres encodes sur 16 bits */ + unsigned char attribute; /* 0x0f pour les VFAT */ + unsigned char hash1; /* toujours 0 */ + unsigned char sum; /* Checksum du nom court */ + char text2 [VSE2SIZE * 2]; /* Caracteres encodes sur 16 bits */ + unsigned char sector_l; /* 0 pour les VFAT */ + unsigned char sector_u; /* 0 pour les VFAT */ + char text3 [VSE3SIZE * 2]; /* Caracteres encodes sur 16 bits */ +} __attribute__ ((packed)) ; + +struct vfat_state { + char name [VBUFSIZE]; + int status; /* is now a bit map of 32 bits */ + int subentries; + unsigned char sum; /* no need to remember the sum for each */ + /* entry, it is the same anyways */ +} __attribute__ ((packed)) ; + +/* Conversion macros */ +#define DOS_YEAR(dir) (((dir)->date[1] >> 1) + 1980) +#define DOS_MONTH(dir) (((((dir)->date[1]&0x1) << 3) + ((dir)->date[0] >> 5))) +#define DOS_DAY(dir) ((dir)->date[0] & 0x1f) +#define DOS_HOUR(dir) ((dir)->time[1] >> 3) +#define DOS_MINUTE(dir) (((((dir)->time[1]&0x7) << 3) + ((dir)->time[0] >> 5))) +#define DOS_SEC(dir) (((dir)->time[0] & 0x1f) * 2) + + +#endif diff --git a/fs/fdos/fat.c b/fs/fdos/fat.c new file mode 100755 index 0000000..2ef2371 --- /dev/null +++ b/fs/fdos/fat.c @@ -0,0 +1,142 @@ +/* + * (C) Copyright 2002 + * Stäubli Faverges - <www.staubli.com> + * Pierre AUBERT p.aubert@staubli.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <common.h> +#include <config.h> +#include <malloc.h> + +#if (CONFIG_COMMANDS & CFG_CMD_FDOS) + +#include "dos.h" +#include "fdos.h" + + +/*----------------------------------------------------------------------------- + * fat_decode -- + *----------------------------------------------------------------------------- + */ +unsigned int fat_decode (Fs_t *fs, unsigned int num) +{ + unsigned int start = num * 3 / 2; + unsigned char *address = fs -> fat_buf + start; + + if (num < 2 || start + 1 > (fs -> fat_len * SZ_STD_SECTOR)) + return 1; + + if (num & 1) + return ((address [1] & 0xff) << 4) | ((address [0] & 0xf0 ) >> 4); + else + return ((address [1] & 0xf) << 8) | (address [0] & 0xff ); +} +/*----------------------------------------------------------------------------- + * check_fat -- + *----------------------------------------------------------------------------- + */ +static int check_fat (Fs_t *fs) +{ + int i, f; + + /* Cluster verification */ + for (i = 3 ; i < fs -> num_clus; i++){ + f = fat_decode (fs, i); + if (f < FAT12_LAST && f > fs -> num_clus){ + /* Wrong cluster number detected */ + return (-1); + } + } + return (0); +} +/*----------------------------------------------------------------------------- + * read_one_fat -- + *----------------------------------------------------------------------------- + */ +static int read_one_fat (BootSector_t *boot, Fs_t *fs, int nfat) +{ + if (dev_read (fs -> fat_buf, + (fs -> fat_start + nfat * fs -> fat_len), + fs -> fat_len) < 0) { + return (-1); + } + + if (fs -> fat_buf [0] || fs -> fat_buf [1] || fs -> fat_buf [2]) { + if ((fs -> fat_buf [0] != boot -> descr && + (fs -> fat_buf [0] != 0xf9 || boot -> descr != MEDIA_STD)) || + fs -> fat_buf [0] < MEDIA_STD){ + /* Unknown Media */ + return (-1); + } + if (fs -> fat_buf [1] != 0xff || fs -> fat_buf [2] != 0xff){ + /* FAT doesn't start with good values */ + return (-1); + } + } + + if (fs -> num_clus >= FAT12_MAX_NB) { + /* Too much clusters */ + return (-1); + } + + return check_fat (fs); +} +/*----------------------------------------------------------------------------- + * read_fat -- + *----------------------------------------------------------------------------- + */ +int read_fat (BootSector_t *boot, Fs_t *fs) +{ + unsigned int buflen; + int i; + + /* Allocate Fat Buffer */ + buflen = fs -> fat_len * SZ_STD_SECTOR; + if (fs -> fat_buf) { + free (fs -> fat_buf); + } + + if ((fs -> fat_buf = malloc (buflen)) == NULL) { + return (-1); + } + + /* Try to read each Fat */ + for (i = 0; i< fs -> nb_fat; i++){ + if (read_one_fat (boot, fs, i) == 0) { + /* Fat is OK */ + fs -> num_fat = i; + break; + } + } + + if (i == fs -> nb_fat){ + return (-1); + } + + if (fs -> fat_len > (((fs -> num_clus + 2) * + (FAT_BITS / 4) -1 ) / 2 / + SZ_STD_SECTOR + 1)) { + return (-1); + } + return (0); +} + +#endif diff --git a/fs/fdos/fdos.c b/fs/fdos/fdos.c new file mode 100755 index 0000000..a29f43d --- /dev/null +++ b/fs/fdos/fdos.c @@ -0,0 +1,175 @@ +/* + * (C) Copyright 2002 + * Stäubli Faverges - <www.staubli.com> + * Pierre AUBERT p.aubert@staubli.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <common.h> +#include <config.h> + +#if (CONFIG_COMMANDS & CFG_CMD_FDOS) +#include <malloc.h> +#include "dos.h" +#include "fdos.h" + + +const char *month [] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + +Fs_t fs; +File_t file; + +/*----------------------------------------------------------------------------- + * dos_open -- + *----------------------------------------------------------------------------- + */ +int dos_open(char *name) +{ + int lg; + int entry; + char *fname; + + /* We need to suppress the " char around the name */ + if (name [0] == '"') { + name ++; + } + lg = strlen (name); + if (name [lg - 1] == '"') { + name [lg - 1] = '\0'; + } + + /* Open file system */ + if (fs_init (&fs) < 0) { + return -1; + } + + /* Init the file descriptor */ + file.name = name; + file.fs = &fs; + + /* find the subdirectory containing the file */ + if (open_subdir (&file) < 0) { + return (-1); + } + + fname = basename (name); + + /* if we try to open root directory */ + if (*fname == '\0') { + file.file = file.subdir; + return (0); + } + + /* find the file in the subdir */ + entry = 0; + if (vfat_lookup (&file.subdir, + file.fs, + &file.file.dir, + &entry, + 0, + fname, + ACCEPT_DIR | ACCEPT_PLAIN | SINGLE | DO_OPEN, + 0, + &file.file) != 0) { + /* File not found */ + printf ("File not found\n"); + return (-1); + } + + return 0; +} + +/*----------------------------------------------------------------------------- + * dos_read -- + *----------------------------------------------------------------------------- + */ +int dos_read (ulong addr) +{ + int read = 0, nb; + + /* Try to boot a directory ? */ + if (file.file.dir.attr & (ATTR_DIRECTORY | ATTR_VOLUME)) { + printf ("Unable to boot %s !!\n", file.name); + return (-1); + } + while (read < file.file.FileSize) { + PRINTF ("read_file (%ld)\n", (file.file.FileSize - read)); + nb = read_file (&fs, + &file.file, + (char *)addr + read, + read, + (file.file.FileSize - read)); + PRINTF ("read_file -> %d\n", nb); + if (nb < 0) { + printf ("read error\n"); + return (-1); + } + read += nb; + } + return (read); +} +/*----------------------------------------------------------------------------- + * dos_dir -- + *----------------------------------------------------------------------------- + */ +int dos_dir (void) +{ + int entry; + Directory_t dir; + char *name; + + + if ((file.file.dir.attr & ATTR_DIRECTORY) == 0) { + printf ("%s: not a directory !!\n", file.name); + return (1); + } + entry = 0; + if ((name = malloc (MAX_VNAMELEN + 1)) == NULL) { + PRINTF ("Allcation error\n"); + return (1); + } + + while (vfat_lookup (&file.file, + file.fs, + &dir, + &entry, + 0, + NULL, + ACCEPT_DIR | ACCEPT_PLAIN | MATCH_ANY, + name, + NULL) == 0) { + /* Display file info */ + printf ("%3.3s %9d %s %02d %04d %02d:%02d:%02d %s\n", + (dir.attr & ATTR_DIRECTORY) ? "dir" : " ", + __le32_to_cpu (dir.size), + month [DOS_MONTH (&dir) - 1], + DOS_DAY (&dir), + DOS_YEAR (&dir), + DOS_HOUR (&dir), + DOS_MINUTE (&dir), + DOS_SEC (&dir), + name); + + } + free (name); + return (0); +} + +#endif diff --git a/fs/fdos/fdos.h b/fs/fdos/fdos.h new file mode 100755 index 0000000..e28c22f --- /dev/null +++ b/fs/fdos/fdos.h @@ -0,0 +1,116 @@ +/* + * (C) Copyright 2002 + * Stäubli Faverges - <www.staubli.com> + * Pierre AUBERT p.aubert@staubli.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#ifndef _FDOS_H_ +#define _FDOS_H_ + + +#undef FDOS_DEBUG + +#ifdef FDOS_DEBUG +#define PRINTF(fmt,args...) printf (fmt ,##args) +#else +#define PRINTF(fmt,args...) +#endif + +/* Data structure describing media */ +typedef struct fs +{ + unsigned long tot_sectors; + + int cluster_size; + int num_clus; + + int fat_start; + int fat_len; + int nb_fat; + int num_fat; + + int dir_start; + int dir_len; + + unsigned char *fat_buf; + +} Fs_t; + +/* Data structure describing one file system slot */ +typedef struct slot { + int (*map) (struct fs *fs, + struct slot *file, + int where, + int *len); + unsigned long FileSize; + + unsigned short int FirstAbsCluNr; + unsigned short int PreviousAbsCluNr; + unsigned short int PreviousRelCluNr; + + Directory_t dir; +} Slot_t; + +typedef struct file { + char *name; + int Case; + Fs_t *fs; + Slot_t subdir; + Slot_t file; +} File_t; + + +/* dev.c */ +int dev_read (void *buffer, int where, int len); +int dev_open (void); +int check_dev (BootSector_t *boot, Fs_t *fs); + +/* fat.c */ +unsigned int fat_decode (Fs_t *fs, unsigned int num); +int read_fat (BootSector_t *boot, Fs_t *fs); + +/* vfat.c */ +int vfat_lookup (Slot_t *dir, + Fs_t *fs, + Directory_t *dirent, + int *entry, + int *vfat_start, + char *filename, + int flags, + char *outname, + Slot_t *file); + +/* subdir.c */ +char *basename (char *name); +int open_subdir (File_t *desc); +int open_file (Slot_t *file, Directory_t *dir); +int read_file (Fs_t *fs, + Slot_t *file, + char *buf, + int where, + int len); +void init_subdir (void); + +/* fs.c */ +int fs_init (Fs_t *fs); + + +#endif diff --git a/fs/fdos/fs.c b/fs/fdos/fs.c new file mode 100755 index 0000000..3b9d09e --- /dev/null +++ b/fs/fdos/fs.c @@ -0,0 +1,118 @@ +/* + * (C) Copyright 2002 + * Stäubli Faverges - <www.staubli.com> + * Pierre AUBERT p.aubert@staubli.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <common.h> +#include <config.h> +#include <malloc.h> + +#if (CONFIG_COMMANDS & CFG_CMD_FDOS) + +#include "dos.h" +#include "fdos.h" + + +/*----------------------------------------------------------------------------- + * fill_fs -- Read info on file system + *----------------------------------------------------------------------------- + */ +static int fill_fs (BootSector_t *boot, Fs_t *fs) +{ + + fs -> fat_start = __le16_to_cpu (boot -> nrsvsect); + fs -> fat_len = __le16_to_cpu (boot -> fatlen); + fs -> nb_fat = boot -> nfat; + + fs -> dir_start = fs -> fat_start + fs -> nb_fat * fs -> fat_len; + fs -> dir_len = __le16_to_cpu (boot -> dirents) * MDIR_SIZE / SZ_STD_SECTOR; + fs -> cluster_size = boot -> clsiz; + fs -> num_clus = (fs -> tot_sectors - fs -> dir_start - fs -> dir_len) / fs -> cluster_size; + + return (0); +} + +/*----------------------------------------------------------------------------- + * fs_init -- + *----------------------------------------------------------------------------- + */ +int fs_init (Fs_t *fs) +{ + BootSector_t *boot; + + /* Initialize physical device */ + if (dev_open () < 0) { + PRINTF ("Unable to initialize the fdc\n"); + return (-1); + } + init_subdir (); + + /* Allocate space for read the boot sector */ + if ((boot = (BootSector_t *)malloc (sizeof (BootSector_t))) == NULL) { + PRINTF ("Unable to allocate space for boot sector\n"); + return (-1); + } + + /* read boot sector */ + if (dev_read (boot, 0, 1)){ + PRINTF ("Error during boot sector read\n"); + free (boot); + return (-1); + } + + /* we verify it'a a DOS diskette */ + if (boot -> jump [0] != JUMP_0_1 && boot -> jump [0] != JUMP_0_2) { + PRINTF ("Not a DOS diskette\n"); + free (boot); + return (-1); + } + + if (boot -> descr < MEDIA_STD) { + /* We handle only recent medias (type F0) */ + PRINTF ("unrecognized diskette type\n"); + free (boot); + return (-1); + } + + if (check_dev (boot, fs) < 0) { + PRINTF ("Bad diskette\n"); + free (boot); + return (-1); + } + + if (fill_fs (boot, fs) < 0) { + free (boot); + + return (-1); + } + + /* Read FAT */ + if (read_fat (boot, fs) < 0) { + free (boot); + return (-1); + } + + free (boot); + return (0); +} + +#endif diff --git a/fs/fdos/subdir.c b/fs/fdos/subdir.c new file mode 100755 index 0000000..97b2504 --- /dev/null +++ b/fs/fdos/subdir.c @@ -0,0 +1,348 @@ +/* + * (C) Copyright 2002 + * Stäubli Faverges - <www.staubli.com> + * Pierre AUBERT p.aubert@staubli.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <common.h> +#include <config.h> +#include <malloc.h> + +#if (CONFIG_COMMANDS & CFG_CMD_FDOS) + +#include "dos.h" +#include "fdos.h" + +static int cache_sect; +static unsigned char cache [SZ_STD_SECTOR]; + + +#define min(x,y) ((x)<(y)?(x):(y)) + +static int descend (Slot_t *parent, + Fs_t *fs, + char *path); + +/*----------------------------------------------------------------------------- + * init_subdir -- + *----------------------------------------------------------------------------- + */ +void init_subdir (void) +{ + cache_sect = -1; +} +/*----------------------------------------------------------------------------- + * basename -- + *----------------------------------------------------------------------------- + */ +char *basename (char *name) +{ + register char *cptr; + + if (!name || !*name) { + return (""); + } + + for (cptr= name; *cptr++; ); + while (--cptr >= name) { + if (*cptr == '/') { + return (cptr + 1); + } + } + return(name); +} +/*----------------------------------------------------------------------------- + * root_map -- + *----------------------------------------------------------------------------- + */ +static int root_map (Fs_t *fs, Slot_t *file, int where, int *len) +{ + *len = min (*len, fs -> dir_len * SZ_STD_SECTOR - where); + if (*len < 0 ) { + *len = 0; + return (-1); + } + return fs -> dir_start * SZ_STD_SECTOR + where; +} +/*----------------------------------------------------------------------------- + * normal_map -- + *----------------------------------------------------------------------------- + */ +static int normal_map (Fs_t *fs, Slot_t *file, int where, int *len) +{ + int offset; + int NrClu; + unsigned short RelCluNr; + unsigned short CurCluNr; + unsigned short NewCluNr; + unsigned short AbsCluNr; + int clus_size; + + clus_size = fs -> cluster_size * SZ_STD_SECTOR; + offset = where % clus_size; + + *len = min (*len, file -> FileSize - where); + + if (*len < 0 ) { + *len = 0; + return (0); + } + + if (file -> FirstAbsCluNr < 2){ + *len = 0; + return (0); + } + + RelCluNr = where / clus_size; + + if (RelCluNr >= file -> PreviousRelCluNr){ + CurCluNr = file -> PreviousRelCluNr; + AbsCluNr = file -> PreviousAbsCluNr; + } else { + CurCluNr = 0; + AbsCluNr = file -> FirstAbsCluNr; + } + + + NrClu = (offset + *len - 1) / clus_size; + while (CurCluNr <= RelCluNr + NrClu) { + if (CurCluNr == RelCluNr){ + /* we have reached the beginning of our zone. Save + * coordinates */ + file -> PreviousRelCluNr = RelCluNr; + file -> PreviousAbsCluNr = AbsCluNr; + } + NewCluNr = fat_decode (fs, AbsCluNr); + if (NewCluNr == 1 || NewCluNr == 0) { + PRINTF("Fat problem while decoding %d %x\n", + AbsCluNr, NewCluNr); + return (-1); + } + if (CurCluNr == RelCluNr + NrClu) { + break; + } + + if (CurCluNr < RelCluNr && NewCluNr == FAT12_END) { + *len = 0; + return 0; + } + + if (CurCluNr >= RelCluNr && NewCluNr != AbsCluNr + 1) + break; + CurCluNr++; + AbsCluNr = NewCluNr; + } + + *len = min (*len, (1 + CurCluNr - RelCluNr) * clus_size - offset); + + return (((file -> PreviousAbsCluNr - 2) * fs -> cluster_size + + fs -> dir_start + fs -> dir_len) * + SZ_STD_SECTOR + offset); +} +/*----------------------------------------------------------------------------- + * open_subdir -- open the subdir containing the file + *----------------------------------------------------------------------------- + */ +int open_subdir (File_t *desc) +{ + char *pathname; + char *tmp, *s, *path; + char terminator; + + if ((pathname = (char *)malloc (MAX_PATH)) == NULL) { + return (-1); + } + + strcpy (pathname, desc -> name); + + /* Suppress file name */ + tmp = basename (pathname); + *tmp = '\0'; + + /* root directory init */ + desc -> subdir.FirstAbsCluNr = 0; + desc -> subdir.FileSize = -1; + desc -> subdir.map = root_map; + desc -> subdir.dir.attr = ATTR_DIRECTORY; + + tmp = pathname; + for (s = tmp; ; ++s) { + if (*s == '/' || *s == '\0') { + path = tmp; + terminator = *s; + *s = '\0'; + if (s != tmp && strcmp (path,".")) { + if (descend (&desc -> subdir, desc -> fs, path) < 0) { + free (pathname); + return (-1); + } + } + if (terminator == 0) { + break; + } + tmp = s + 1; + } + } + free (pathname); + return (0); +} +/*----------------------------------------------------------------------------- + * descend -- + *----------------------------------------------------------------------------- + */ +static int descend (Slot_t *parent, + Fs_t *fs, + char *path) +{ + int entry; + Slot_t SubDir; + + if(path[0] == '\0' || strcmp (path, ".") == 0) { + return (0); + } + + + entry = 0; + if (vfat_lookup (parent, + fs, + &(SubDir.dir), + &entry, + 0, + path, + ACCEPT_DIR | SINGLE | DO_OPEN, + 0, + &SubDir) == 0) { + *parent = SubDir; + return (0); + } + + if (strcmp(path, "..") == 0) { + parent -> FileSize = -1; + parent -> FirstAbsCluNr = 0; + parent -> map = root_map; + return (0); + } + return (-1); +} +/*----------------------------------------------------------------------------- + * open_file -- + *----------------------------------------------------------------------------- + */ +int open_file (Slot_t *file, Directory_t *dir) +{ + int first; + unsigned long size; + + first = __le16_to_cpu (dir -> start); + + if(first == 0 && + (dir -> attr & ATTR_DIRECTORY) != 0) { + file -> FirstAbsCluNr = 0; + file -> FileSize = -1; + file -> map = root_map; + return (0); + } + + if ((dir -> attr & ATTR_DIRECTORY) != 0) { + size = (1UL << 31) - 1; + } + else { + size = __le32_to_cpu (dir -> size); + } + + file -> map = normal_map; + file -> FirstAbsCluNr = first; + file -> PreviousRelCluNr = 0xffff; + file -> FileSize = size; + return (0); +} +/*----------------------------------------------------------------------------- + * read_file -- + *----------------------------------------------------------------------------- + */ +int read_file (Fs_t *fs, + Slot_t *file, + char *buf, + int where, + int len) +{ + int pos; + int read, nb, sect, offset; + + pos = file -> map (fs, file, where, &len); + if (pos < 0) { + return -1; + } + if (len == 0) { + return (0); + } + + /* Compute sector number */ + sect = pos / SZ_STD_SECTOR; + offset = pos % SZ_STD_SECTOR; + read = 0; + + if (offset) { + /* Read doesn't start at the sector beginning. We need to use our */ + /* cache */ + if (sect != cache_sect) { + if (dev_read (cache, sect, 1) < 0) { + return (-1); + } + cache_sect = sect; + } + nb = min (len, SZ_STD_SECTOR - offset); + + memcpy (buf, cache + offset, nb); + read += nb; + len -= nb; + sect += 1; + } + + if (len > SZ_STD_SECTOR) { + nb = (len - 1) / SZ_STD_SECTOR; + if (dev_read (buf + read, sect, nb) < 0) { + return ((read) ? read : -1); + } + /* update sector position */ + sect += nb; + + /* Update byte position */ + nb *= SZ_STD_SECTOR; + read += nb; + len -= nb; + } + + if (len) { + if (sect != cache_sect) { + if (dev_read (cache, sect, 1) < 0) { + return ((read) ? read : -1); + cache_sect = -1; + } + cache_sect = sect; + } + + memcpy (buf + read, cache, len); + read += len; + } + return (read); +} +#endif diff --git a/fs/fdos/vfat.c b/fs/fdos/vfat.c new file mode 100755 index 0000000..46a464b --- /dev/null +++ b/fs/fdos/vfat.c @@ -0,0 +1,357 @@ +/* + * (C) Copyright 2002 + * Stäubli Faverges - <www.staubli.com> + * Pierre AUBERT p.aubert@staubli.com + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <common.h> +#include <config.h> + +#if (CONFIG_COMMANDS & CFG_CMD_FDOS) +#include <linux/ctype.h> + +#include "dos.h" +#include "fdos.h" + +static int dir_read (Fs_t *fs, + Slot_t *dir, + Directory_t *dirent, + int num, + struct vfat_state *v); + +static int unicode_read (char *in, char *out, int num); +static int match (const char *s, const char *p); +static unsigned char sum_shortname (char *name); +static int check_vfat (struct vfat_state *v, Directory_t *dir); +static char *conv_name (char *name, char *ext, char Case, char *ans); + + +/*----------------------------------------------------------------------------- + * clear_vfat -- + *----------------------------------------------------------------------------- + */ +static void clear_vfat (struct vfat_state *v) +{ + v -> subentries = 0; + v -> status = 0; +} + +/*----------------------------------------------------------------------------- + * vfat_lookup -- + *----------------------------------------------------------------------------- + */ +int vfat_lookup (Slot_t *dir, + Fs_t *fs, + Directory_t *dirent, + int *entry, + int *vfat_start, + char *filename, + int flags, + char *outname, + Slot_t *file) +{ + int found; + struct vfat_state vfat; + char newfile [VSE_NAMELEN]; + int vfat_present = 0; + + if (*entry == -1) { + return -1; + } + + found = 0; + clear_vfat (&vfat); + while (1) { + if (dir_read (fs, dir, dirent, *entry, &vfat) < 0) { + if (vfat_start) { + *vfat_start = *entry; + } + break; + } + (*entry)++; + + /* Empty slot */ + if (dirent -> name[0] == '\0'){ + if (vfat_start == 0) { + break; + } + continue; + } + + if (dirent -> attr == ATTR_VSE) { + /* VSE entry, continue */ + continue; + } + if ( (dirent -> name [0] == DELMARK) || + ((dirent -> attr & ATTR_DIRECTORY) != 0 && + (flags & ACCEPT_DIR) == 0) || + ((dirent -> attr & ATTR_VOLUME) != 0 && + (flags & ACCEPT_LABEL) == 0) || + (((dirent -> attr & (ATTR_DIRECTORY | ATTR_VOLUME)) == 0) && + (flags & ACCEPT_PLAIN) == 0)) { + clear_vfat (&vfat); + continue; + } + + vfat_present = check_vfat (&vfat, dirent); + if (vfat_start) { + *vfat_start = *entry - 1; + if (vfat_present) { + *vfat_start -= vfat.subentries; + } + } + + if (dirent -> attr & ATTR_VOLUME) { + strncpy (newfile, dirent -> name, 8); + newfile [8] = '\0'; + strncat (newfile, dirent -> ext, 3); + newfile [11] = '\0'; + } + else { + conv_name (dirent -> name, dirent -> ext, dirent -> Case, newfile); + } + + if (flags & MATCH_ANY) { + found = 1; + break; + } + + if ((vfat_present && match (vfat.name, filename)) || + (match (newfile, filename))) { + found = 1; + break; + } + clear_vfat (&vfat); + } + + if (found) { + if ((flags & DO_OPEN) && file) { + if (open_file (file, dirent) < 0) { + return (-1); + } + } + if (outname) { + if (vfat_present) { + strcpy (outname, vfat.name); + } + else { + strcpy (outname, newfile); + } + } + return (0); /* File found */ + } else { + *entry = -1; + return -1; /* File not found */ + } +} + +/*----------------------------------------------------------------------------- + * dir_read -- Read one directory entry + *----------------------------------------------------------------------------- + */ +static int dir_read (Fs_t *fs, + Slot_t *dir, + Directory_t *dirent, + int num, + struct vfat_state *v) +{ + + /* read the directory entry */ + if (read_file (fs, + dir, + (char *)dirent, + num * MDIR_SIZE, + MDIR_SIZE) != MDIR_SIZE) { + return (-1); + } + + if (v && (dirent -> attr == ATTR_VSE)) { + struct vfat_subentry *vse; + unsigned char id, last_flag; + char *c; + + vse = (struct vfat_subentry *) dirent; + id = vse -> id & VSE_MASK; + last_flag = (vse -> id & VSE_LAST); + if (id > MAX_VFAT_SUBENTRIES) { + /* Invalid VSE entry */ + return (-1); + } + + + /* Decode VSE */ + if(v -> sum != vse -> sum) { + clear_vfat (v); + v -> sum = vse -> sum; + } + + + v -> status |= 1 << (id - 1); + if (last_flag) { + v -> subentries = id; + } + + c = &(v -> name [VSE_NAMELEN * (id - 1)]); + c += unicode_read (vse->text1, c, VSE1SIZE); + c += unicode_read (vse->text2, c, VSE2SIZE); + c += unicode_read (vse->text3, c, VSE3SIZE); + + if (last_flag) { + *c = '\0'; /* Null terminate long name */ + } + + } + return (0); +} + +/*----------------------------------------------------------------------------- + * unicode_read -- + *----------------------------------------------------------------------------- + */ +static int unicode_read (char *in, char *out, int num) +{ + int j; + + for (j = 0; j < num; ++j) { + if (in [1]) + *out = '_'; + else + *out = in [0]; + out ++; + in += 2; + } + return num; +} + +/*----------------------------------------------------------------------------- + * match -- + *----------------------------------------------------------------------------- + */ +static int match (const char *s, const char *p) +{ + + for (; *p != '\0'; ) { + if (toupper (*s) != toupper (*p)) { + return (0); + } + p++; + s++; + } + + if (*s != '\0') { + return (0); + } + else { + return (1); + } +} +/*----------------------------------------------------------------------------- + * sum_shortname -- + *----------------------------------------------------------------------------- + */ +static unsigned char sum_shortname (char *name) +{ + unsigned char sum; + int j; + + for (j = sum = 0; j < 11; ++j) { + sum = ((sum & 1) ? 0x80 : 0) + (sum >> 1) + + (name [j] ? name [j] : ' '); + } + return (sum); +} +/*----------------------------------------------------------------------------- + * check_vfat -- + * Return 1 if long name is valid, 0 else + *----------------------------------------------------------------------------- + */ +static int check_vfat (struct vfat_state *v, Directory_t *dir) +{ + char name[12]; + + if (v -> subentries == 0) { + return 0; + } + + strncpy (name, dir -> name, 8); + strncpy (name + 8, dir -> ext, 3); + name [11] = '\0'; + + if (v -> sum != sum_shortname (name)) { + return 0; + } + + if( (v -> status & ((1 << v -> subentries) - 1)) != + (1 << v -> subentries) - 1) { + return 0; + } + v->name [VSE_NAMELEN * v -> subentries] = 0; + + return 1; +} +/*----------------------------------------------------------------------------- + * conv_name -- + *----------------------------------------------------------------------------- + */ +static char *conv_name (char *name, char *ext, char Case, char *ans) +{ + char tname [9], text [4]; + int i; + + i = 0; + while (i < 8 && name [i] != ' ' && name [i] != '\0') { + tname [i] = name [i]; + i++; + } + tname [i] = '\0'; + + if (Case & BASECASE) { + for (i = 0; i < 8 && tname [i]; i++) { + tname [i] = tolower (tname [i]); + } + } + + i = 0; + while (i < 3 && ext [i] != ' ' && ext [i] != '\0') { + text [i] = ext [i]; + i++; + } + text [i] = '\0'; + + if (Case & EXTCASE){ + for (i = 0; i < 3 && text [i]; i++) { + text [i] = tolower (text [i]); + } + } + + if (*text) { + strcpy (ans, tname); + strcat (ans, "."); + strcat (ans, text); + } + else { + strcpy(ans, tname); + } + return (ans); +} + + +#endif diff --git a/fs/jffs2/Makefile b/fs/jffs2/Makefile new file mode 100755 index 0000000..f28b17a --- /dev/null +++ b/fs/jffs2/Makefile @@ -0,0 +1,48 @@ +# +# (C) Copyright 2000, 2001 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +include $(TOPDIR)/config.mk + +LIB = libjffs2.a + +AOBJS = +COBJS = jffs2_1pass.o compr_rtime.o compr_rubin.o compr_zlib.o mini_inflate.o +COBJS += compr_lzo.o compr_lzari.o +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += + +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend + +######################################################################### diff --git a/fs/jffs2/compr_lzari.c b/fs/jffs2/compr_lzari.c new file mode 100755 index 0000000..828b6e5 --- /dev/null +++ b/fs/jffs2/compr_lzari.c @@ -0,0 +1,262 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2004 Patrik Kluba, + * University of Szeged, Hungary + * + * For licensing information, see the file 'LICENCE' in the + * jffs2 directory. + * + * $Id: compr_lzari.c,v 1.3 2004/06/23 16:34:39 havasi Exp $ + * + */ + +/* + Lempel-Ziv-Arithmetic coding compression module for jffs2 + Based on the LZARI source included in LDS (lossless datacompression sources) +*/ + +/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */ + +/* +Original copyright follows: + +************************************************************** + LZARI.C -- A Data Compression Program + (tab = 4 spaces) +************************************************************** + 4/7/1989 Haruhiko Okumura + Use, distribute, and modify this program freely. + Please send me your improved versions. + PC-VAN SCIENCE + NIFTY-Serve PAF01022 + CompuServe 74050,1022 +************************************************************** + +LZARI.C (c)1989 by Haruyasu Yoshizaki, Haruhiko Okumura, and Kenji Rikitake. +All rights reserved. Permission granted for non-commercial use. + +*/ + +/* + + 2004-02-18 pajko <pajko(AT)halom(DOT)u-szeged(DOT)hu> + Removed unused variables and fixed no return value + + 2004-02-16 pajko <pajko(AT)halom(DOT)u-szeged(DOT)hu> + Initial release + +*/ + + +#include <config.h> +#if ((CONFIG_COMMANDS & CFG_CMD_JFFS2) && defined(CONFIG_JFFS2_LZO_LZARI)) + +#include <linux/stddef.h> +#include <jffs2/jffs2.h> + + +#define N 4096 /* size of ring buffer */ +#define F 60 /* upper limit for match_length */ +#define THRESHOLD 2 /* encode string into position and length + if match_length is greater than this */ +#define NIL N /* index for root of binary search trees */ + +static unsigned char + text_buf[N + F - 1]; /* ring buffer of size N, + with extra F-1 bytes to facilitate string comparison */ + +/********** Arithmetic Compression **********/ + +/* If you are not familiar with arithmetic compression, you should read + I. E. Witten, R. M. Neal, and J. G. Cleary, + Communications of the ACM, Vol. 30, pp. 520-540 (1987), + from which much have been borrowed. */ + +#define M 15 + +/* Q1 (= 2 to the M) must be sufficiently large, but not so + large as the unsigned long 4 * Q1 * (Q1 - 1) overflows. */ + +#define Q1 (1UL << M) +#define Q2 (2 * Q1) +#define Q3 (3 * Q1) +#define Q4 (4 * Q1) +#define MAX_CUM (Q1 - 1) + +#define N_CHAR (256 - THRESHOLD + F) + /* character code = 0, 1, ..., N_CHAR - 1 */ + +static unsigned long char_to_sym[N_CHAR], sym_to_char[N_CHAR + 1]; +static unsigned long + sym_freq[N_CHAR + 1], /* frequency for symbols */ + sym_cum[N_CHAR + 1], /* cumulative freq for symbols */ + position_cum[N + 1]; /* cumulative freq for positions */ + +static void StartModel(void) /* Initialize model */ +{ + unsigned long ch, sym, i; + + sym_cum[N_CHAR] = 0; + for (sym = N_CHAR; sym >= 1; sym--) { + ch = sym - 1; + char_to_sym[ch] = sym; sym_to_char[sym] = ch; + sym_freq[sym] = 1; + sym_cum[sym - 1] = sym_cum[sym] + sym_freq[sym]; + } + sym_freq[0] = 0; /* sentinel (!= sym_freq[1]) */ + position_cum[N] = 0; + for (i = N; i >= 1; i--) + position_cum[i - 1] = position_cum[i] + 10000 / (i + 200); + /* empirical distribution function (quite tentative) */ + /* Please devise a better mechanism! */ +} + +static void UpdateModel(unsigned long sym) +{ + unsigned long c, ch_i, ch_sym; + unsigned long i; + if (sym_cum[0] >= MAX_CUM) { + c = 0; + for (i = N_CHAR; i > 0; i--) { + sym_cum[i] = c; + c += (sym_freq[i] = (sym_freq[i] + 1) >> 1); + } + sym_cum[0] = c; + } + for (i = sym; sym_freq[i] == sym_freq[i - 1]; i--) ; + if (i < sym) { + ch_i = sym_to_char[i]; ch_sym = sym_to_char[sym]; + sym_to_char[i] = ch_sym; sym_to_char[sym] = ch_i; + char_to_sym[ch_i] = sym; char_to_sym[ch_sym] = i; + } + sym_freq[i]++; + while (--i > 0) sym_cum[i]++; + sym_cum[0]++; +} + +static unsigned long BinarySearchSym(unsigned long x) + /* 1 if x >= sym_cum[1], + N_CHAR if sym_cum[N_CHAR] > x, + i such that sym_cum[i - 1] > x >= sym_cum[i] otherwise */ +{ + unsigned long i, j, k; + + i = 1; j = N_CHAR; + while (i < j) { + k = (i + j) / 2; + if (sym_cum[k] > x) i = k + 1; else j = k; + } + return i; +} + +unsigned long BinarySearchPos(unsigned long x) + /* 0 if x >= position_cum[1], + N - 1 if position_cum[N] > x, + i such that position_cum[i] > x >= position_cum[i + 1] otherwise */ +{ + unsigned long i, j, k; + + i = 1; j = N; + while (i < j) { + k = (i + j) / 2; + if (position_cum[k] > x) i = k + 1; else j = k; + } + return i - 1; +} + +static int Decode(unsigned char *srcbuf, unsigned char *dstbuf, unsigned long srclen, + unsigned long dstlen) /* Just the reverse of Encode(). */ +{ + unsigned long i, r, j, k, c, range, sym; + unsigned char *ip, *op; + unsigned char *srcend = srcbuf + srclen; + unsigned char *dstend = dstbuf + dstlen; + unsigned char buffer = 0; + unsigned char mask = 0; + unsigned long low = 0; + unsigned long high = Q4; + unsigned long value = 0; + + ip = srcbuf; + op = dstbuf; + for (i = 0; i < M + 2; i++) { + value *= 2; + if ((mask >>= 1) == 0) { + buffer = (ip >= srcend) ? 0 : *(ip++); + mask = 128; + } + value += ((buffer & mask) != 0); + } + + StartModel(); + for (i = 0; i < N - F; i++) text_buf[i] = ' '; + r = N - F; + + while (op < dstend) { + range = high - low; + sym = BinarySearchSym((unsigned long) + (((value - low + 1) * sym_cum[0] - 1) / range)); + high = low + (range * sym_cum[sym - 1]) / sym_cum[0]; + low += (range * sym_cum[sym ]) / sym_cum[0]; + for ( ; ; ) { + if (low >= Q2) { + value -= Q2; low -= Q2; high -= Q2; + } else if (low >= Q1 && high <= Q3) { + value -= Q1; low -= Q1; high -= Q1; + } else if (high > Q2) break; + low += low; high += high; + value *= 2; + if ((mask >>= 1) == 0) { + buffer = (ip >= srcend) ? 0 : *(ip++); + mask = 128; + } + value += ((buffer & mask) != 0); + } + c = sym_to_char[sym]; + UpdateModel(sym); + if (c < 256) { + if (op >= dstend) return -1; + *(op++) = c; + text_buf[r++] = c; + r &= (N - 1); + } else { + j = c - 255 + THRESHOLD; + range = high - low; + i = BinarySearchPos((unsigned long) + (((value - low + 1) * position_cum[0] - 1) / range)); + high = low + (range * position_cum[i ]) / position_cum[0]; + low += (range * position_cum[i + 1]) / position_cum[0]; + for ( ; ; ) { + if (low >= Q2) { + value -= Q2; low -= Q2; high -= Q2; + } else if (low >= Q1 && high <= Q3) { + value -= Q1; low -= Q1; high -= Q1; + } else if (high > Q2) break; + low += low; high += high; + value *= 2; + if ((mask >>= 1) == 0) { + buffer = (ip >= srcend) ? 0 : *(ip++); + mask = 128; + } + value += ((buffer & mask) != 0); + } + i = (r - i - 1) & (N - 1); + for (k = 0; k < j; k++) { + c = text_buf[(i + k) & (N - 1)]; + if (op >= dstend) return -1; + *(op++) = c; + text_buf[r++] = c; + r &= (N - 1); + } + } + } + return 0; +} + +int lzari_decompress(unsigned char *data_in, unsigned char *cpage_out, + u32 srclen, u32 destlen) +{ + return Decode(data_in, cpage_out, srclen, destlen); +} +#endif /* ((CONFIG_COMMANDS & CFG_CMD_JFFS2) && defined(CONFIG_JFFS2_LZO_LZARI)) */ diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c new file mode 100755 index 0000000..b6c590a --- /dev/null +++ b/fs/jffs2/compr_lzo.c @@ -0,0 +1,405 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2004 Patrik Kluba, + * University of Szeged, Hungary + * + * For licensing information, see the file 'LICENCE' in the + * jffs2 directory. + * + * $Id: compr_lzo.c,v 1.3 2004/06/23 16:34:39 havasi Exp $ + * + */ + +/* + LZO1X-1 (and -999) compression module for jffs2 + based on the original LZO sources +*/ + +/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */ + +/* + Original copyright notice follows: + + lzo1x_9x.c -- implementation of the LZO1X-999 compression algorithm + lzo_ptr.h -- low-level pointer constructs + lzo_swd.ch -- sliding window dictionary + lzoconf.h -- configuration for the LZO real-time data compression library + lzo_mchw.ch -- matching functions using a window + minilzo.c -- mini subset of the LZO real-time data compression library + config1x.h -- configuration for the LZO1X algorithm + lzo1x.h -- public interface of the LZO1X compression algorithm + + These files are part of the LZO real-time data compression library. + + Copyright (C) 1996-2002 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + <markus@oberhumer.com> +*/ + +/* + + 2004-02-16 pajko <pajko(AT)halom(DOT)u-szeged(DOT)hu> + Initial release + -removed all 16 bit code + -all sensitive data will be on 4 byte boundary + -removed check parts for library use + -removed all but LZO1X-* compression + +*/ + + +#include <config.h> +#if ((CONFIG_COMMANDS & CFG_CMD_JFFS2) && defined(CONFIG_JFFS2_LZO_LZARI)) + +#include <linux/stddef.h> +#include <jffs2/jffs2.h> +#include <jffs2/compr_rubin.h> + +/* Integral types that have *exactly* the same number of bits as a lzo_voidp */ +typedef unsigned long lzo_ptr_t; +typedef long lzo_sptr_t; + +/* data type definitions */ +#define U32 unsigned long +#define S32 signed long +#define I32 long +#define U16 unsigned short +#define S16 signed short +#define I16 short +#define U8 unsigned char +#define S8 signed char +#define I8 char + +#define M1_MAX_OFFSET 0x0400 +#define M2_MAX_OFFSET 0x0800 +#define M3_MAX_OFFSET 0x4000 +#define M4_MAX_OFFSET 0xbfff + +#define __COPY4(dst,src) * (lzo_uint32p)(dst) = * (const lzo_uint32p)(src) +#define COPY4(dst,src) __COPY4((lzo_ptr_t)(dst),(lzo_ptr_t)(src)) + +#define TEST_IP (ip < ip_end) +#define TEST_OP (op <= op_end) + +#define NEED_IP(x) \ + if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun +#define NEED_OP(x) \ + if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun +#define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun + +typedef U32 lzo_uint32; +typedef I32 lzo_int32; +typedef U32 lzo_uint; +typedef I32 lzo_int; +typedef int lzo_bool; + +#define lzo_byte U8 +#define lzo_bytep U8 * +#define lzo_charp char * +#define lzo_voidp void * +#define lzo_shortp short * +#define lzo_ushortp unsigned short * +#define lzo_uint32p lzo_uint32 * +#define lzo_int32p lzo_int32 * +#define lzo_uintp lzo_uint * +#define lzo_intp lzo_int * +#define lzo_voidpp lzo_voidp * +#define lzo_bytepp lzo_bytep * +#define lzo_sizeof_dict_t sizeof(lzo_bytep) + +#define LZO_E_OK 0 +#define LZO_E_ERROR (-1) +#define LZO_E_OUT_OF_MEMORY (-2) /* not used right now */ +#define LZO_E_NOT_COMPRESSIBLE (-3) /* not used right now */ +#define LZO_E_INPUT_OVERRUN (-4) +#define LZO_E_OUTPUT_OVERRUN (-5) +#define LZO_E_LOOKBEHIND_OVERRUN (-6) +#define LZO_E_EOF_NOT_FOUND (-7) +#define LZO_E_INPUT_NOT_CONSUMED (-8) + +#define PTR(a) ((lzo_ptr_t) (a)) +#define PTR_LINEAR(a) PTR(a) +#define PTR_ALIGNED_4(a) ((PTR_LINEAR(a) & 3) == 0) +#define PTR_ALIGNED_8(a) ((PTR_LINEAR(a) & 7) == 0) +#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0) +#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0) +#define PTR_LT(a,b) (PTR(a) < PTR(b)) +#define PTR_GE(a,b) (PTR(a) >= PTR(b)) +#define PTR_DIFF(a,b) ((lzo_ptrdiff_t) (PTR(a) - PTR(b))) +#define pd(a,b) ((lzo_uint) ((a)-(b))) + +typedef ptrdiff_t lzo_ptrdiff_t; + +static int +lzo1x_decompress (const lzo_byte * in, lzo_uint in_len, + lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem) +{ + register lzo_byte *op; + register const lzo_byte *ip; + register lzo_uint t; + + register const lzo_byte *m_pos; + + const lzo_byte *const ip_end = in + in_len; + lzo_byte *const op_end = out + *out_len; + + *out_len = 0; + + op = out; + ip = in; + + if (*ip > 17) + { + t = *ip++ - 17; + if (t < 4) + goto match_next; + NEED_OP (t); + NEED_IP (t + 1); + do + *op++ = *ip++; + while (--t > 0); + goto first_literal_run; + } + + while (TEST_IP && TEST_OP) + { + t = *ip++; + if (t >= 16) + goto match; + if (t == 0) + { + NEED_IP (1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP (1); + } + t += 15 + *ip++; + } + NEED_OP (t + 3); + NEED_IP (t + 4); + if (PTR_ALIGNED2_4 (op, ip)) + { + COPY4 (op, ip); + + op += 4; + ip += 4; + if (--t > 0) + { + if (t >= 4) + { + do + { + COPY4 (op, ip); + op += 4; + ip += 4; + t -= 4; + } + while (t >= 4); + if (t > 0) + do + *op++ = *ip++; + while (--t > 0); + } + else + do + *op++ = *ip++; + while (--t > 0); + } + } + else + { + *op++ = *ip++; + *op++ = *ip++; + *op++ = *ip++; + do + *op++ = *ip++; + while (--t > 0); + } + first_literal_run: + + t = *ip++; + if (t >= 16) + goto match; + + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + TEST_LOOKBEHIND (m_pos, out); + NEED_OP (3); + *op++ = *m_pos++; + *op++ = *m_pos++; + *op++ = *m_pos; + + goto match_done; + + while (TEST_IP && TEST_OP) + { + match: + if (t >= 64) + { + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; + TEST_LOOKBEHIND (m_pos, out); + NEED_OP (t + 3 - 1); + goto copy_match; + + } + else if (t >= 32) + { + t &= 31; + if (t == 0) + { + NEED_IP (1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP (1); + } + t += 31 + *ip++; + } + + m_pos = op - 1; + m_pos -= (ip[0] >> 2) + (ip[1] << 6); + + ip += 2; + } + else if (t >= 16) + { + m_pos = op; + m_pos -= (t & 8) << 11; + + t &= 7; + if (t == 0) + { + NEED_IP (1); + while (*ip == 0) + { + t += 255; + ip++; + NEED_IP (1); + } + t += 7 + *ip++; + } + + m_pos -= (ip[0] >> 2) + (ip[1] << 6); + + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } + else + { + + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + TEST_LOOKBEHIND (m_pos, out); + NEED_OP (2); + *op++ = *m_pos++; + *op++ = *m_pos; + + goto match_done; + } + + TEST_LOOKBEHIND (m_pos, out); + NEED_OP (t + 3 - 1); + if (t >= 2 * 4 - (3 - 1) + && PTR_ALIGNED2_4 (op, m_pos)) + { + COPY4 (op, m_pos); + op += 4; + m_pos += 4; + t -= 4 - (3 - 1); + do + { + COPY4 (op, m_pos); + op += 4; + m_pos += 4; + t -= 4; + } + while (t >= 4); + if (t > 0) + do + *op++ = *m_pos++; + while (--t > 0); + } + else + + { + copy_match: + *op++ = *m_pos++; + *op++ = *m_pos++; + do + *op++ = *m_pos++; + while (--t > 0); + } + + match_done: + t = ip[-2] & 3; + + if (t == 0) + break; + + match_next: + NEED_OP (t); + NEED_IP (t + 1); + do + *op++ = *ip++; + while (--t > 0); + t = *ip++; + } + } + *out_len = op - out; + return LZO_E_EOF_NOT_FOUND; + + eof_found: + *out_len = op - out; + return (ip == ip_end ? LZO_E_OK : + (ip < + ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + + input_overrun: + *out_len = op - out; + return LZO_E_INPUT_OVERRUN; + + output_overrun: + *out_len = op - out; + return LZO_E_OUTPUT_OVERRUN; + + lookbehind_overrun: + *out_len = op - out; + return LZO_E_LOOKBEHIND_OVERRUN; +} + +int lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, + u32 srclen, u32 destlen) +{ + lzo_uint outlen = destlen; + return lzo1x_decompress (data_in, srclen, cpage_out, &outlen, NULL); +} + +#endif /* ((CONFIG_COMMANDS & CFG_CMD_JFFS2) && defined(CONFIG_JFFS2_LZO_LZARI)) */ diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c new file mode 100755 index 0000000..9bb4f1b --- /dev/null +++ b/fs/jffs2/compr_rtime.c @@ -0,0 +1,91 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2001 Red Hat, Inc. + * + * Created by Arjan van de Ven <arjanv@redhat.com> + * + * The original JFFS, from which the design for JFFS2 was derived, + * was designed and implemented by Axis Communications AB. + * + * The contents of this file are subject to the Red Hat eCos Public + * License Version 1.1 (the "Licence"); you may not use this file + * except in compliance with the Licence. You may obtain a copy of + * the Licence at http://www.redhat.com/ + * + * Software distributed under the Licence is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the Licence for the specific language governing rights and + * limitations under the Licence. + * + * The Original Code is JFFS2 - Journalling Flash File System, version 2 + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License version 2 (the "GPL"), in + * which case the provisions of the GPL are applicable instead of the + * above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the RHEPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file + * under either the RHEPL or the GPL. + * + * $Id: compr_rtime.c,v 1.2 2002/01/24 22:58:42 rfeany Exp $ + * + * + * Very simple lz77-ish encoder. + * + * Theory of operation: Both encoder and decoder have a list of "last + * occurances" for every possible source-value; after sending the + * first source-byte, the second byte indicated the "run" length of + * matches + * + * The algorithm is intended to only send "whole bytes", no bit-messing. + * + */ + +#include <config.h> +#if (CONFIG_COMMANDS & CFG_CMD_JFFS2) + +#include <jffs2/jffs2.h> + +void rtime_decompress(unsigned char *data_in, unsigned char *cpage_out, + u32 srclen, u32 destlen) +{ + int positions[256]; + int outpos; + int pos; + int i; + + outpos = pos = 0; + + for (i = 0; i < 256; positions[i++] = 0); + + while (outpos<destlen) { + unsigned char value; + int backoffs; + int repeat; + + value = data_in[pos++]; + cpage_out[outpos++] = value; /* first the verbatim copied byte */ + repeat = data_in[pos++]; + backoffs = positions[value]; + + positions[value]=outpos; + if (repeat) { + if (backoffs + repeat >= outpos) { + while(repeat) { + cpage_out[outpos++] = cpage_out[backoffs++]; + repeat--; + } + } else { + for (i = 0; i < repeat; i++) + *(cpage_out + outpos + i) = *(cpage_out + backoffs + i); + outpos+=repeat; + } + } + } +} + +#endif /* CFG_CMD_JFFS2 */ diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c new file mode 100755 index 0000000..74577d9 --- /dev/null +++ b/fs/jffs2/compr_rubin.c @@ -0,0 +1,126 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2001 Red Hat, Inc. + * + * Created by Arjan van de Ven <arjanv@redhat.com> + * + * Heavily modified by Russ Dill <Russ.Dill@asu.edu> in an attempt at + * a little more speed. + * + * The original JFFS, from which the design for JFFS2 was derived, + * was designed and implemented by Axis Communications AB. + * + * The contents of this file are subject to the Red Hat eCos Public + * License Version 1.1 (the "Licence"); you may not use this file + * except in compliance with the Licence. You may obtain a copy of + * the Licence at http://www.redhat.com/ + * + * Software distributed under the Licence is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the Licence for the specific language governing rights and + * limitations under the Licence. + * + * The Original Code is JFFS2 - Journalling Flash File System, version 2 + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License version 2 (the "GPL"), in + * which case the provisions of the GPL are applicable instead of the + * above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the RHEPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file + * under either the RHEPL or the GPL. + * + * $Id: compr_rubin.c,v 1.2 2002/01/24 22:58:42 rfeany Exp $ + * + */ + +#include <config.h> +#if (CONFIG_COMMANDS & CFG_CMD_JFFS2) + +#include <jffs2/jffs2.h> +#include <jffs2/compr_rubin.h> + + +void rubin_do_decompress(unsigned char *bits, unsigned char *in, + unsigned char *page_out, __u32 destlen) +{ + register char *curr = (char *)page_out; + char *end = (char *)(page_out + destlen); + register unsigned long temp; + register unsigned long result; + register unsigned long p; + register unsigned long q; + register unsigned long rec_q; + register unsigned long bit; + register long i0; + unsigned long i; + + /* init_pushpull */ + temp = *(u32 *) in; + bit = 16; + + /* init_rubin */ + q = 0; + p = (long) (2 * UPPER_BIT_RUBIN); + + /* init_decode */ + rec_q = (in[0] << 8) | in[1]; + + while (curr < end) { + /* in byte */ + + result = 0; + for (i = 0; i < 8; i++) { + /* decode */ + + while ((q & UPPER_BIT_RUBIN) || ((p + q) <= UPPER_BIT_RUBIN)) { + q &= ~UPPER_BIT_RUBIN; + q <<= 1; + p <<= 1; + rec_q &= ~UPPER_BIT_RUBIN; + rec_q <<= 1; + rec_q |= (temp >> (bit++ ^ 7)) & 1; + if (bit > 31) { + u32 *p = (u32 *)in; + bit = 0; + temp = *(++p); + in = (unsigned char *)p; + } + } + i0 = (bits[i] * p) >> 8; + + if (i0 <= 0) i0 = 1; + /* if it fails, it fails, we have our crc + if (i0 >= p) i0 = p - 1; */ + + result >>= 1; + if (rec_q < q + i0) { + /* result |= 0x00; */ + p = i0; + } else { + result |= 0x80; + p -= i0; + q += i0; + } + } + *(curr++) = result; + } +} + +void dynrubin_decompress(unsigned char *data_in, unsigned char *cpage_out, + unsigned long sourcelen, unsigned long dstlen) +{ + unsigned char bits[8]; + int c; + + for (c=0; c<8; c++) + bits[c] = (256 - data_in[c]); + + rubin_do_decompress(bits, data_in+8, cpage_out, dstlen); +} + +#endif /* CFG_CMD_JFFS2 */ diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c new file mode 100755 index 0000000..1b35585 --- /dev/null +++ b/fs/jffs2/compr_zlib.c @@ -0,0 +1,52 @@ +/* + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2001 Red Hat, Inc. + * + * Created by David Woodhouse <dwmw2@cambridge.redhat.com> + * + * The original JFFS, from which the design for JFFS2 was derived, + * was designed and implemented by Axis Communications AB. + * + * The contents of this file are subject to the Red Hat eCos Public + * License Version 1.1 (the "Licence"); you may not use this file + * except in compliance with the Licence. You may obtain a copy of + * the Licence at http://www.redhat.com/ + * + * Software distributed under the Licence is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the Licence for the specific language governing rights and + * limitations under the Licence. + * + * The Original Code is JFFS2 - Journalling Flash File System, version 2 + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License version 2 (the "GPL"), in + * which case the provisions of the GPL are applicable instead of the + * above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the RHEPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file + * under either the RHEPL or the GPL. + * + * $Id: compr_zlib.c,v 1.2 2002/01/24 22:58:42 rfeany Exp $ + * + */ + +#include <common.h> +#include <config.h> +#if (CONFIG_COMMANDS & CFG_CMD_JFFS2) + +#include <jffs2/jffs2.h> +#include <jffs2/mini_inflate.h> + +long zlib_decompress(unsigned char *data_in, unsigned char *cpage_out, + __u32 srclen, __u32 destlen) +{ + return (decompress_block(cpage_out, data_in + 2, ldr_memcpy)); + +} + +#endif /* CFG_CMD_JFFS2 */ diff --git a/fs/jffs2/jffs2_1pass.c b/fs/jffs2/jffs2_1pass.c new file mode 100755 index 0000000..c6c0c2a --- /dev/null +++ b/fs/jffs2/jffs2_1pass.c @@ -0,0 +1,1370 @@ +/* +------------------------------------------------------------------------- + * Filename: jffs2.c + * Version: $Id: jffs2_1pass.c,v 1.7 2002/01/25 01:56:47 nyet Exp $ + * Copyright: Copyright (C) 2001, Russ Dill + * Author: Russ Dill <Russ.Dill@asu.edu> + * Description: Module to load kernel from jffs2 + *-----------------------------------------------------------------------*/ +/* + * some portions of this code are taken from jffs2, and as such, the + * following copyright notice is included. + * + * JFFS2 -- Journalling Flash File System, Version 2. + * + * Copyright (C) 2001 Red Hat, Inc. + * + * Created by David Woodhouse <dwmw2@cambridge.redhat.com> + * + * The original JFFS, from which the design for JFFS2 was derived, + * was designed and implemented by Axis Communications AB. + * + * The contents of this file are subject to the Red Hat eCos Public + * License Version 1.1 (the "Licence"); you may not use this file + * except in compliance with the Licence. You may obtain a copy of + * the Licence at http://www.redhat.com/ + * + * Software distributed under the Licence is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the Licence for the specific language governing rights and + * limitations under the Licence. + * + * The Original Code is JFFS2 - Journalling Flash File System, version 2 + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License version 2 (the "GPL"), in + * which case the provisions of the GPL are applicable instead of the + * above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the RHEPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file + * under either the RHEPL or the GPL. + * + * $Id: jffs2_1pass.c,v 1.7 2002/01/25 01:56:47 nyet Exp $ + * + */ + +/* Ok, so anyone who knows the jffs2 code will probably want to get a papar + * bag to throw up into before reading this code. I looked through the jffs2 + * code, the caching scheme is very elegant. I tried to keep the version + * for a bootloader as small and simple as possible. Instead of worring about + * unneccesary data copies, node scans, etc, I just optimized for the known + * common case, a kernel, which looks like: + * (1) most pages are 4096 bytes + * (2) version numbers are somewhat sorted in acsending order + * (3) multiple compressed blocks making up one page is uncommon + * + * So I create a linked list of decending version numbers (insertions at the + * head), and then for each page, walk down the list, until a matching page + * with 4096 bytes is found, and then decompress the watching pages in + * reverse order. + * + */ + +/* + * Adapted by Nye Liu <nyet@zumanetworks.com> and + * Rex Feany <rfeany@zumanetworks.com> + * on Jan/2002 for U-Boot. + * + * Clipped out all the non-1pass functions, cleaned up warnings, + * wrappers, etc. No major changes to the code. + * Please, he really means it when he said have a paper bag + * handy. We needed it ;). + * + */ + +/* + * Bugfixing by Kai-Uwe Bloem <kai-uwe.bloem@auerswald.de>, (C) Mar/2003 + * + * - overhaul of the memory management. Removed much of the "paper-bagging" + * in that part of the code, fixed several bugs, now frees memory when + * partition is changed. + * It's still ugly :-( + * - fixed a bug in jffs2_1pass_read_inode where the file length calculation + * was incorrect. Removed a bit of the paper-bagging as well. + * - removed double crc calculation for fragment headers in jffs2_private.h + * for speedup. + * - scan_empty rewritten in a more "standard" manner (non-paperbag, that is). + * - spinning wheel now spins depending on how much memory has been scanned + * - lots of small changes all over the place to "improve" readability. + * - implemented fragment sorting to ensure that the newest data is copied + * if there are multiple copies of fragments for a certain file offset. + * + * The fragment sorting feature must be enabled by CFG_JFFS2_SORT_FRAGMENTS. + * Sorting is done while adding fragments to the lists, which is more or less a + * bubble sort. This takes a lot of time, and is most probably not an issue if + * the boot filesystem is always mounted readonly. + * + * You should define it if the boot filesystem is mounted writable, and updates + * to the boot files are done by copying files to that filesystem. + * + * + * There's a big issue left: endianess is completely ignored in this code. Duh! + * + * + * You still should have paper bags at hand :-(. The code lacks more or less + * any comment, and is still arcane and difficult to read in places. As this + * might be incompatible with any new code from the jffs2 maintainers anyway, + * it should probably be dumped and replaced by something like jffs2reader! + */ + + +#include <common.h> +#include <config.h> +#include <malloc.h> +#include <linux/stat.h> +#include <linux/time.h> + +#if (CONFIG_COMMANDS & CFG_CMD_JFFS2) + +#include <jffs2/jffs2.h> +#include <jffs2/jffs2_1pass.h> + +#include "jffs2_private.h" + + +#define NODE_CHUNK 1024 /* size of memory allocation chunk in b_nodes */ +#define SPIN_BLKSIZE 18 /* spin after having scanned 1<<BLKSIZE bytes */ + +/* Debugging switches */ +#undef DEBUG_DIRENTS /* print directory entry list after scan */ +#undef DEBUG_FRAGMENTS /* print fragment list after scan */ +#undef DEBUG /* enable debugging messages */ + + +#ifdef DEBUG +# define DEBUGF(fmt,args...) printf(fmt ,##args) +#else +# define DEBUGF(fmt,args...) +#endif + +/* keeps pointer to currentlu processed partition */ +static struct part_info *current_part; + +#if defined(CONFIG_JFFS2_NAND) && (CONFIG_COMMANDS & CFG_CMD_NAND) +/* + * Support for jffs2 on top of NAND-flash + * + * NAND memory isn't mapped in processor's address space, + * so data should be fetched from flash before + * being processed. This is exactly what functions declared + * here do. + * + */ + +/* this one defined in cmd_nand.c */ +int read_jffs2_nand(size_t start, size_t len, + size_t * retlen, u_char * buf, int nanddev); + +#define NAND_PAGE_SIZE 512 +#define NAND_PAGE_SHIFT 9 +#define NAND_PAGE_MASK (~(NAND_PAGE_SIZE-1)) + +#ifndef NAND_CACHE_PAGES +#define NAND_CACHE_PAGES 16 +#endif +#define NAND_CACHE_SIZE (NAND_CACHE_PAGES*NAND_PAGE_SIZE) + +static u8* nand_cache = NULL; +static u32 nand_cache_off = (u32)-1; + +static int read_nand_cached(u32 off, u32 size, u_char *buf) +{ + struct mtdids *id = current_part->dev->id; + u32 bytes_read = 0; + size_t retlen; + int cpy_bytes; + + while (bytes_read < size) { + if ((off + bytes_read < nand_cache_off) || + (off + bytes_read >= nand_cache_off+NAND_CACHE_SIZE)) { + nand_cache_off = (off + bytes_read) & NAND_PAGE_MASK; + if (!nand_cache) { + /* This memory never gets freed but 'cause + it's a bootloader, nobody cares */ + nand_cache = malloc(NAND_CACHE_SIZE); + if (!nand_cache) { + printf("read_nand_cached: can't alloc cache size %d bytes\n", + NAND_CACHE_SIZE); + return -1; + } + } + if (read_jffs2_nand(nand_cache_off, NAND_CACHE_SIZE, + &retlen, nand_cache, id->num) < 0 || + retlen != NAND_CACHE_SIZE) { + printf("read_nand_cached: error reading nand off %#x size %d bytes\n", + nand_cache_off, NAND_CACHE_SIZE); + return -1; + } + } + cpy_bytes = nand_cache_off + NAND_CACHE_SIZE - (off + bytes_read); + if (cpy_bytes > size - bytes_read) + cpy_bytes = size - bytes_read; + memcpy(buf + bytes_read, + nand_cache + off + bytes_read - nand_cache_off, + cpy_bytes); + bytes_read += cpy_bytes; + } + return bytes_read; +} + +static void *get_fl_mem_nand(u32 off, u32 size, void *ext_buf) +{ + u_char *buf = ext_buf ? (u_char*)ext_buf : (u_char*)malloc(size); + + if (NULL == buf) { + printf("get_fl_mem_nand: can't alloc %d bytes\n", size); + return NULL; + } + if (read_nand_cached(off, size, buf) < 0) { + if (!ext_buf) + free(buf); + return NULL; + } + + return buf; +} + +static void *get_node_mem_nand(u32 off) +{ + struct jffs2_unknown_node node; + void *ret = NULL; + + if (NULL == get_fl_mem_nand(off, sizeof(node), &node)) + return NULL; + + if (!(ret = get_fl_mem_nand(off, node.magic == + JFFS2_MAGIC_BITMASK ? node.totlen : sizeof(node), + NULL))) { + printf("off = %#x magic %#x type %#x node.totlen = %d\n", + off, node.magic, node.nodetype, node.totlen); + } + return ret; +} + +static void put_fl_mem_nand(void *buf) +{ + free(buf); +} +#endif /* #if defined(CONFIG_JFFS2_NAND) && (CONFIG_COMMANDS & CFG_CMD_NAND) */ + + +#if (CONFIG_COMMANDS & CFG_CMD_FLASH) +/* + * Support for jffs2 on top of NOR-flash + * + * NOR flash memory is mapped in processor's address space, + * just return address. + */ +static inline void *get_fl_mem_nor(u32 off) +{ + u32 addr = off; + struct mtdids *id = current_part->dev->id; + + extern flash_info_t flash_info[]; + flash_info_t *flash = &flash_info[id->num]; + + addr += flash->start[0]; + return (void*)addr; +} + +static inline void *get_node_mem_nor(u32 off) +{ + return (void*)get_fl_mem_nor(off); +} +#endif /* #if (CONFIG_COMMANDS & CFG_CMD_FLASH) */ + + +/* + * Generic jffs2 raw memory and node read routines. + * + */ +static inline void *get_fl_mem(u32 off, u32 size, void *ext_buf) +{ + struct mtdids *id = current_part->dev->id; + +#if (CONFIG_COMMANDS & CFG_CMD_FLASH) + if (id->type == MTD_DEV_TYPE_NOR) + return get_fl_mem_nor(off); +#endif + +#if defined(CONFIG_JFFS2_NAND) && (CONFIG_COMMANDS & CFG_CMD_NAND) + if (id->type == MTD_DEV_TYPE_NAND) + return get_fl_mem_nand(off, size, ext_buf); +#endif + + printf("get_fl_mem: unknown device type, using raw offset!\n"); + return (void*)off; +} + +static inline void *get_node_mem(u32 off) +{ + struct mtdids *id = current_part->dev->id; + +#if (CONFIG_COMMANDS & CFG_CMD_FLASH) + if (id->type == MTD_DEV_TYPE_NOR) + return get_node_mem_nor(off); +#endif + +#if defined(CONFIG_JFFS2_NAND) && (CONFIG_COMMANDS & CFG_CMD_NAND) + if (id->type == MTD_DEV_TYPE_NAND) + return get_node_mem_nand(off); +#endif + + printf("get_node_mem: unknown device type, using raw offset!\n"); + return (void*)off; +} + +static inline void put_fl_mem(void *buf) +{ +#if defined(CONFIG_JFFS2_NAND) && (CONFIG_COMMANDS & CFG_CMD_NAND) + struct mtdids *id = current_part->dev->id; + + if (id->type == MTD_DEV_TYPE_NAND) + return put_fl_mem_nand(buf); +#endif +} + +/* Compression names */ +static char *compr_names[] = { + "NONE", + "ZERO", + "RTIME", + "RUBINMIPS", + "COPY", + "DYNRUBIN", + "ZLIB", +#if defined(CONFIG_JFFS2_LZO_LZARI) + "LZO", + "LZARI", +#endif +}; + +/* Spinning wheel */ +static char spinner[] = { '|', '/', '-', '\\' }; + +/* Memory management */ +struct mem_block { + u32 index; + struct mem_block *next; + struct b_node nodes[NODE_CHUNK]; +}; + + +static void +free_nodes(struct b_list *list) +{ + while (list->listMemBase != NULL) { + struct mem_block *next = list->listMemBase->next; + free( list->listMemBase ); + list->listMemBase = next; + } +} + +static struct b_node * +add_node(struct b_list *list) +{ + u32 index = 0; + struct mem_block *memBase; + struct b_node *b; + + memBase = list->listMemBase; + if (memBase != NULL) + index = memBase->index; +#if 0 + putLabeledWord("add_node: index = ", index); + putLabeledWord("add_node: memBase = ", list->listMemBase); +#endif + + if (memBase == NULL || index >= NODE_CHUNK) { + /* we need more space before we continue */ + memBase = mmalloc(sizeof(struct mem_block)); + if (memBase == NULL) { + putstr("add_node: malloc failed\n"); + return NULL; + } + memBase->next = list->listMemBase; + index = 0; +#if 0 + putLabeledWord("add_node: alloced a new membase at ", *memBase); +#endif + + } + /* now we have room to add it. */ + b = &memBase->nodes[index]; + index ++; + + memBase->index = index; + list->listMemBase = memBase; + list->listCount++; + return b; +} + +static struct b_node * +insert_node(struct b_list *list, u32 offset) +{ + struct b_node *new; +#ifdef CFG_JFFS2_SORT_FRAGMENTS + struct b_node *b, *prev; +#endif + + if (!(new = add_node(list))) { + putstr("add_node failed!\r\n"); + return NULL; + } + new->offset = offset; + +#ifdef CFG_JFFS2_SORT_FRAGMENTS + if (list->listTail != NULL && list->listCompare(new, list->listTail)) + prev = list->listTail; + else if (list->listLast != NULL && list->listCompare(new, list->listLast)) + prev = list->listLast; + else + prev = NULL; + + for (b = (prev ? prev->next : list->listHead); + b != NULL && list->listCompare(new, b); + prev = b, b = b->next) { + list->listLoops++; + } + if (b != NULL) + list->listLast = prev; + + if (b != NULL) { + new->next = b; + if (prev != NULL) + prev->next = new; + else + list->listHead = new; + } else +#endif + { + new->next = (struct b_node *) NULL; + if (list->listTail != NULL) { + list->listTail->next = new; + list->listTail = new; + } else { + list->listTail = list->listHead = new; + } + } + + return new; +} + +#ifdef CFG_JFFS2_SORT_FRAGMENTS +/* Sort data entries with the latest version last, so that if there + * is overlapping data the latest version will be used. + */ +static int compare_inodes(struct b_node *new, struct b_node *old) +{ + struct jffs2_raw_inode ojNew; + struct jffs2_raw_inode ojOld; + struct jffs2_raw_inode *jNew = + (struct jffs2_raw_inode *)get_fl_mem(new->offset, sizeof(ojNew), &ojNew); + struct jffs2_raw_inode *jOld = + (struct jffs2_raw_inode *)get_fl_mem(old->offset, sizeof(ojOld), &ojOld); + + return jNew->version > jOld->version; +} + +/* Sort directory entries so all entries in the same directory + * with the same name are grouped together, with the latest version + * last. This makes it easy to eliminate all but the latest version + * by marking the previous version dead by setting the inode to 0. + */ +static int compare_dirents(struct b_node *new, struct b_node *old) +{ + struct jffs2_raw_dirent ojNew; + struct jffs2_raw_dirent ojOld; + struct jffs2_raw_dirent *jNew = + (struct jffs2_raw_dirent *)get_fl_mem(new->offset, sizeof(ojNew), &ojNew); + struct jffs2_raw_dirent *jOld = + (struct jffs2_raw_dirent *)get_fl_mem(old->offset, sizeof(ojOld), &ojOld); + int cmp; + + /* ascending sort by pino */ + if (jNew->pino != jOld->pino) + return jNew->pino > jOld->pino; + + /* pino is the same, so use ascending sort by nsize, so + * we don't do strncmp unless we really must. + */ + if (jNew->nsize != jOld->nsize) + return jNew->nsize > jOld->nsize; + + /* length is also the same, so use ascending sort by name + */ + cmp = strncmp((char *)jNew->name, (char *)jOld->name, jNew->nsize); + if (cmp != 0) + return cmp > 0; + + /* we have duplicate names in this directory, so use ascending + * sort by version + */ + if (jNew->version > jOld->version) { + /* since jNew is newer, we know jOld is not valid, so + * mark it with inode 0 and it will not be used + */ + jOld->ino = 0; + return 1; + } + + return 0; +} +#endif + +static u32 +jffs2_scan_empty(u32 start_offset, struct part_info *part) +{ + char *max = (char *)(part->offset + part->size - sizeof(struct jffs2_raw_inode)); + char *offset = (char *)(part->offset + start_offset); + u32 off; + + while (offset < max && + *(u32*)get_fl_mem((u32)offset, sizeof(u32), &off) == 0xFFFFFFFF) { + offset += sizeof(u32); + /* return if spinning is due */ + if (((u32)offset & ((1 << SPIN_BLKSIZE)-1)) == 0) break; + } + + return (u32)offset - part->offset; +} + +void +jffs2_free_cache(struct part_info *part) +{ + struct b_lists *pL; + + if (part->jffs2_priv != NULL) { + pL = (struct b_lists *)part->jffs2_priv; + free_nodes(&pL->frag); + free_nodes(&pL->dir); + free(pL); + } +} + +static u32 +jffs_init_1pass_list(struct part_info *part) +{ + struct b_lists *pL; + + jffs2_free_cache(part); + + if (NULL != (part->jffs2_priv = malloc(sizeof(struct b_lists)))) { + pL = (struct b_lists *)part->jffs2_priv; + + memset(pL, 0, sizeof(*pL)); +#ifdef CFG_JFFS2_SORT_FRAGMENTS + pL->dir.listCompare = compare_dirents; + pL->frag.listCompare = compare_inodes; +#endif + } + return 0; +} + +/* find the inode from the slashless name given a parent */ +static long +jffs2_1pass_read_inode(struct b_lists *pL, u32 inode, char *dest) +{ + struct b_node *b; + struct jffs2_raw_inode *jNode; + u32 totalSize = 0; + u32 latestVersion = 0; + uchar *lDest; + uchar *src; + long ret; + int i; + u32 counter = 0; +#ifdef CFG_JFFS2_SORT_FRAGMENTS + /* Find file size before loading any data, so fragments that + * start past the end of file can be ignored. A fragment + * that is partially in the file is loaded, so extra data may + * be loaded up to the next 4K boundary above the file size. + * This shouldn't cause trouble when loading kernel images, so + * we will live with it. + */ + for (b = pL->frag.listHead; b != NULL; b = b->next) { + jNode = (struct jffs2_raw_inode *) get_fl_mem(b->offset, + sizeof(struct jffs2_raw_inode), NULL); + if ((inode == jNode->ino)) { + /* get actual file length from the newest node */ + if (jNode->version >= latestVersion) { + totalSize = jNode->isize; + latestVersion = jNode->version; + } + } + put_fl_mem(jNode); + } +#endif + + for (b = pL->frag.listHead; b != NULL; b = b->next) { + jNode = (struct jffs2_raw_inode *) get_node_mem(b->offset); + if ((inode == jNode->ino)) { +#if 0 + putLabeledWord("\r\n\r\nread_inode: totlen = ", jNode->totlen); + putLabeledWord("read_inode: inode = ", jNode->ino); + putLabeledWord("read_inode: version = ", jNode->version); + putLabeledWord("read_inode: isize = ", jNode->isize); + putLabeledWord("read_inode: offset = ", jNode->offset); + putLabeledWord("read_inode: csize = ", jNode->csize); + putLabeledWord("read_inode: dsize = ", jNode->dsize); + putLabeledWord("read_inode: compr = ", jNode->compr); + putLabeledWord("read_inode: usercompr = ", jNode->usercompr); + putLabeledWord("read_inode: flags = ", jNode->flags); +#endif + +#ifndef CFG_JFFS2_SORT_FRAGMENTS + /* get actual file length from the newest node */ + if (jNode->version >= latestVersion) { + totalSize = jNode->isize; + latestVersion = jNode->version; + } +#endif + + if(dest) { + src = ((uchar *) jNode) + sizeof(struct jffs2_raw_inode); + /* ignore data behind latest known EOF */ + if (jNode->offset > totalSize) { + put_fl_mem(jNode); + continue; + } + + lDest = (uchar *) (dest + jNode->offset); +#if 0 + putLabeledWord("read_inode: src = ", src); + putLabeledWord("read_inode: dest = ", lDest); +#endif + switch (jNode->compr) { + case JFFS2_COMPR_NONE: + ret = (unsigned long) ldr_memcpy(lDest, src, jNode->dsize); + break; + case JFFS2_COMPR_ZERO: + ret = 0; + for (i = 0; i < jNode->dsize; i++) + *(lDest++) = 0; + break; + case JFFS2_COMPR_RTIME: + ret = 0; + rtime_decompress(src, lDest, jNode->csize, jNode->dsize); + break; + case JFFS2_COMPR_DYNRUBIN: + /* this is slow but it works */ + ret = 0; + dynrubin_decompress(src, lDest, jNode->csize, jNode->dsize); + break; + case JFFS2_COMPR_ZLIB: + ret = zlib_decompress(src, lDest, jNode->csize, jNode->dsize); + break; +#if defined(CONFIG_JFFS2_LZO_LZARI) + case JFFS2_COMPR_LZO: + ret = lzo_decompress(src, lDest, jNode->csize, jNode->dsize); + break; + case JFFS2_COMPR_LZARI: + ret = lzari_decompress(src, lDest, jNode->csize, jNode->dsize); + break; +#endif + default: + /* unknown */ + putLabeledWord("UNKOWN COMPRESSION METHOD = ", jNode->compr); + put_fl_mem(jNode); + return -1; + break; + } + } + +#if 0 + putLabeledWord("read_inode: totalSize = ", totalSize); + putLabeledWord("read_inode: compr ret = ", ret); +#endif + } + counter++; + put_fl_mem(jNode); + } + +#if 0 + putLabeledWord("read_inode: returning = ", totalSize); +#endif + return totalSize; +} + +/* find the inode from the slashless name given a parent */ +static u32 +jffs2_1pass_find_inode(struct b_lists * pL, const char *name, u32 pino) +{ + struct b_node *b; + struct jffs2_raw_dirent *jDir; + int len; + u32 counter; + u32 version = 0; + u32 inode = 0; + + /* name is assumed slash free */ + len = strlen(name); + + counter = 0; + /* we need to search all and return the inode with the highest version */ + for(b = pL->dir.listHead; b; b = b->next, counter++) { + jDir = (struct jffs2_raw_dirent *) get_node_mem(b->offset); + if ((pino == jDir->pino) && (len == jDir->nsize) && + (jDir->ino) && /* 0 for unlink */ + (!strncmp((char *)jDir->name, name, len))) { /* a match */ + if (jDir->version < version) { + put_fl_mem(jDir); + continue; + } + + if (jDir->version == version && inode != 0) { + /* I'm pretty sure this isn't legal */ + putstr(" ** ERROR ** "); + putnstr(jDir->name, jDir->nsize); + putLabeledWord(" has dup version =", version); + } + inode = jDir->ino; + version = jDir->version; + } +#if 0 + putstr("\r\nfind_inode:p&l ->"); + putnstr(jDir->name, jDir->nsize); + putstr("\r\n"); + putLabeledWord("pino = ", jDir->pino); + putLabeledWord("nsize = ", jDir->nsize); + putLabeledWord("b = ", (u32) b); + putLabeledWord("counter = ", counter); +#endif + put_fl_mem(jDir); + } + return inode; +} + +char *mkmodestr(unsigned long mode, char *str) +{ + static const char *l = "xwr"; + int mask = 1, i; + char c; + + switch (mode & S_IFMT) { + case S_IFDIR: str[0] = 'd'; break; + case S_IFBLK: str[0] = 'b'; break; + case S_IFCHR: str[0] = 'c'; break; + case S_IFIFO: str[0] = 'f'; break; + case S_IFLNK: str[0] = 'l'; break; + case S_IFSOCK: str[0] = 's'; break; + case S_IFREG: str[0] = '-'; break; + default: str[0] = '?'; + } + + for(i = 0; i < 9; i++) { + c = l[i%3]; + str[9-i] = (mode & mask)?c:'-'; + mask = mask<<1; + } + + if(mode & S_ISUID) str[3] = (mode & S_IXUSR)?'s':'S'; + if(mode & S_ISGID) str[6] = (mode & S_IXGRP)?'s':'S'; + if(mode & S_ISVTX) str[9] = (mode & S_IXOTH)?'t':'T'; + str[10] = '\0'; + return str; +} + +static inline void dump_stat(struct stat *st, const char *name) +{ + char str[20]; + char s[64], *p; + + if (st->st_mtime == (time_t)(-1)) /* some ctimes really hate -1 */ + st->st_mtime = 1; + + ctime_r((time_t *)&st->st_mtime, s/*,64*/); /* newlib ctime doesn't have buflen */ + + if ((p = strchr(s,'\n')) != NULL) *p = '\0'; + if ((p = strchr(s,'\r')) != NULL) *p = '\0'; + +/* + printf("%6lo %s %8ld %s %s\n", st->st_mode, mkmodestr(st->st_mode, str), + st->st_size, s, name); +*/ + + printf(" %s %8ld %s %s", mkmodestr(st->st_mode,str), st->st_size, s, name); +} + +static inline u32 dump_inode(struct b_lists * pL, struct jffs2_raw_dirent *d, struct jffs2_raw_inode *i) +{ + char fname[256]; + struct stat st; + + if(!d || !i) return -1; + + strncpy(fname, (char *)d->name, d->nsize); + fname[d->nsize] = '\0'; + + memset(&st,0,sizeof(st)); + + st.st_mtime = i->mtime; + st.st_mode = i->mode; + st.st_ino = i->ino; + + /* neither dsize nor isize help us.. do it the long way */ + st.st_size = jffs2_1pass_read_inode(pL, i->ino, NULL); + + dump_stat(&st, fname); + + if (d->type == DT_LNK) { + unsigned char *src = (unsigned char *) (&i[1]); + putstr(" -> "); + putnstr(src, (int)i->dsize); + } + + putstr("\r\n"); + + return 0; +} + +/* list inodes with the given pino */ +static u32 +jffs2_1pass_list_inodes(struct b_lists * pL, u32 pino) +{ + struct b_node *b; + struct jffs2_raw_dirent *jDir; + + for (b = pL->dir.listHead; b; b = b->next) { + jDir = (struct jffs2_raw_dirent *) get_node_mem(b->offset); + if ((pino == jDir->pino) && (jDir->ino)) { /* ino=0 -> unlink */ + u32 i_version = 0; + struct jffs2_raw_inode ojNode; + struct jffs2_raw_inode *jNode, *i = NULL; + struct b_node *b2 = pL->frag.listHead; + + while (b2) { + jNode = (struct jffs2_raw_inode *) + get_fl_mem(b2->offset, sizeof(ojNode), &ojNode); + if (jNode->ino == jDir->ino && jNode->version >= i_version) { + if (i) + put_fl_mem(i); + + if (jDir->type == DT_LNK) + i = get_node_mem(b2->offset); + else + i = get_fl_mem(b2->offset, sizeof(*i), NULL); + } + b2 = b2->next; + } + + dump_inode(pL, jDir, i); + put_fl_mem(i); + } + put_fl_mem(jDir); + } + return pino; +} + +static u32 +jffs2_1pass_search_inode(struct b_lists * pL, const char *fname, u32 pino) +{ + int i; + char tmp[256]; + char working_tmp[256]; + char *c; + + /* discard any leading slash */ + i = 0; + while (fname[i] == '/') + i++; + strcpy(tmp, &fname[i]); + + while ((c = (char *) strchr(tmp, '/'))) /* we are still dired searching */ + { + strncpy(working_tmp, tmp, c - tmp); + working_tmp[c - tmp] = '\0'; +#if 0 + putstr("search_inode: tmp = "); + putstr(tmp); + putstr("\r\n"); + putstr("search_inode: wtmp = "); + putstr(working_tmp); + putstr("\r\n"); + putstr("search_inode: c = "); + putstr(c); + putstr("\r\n"); +#endif + for (i = 0; i < strlen(c) - 1; i++) + tmp[i] = c[i + 1]; + tmp[i] = '\0'; +#if 0 + putstr("search_inode: post tmp = "); + putstr(tmp); + putstr("\r\n"); +#endif + + if (!(pino = jffs2_1pass_find_inode(pL, working_tmp, pino))) { + putstr("find_inode failed for name="); + putstr(working_tmp); + putstr("\r\n"); + return 0; + } + } + /* this is for the bare filename, directories have already been mapped */ + if (!(pino = jffs2_1pass_find_inode(pL, tmp, pino))) { + putstr("find_inode failed for name="); + putstr(tmp); + putstr("\r\n"); + return 0; + } + return pino; + +} + +static u32 +jffs2_1pass_resolve_inode(struct b_lists * pL, u32 ino) +{ + struct b_node *b; + struct b_node *b2; + struct jffs2_raw_dirent *jDir; + struct jffs2_raw_inode *jNode; + u8 jDirFoundType = 0; + u32 jDirFoundIno = 0; + u32 jDirFoundPino = 0; + char tmp[256]; + u32 version = 0; + u32 pino; + unsigned char *src; + + /* we need to search all and return the inode with the highest version */ + for(b = pL->dir.listHead; b; b = b->next) { + jDir = (struct jffs2_raw_dirent *) get_node_mem(b->offset); + if (ino == jDir->ino) { + if (jDir->version < version) { + put_fl_mem(jDir); + continue; + } + + if (jDir->version == version && jDirFoundType) { + /* I'm pretty sure this isn't legal */ + putstr(" ** ERROR ** "); + putnstr(jDir->name, jDir->nsize); + putLabeledWord(" has dup version (resolve) = ", + version); + } + + jDirFoundType = jDir->type; + jDirFoundIno = jDir->ino; + jDirFoundPino = jDir->pino; + version = jDir->version; + } + put_fl_mem(jDir); + } + /* now we found the right entry again. (shoulda returned inode*) */ + if (jDirFoundType != DT_LNK) + return jDirFoundIno; + + /* it's a soft link so we follow it again. */ + b2 = pL->frag.listHead; + while (b2) { + jNode = (struct jffs2_raw_inode *) get_node_mem(b2->offset); + if (jNode->ino == jDirFoundIno) { + src = (unsigned char *)jNode + sizeof(struct jffs2_raw_inode); + +#if 0 + putLabeledWord("\t\t dsize = ", jNode->dsize); + putstr("\t\t target = "); + putnstr(src, jNode->dsize); + putstr("\r\n"); +#endif + strncpy(tmp, (char *)src, jNode->dsize); + tmp[jNode->dsize] = '\0'; + put_fl_mem(jNode); + break; + } + b2 = b2->next; + put_fl_mem(jNode); + } + /* ok so the name of the new file to find is in tmp */ + /* if it starts with a slash it is root based else shared dirs */ + if (tmp[0] == '/') + pino = 1; + else + pino = jDirFoundPino; + + return jffs2_1pass_search_inode(pL, tmp, pino); +} + +static u32 +jffs2_1pass_search_list_inodes(struct b_lists * pL, const char *fname, u32 pino) +{ + int i; + char tmp[256]; + char working_tmp[256]; + char *c; + + /* discard any leading slash */ + i = 0; + while (fname[i] == '/') + i++; + strcpy(tmp, &fname[i]); + working_tmp[0] = '\0'; + while ((c = (char *) strchr(tmp, '/'))) /* we are still dired searching */ + { + strncpy(working_tmp, tmp, c - tmp); + working_tmp[c - tmp] = '\0'; + for (i = 0; i < strlen(c) - 1; i++) + tmp[i] = c[i + 1]; + tmp[i] = '\0'; + /* only a failure if we arent looking at top level */ + if (!(pino = jffs2_1pass_find_inode(pL, working_tmp, pino)) && + (working_tmp[0])) { + putstr("find_inode failed for name="); + putstr(working_tmp); + putstr("\r\n"); + return 0; + } + } + + if (tmp[0] && !(pino = jffs2_1pass_find_inode(pL, tmp, pino))) { + putstr("find_inode failed for name="); + putstr(tmp); + putstr("\r\n"); + return 0; + } + /* this is for the bare filename, directories have already been mapped */ + if (!(pino = jffs2_1pass_list_inodes(pL, pino))) { + putstr("find_inode failed for name="); + putstr(tmp); + putstr("\r\n"); + return 0; + } + return pino; + +} + +unsigned char +jffs2_1pass_rescan_needed(struct part_info *part) +{ + struct b_node *b; + struct jffs2_unknown_node onode; + struct jffs2_unknown_node *node; + struct b_lists *pL = (struct b_lists *)part->jffs2_priv; + + if (part->jffs2_priv == 0){ + DEBUGF ("rescan: First time in use\n"); + return 1; + } + + /* if we have no list, we need to rescan */ + if (pL->frag.listCount == 0) { + DEBUGF ("rescan: fraglist zero\n"); + return 1; + } + + /* but suppose someone reflashed a partition at the same offset... */ + b = pL->dir.listHead; + while (b) { + node = (struct jffs2_unknown_node *) get_fl_mem(b->offset, + sizeof(onode), &onode); + if (node->nodetype != JFFS2_NODETYPE_DIRENT) { + DEBUGF ("rescan: fs changed beneath me? (%lx)\n", + (unsigned long) b->offset); + return 1; + } + b = b->next; + } + return 0; +} + +#ifdef DEBUG_FRAGMENTS +static void +dump_fragments(struct b_lists *pL) +{ + struct b_node *b; + struct jffs2_raw_inode ojNode; + struct jffs2_raw_inode *jNode; + + putstr("\r\n\r\n******The fragment Entries******\r\n"); + b = pL->frag.listHead; + while (b) { + jNode = (struct jffs2_raw_inode *) get_fl_mem(b->offset, + sizeof(ojNode), &ojNode); + putLabeledWord("\r\n\tbuild_list: FLASH_OFFSET = ", b->offset); + putLabeledWord("\tbuild_list: totlen = ", jNode->totlen); + putLabeledWord("\tbuild_list: inode = ", jNode->ino); + putLabeledWord("\tbuild_list: version = ", jNode->version); + putLabeledWord("\tbuild_list: isize = ", jNode->isize); + putLabeledWord("\tbuild_list: atime = ", jNode->atime); + putLabeledWord("\tbuild_list: offset = ", jNode->offset); + putLabeledWord("\tbuild_list: csize = ", jNode->csize); + putLabeledWord("\tbuild_list: dsize = ", jNode->dsize); + putLabeledWord("\tbuild_list: compr = ", jNode->compr); + putLabeledWord("\tbuild_list: usercompr = ", jNode->usercompr); + putLabeledWord("\tbuild_list: flags = ", jNode->flags); + putLabeledWord("\tbuild_list: offset = ", b->offset); /* FIXME: ? [RS] */ + b = b->next; + } +} +#endif + +#ifdef DEBUG_DIRENTS +static void +dump_dirents(struct b_lists *pL) +{ + struct b_node *b; + struct jffs2_raw_dirent *jDir; + + putstr("\r\n\r\n******The directory Entries******\r\n"); + b = pL->dir.listHead; + while (b) { + jDir = (struct jffs2_raw_dirent *) get_node_mem(b->offset); + putstr("\r\n"); + putnstr(jDir->name, jDir->nsize); + putLabeledWord("\r\n\tbuild_list: magic = ", jDir->magic); + putLabeledWord("\tbuild_list: nodetype = ", jDir->nodetype); + putLabeledWord("\tbuild_list: hdr_crc = ", jDir->hdr_crc); + putLabeledWord("\tbuild_list: pino = ", jDir->pino); + putLabeledWord("\tbuild_list: version = ", jDir->version); + putLabeledWord("\tbuild_list: ino = ", jDir->ino); + putLabeledWord("\tbuild_list: mctime = ", jDir->mctime); + putLabeledWord("\tbuild_list: nsize = ", jDir->nsize); + putLabeledWord("\tbuild_list: type = ", jDir->type); + putLabeledWord("\tbuild_list: node_crc = ", jDir->node_crc); + putLabeledWord("\tbuild_list: name_crc = ", jDir->name_crc); + putLabeledWord("\tbuild_list: offset = ", b->offset); /* FIXME: ? [RS] */ + b = b->next; + put_fl_mem(jDir); + } +} +#endif + +static u32 +jffs2_1pass_build_lists(struct part_info * part) +{ + struct b_lists *pL; + struct jffs2_unknown_node *node; + u32 offset, oldoffset = 0; + u32 max = part->size - sizeof(struct jffs2_raw_inode); + u32 counter = 0; + u32 counter4 = 0; + u32 counterF = 0; + u32 counterN = 0; + + /* turn off the lcd. Refreshing the lcd adds 50% overhead to the */ + /* jffs2 list building enterprise nope. in newer versions the overhead is */ + /* only about 5 %. not enough to inconvenience people for. */ + /* lcd_off(); */ + + /* if we are building a list we need to refresh the cache. */ + jffs_init_1pass_list(part); + pL = (struct b_lists *)part->jffs2_priv; + offset = 0; + puts ("Scanning JFFS2 FS: "); + + /* start at the beginning of the partition */ + while (offset < max) { + if ((oldoffset >> SPIN_BLKSIZE) != (offset >> SPIN_BLKSIZE)) { + printf("\b\b%c ", spinner[counter++ % sizeof(spinner)]); + oldoffset = offset; + } + + node = (struct jffs2_unknown_node *) get_node_mem((u32)part->offset + offset); + if (node->magic == JFFS2_MAGIC_BITMASK && hdr_crc(node)) { + /* if its a fragment add it */ + if (node->nodetype == JFFS2_NODETYPE_INODE && + inode_crc((struct jffs2_raw_inode *) node)) { + if (insert_node(&pL->frag, (u32) part->offset + + offset) == NULL) { + put_fl_mem(node); + return 0; + } + } else if (node->nodetype == JFFS2_NODETYPE_DIRENT && + dirent_crc((struct jffs2_raw_dirent *) node) && + dirent_name_crc((struct jffs2_raw_dirent *) node)) { + if (! (counterN%100)) + puts ("\b\b. "); + if (insert_node(&pL->dir, (u32) part->offset + + offset) == NULL) { + put_fl_mem(node); + return 0; + } + counterN++; + } else if (node->nodetype == JFFS2_NODETYPE_CLEANMARKER) { + if (node->totlen != sizeof(struct jffs2_unknown_node)) + printf("OOPS Cleanmarker has bad size " + "%d != %d\n", node->totlen, + sizeof(struct jffs2_unknown_node)); + } else if (node->nodetype == JFFS2_NODETYPE_PADDING) { + if (node->totlen < sizeof(struct jffs2_unknown_node)) + printf("OOPS Padding has bad size " + "%d < %d\n", node->totlen, + sizeof(struct jffs2_unknown_node)); + } else { + printf("Unknown node type: %x len %d " + "offset 0x%x\n", node->nodetype, + node->totlen, offset); + } + offset += ((node->totlen + 3) & ~3); + counterF++; + } else if (node->magic == JFFS2_EMPTY_BITMASK && + node->nodetype == JFFS2_EMPTY_BITMASK) { + offset = jffs2_scan_empty(offset, part); + } else { /* if we know nothing, we just step and look. */ + offset += 4; + counter4++; + } +/* printf("unknown node magic %4.4x %4.4x @ %lx\n", node->magic, node->nodetype, (unsigned long)node); */ + put_fl_mem(node); + } + + putstr("\b\b done.\r\n"); /* close off the dots */ + /* turn the lcd back on. */ + /* splash(); */ + +#if 0 + putLabeledWord("dir entries = ", pL->dir.listCount); + putLabeledWord("frag entries = ", pL->frag.listCount); + putLabeledWord("+4 increments = ", counter4); + putLabeledWord("+file_offset increments = ", counterF); + +#endif + +#ifdef DEBUG_DIRENTS + dump_dirents(pL); +#endif + +#ifdef DEBUG_FRAGMENTS + dump_fragments(pL); +#endif + + /* give visual feedback that we are done scanning the flash */ + led_blink(0x0, 0x0, 0x1, 0x1); /* off, forever, on 100ms, off 100ms */ + return 1; +} + + +static u32 +jffs2_1pass_fill_info(struct b_lists * pL, struct b_jffs2_info * piL) +{ + struct b_node *b; + struct jffs2_raw_inode ojNode; + struct jffs2_raw_inode *jNode; + int i; + + for (i = 0; i < JFFS2_NUM_COMPR; i++) { + piL->compr_info[i].num_frags = 0; + piL->compr_info[i].compr_sum = 0; + piL->compr_info[i].decompr_sum = 0; + } + + b = pL->frag.listHead; + while (b) { + jNode = (struct jffs2_raw_inode *) get_fl_mem(b->offset, + sizeof(ojNode), &ojNode); + if (jNode->compr < JFFS2_NUM_COMPR) { + piL->compr_info[jNode->compr].num_frags++; + piL->compr_info[jNode->compr].compr_sum += jNode->csize; + piL->compr_info[jNode->compr].decompr_sum += jNode->dsize; + } + b = b->next; + } + return 0; +} + + +static struct b_lists * +jffs2_get_list(struct part_info * part, const char *who) +{ + /* copy requested part_info struct pointer to global location */ + current_part = part; + + if (jffs2_1pass_rescan_needed(part)) { + if (!jffs2_1pass_build_lists(part)) { + printf("%s: Failed to scan JFFSv2 file structure\n", who); + return NULL; + } + } + return (struct b_lists *)part->jffs2_priv; +} + + +/* Print directory / file contents */ +u32 +jffs2_1pass_ls(struct part_info * part, const char *fname) +{ + struct b_lists *pl; + long ret = 1; + u32 inode; + + if (! (pl = jffs2_get_list(part, "ls"))) + return 0; + + if (! (inode = jffs2_1pass_search_list_inodes(pl, fname, 1))) { + putstr("ls: Failed to scan jffs2 file structure\r\n"); + return 0; + } + + +#if 0 + putLabeledWord("found file at inode = ", inode); + putLabeledWord("read_inode returns = ", ret); +#endif + + return ret; +} + + +/* Load a file from flash into memory. fname can be a full path */ +u32 +jffs2_1pass_load(char *dest, struct part_info * part, const char *fname) +{ + + struct b_lists *pl; + long ret = 1; + u32 inode; + + if (! (pl = jffs2_get_list(part, "load"))) + return 0; + + if (! (inode = jffs2_1pass_search_inode(pl, fname, 1))) { + putstr("load: Failed to find inode\r\n"); + return 0; + } + + /* Resolve symlinks */ + if (! (inode = jffs2_1pass_resolve_inode(pl, inode))) { + putstr("load: Failed to resolve inode structure\r\n"); + return 0; + } + + if ((ret = jffs2_1pass_read_inode(pl, inode, dest)) < 0) { + putstr("load: Failed to read inode\r\n"); + return 0; + } + + DEBUGF ("load: loaded '%s' to 0x%lx (%ld bytes)\n", fname, + (unsigned long) dest, ret); + return ret; +} + +/* Return information about the fs on this partition */ +u32 +jffs2_1pass_info(struct part_info * part) +{ + struct b_jffs2_info info; + struct b_lists *pl; + int i; + + if (! (pl = jffs2_get_list(part, "info"))) + return 0; + + jffs2_1pass_fill_info(pl, &info); + for (i = 0; i < JFFS2_NUM_COMPR; i++) { + printf ("Compression: %s\n" + "\tfrag count: %d\n" + "\tcompressed sum: %d\n" + "\tuncompressed sum: %d\n", + compr_names[i], + info.compr_info[i].num_frags, + info.compr_info[i].compr_sum, + info.compr_info[i].decompr_sum); + } + return 1; +} + +#endif /* CFG_CMD_JFFS2 */ diff --git a/fs/jffs2/jffs2_private.h b/fs/jffs2/jffs2_private.h new file mode 100755 index 0000000..65ca6eb --- /dev/null +++ b/fs/jffs2/jffs2_private.h @@ -0,0 +1,88 @@ +#ifndef jffs2_private_h +#define jffs2_private_h + +#include <jffs2/jffs2.h> + + +struct b_node { + u32 offset; + struct b_node *next; +}; + +struct b_list { + struct b_node *listTail; + struct b_node *listHead; +#ifdef CFG_JFFS2_SORT_FRAGMENTS + struct b_node *listLast; + int (*listCompare)(struct b_node *new, struct b_node *node); + u32 listLoops; +#endif + u32 listCount; + struct mem_block *listMemBase; +}; + +struct b_lists { + struct b_list dir; + struct b_list frag; + +}; + +struct b_compr_info { + u32 num_frags; + u32 compr_sum; + u32 decompr_sum; +}; + +struct b_jffs2_info { + struct b_compr_info compr_info[JFFS2_NUM_COMPR]; +}; + +static inline int +hdr_crc(struct jffs2_unknown_node *node) +{ +#if 1 + u32 crc = crc32_no_comp(0, (unsigned char *)node, sizeof(struct jffs2_unknown_node) - 4); +#else + /* what's the semantics of this? why is this here? */ + u32 crc = crc32_no_comp(~0, (unsigned char *)node, sizeof(struct jffs2_unknown_node) - 4); + + crc ^= ~0; +#endif + if (node->hdr_crc != crc) { + return 0; + } else { + return 1; + } +} + +static inline int +dirent_crc(struct jffs2_raw_dirent *node) +{ + if (node->node_crc != crc32_no_comp(0, (unsigned char *)node, sizeof(struct jffs2_raw_dirent) - 8)) { + return 0; + } else { + return 1; + } +} + +static inline int +dirent_name_crc(struct jffs2_raw_dirent *node) +{ + if (node->name_crc != crc32_no_comp(0, (unsigned char *)&(node->name), node->nsize)) { + return 0; + } else { + return 1; + } +} + +static inline int +inode_crc(struct jffs2_raw_inode *node) +{ + if (node->node_crc != crc32_no_comp(0, (unsigned char *)node, sizeof(struct jffs2_raw_inode) - 8)) { + return 0; + } else { + return 1; + } +} + +#endif /* jffs2_private.h */ diff --git a/fs/jffs2/mini_inflate.c b/fs/jffs2/mini_inflate.c new file mode 100755 index 0000000..4f511ec --- /dev/null +++ b/fs/jffs2/mini_inflate.c @@ -0,0 +1,396 @@ +/*------------------------------------------------------------------------- + * Filename: mini_inflate.c + * Version: $Id: mini_inflate.c,v 1.3 2002/01/24 22:58:42 rfeany Exp $ + * Copyright: Copyright (C) 2001, Russ Dill + * Author: Russ Dill <Russ.Dill@asu.edu> + * Description: Mini inflate implementation (RFC 1951) + *-----------------------------------------------------------------------*/ +/* + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#if (CONFIG_COMMANDS & CFG_CMD_JFFS2) + +#include <jffs2/mini_inflate.h> + +/* The order that the code lengths in section 3.2.7 are in */ +static unsigned char huffman_order[] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, + 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +inline void cramfs_memset(int *s, const int c, size n) +{ + n--; + for (;n > 0; n--) s[n] = c; + s[0] = c; +} + +/* associate a stream with a block of data and reset the stream */ +static void init_stream(struct bitstream *stream, unsigned char *data, + void *(*inflate_memcpy)(void *, const void *, size)) +{ + stream->error = NO_ERROR; + stream->memcpy = inflate_memcpy; + stream->decoded = 0; + stream->data = data; + stream->bit = 0; /* The first bit of the stream is the lsb of the + * first byte */ + + /* really sorry about all this initialization, think of a better way, + * let me know and it will get cleaned up */ + stream->codes.bits = 8; + stream->codes.num_symbols = 19; + stream->codes.lengths = stream->code_lengths; + stream->codes.symbols = stream->code_symbols; + stream->codes.count = stream->code_count; + stream->codes.first = stream->code_first; + stream->codes.pos = stream->code_pos; + + stream->lengths.bits = 16; + stream->lengths.num_symbols = 288; + stream->lengths.lengths = stream->length_lengths; + stream->lengths.symbols = stream->length_symbols; + stream->lengths.count = stream->length_count; + stream->lengths.first = stream->length_first; + stream->lengths.pos = stream->length_pos; + + stream->distance.bits = 16; + stream->distance.num_symbols = 32; + stream->distance.lengths = stream->distance_lengths; + stream->distance.symbols = stream->distance_symbols; + stream->distance.count = stream->distance_count; + stream->distance.first = stream->distance_first; + stream->distance.pos = stream->distance_pos; + +} + +/* pull 'bits' bits out of the stream. The last bit pulled it returned as the + * msb. (section 3.1.1) + */ +inline unsigned long pull_bits(struct bitstream *stream, + const unsigned int bits) +{ + unsigned long ret; + int i; + + ret = 0; + for (i = 0; i < bits; i++) { + ret += ((*(stream->data) >> stream->bit) & 1) << i; + + /* if, before incrementing, we are on bit 7, + * go to the lsb of the next byte */ + if (stream->bit++ == 7) { + stream->bit = 0; + stream->data++; + } + } + return ret; +} + +inline int pull_bit(struct bitstream *stream) +{ + int ret = ((*(stream->data) >> stream->bit) & 1); + if (stream->bit++ == 7) { + stream->bit = 0; + stream->data++; + } + return ret; +} + +/* discard bits up to the next whole byte */ +static void discard_bits(struct bitstream *stream) +{ + if (stream->bit != 0) { + stream->bit = 0; + stream->data++; + } +} + +/* No decompression, the data is all literals (section 3.2.4) */ +static void decompress_none(struct bitstream *stream, unsigned char *dest) +{ + unsigned int length; + + discard_bits(stream); + length = *(stream->data++); + length += *(stream->data++) << 8; + pull_bits(stream, 16); /* throw away the inverse of the size */ + + stream->decoded += length; + stream->memcpy(dest, stream->data, length); + stream->data += length; +} + +/* Read in a symbol from the stream (section 3.2.2) */ +static int read_symbol(struct bitstream *stream, struct huffman_set *set) +{ + int bits = 0; + int code = 0; + while (!(set->count[bits] && code < set->first[bits] + + set->count[bits])) { + code = (code << 1) + pull_bit(stream); + if (++bits > set->bits) { + /* error decoding (corrupted data?) */ + stream->error = CODE_NOT_FOUND; + return -1; + } + } + return set->symbols[set->pos[bits] + code - set->first[bits]]; +} + +/* decompress a stream of data encoded with the passed length and distance + * huffman codes */ +static void decompress_huffman(struct bitstream *stream, unsigned char *dest) +{ + struct huffman_set *lengths = &(stream->lengths); + struct huffman_set *distance = &(stream->distance); + + int symbol, length, dist, i; + + do { + if ((symbol = read_symbol(stream, lengths)) < 0) return; + if (symbol < 256) { + *(dest++) = symbol; /* symbol is a literal */ + stream->decoded++; + } else if (symbol > 256) { + /* Determine the length of the repitition + * (section 3.2.5) */ + if (symbol < 265) length = symbol - 254; + else if (symbol == 285) length = 258; + else { + length = pull_bits(stream, (symbol - 261) >> 2); + length += (4 << ((symbol - 261) >> 2)) + 3; + length += ((symbol - 1) % 4) << + ((symbol - 261) >> 2); + } + + /* Determine how far back to go */ + if ((symbol = read_symbol(stream, distance)) < 0) + return; + if (symbol < 4) dist = symbol + 1; + else { + dist = pull_bits(stream, (symbol - 2) >> 1); + dist += (2 << ((symbol - 2) >> 1)) + 1; + dist += (symbol % 2) << ((symbol - 2) >> 1); + } + stream->decoded += length; + for (i = 0; i < length; i++) { + *dest = dest[-dist]; + dest++; + } + } + } while (symbol != 256); /* 256 is the end of the data block */ +} + +/* Fill the lookup tables (section 3.2.2) */ +static void fill_code_tables(struct huffman_set *set) +{ + int code = 0, i, length; + + /* fill in the first code of each bit length, and the pos pointer */ + set->pos[0] = 0; + for (i = 1; i < set->bits; i++) { + code = (code + set->count[i - 1]) << 1; + set->first[i] = code; + set->pos[i] = set->pos[i - 1] + set->count[i - 1]; + } + + /* Fill in the table of symbols in order of their huffman code */ + for (i = 0; i < set->num_symbols; i++) { + if ((length = set->lengths[i])) + set->symbols[set->pos[length]++] = i; + } + + /* reset the pos pointer */ + for (i = 1; i < set->bits; i++) set->pos[i] -= set->count[i]; +} + +static void init_code_tables(struct huffman_set *set) +{ + cramfs_memset(set->lengths, 0, set->num_symbols); + cramfs_memset(set->count, 0, set->bits); + cramfs_memset(set->first, 0, set->bits); +} + +/* read in the huffman codes for dynamic decoding (section 3.2.7) */ +static void decompress_dynamic(struct bitstream *stream, unsigned char *dest) +{ + /* I tried my best to minimize the memory footprint here, while still + * keeping up performance. I really dislike the _lengths[] tables, but + * I see no way of eliminating them without a sizable performance + * impact. The first struct table keeps track of stats on each bit + * length. The _length table keeps a record of the bit length of each + * symbol. The _symbols table is for looking up symbols by the huffman + * code (the pos element points to the first place in the symbol table + * where that bit length occurs). I also hate the initization of these + * structs, if someone knows how to compact these, lemme know. */ + + struct huffman_set *codes = &(stream->codes); + struct huffman_set *lengths = &(stream->lengths); + struct huffman_set *distance = &(stream->distance); + + int hlit = pull_bits(stream, 5) + 257; + int hdist = pull_bits(stream, 5) + 1; + int hclen = pull_bits(stream, 4) + 4; + int length, curr_code, symbol, i, last_code; + + last_code = 0; + + init_code_tables(codes); + init_code_tables(lengths); + init_code_tables(distance); + + /* fill in the count of each bit length' as well as the lengths + * table */ + for (i = 0; i < hclen; i++) { + length = pull_bits(stream, 3); + codes->lengths[huffman_order[i]] = length; + if (length) codes->count[length]++; + + } + fill_code_tables(codes); + + /* Do the same for the length codes, being carefull of wrap through + * to the distance table */ + curr_code = 0; + while (curr_code < hlit) { + if ((symbol = read_symbol(stream, codes)) < 0) return; + if (symbol == 0) { + curr_code++; + last_code = 0; + } else if (symbol < 16) { /* Literal length */ + lengths->lengths[curr_code] = last_code = symbol; + lengths->count[symbol]++; + curr_code++; + } else if (symbol == 16) { /* repeat the last symbol 3 - 6 + * times */ + length = 3 + pull_bits(stream, 2); + for (;length; length--, curr_code++) + if (curr_code < hlit) { + lengths->lengths[curr_code] = + last_code; + lengths->count[last_code]++; + } else { /* wrap to the distance table */ + distance->lengths[curr_code - hlit] = + last_code; + distance->count[last_code]++; + } + } else if (symbol == 17) { /* repeat a bit length 0 */ + curr_code += 3 + pull_bits(stream, 3); + last_code = 0; + } else { /* same, but more times */ + curr_code += 11 + pull_bits(stream, 7); + last_code = 0; + } + } + fill_code_tables(lengths); + + /* Fill the distance table, don't need to worry about wrapthrough + * here */ + curr_code -= hlit; + while (curr_code < hdist) { + if ((symbol = read_symbol(stream, codes)) < 0) return; + if (symbol == 0) { + curr_code++; + last_code = 0; + } else if (symbol < 16) { + distance->lengths[curr_code] = last_code = symbol; + distance->count[symbol]++; + curr_code++; + } else if (symbol == 16) { + length = 3 + pull_bits(stream, 2); + for (;length; length--, curr_code++) { + distance->lengths[curr_code] = + last_code; + distance->count[last_code]++; + } + } else if (symbol == 17) { + curr_code += 3 + pull_bits(stream, 3); + last_code = 0; + } else { + curr_code += 11 + pull_bits(stream, 7); + last_code = 0; + } + } + fill_code_tables(distance); + + decompress_huffman(stream, dest); +} + +/* fill in the length and distance huffman codes for fixed encoding + * (section 3.2.6) */ +static void decompress_fixed(struct bitstream *stream, unsigned char *dest) +{ + /* let gcc fill in the initial values */ + struct huffman_set *lengths = &(stream->lengths); + struct huffman_set *distance = &(stream->distance); + + cramfs_memset(lengths->count, 0, 16); + cramfs_memset(lengths->first, 0, 16); + cramfs_memset(lengths->lengths, 8, 144); + cramfs_memset(lengths->lengths + 144, 9, 112); + cramfs_memset(lengths->lengths + 256, 7, 24); + cramfs_memset(lengths->lengths + 280, 8, 8); + lengths->count[7] = 24; + lengths->count[8] = 152; + lengths->count[9] = 112; + + cramfs_memset(distance->count, 0, 16); + cramfs_memset(distance->first, 0, 16); + cramfs_memset(distance->lengths, 5, 32); + distance->count[5] = 32; + + + fill_code_tables(lengths); + fill_code_tables(distance); + + + decompress_huffman(stream, dest); +} + +/* returns the number of bytes decoded, < 0 if there was an error. Note that + * this function assumes that the block starts on a byte boundry + * (non-compliant, but I don't see where this would happen). section 3.2.3 */ +long decompress_block(unsigned char *dest, unsigned char *source, + void *(*inflate_memcpy)(void *, const void *, size)) +{ + int bfinal, btype; + struct bitstream stream; + + init_stream(&stream, source, inflate_memcpy); + do { + bfinal = pull_bit(&stream); + btype = pull_bits(&stream, 2); + if (btype == NO_COMP) decompress_none(&stream, dest + stream.decoded); + else if (btype == DYNAMIC_COMP) + decompress_dynamic(&stream, dest + stream.decoded); + else if (btype == FIXED_COMP) decompress_fixed(&stream, dest + stream.decoded); + else stream.error = COMP_UNKNOWN; + } while (!bfinal && !stream.error); + +#if 0 + putstr("decompress_block start\r\n"); + putLabeledWord("stream.error = ",stream.error); + putLabeledWord("stream.decoded = ",stream.decoded); + putLabeledWord("dest = ",dest); + putstr("decompress_block end\r\n"); +#endif + return stream.error ? -stream.error : stream.decoded; +} + +#endif /* CFG_CMD_JFFS2 */ diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile new file mode 100755 index 0000000..98a9a8d --- /dev/null +++ b/fs/reiserfs/Makefile @@ -0,0 +1,48 @@ +# +# (C) Copyright 2003 +# Pavel Bartusek, Sysgo Real-Time Solutions AG, pba@sysgo.de +# +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +include $(TOPDIR)/config.mk + +LIB = libreiserfs.a + +AOBJS = +COBJS = reiserfs.o dev.o mode_string.o +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += + +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend + +######################################################################### diff --git a/fs/reiserfs/dev.c b/fs/reiserfs/dev.c new file mode 100755 index 0000000..6f6056f --- /dev/null +++ b/fs/reiserfs/dev.c @@ -0,0 +1,123 @@ +/* + * (C) Copyright 2003 - 2004 + * Sysgo AG, <www.elinos.com>, Pavel Bartusek <pba@sysgo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include <common.h> +#if (CONFIG_COMMANDS & CFG_CMD_REISER) + +#include <config.h> +#include <reiserfs.h> + +#include "reiserfs_private.h" + +static block_dev_desc_t *reiserfs_block_dev_desc; +static disk_partition_t part_info; + + +int reiserfs_set_blk_dev(block_dev_desc_t *rbdd, int part) +{ + reiserfs_block_dev_desc = rbdd; + + if (part == 0) { + /* disk doesn't use partition table */ + part_info.start = 0; + part_info.size = rbdd->lba; + part_info.blksz = rbdd->blksz; + } else { + if (get_partition_info (reiserfs_block_dev_desc, part, &part_info)) { + return 0; + } + } + return (part_info.size); +} + + +int reiserfs_devread (int sector, int byte_offset, int byte_len, char *buf) +{ + char sec_buf[SECTOR_SIZE]; + unsigned block_len; +/* + unsigned len = byte_len; + u8 *start = buf; +*/ + /* + * Check partition boundaries + */ + if (sector < 0 + || ((sector + ((byte_offset + byte_len - 1) >> SECTOR_BITS)) + >= part_info.size)) { +/* errnum = ERR_OUTSIDE_PART; */ + printf (" ** reiserfs_devread() read outside partition\n"); + return 0; + } + + /* + * Get the read to the beginning of a partition. + */ + sector += byte_offset >> SECTOR_BITS; + byte_offset &= SECTOR_SIZE - 1; + +#if defined(DEBUG) + printf (" <%d, %d, %d> ", sector, byte_offset, byte_len); +#endif + + + if (reiserfs_block_dev_desc == NULL) + return 0; + + + if (byte_offset != 0) { + /* read first part which isn't aligned with start of sector */ + if (reiserfs_block_dev_desc->block_read(reiserfs_block_dev_desc->dev, + part_info.start+sector, 1, (unsigned long *)sec_buf) != 1) { + printf (" ** reiserfs_devread() read error\n"); + return 0; + } + memcpy(buf, sec_buf+byte_offset, min(SECTOR_SIZE-byte_offset, byte_len)); + buf+=min(SECTOR_SIZE-byte_offset, byte_len); + byte_len-=min(SECTOR_SIZE-byte_offset, byte_len); + sector++; + } + + /* read sector aligned part */ + block_len = byte_len & ~(SECTOR_SIZE-1); + if (reiserfs_block_dev_desc->block_read(reiserfs_block_dev_desc->dev, + part_info.start+sector, block_len/SECTOR_SIZE, (unsigned long *)buf) != + block_len/SECTOR_SIZE) { + printf (" ** reiserfs_devread() read error - block\n"); + return 0; + } + buf+=block_len; + byte_len-=block_len; + sector+= block_len/SECTOR_SIZE; + + if ( byte_len != 0 ) { + /* read rest of data which are not in whole sector */ + if (reiserfs_block_dev_desc->block_read(reiserfs_block_dev_desc->dev, + part_info.start+sector, 1, (unsigned long *)sec_buf) != 1) { + printf (" ** reiserfs_devread() read error - last part\n"); + return 0; + } + memcpy(buf, sec_buf, byte_len); + } + + return 1; +} + +#endif /* CFG_CMD_REISERFS */ diff --git a/fs/reiserfs/mode_string.c b/fs/reiserfs/mode_string.c new file mode 100755 index 0000000..bc565fb --- /dev/null +++ b/fs/reiserfs/mode_string.c @@ -0,0 +1,142 @@ +/* vi: set sw=4 ts=4: */ +/* + * mode_string implementation for busybox + * + * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* Aug 13, 2003 + * Fix a bug reported by junkio@cox.net involving the mode_chars index. + */ + + +#include <common.h> +#if (CONFIG_COMMANDS & CFG_CMD_REISER) +#include <linux/stat.h> + +#if ( S_ISUID != 04000 ) || ( S_ISGID != 02000 ) || ( S_ISVTX != 01000 ) \ + || ( S_IRUSR != 00400 ) || ( S_IWUSR != 00200 ) || ( S_IXUSR != 00100 ) \ + || ( S_IRGRP != 00040 ) || ( S_IWGRP != 00020 ) || ( S_IXGRP != 00010 ) \ + || ( S_IROTH != 00004 ) || ( S_IWOTH != 00002 ) || ( S_IXOTH != 00001 ) +#error permission bitflag value assumption(s) violated! +#endif + +#if ( S_IFSOCK!= 0140000 ) || ( S_IFLNK != 0120000 ) \ + || ( S_IFREG != 0100000 ) || ( S_IFBLK != 0060000 ) \ + || ( S_IFDIR != 0040000 ) || ( S_IFCHR != 0020000 ) \ + || ( S_IFIFO != 0010000 ) +#warning mode type bitflag value assumption(s) violated! falling back to larger version + +#if (S_IRWXU | S_IRWXG | S_IRWXO | S_ISUID | S_ISGID | S_ISVTX) == 07777 +#undef mode_t +#define mode_t unsigned short +#endif + +static const mode_t mode_flags[] = { + S_IRUSR, S_IWUSR, S_IXUSR, S_ISUID, + S_IRGRP, S_IWGRP, S_IXGRP, S_ISGID, + S_IROTH, S_IWOTH, S_IXOTH, S_ISVTX +}; + +/* The static const char arrays below are duplicated for the two cases + * because moving them ahead of the mode_flags declaration cause a text + * size increase with the gcc version I'm using. */ + +/* The previous version used "0pcCd?bB-?l?s???". However, the '0', 'C', + * and 'B' types don't appear to be available on linux. So I removed them. */ +static const char type_chars[16] = "?pc?d?b?-?l?s???"; +/* 0123456789abcdef */ +static const char mode_chars[7] = "rwxSTst"; + +const char *bb_mode_string(int mode) +{ + static char buf[12]; + char *p = buf; + + int i, j, k; + + *p = type_chars[ (mode >> 12) & 0xf ]; + i = 0; + do { + j = k = 0; + do { + *++p = '-'; + if (mode & mode_flags[i+j]) { + *p = mode_chars[j]; + k = j; + } + } while (++j < 3); + if (mode & mode_flags[i+j]) { + *p = mode_chars[3 + (k & 2) + ((i&8) >> 3)]; + } + i += 4; + } while (i < 12); + + /* Note: We don't bother with nul termination because bss initialization + * should have taken care of that for us. If the user scribbled in buf + * memory, they deserve whatever happens. But we'll at least assert. */ + if (buf[10] != 0) return NULL; + + return buf; +} + +#else + +/* The previous version used "0pcCd?bB-?l?s???". However, the '0', 'C', + * and 'B' types don't appear to be available on linux. So I removed them. */ +static const char type_chars[16] = "?pc?d?b?-?l?s???"; +/* 0123456789abcdef */ +static const char mode_chars[7] = "rwxSTst"; + +const char *bb_mode_string(int mode) +{ + static char buf[12]; + char *p = buf; + + int i, j, k, m; + + *p = type_chars[ (mode >> 12) & 0xf ]; + i = 0; + m = 0400; + do { + j = k = 0; + do { + *++p = '-'; + if (mode & m) { + *p = mode_chars[j]; + k = j; + } + m >>= 1; + } while (++j < 3); + ++i; + if (mode & (010000 >> i)) { + *p = mode_chars[3 + (k & 2) + (i == 3)]; + } + } while (i < 3); + + /* Note: We don't bother with nul termination because bss initialization + * should have taken care of that for us. If the user scribbled in buf + * memory, they deserve whatever happens. But we'll at least assert. */ + if (buf[10] != 0) return NULL; + + return buf; +} + +#endif + +#endif /* CFG_CMD_REISER */ diff --git a/fs/reiserfs/reiserfs.c b/fs/reiserfs/reiserfs.c new file mode 100755 index 0000000..31c25eb --- /dev/null +++ b/fs/reiserfs/reiserfs.c @@ -0,0 +1,986 @@ +/* + * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README + * + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2000, 2001 Free Software Foundation, Inc. + * + * (C) Copyright 2003 - 2004 + * Sysgo AG, <www.elinos.com>, Pavel Bartusek <pba@sysgo.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* An implementation for the ReiserFS filesystem ported from GRUB. + * Some parts of this code (mainly the structures and defines) are + * from the original reiser fs code, as found in the linux kernel. + */ + +#include <common.h> +#if (CONFIG_COMMANDS & CFG_CMD_REISER) + +#include <malloc.h> +#include <linux/ctype.h> +#include <linux/time.h> +#include <asm/byteorder.h> +#include <reiserfs.h> + +#include "reiserfs_private.h" + +#undef REISERDEBUG + +/* Some parts of this code (mainly the structures and defines) are + * from the original reiser fs code, as found in the linux kernel. + */ + +static char fsys_buf[FSYS_BUFLEN]; +static reiserfs_error_t errnum = ERR_NONE; +static int print_possibilities; +static unsigned int filepos, filemax; + +static int +substring (const char *s1, const char *s2) +{ + while (*s1 == *s2) + { + /* The strings match exactly. */ + if (! *(s1++)) + return 0; + s2 ++; + } + + /* S1 is a substring of S2. */ + if (*s1 == 0) + return -1; + + /* S1 isn't a substring. */ + return 1; +} + +static void sd_print_item (struct item_head * ih, char * item) +{ + char filetime[30]; + time_t ttime; + + if (stat_data_v1 (ih)) { + struct stat_data_v1 * sd = (struct stat_data_v1 *)item; + ttime = sd_v1_mtime(sd); + ctime_r(&ttime, filetime); + printf ("%-10s %4hd %6d %6d %9d %24.24s", + bb_mode_string(sd_v1_mode(sd)), sd_v1_nlink(sd),sd_v1_uid(sd), sd_v1_gid(sd), + sd_v1_size(sd), filetime); + } else { + struct stat_data * sd = (struct stat_data *)item; + ttime = sd_v2_mtime(sd); + ctime_r(&ttime, filetime); + printf ("%-10s %4d %6d %6d %9d %24.24s", + bb_mode_string(sd_v2_mode(sd)), sd_v2_nlink(sd),sd_v2_uid(sd),sd_v2_gid(sd), + (__u32) sd_v2_size(sd), filetime); + } +} + +static int +journal_read (int block, int len, char *buffer) +{ + return reiserfs_devread ((INFO->journal_block + block) << INFO->blocksize_shift, + 0, len, buffer); +} + +/* Read a block from ReiserFS file system, taking the journal into + * account. If the block nr is in the journal, the block from the + * journal taken. + */ +static int +block_read (unsigned int blockNr, int start, int len, char *buffer) +{ + int transactions = INFO->journal_transactions; + int desc_block = INFO->journal_first_desc; + int journal_mask = INFO->journal_block_count - 1; + int translatedNr = blockNr; + __u32 *journal_table = JOURNAL_START; + while (transactions-- > 0) + { + int i = 0; + int j_len; + if (__le32_to_cpu(*journal_table) != 0xffffffff) + { + /* Search for the blockNr in cached journal */ + j_len = __le32_to_cpu(*journal_table++); + while (i++ < j_len) + { + if (__le32_to_cpu(*journal_table++) == blockNr) + { + journal_table += j_len - i; + goto found; + } + } + } + else + { + /* This is the end of cached journal marker. The remaining + * transactions are still on disk. + */ + struct reiserfs_journal_desc desc; + struct reiserfs_journal_commit commit; + + if (! journal_read (desc_block, sizeof (desc), (char *) &desc)) + return 0; + + j_len = __le32_to_cpu(desc.j_len); + while (i < j_len && i < JOURNAL_TRANS_HALF) + if (__le32_to_cpu(desc.j_realblock[i++]) == blockNr) + goto found; + + if (j_len >= JOURNAL_TRANS_HALF) + { + int commit_block = (desc_block + 1 + j_len) & journal_mask; + if (! journal_read (commit_block, + sizeof (commit), (char *) &commit)) + return 0; + while (i < j_len) + if (__le32_to_cpu(commit.j_realblock[i++ - JOURNAL_TRANS_HALF]) == blockNr) + goto found; + } + } + goto not_found; + + found: + translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask); +#ifdef REISERDEBUG + printf ("block_read: block %d is mapped to journal block %d.\n", + blockNr, translatedNr - INFO->journal_block); +#endif + /* We must continue the search, as this block may be overwritten + * in later transactions. + */ + not_found: + desc_block = (desc_block + 2 + j_len) & journal_mask; + } + return reiserfs_devread (translatedNr << INFO->blocksize_shift, start, len, buffer); +} + +/* Init the journal data structure. We try to cache as much as + * possible in the JOURNAL_START-JOURNAL_END space, but if it is full + * we can still read the rest from the disk on demand. + * + * The first number of valid transactions and the descriptor block of the + * first valid transaction are held in INFO. The transactions are all + * adjacent, but we must take care of the journal wrap around. + */ +static int +journal_init (void) +{ + unsigned int block_count = INFO->journal_block_count; + unsigned int desc_block; + unsigned int commit_block; + unsigned int next_trans_id; + struct reiserfs_journal_header header; + struct reiserfs_journal_desc desc; + struct reiserfs_journal_commit commit; + __u32 *journal_table = JOURNAL_START; + + journal_read (block_count, sizeof (header), (char *) &header); + desc_block = __le32_to_cpu(header.j_first_unflushed_offset); + if (desc_block >= block_count) + return 0; + + INFO->journal_first_desc = desc_block; + next_trans_id = __le32_to_cpu(header.j_last_flush_trans_id) + 1; + +#ifdef REISERDEBUG + printf ("journal_init: last flushed %d\n", + __le32_to_cpu(header.j_last_flush_trans_id)); +#endif + + while (1) + { + journal_read (desc_block, sizeof (desc), (char *) &desc); + if (substring (JOURNAL_DESC_MAGIC, desc.j_magic) > 0 + || __le32_to_cpu(desc.j_trans_id) != next_trans_id + || __le32_to_cpu(desc.j_mount_id) != __le32_to_cpu(header.j_mount_id)) + /* no more valid transactions */ + break; + + commit_block = (desc_block + __le32_to_cpu(desc.j_len) + 1) & (block_count - 1); + journal_read (commit_block, sizeof (commit), (char *) &commit); + if (__le32_to_cpu(desc.j_trans_id) != commit.j_trans_id + || __le32_to_cpu(desc.j_len) != __le32_to_cpu(commit.j_len)) + /* no more valid transactions */ + break; + +#ifdef REISERDEBUG + printf ("Found valid transaction %d/%d at %d.\n", + __le32_to_cpu(desc.j_trans_id), __le32_to_cpu(desc.j_mount_id), desc_block); +#endif + + next_trans_id++; + if (journal_table < JOURNAL_END) + { + if ((journal_table + 1 + __le32_to_cpu(desc.j_len)) >= JOURNAL_END) + { + /* The table is almost full; mark the end of the cached + * journal.*/ + *journal_table = __cpu_to_le32(0xffffffff); + journal_table = JOURNAL_END; + } + else + { + unsigned int i; + /* Cache the length and the realblock numbers in the table. + * The block number of descriptor can easily be computed. + * and need not to be stored here. + */ + + /* both are in the little endian format */ + *journal_table++ = desc.j_len; + for (i = 0; i < __le32_to_cpu(desc.j_len) && i < JOURNAL_TRANS_HALF; i++) + { + /* both are in the little endian format */ + *journal_table++ = desc.j_realblock[i]; +#ifdef REISERDEBUG + printf ("block %d is in journal %d.\n", + __le32_to_cpu(desc.j_realblock[i]), desc_block); +#endif + } + for ( ; i < __le32_to_cpu(desc.j_len); i++) + { + /* both are in the little endian format */ + *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF]; +#ifdef REISERDEBUG + printf ("block %d is in journal %d.\n", + __le32_to_cpu(commit.j_realblock[i-JOURNAL_TRANS_HALF]), + desc_block); +#endif + } + } + } + desc_block = (commit_block + 1) & (block_count - 1); + } +#ifdef REISERDEBUG + printf ("Transaction %d/%d at %d isn't valid.\n", + __le32_to_cpu(desc.j_trans_id), __le32_to_cpu(desc.j_mount_id), desc_block); +#endif + + INFO->journal_transactions + = next_trans_id - __le32_to_cpu(header.j_last_flush_trans_id) - 1; + return errnum == 0; +} + +/* check filesystem types and read superblock into memory buffer */ +int +reiserfs_mount (unsigned part_length) +{ + struct reiserfs_super_block super; + int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS; + + if (part_length < superblock + (sizeof (super) >> SECTOR_BITS) + || ! reiserfs_devread (superblock, 0, sizeof (struct reiserfs_super_block), + (char *) &super) + || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0 + && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0 + && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0) + || (/* check that this is not a copy inside the journal log */ + sb_journal_block(&super) * sb_blocksize(&super) + <= REISERFS_DISK_OFFSET_IN_BYTES)) + { + /* Try old super block position */ + superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS; + if (part_length < superblock + (sizeof (super) >> SECTOR_BITS) + || ! reiserfs_devread (superblock, 0, sizeof (struct reiserfs_super_block), + (char *) &super)) + return 0; + + if (substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0 + && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0) + { + /* pre journaling super block ? */ + if (substring (REISERFS_SUPER_MAGIC_STRING, + (char*) ((int) &super + 20)) > 0) + return 0; + + set_sb_blocksize(&super, REISERFS_OLD_BLOCKSIZE); + set_sb_journal_block(&super, 0); + set_sb_version(&super, 0); + } + } + + /* check the version number. */ + if (sb_version(&super) > REISERFS_MAX_SUPPORTED_VERSION) + return 0; + + INFO->version = sb_version(&super); + INFO->blocksize = sb_blocksize(&super); + INFO->fullblocksize_shift = log2 (sb_blocksize(&super)); + INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS; + INFO->cached_slots = + (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1; + +#ifdef REISERDEBUG + printf ("reiserfs_mount: version=%d, blocksize=%d\n", + INFO->version, INFO->blocksize); +#endif /* REISERDEBUG */ + + /* Clear node cache. */ + memset (INFO->blocks, 0, sizeof (INFO->blocks)); + + if (sb_blocksize(&super) < FSYSREISER_MIN_BLOCKSIZE + || sb_blocksize(&super) > FSYSREISER_MAX_BLOCKSIZE + || (SECTOR_SIZE << INFO->blocksize_shift) != sb_blocksize(&super)) + return 0; + + /* Initialize journal code. If something fails we end with zero + * journal_transactions, so we don't access the journal at all. + */ + INFO->journal_transactions = 0; + if (sb_journal_block(&super) != 0 && super.s_journal_dev == 0) + { + INFO->journal_block = sb_journal_block(&super); + INFO->journal_block_count = sb_journal_size(&super); + if (is_power_of_two (INFO->journal_block_count)) + journal_init (); + + /* Read in super block again, maybe it is in the journal */ + block_read (superblock >> INFO->blocksize_shift, + 0, sizeof (struct reiserfs_super_block), (char *) &super); + } + + if (! block_read (sb_root_block(&super), 0, INFO->blocksize, (char*) ROOT)) + return 0; + + INFO->tree_depth = __le16_to_cpu(BLOCKHEAD (ROOT)->blk_level); + +#ifdef REISERDEBUG + printf ("root read_in: block=%d, depth=%d\n", + sb_root_block(&super), INFO->tree_depth); +#endif /* REISERDEBUG */ + + if (INFO->tree_depth >= MAX_HEIGHT) + return 0; + if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL) + { + /* There is only one node in the whole filesystem, + * which is simultanously leaf and root */ + memcpy (LEAF, ROOT, INFO->blocksize); + } + return 1; +} + +/***************** TREE ACCESSING METHODS *****************************/ + +/* I assume you are familiar with the ReiserFS tree, if not go to + * http://www.namesys.com/content_table.html + * + * My tree node cache is organized as following + * 0 ROOT node + * 1 LEAF node (if the ROOT is also a LEAF it is copied here + * 2-n other nodes on current path from bottom to top. + * if there is not enough space in the cache, the top most are + * omitted. + * + * I have only two methods to find a key in the tree: + * search_stat(dir_id, objectid) searches for the stat entry (always + * the first entry) of an object. + * next_key() gets the next key in tree order. + * + * This means, that I can only sequential reads of files are + * efficient, but this really doesn't hurt for grub. + */ + +/* Read in the node at the current path and depth into the node cache. + * You must set INFO->blocks[depth] before. + */ +static char * +read_tree_node (unsigned int blockNr, int depth) +{ + char* cache = CACHE(depth); + int num_cached = INFO->cached_slots; + if (depth < num_cached) + { + /* This is the cached part of the path. Check if same block is + * needed. + */ + if (blockNr == INFO->blocks[depth]) + return cache; + } + else + cache = CACHE(num_cached); + +#ifdef REISERDEBUG + printf (" next read_in: block=%d (depth=%d)\n", + blockNr, depth); +#endif /* REISERDEBUG */ + if (! block_read (blockNr, 0, INFO->blocksize, cache)) + return 0; + /* Make sure it has the right node level */ + if (__le16_to_cpu(BLOCKHEAD (cache)->blk_level) != depth) + { + errnum = ERR_FSYS_CORRUPT; + return 0; + } + + INFO->blocks[depth] = blockNr; + return cache; +} + +/* Get the next key, i.e. the key following the last retrieved key in + * tree order. INFO->current_ih and + * INFO->current_info are adapted accordingly. */ +static int +next_key (void) +{ + int depth; + struct item_head *ih = INFO->current_ih + 1; + char *cache; + +#ifdef REISERDEBUG + printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n", + __le32_to_cpu(INFO->current_ih->ih_key.k_dir_id), + __le32_to_cpu(INFO->current_ih->ih_key.k_objectid), + __le32_to_cpu(INFO->current_ih->ih_key.u.v1.k_offset), + __le32_to_cpu(INFO->current_ih->ih_key.u.v1.k_uniqueness), + __le16_to_cpu(INFO->current_ih->ih_version)); +#endif /* REISERDEBUG */ + + if (ih == &ITEMHEAD[__le16_to_cpu(BLOCKHEAD (LEAF)->blk_nr_item)]) + { + depth = DISK_LEAF_NODE_LEVEL; + /* The last item, was the last in the leaf node. + * Read in the next block + */ + do + { + if (depth == INFO->tree_depth) + { + /* There are no more keys at all. + * Return a dummy item with MAX_KEY */ + ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key; + goto found; + } + depth++; +#ifdef REISERDEBUG + printf (" depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]); +#endif /* REISERDEBUG */ + } + while (INFO->next_key_nr[depth] == 0); + + if (depth == INFO->tree_depth) + cache = ROOT; + else if (depth <= INFO->cached_slots) + cache = CACHE (depth); + else + { + cache = read_tree_node (INFO->blocks[depth], depth); + if (! cache) + return 0; + } + + do + { + int nr_item = __le16_to_cpu(BLOCKHEAD (cache)->blk_nr_item); + int key_nr = INFO->next_key_nr[depth]++; +#ifdef REISERDEBUG + printf (" depth=%d, i=%d/%d\n", depth, key_nr, nr_item); +#endif /* REISERDEBUG */ + if (key_nr == nr_item) + /* This is the last item in this block, set the next_key_nr to 0 */ + INFO->next_key_nr[depth] = 0; + + cache = read_tree_node (dc_block_number(&(DC (cache)[key_nr])), --depth); + if (! cache) + return 0; + } + while (depth > DISK_LEAF_NODE_LEVEL); + + ih = ITEMHEAD; + } + found: + INFO->current_ih = ih; + INFO->current_item = &LEAF[__le16_to_cpu(ih->ih_item_location)]; +#ifdef REISERDEBUG + printf (" new ih: key %d:%d:%d:%d version:%d\n", + __le32_to_cpu(INFO->current_ih->ih_key.k_dir_id), + __le32_to_cpu(INFO->current_ih->ih_key.k_objectid), + __le32_to_cpu(INFO->current_ih->ih_key.u.v1.k_offset), + __le32_to_cpu(INFO->current_ih->ih_key.u.v1.k_uniqueness), + __le16_to_cpu(INFO->current_ih->ih_version)); +#endif /* REISERDEBUG */ + return 1; +} + +/* preconditions: reiserfs_mount already executed, therefore + * INFO block is valid + * returns: 0 if error (errnum is set), + * nonzero iff we were able to find the key successfully. + * postconditions: on a nonzero return, the current_ih and + * current_item fields describe the key that equals the + * searched key. INFO->next_key contains the next key after + * the searched key. + * side effects: messes around with the cache. + */ +static int +search_stat (__u32 dir_id, __u32 objectid) +{ + char *cache; + int depth; + int nr_item; + int i; + struct item_head *ih; +#ifdef REISERDEBUG + printf ("search_stat:\n key %d:%d:0:0\n", dir_id, objectid); +#endif /* REISERDEBUG */ + + depth = INFO->tree_depth; + cache = ROOT; + + while (depth > DISK_LEAF_NODE_LEVEL) + { + struct key *key; + nr_item = __le16_to_cpu(BLOCKHEAD (cache)->blk_nr_item); + + key = KEY (cache); + + for (i = 0; i < nr_item; i++) + { + if (__le32_to_cpu(key->k_dir_id) > dir_id + || (__le32_to_cpu(key->k_dir_id) == dir_id + && (__le32_to_cpu(key->k_objectid) > objectid + || (__le32_to_cpu(key->k_objectid) == objectid + && (__le32_to_cpu(key->u.v1.k_offset) + | __le32_to_cpu(key->u.v1.k_uniqueness)) > 0)))) + break; + key++; + } + +#ifdef REISERDEBUG + printf (" depth=%d, i=%d/%d\n", depth, i, nr_item); +#endif /* REISERDEBUG */ + INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1; + cache = read_tree_node (dc_block_number(&(DC (cache)[i])), --depth); + if (! cache) + return 0; + } + + /* cache == LEAF */ + nr_item = __le16_to_cpu(BLOCKHEAD (LEAF)->blk_nr_item); + ih = ITEMHEAD; + for (i = 0; i < nr_item; i++) + { + if (__le32_to_cpu(ih->ih_key.k_dir_id) == dir_id + && __le32_to_cpu(ih->ih_key.k_objectid) == objectid + && __le32_to_cpu(ih->ih_key.u.v1.k_offset) == 0 + && __le32_to_cpu(ih->ih_key.u.v1.k_uniqueness) == 0) + { +#ifdef REISERDEBUG + printf (" depth=%d, i=%d/%d\n", depth, i, nr_item); +#endif /* REISERDEBUG */ + INFO->current_ih = ih; + INFO->current_item = &LEAF[__le16_to_cpu(ih->ih_item_location)]; + return 1; + } + ih++; + } + errnum = ERR_FSYS_CORRUPT; + return 0; +} + +int +reiserfs_read (char *buf, unsigned len) +{ + unsigned int blocksize; + unsigned int offset; + unsigned int to_read; + char *prev_buf = buf; + +#ifdef REISERDEBUG + printf ("reiserfs_read: filepos=%d len=%d, offset=%Lx\n", + filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1); +#endif /* REISERDEBUG */ + + if (__le32_to_cpu(INFO->current_ih->ih_key.k_objectid) != INFO->fileinfo.k_objectid + || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1) + { + search_stat (INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid); + goto get_next_key; + } + + while (! errnum) + { + if (__le32_to_cpu(INFO->current_ih->ih_key.k_objectid) != INFO->fileinfo.k_objectid) { + break; + } + + offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1; + blocksize = __le16_to_cpu(INFO->current_ih->ih_item_len); + +#ifdef REISERDEBUG + printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n", + filepos, len, offset, blocksize); +#endif /* REISERDEBUG */ + + if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT) + && offset < blocksize) + { +#ifdef REISERDEBUG + printf ("direct_read: offset=%d, blocksize=%d\n", + offset, blocksize); +#endif /* REISERDEBUG */ + to_read = blocksize - offset; + if (to_read > len) + to_read = len; + + memcpy (buf, INFO->current_item + offset, to_read); + goto update_buf_len; + } + else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT)) + { + blocksize = (blocksize >> 2) << INFO->fullblocksize_shift; +#ifdef REISERDEBUG + printf ("indirect_read: offset=%d, blocksize=%d\n", + offset, blocksize); +#endif /* REISERDEBUG */ + + while (offset < blocksize) + { + __u32 blocknr = __le32_to_cpu(((__u32 *) INFO->current_item) + [offset >> INFO->fullblocksize_shift]); + int blk_offset = offset & (INFO->blocksize-1); + to_read = INFO->blocksize - blk_offset; + if (to_read > len) + to_read = len; + + /* Journal is only for meta data. Data blocks can be read + * directly without using block_read + */ + reiserfs_devread (blocknr << INFO->blocksize_shift, + blk_offset, to_read, buf); + update_buf_len: + len -= to_read; + buf += to_read; + offset += to_read; + filepos += to_read; + if (len == 0) + goto done; + } + } + get_next_key: + next_key (); + } + done: + return errnum ? 0 : buf - prev_buf; +} + + +/* preconditions: reiserfs_mount already executed, therefore + * INFO block is valid + * returns: 0 if error, nonzero iff we were able to find the file successfully + * postconditions: on a nonzero return, INFO->fileinfo contains the info + * of the file we were trying to look up, filepos is 0 and filemax is + * the size of the file. + */ +static int +reiserfs_dir (char *dirname) +{ + struct reiserfs_de_head *de_head; + char *rest, ch; + __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0; +#ifndef STAGE1_5 + int do_possibilities = 0; +#endif /* ! STAGE1_5 */ + char linkbuf[PATH_MAX]; /* buffer for following symbolic links */ + int link_count = 0; + int mode; + + dir_id = REISERFS_ROOT_PARENT_OBJECTID; + objectid = REISERFS_ROOT_OBJECTID; + + while (1) + { +#ifdef REISERDEBUG + printf ("dirname=%s\n", dirname); +#endif /* REISERDEBUG */ + + /* Search for the stat info first. */ + if (! search_stat (dir_id, objectid)) + return 0; + +#ifdef REISERDEBUG + printf ("sd_mode=%x sd_size=%d\n", + stat_data_v1(INFO->current_ih) ? sd_v1_mode((struct stat_data_v1 *) INFO->current_item) : + sd_v2_mode((struct stat_data *) (INFO->current_item)), + stat_data_v1(INFO->current_ih) ? sd_v1_size((struct stat_data_v1 *) INFO->current_item) : + sd_v2_size((struct stat_data *) INFO->current_item) + ); + +#endif /* REISERDEBUG */ + mode = stat_data_v1(INFO->current_ih) ? + sd_v1_mode((struct stat_data_v1 *) INFO->current_item) : + sd_v2_mode((struct stat_data *) INFO->current_item); + + /* If we've got a symbolic link, then chase it. */ + if (S_ISLNK (mode)) + { + unsigned int len; + if (++link_count > MAX_LINK_COUNT) + { + errnum = ERR_SYMLINK_LOOP; + return 0; + } + + /* Get the symlink size. */ + filemax = stat_data_v1(INFO->current_ih) ? + sd_v1_size((struct stat_data_v1 *) INFO->current_item) : + sd_v2_size((struct stat_data *) INFO->current_item); + + /* Find out how long our remaining name is. */ + len = 0; + while (dirname[len] && !isspace (dirname[len])) + len++; + + if (filemax + len > sizeof (linkbuf) - 1) + { + errnum = ERR_FILELENGTH; + return 0; + } + + /* Copy the remaining name to the end of the symlink data. + Note that DIRNAME and LINKBUF may overlap! */ + memmove (linkbuf + filemax, dirname, len+1); + + INFO->fileinfo.k_dir_id = dir_id; + INFO->fileinfo.k_objectid = objectid; + filepos = 0; + if (! next_key () + || reiserfs_read (linkbuf, filemax) != filemax) + { + if (! errnum) + errnum = ERR_FSYS_CORRUPT; + return 0; + } + +#ifdef REISERDEBUG + printf ("symlink=%s\n", linkbuf); +#endif /* REISERDEBUG */ + + dirname = linkbuf; + if (*dirname == '/') + { + /* It's an absolute link, so look it up in root. */ + dir_id = REISERFS_ROOT_PARENT_OBJECTID; + objectid = REISERFS_ROOT_OBJECTID; + } + else + { + /* Relative, so look it up in our parent directory. */ + dir_id = parent_dir_id; + objectid = parent_objectid; + } + + /* Now lookup the new name. */ + continue; + } + + /* if we have a real file (and we're not just printing possibilities), + then this is where we want to exit */ + + if (! *dirname || isspace (*dirname)) + { + if (! S_ISREG (mode)) + { + errnum = ERR_BAD_FILETYPE; + return 0; + } + + filepos = 0; + filemax = stat_data_v1(INFO->current_ih) ? + sd_v1_size((struct stat_data_v1 *) INFO->current_item) : + sd_v2_size((struct stat_data *) INFO->current_item); +#if 0 + /* If this is a new stat data and size is > 4GB set filemax to + * maximum + */ + if (__le16_to_cpu(INFO->current_ih->ih_version) == ITEM_VERSION_2 + && sd_size_hi((struct stat_data *) INFO->current_item) > 0) + filemax = 0xffffffff; +#endif + INFO->fileinfo.k_dir_id = dir_id; + INFO->fileinfo.k_objectid = objectid; + return next_key (); + } + + /* continue with the file/directory name interpretation */ + while (*dirname == '/') + dirname++; + if (! S_ISDIR (mode)) + { + errnum = ERR_BAD_FILETYPE; + return 0; + } + for (rest = dirname; (ch = *rest) && ! isspace (ch) && ch != '/'; rest++); + *rest = 0; + +# ifndef STAGE1_5 + if (print_possibilities && ch != '/') + do_possibilities = 1; +# endif /* ! STAGE1_5 */ + + while (1) + { + char *name_end; + int num_entries; + + if (! next_key ()) + return 0; +#ifdef REISERDEBUG + printf ("ih: key %d:%d:%d:%d version:%d\n", + __le32_to_cpu(INFO->current_ih->ih_key.k_dir_id), + __le32_to_cpu(INFO->current_ih->ih_key.k_objectid), + __le32_to_cpu(INFO->current_ih->ih_key.u.v1.k_offset), + __le32_to_cpu(INFO->current_ih->ih_key.u.v1.k_uniqueness), + __le16_to_cpu(INFO->current_ih->ih_version)); +#endif /* REISERDEBUG */ + + if (__le32_to_cpu(INFO->current_ih->ih_key.k_objectid) != objectid) + break; + + name_end = INFO->current_item + __le16_to_cpu(INFO->current_ih->ih_item_len); + de_head = (struct reiserfs_de_head *) INFO->current_item; + num_entries = __le16_to_cpu(INFO->current_ih->u.ih_entry_count); + while (num_entries > 0) + { + char *filename = INFO->current_item + deh_location(de_head); + char tmp = *name_end; + if ((deh_state(de_head) & DEH_Visible)) + { + int cmp; + /* Directory names in ReiserFS are not null + * terminated. We write a temporary 0 behind it. + * NOTE: that this may overwrite the first block in + * the tree cache. That doesn't hurt as long as we + * don't call next_key () in between. + */ + *name_end = 0; + cmp = substring (dirname, filename); + *name_end = tmp; +# ifndef STAGE1_5 + if (do_possibilities) + { + if (cmp <= 0) + { + char fn[PATH_MAX]; + struct fsys_reiser_info info_save; + + if (print_possibilities > 0) + print_possibilities = -print_possibilities; + *name_end = 0; + strcpy(fn, filename); + *name_end = tmp; + + /* If NAME is "." or "..", do not count it. */ + if (strcmp (fn, ".") != 0 && strcmp (fn, "..") != 0) { + memcpy(&info_save, INFO, sizeof(struct fsys_reiser_info)); + search_stat (deh_dir_id(de_head), deh_objectid(de_head)); + sd_print_item(INFO->current_ih, INFO->current_item); + printf(" %s\n", fn); + search_stat (dir_id, objectid); + memcpy(INFO, &info_save, sizeof(struct fsys_reiser_info)); + } + } + } + else +# endif /* ! STAGE1_5 */ + if (cmp == 0) + goto found; + } + /* The beginning of this name marks the end of the next name. + */ + name_end = filename; + de_head++; + num_entries--; + } + } + +# ifndef STAGE1_5 + if (print_possibilities < 0) + return 1; +# endif /* ! STAGE1_5 */ + + errnum = ERR_FILE_NOT_FOUND; + *rest = ch; + return 0; + + found: + *rest = ch; + dirname = rest; + + parent_dir_id = dir_id; + parent_objectid = objectid; + dir_id = deh_dir_id(de_head); + objectid = deh_objectid(de_head); + } +} + +/* + * U-Boot interface functions + */ + +/* + * List given directory + * + * RETURN: 0 - OK, else grub_error_t errnum + */ +int +reiserfs_ls (char *dirname) +{ + char *dir_slash; + int res; + + errnum = 0; + dir_slash = malloc(strlen(dirname) + 1); + if (dir_slash == NULL) { + return ERR_NUMBER_OVERFLOW; + } + strcpy(dir_slash, dirname); + /* add "/" to the directory name */ + strcat(dir_slash, "/"); + + print_possibilities = 1; + res = reiserfs_dir (dir_slash); + free(dir_slash); + if (!res || errnum) { + return errnum; + } + + return 0; +} + +/* + * Open file for reading + * + * RETURN: >0 - OK, size of opened file + * <0 - ERROR -grub_error_t errnum + */ +int +reiserfs_open (char *filename) +{ + /* open the file */ + errnum = 0; + print_possibilities = 0; + if (!reiserfs_dir (filename) || errnum) { + return -errnum; + } + return filemax; +} + +#endif /* CFG_CMD_REISER */ diff --git a/fs/reiserfs/reiserfs_private.h b/fs/reiserfs/reiserfs_private.h new file mode 100755 index 0000000..d0197cb --- /dev/null +++ b/fs/reiserfs/reiserfs_private.h @@ -0,0 +1,520 @@ +/* + * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README + * + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2000, 2001 Free Software Foundation, Inc. + * + * (C) Copyright 2003 - 2004 + * Sysgo AG, <www.elinos.com>, Pavel Bartusek <pba@sysgo.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* An implementation for the ReiserFS filesystem ported from GRUB. + * Some parts of this code (mainly the structures and defines) are + * from the original reiser fs code, as found in the linux kernel. + */ + +#ifndef __BYTE_ORDER +#if defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN) +#define __BYTE_ORDER __LITTLE_ENDIAN +#elif defined(__BIG_ENDIAN) && !defined(__LITTLE_ENDIAN) +#define __BYTE_ORDER __BIG_ENDIAN +#else +#error "unable to define __BYTE_ORDER" +#endif +#endif /* not __BYTE_ORDER */ + +#define FSYS_BUFLEN 0x8000 +#define FSYS_BUF fsys_buf + +/* This is the new super block of a journaling reiserfs system */ +struct reiserfs_super_block +{ + __u32 s_block_count; /* blocks count */ + __u32 s_free_blocks; /* free blocks count */ + __u32 s_root_block; /* root block number */ + __u32 s_journal_block; /* journal block number */ + __u32 s_journal_dev; /* journal device number */ + __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */ + __u32 s_journal_trans_max; /* max number of blocks in a transaction. */ + __u32 s_journal_magic; /* random value made on fs creation */ + __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */ + __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */ + __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */ + __u16 s_blocksize; /* block size */ + __u16 s_oid_maxsize; /* max size of object id array */ + __u16 s_oid_cursize; /* current size of object id array */ + __u16 s_state; /* valid or error */ + char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */ + __u16 s_tree_height; /* height of disk tree */ + __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */ + __u16 s_version; + char s_unused[128]; /* zero filled by mkreiserfs */ +}; + + +#define sb_root_block(sbp) (__le32_to_cpu((sbp)->s_root_block)) +#define sb_journal_block(sbp) (__le32_to_cpu((sbp)->s_journal_block)) +#define set_sb_journal_block(sbp,v) ((sbp)->s_journal_block = __cpu_to_le32(v)) +#define sb_journal_size(sbp) (__le32_to_cpu((sbp)->s_journal_size)) +#define sb_blocksize(sbp) (__le16_to_cpu((sbp)->s_blocksize)) +#define set_sb_blocksize(sbp,v) ((sbp)->s_blocksize = __cpu_to_le16(v)) +#define sb_version(sbp) (__le16_to_cpu((sbp)->s_version)) +#define set_sb_version(sbp,v) ((sbp)->s_version = __cpu_to_le16(v)) + + +#define REISERFS_MAX_SUPPORTED_VERSION 2 +#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" +#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" +#define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs" + +#define MAX_HEIGHT 7 + +/* must be correct to keep the desc and commit structs at 4k */ +#define JOURNAL_TRANS_HALF 1018 + +/* first block written in a commit. */ +struct reiserfs_journal_desc { + __u32 j_trans_id; /* id of commit */ + __u32 j_len; /* length of commit. len +1 is the commit block */ + __u32 j_mount_id; /* mount id of this trans*/ + __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */ + char j_magic[12]; +}; + +/* last block written in a commit */ +struct reiserfs_journal_commit { + __u32 j_trans_id; /* must match j_trans_id from the desc block */ + __u32 j_len; /* ditto */ + __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */ + char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */ +}; + +/* this header block gets written whenever a transaction is considered + fully flushed, and is more recent than the last fully flushed + transaction. + fully flushed means all the log blocks and all the real blocks are + on disk, and this transaction does not need to be replayed. +*/ +struct reiserfs_journal_header { + /* id of last fully flushed transaction */ + __u32 j_last_flush_trans_id; + /* offset in the log of where to start replay after a crash */ + __u32 j_first_unflushed_offset; + /* mount id to detect very old transactions */ + __u32 j_mount_id; +}; + +/* magic string to find desc blocks in the journal */ +#define JOURNAL_DESC_MAGIC "ReIsErLB" + + +/* + * directories use this key as well as old files + */ +struct offset_v1 +{ + /* + * for regular files this is the offset to the first byte of the + * body, contained in the object-item, as measured from the start of + * the entire body of the object. + * + * for directory entries, k_offset consists of hash derived from + * hashing the name and using few bits (23 or more) of the resulting + * hash, and generation number that allows distinguishing names with + * hash collisions. If number of collisions overflows generation + * number, we return EEXIST. High order bit is 0 always + */ + __u32 k_offset; + __u32 k_uniqueness; +}; + +struct offset_v2 { + /* + * for regular files this is the offset to the first byte of the + * body, contained in the object-item, as measured from the start of + * the entire body of the object. + * + * for directory entries, k_offset consists of hash derived from + * hashing the name and using few bits (23 or more) of the resulting + * hash, and generation number that allows distinguishing names with + * hash collisions. If number of collisions overflows generation + * number, we return EEXIST. High order bit is 0 always + */ + +#if defined(__LITTLE_ENDIAN_BITFIELD) + /* little endian version */ + __u64 k_offset:60; + __u64 k_type: 4; +#elif defined(__BIG_ENDIAN_BITFIELD) + /* big endian version */ + __u64 k_type: 4; + __u64 k_offset:60; +#else +#error "__LITTLE_ENDIAN_BITFIELD or __BIG_ENDIAN_BITFIELD must be defined" +#endif +} __attribute__ ((__packed__)); + +#define TYPE_MAXTYPE 3 +#define TYPE_ANY 15 + +#if (__BYTE_ORDER == __BIG_ENDIAN) +typedef union { + struct offset_v2 offset_v2; + __u64 linear; +} __attribute__ ((__packed__)) offset_v2_esafe_overlay; + +static inline __u16 offset_v2_k_type( const struct offset_v2 *v2 ) +{ + offset_v2_esafe_overlay tmp = *(const offset_v2_esafe_overlay *)v2; + tmp.linear = __le64_to_cpu( tmp.linear ); + return (tmp.offset_v2.k_type <= TYPE_MAXTYPE)?tmp.offset_v2.k_type:TYPE_ANY; +} + +static inline loff_t offset_v2_k_offset( const struct offset_v2 *v2 ) +{ + offset_v2_esafe_overlay tmp = *(const offset_v2_esafe_overlay *)v2; + tmp.linear = __le64_to_cpu( tmp.linear ); + return tmp.offset_v2.k_offset; +} +#elif (__BYTE_ORDER == __LITTLE_ENDIAN) +# define offset_v2_k_type(v2) ((v2)->k_type) +# define offset_v2_k_offset(v2) ((v2)->k_offset) +#else +#error "__BYTE_ORDER must be __LITTLE_ENDIAN or __BIG_ENDIAN" +#endif + +struct key +{ + /* packing locality: by default parent directory object id */ + __u32 k_dir_id; + /* object identifier */ + __u32 k_objectid; + /* the offset and node type (old and new form) */ + union + { + struct offset_v1 v1; + struct offset_v2 v2; + } + u; +}; + +#define KEY_SIZE (sizeof (struct key)) + +/* Header of a disk block. More precisely, header of a formatted leaf + or internal node, and not the header of an unformatted node. */ +struct block_head +{ + __u16 blk_level; /* Level of a block in the tree. */ + __u16 blk_nr_item; /* Number of keys/items in a block. */ + __u16 blk_free_space; /* Block free space in bytes. */ + struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes + only) */ +}; +#define BLKH_SIZE (sizeof (struct block_head)) +#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ + +struct item_head +{ + /* Everything in the tree is found by searching for it based on + * its key.*/ + struct key ih_key; + union { + /* The free space in the last unformatted node of an + indirect item if this is an indirect item. This + equals 0xFFFF iff this is a direct item or stat data + item. Note that the key, not this field, is used to + determine the item type, and thus which field this + union contains. */ + __u16 ih_free_space; + /* Iff this is a directory item, this field equals the + number of directory entries in the directory item. */ + __u16 ih_entry_count; + } __attribute__ ((__packed__)) u; + __u16 ih_item_len; /* total size of the item body */ + __u16 ih_item_location; /* an offset to the item body + * within the block */ + __u16 ih_version; /* 0 for all old items, 2 for new + ones. Highest bit is set by fsck + temporary, cleaned after all + done */ +} __attribute__ ((__packed__)); + +/* size of item header */ +#define IH_SIZE (sizeof (struct item_head)) + +#define ITEM_VERSION_1 0 +#define ITEM_VERSION_2 1 + +#define ih_version(ih) (__le16_to_cpu((ih)->ih_version)) + +#define IH_KEY_OFFSET(ih) (ih_version(ih) == ITEM_VERSION_1 \ + ? __le32_to_cpu((ih)->ih_key.u.v1.k_offset) \ + : offset_v2_k_offset(&((ih)->ih_key.u.v2))) + +#define IH_KEY_ISTYPE(ih, type) (ih_version(ih) == ITEM_VERSION_1 \ + ? __le32_to_cpu((ih)->ih_key.u.v1.k_uniqueness) == V1_##type \ + : offset_v2_k_type(&((ih)->ih_key.u.v2)) == V2_##type) + +/***************************************************************************/ +/* DISK CHILD */ +/***************************************************************************/ +/* Disk child pointer: The pointer from an internal node of the tree + to a node that is on disk. */ +struct disk_child { + __u32 dc_block_number; /* Disk child's block number. */ + __u16 dc_size; /* Disk child's used space. */ + __u16 dc_reserved; +}; + +#define DC_SIZE (sizeof(struct disk_child)) +#define dc_block_number(dc_p) (__le32_to_cpu((dc_p)->dc_block_number)) + + +/* + * old stat data is 32 bytes long. We are going to distinguish new one by + * different size + */ +struct stat_data_v1 +{ + __u16 sd_mode; /* file type, permissions */ + __u16 sd_nlink; /* number of hard links */ + __u16 sd_uid; /* owner */ + __u16 sd_gid; /* group */ + __u32 sd_size; /* file size */ + __u32 sd_atime; /* time of last access */ + __u32 sd_mtime; /* time file was last modified */ + __u32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + union { + __u32 sd_rdev; + __u32 sd_blocks; /* number of blocks file uses */ + } __attribute__ ((__packed__)) u; + __u32 sd_first_direct_byte; /* first byte of file which is stored + in a direct item: except that if it + equals 1 it is a symlink and if it + equals ~(__u32)0 there is no + direct item. The existence of this + field really grates on me. Let's + replace it with a macro based on + sd_size and our tail suppression + policy. Someday. -Hans */ +} __attribute__ ((__packed__)); + +#define stat_data_v1(ih) (ih_version(ih) == ITEM_VERSION_1) +#define sd_v1_mode(sdp) ((sdp)->sd_mode) +#define sd_v1_nlink(sdp) (__le16_to_cpu((sdp)->sd_nlink)) +#define sd_v1_uid(sdp) (__le16_to_cpu((sdp)->sd_uid)) +#define sd_v1_gid(sdp) (__le16_to_cpu((sdp)->sd_gid)) +#define sd_v1_size(sdp) (__le32_to_cpu((sdp)->sd_size)) +#define sd_v1_mtime(sdp) (__le32_to_cpu((sdp)->sd_mtime)) + +/* Stat Data on disk (reiserfs version of UFS disk inode minus the + address blocks) */ +struct stat_data { + __u16 sd_mode; /* file type, permissions */ + __u16 sd_attrs; /* persistent inode flags */ + __u32 sd_nlink; /* number of hard links */ + __u64 sd_size; /* file size */ + __u32 sd_uid; /* owner */ + __u32 sd_gid; /* group */ + __u32 sd_atime; /* time of last access */ + __u32 sd_mtime; /* time file was last modified */ + __u32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + __u32 sd_blocks; + union { + __u32 sd_rdev; + __u32 sd_generation; + /*__u32 sd_first_direct_byte; */ + /* first byte of file which is stored in a + direct item: except that if it equals 1 + it is a symlink and if it equals + ~(__u32)0 there is no direct item. The + existence of this field really grates + on me. Let's replace it with a macro + based on sd_size and our tail + suppression policy? */ + } __attribute__ ((__packed__)) u; +} __attribute__ ((__packed__)); + +#define stat_data_v2(ih) (ih_version(ih) == ITEM_VERSION_2) +#define sd_v2_mode(sdp) (__le16_to_cpu((sdp)->sd_mode)) +#define sd_v2_nlink(sdp) (__le32_to_cpu((sdp)->sd_nlink)) +#define sd_v2_size(sdp) (__le64_to_cpu((sdp)->sd_size)) +#define sd_v2_uid(sdp) (__le32_to_cpu((sdp)->sd_uid)) +#define sd_v2_gid(sdp) (__le32_to_cpu((sdp)->sd_gid)) +#define sd_v2_mtime(sdp) (__le32_to_cpu((sdp)->sd_mtime)) + +#define sd_mode(sdp) (__le16_to_cpu((sdp)->sd_mode)) +#define sd_size(sdp) (__le32_to_cpu((sdp)->sd_size)) +#define sd_size_hi(sdp) (__le32_to_cpu((sdp)->sd_size_hi)) + +struct reiserfs_de_head +{ + __u32 deh_offset; /* third component of the directory entry key */ + __u32 deh_dir_id; /* objectid of the parent directory of the + object, that is referenced by directory entry */ + __u32 deh_objectid;/* objectid of the object, that is referenced by + directory entry */ + __u16 deh_location;/* offset of name in the whole item */ + __u16 deh_state; /* whether 1) entry contains stat data (for + future), and 2) whether entry is hidden + (unlinked) */ +}; + +#define DEH_SIZE (sizeof (struct reiserfs_de_head)) +#define deh_offset(p_deh) (__le32_to_cpu((p_deh)->deh_offset)) +#define deh_dir_id(p_deh) (__le32_to_cpu((p_deh)->deh_dir_id)) +#define deh_objectid(p_deh) (__le32_to_cpu((p_deh)->deh_objectid)) +#define deh_location(p_deh) (__le16_to_cpu((p_deh)->deh_location)) +#define deh_state(p_deh) (__le16_to_cpu((p_deh)->deh_state)) + + +#define DEH_Statdata (1 << 0) /* not used now */ +#define DEH_Visible (1 << 2) + +#define SD_OFFSET 0 +#define SD_UNIQUENESS 0 +#define DOT_OFFSET 1 +#define DOT_DOT_OFFSET 2 +#define DIRENTRY_UNIQUENESS 500 + +#define V1_TYPE_STAT_DATA 0x0 +#define V1_TYPE_DIRECT 0xffffffff +#define V1_TYPE_INDIRECT 0xfffffffe +#define V1_TYPE_DIRECTORY_MAX 0xfffffffd +#define V2_TYPE_STAT_DATA 0 +#define V2_TYPE_INDIRECT 1 +#define V2_TYPE_DIRECT 2 +#define V2_TYPE_DIRENTRY 3 + +#define REISERFS_ROOT_OBJECTID 2 +#define REISERFS_ROOT_PARENT_OBJECTID 1 +#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) +/* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */ +#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) +#define REISERFS_OLD_BLOCKSIZE 4096 + +#define S_ISREG(mode) (((mode) & 0170000) == 0100000) +#define S_ISDIR(mode) (((mode) & 0170000) == 0040000) +#define S_ISLNK(mode) (((mode) & 0170000) == 0120000) + +#define PATH_MAX 1024 /* include/linux/limits.h */ +#define MAX_LINK_COUNT 5 /* number of symbolic links to follow */ + +/* The size of the node cache */ +#define FSYSREISER_CACHE_SIZE 24*1024 +#define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE +#define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3 + +/* Info about currently opened file */ +struct fsys_reiser_fileinfo +{ + __u32 k_dir_id; + __u32 k_objectid; +}; + +/* In memory info about the currently mounted filesystem */ +struct fsys_reiser_info +{ + /* The last read item head */ + struct item_head *current_ih; + /* The last read item */ + char *current_item; + /* The information for the currently opened file */ + struct fsys_reiser_fileinfo fileinfo; + /* The start of the journal */ + __u32 journal_block; + /* The size of the journal */ + __u32 journal_block_count; + /* The first valid descriptor block in journal + (relative to journal_block) */ + __u32 journal_first_desc; + + /* The ReiserFS version. */ + __u16 version; + /* The current depth of the reiser tree. */ + __u16 tree_depth; + /* SECTOR_SIZE << blocksize_shift == blocksize. */ + __u8 blocksize_shift; + /* 1 << full_blocksize_shift == blocksize. */ + __u8 fullblocksize_shift; + /* The reiserfs block size (must be a power of 2) */ + __u16 blocksize; + /* The number of cached tree nodes */ + __u16 cached_slots; + /* The number of valid transactions in journal */ + __u16 journal_transactions; + + unsigned int blocks[MAX_HEIGHT]; + unsigned int next_key_nr[MAX_HEIGHT]; +}; + +/* The cached s+tree blocks in FSYS_BUF, see below + * for a more detailed description. + */ +#define ROOT ((char *) ((int) FSYS_BUF)) +#define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift)) +#define LEAF CACHE (DISK_LEAF_NODE_LEVEL) + +#define BLOCKHEAD(cache) ((struct block_head *) cache) +#define ITEMHEAD ((struct item_head *) ((int) LEAF + BLKH_SIZE)) +#define KEY(cache) ((struct key *) ((int) cache + BLKH_SIZE)) +#define DC(cache) ((struct disk_child *) \ + ((int) cache + BLKH_SIZE + KEY_SIZE * nr_item)) +/* The fsys_reiser_info block. + */ +#define INFO \ + ((struct fsys_reiser_info *) ((int) FSYS_BUF + FSYSREISER_CACHE_SIZE)) +/* + * The journal cache. For each transaction it contains the number of + * blocks followed by the real block numbers of this transaction. + * + * If the block numbers of some transaction won't fit in this space, + * this list is stopped with a 0xffffffff marker and the remaining + * uncommitted transactions aren't cached. + */ +#define JOURNAL_START ((__u32 *) (INFO + 1)) +#define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN)) + + +static __inline__ unsigned long +log2 (unsigned long word) +{ +#ifdef __I386__ + __asm__ ("bsfl %1,%0" + : "=r" (word) + : "r" (word)); + return word; +#else + int i; + + for(i=0; i<(8*sizeof(word)); i++) + if ((1<<i) & word) + return i; + + return 0; +#endif +} + +static __inline__ int +is_power_of_two (unsigned long word) +{ + return (word & -word) == word; +} + +extern const char *bb_mode_string(int mode); +extern int reiserfs_devread (int sector, int byte_offset, int byte_len, char *buf); diff --git a/fs/ubi/Makefile b/fs/ubi/Makefile new file mode 100755 index 0000000..1feba03 --- /dev/null +++ b/fs/ubi/Makefile @@ -0,0 +1,48 @@ +# +# (C) Copyright 2006 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +include $(TOPDIR)/config.mk + +LIB := $(obj)libubi.a + +AOBJS = +COBJS += build.o vtbl.o vmt.o upd.o kapi.o eba.o io.o wl.o scan.o crc32.o + +COBJS += misc.o + +OBJS := $(AOBJS) $(COBJS) + + +#CPPFLAGS += +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend +######################################################################### diff --git a/fs/ubi/build.c b/fs/ubi/build.c new file mode 100755 index 0000000..376ec77 --- /dev/null +++ b/fs/ubi/build.c @@ -0,0 +1,1162 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём), + * Frank Haverkamp + */ + +/* + * This file includes UBI initialization and building of UBI devices. + * + * When UBI is initialized, it attaches all the MTD devices specified as the + * module load parameters or the kernel boot parameters. If MTD devices were + * specified, UBI does not attach any MTD device, but it is possible to do + * later using the "UBI control device". + * + * At the moment we only attach UBI devices by scanning, which will become a + * bottleneck when flashes reach certain large size. Then one may improve UBI + * and add other methods, although it does not seem to be easy to do. + */ + +#ifdef UBI_LINUX +#include <linux/err.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/stringify.h> +#include <linux/stat.h> +#include <linux/miscdevice.h> +#include <linux/log2.h> +#include <linux/kthread.h> +#endif +#include <ubi_uboot.h> +#include "ubi.h" +#include "nand.h" +#include <linux/mtd/nand.h> +/* Maximum length of the 'mtd=' parameter */ +extern struct nand_chip nand_dev_desc[]; + +static int ubi_start = 0; +static int ubi_size = 128; + +/** + * struct mtd_dev_param - MTD device parameter description data structure. + * @name: MTD device name or number string + * @vid_hdr_offs: VID header offset + */ +struct mtd_dev_param +{ + char name[20]; + int vid_hdr_offs; +}; + +extern struct nand_chip nand_dev_desc[]; + +/* Numbers of elements set in the @mtd_dev_param array */ +static int mtd_devs = 0; + +/* MTD devices specification parameters */ +static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; + +/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ +struct class *ubi_class; + +#ifdef UBI_LINUX +/* Slab cache for wear-leveling entries */ +struct kmem_cache *ubi_wl_entry_slab; + +/* UBI control character device */ +static struct miscdevice ubi_ctrl_cdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "ubi_ctrl", + .fops = &ubi_ctrl_cdev_operations, +}; +#endif + +/* All UBI devices in system */ +struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; + +#ifdef UBI_LINUX +/* Serializes UBI devices creations and removals */ +DEFINE_MUTEX(ubi_devices_mutex); + +/* Protects @ubi_devices and @ubi->ref_count */ +static DEFINE_SPINLOCK(ubi_devices_lock); + +/* "Show" method for files in '/<sysfs>/class/ubi/' */ +static ssize_t ubi_version_show(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", UBI_VERSION); +} + +/* UBI version attribute ('/<sysfs>/class/ubi/version') */ +static struct class_attribute ubi_version = + __ATTR(version, S_IRUGO, ubi_version_show, NULL); + +static ssize_t dev_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf); + +/* UBI device attributes (correspond to files in '/<sysfs>/class/ubi/ubiX') */ +static struct device_attribute dev_eraseblock_size = + __ATTR(eraseblock_size, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_avail_eraseblocks = + __ATTR(avail_eraseblocks, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_total_eraseblocks = + __ATTR(total_eraseblocks, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_volumes_count = + __ATTR(volumes_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_max_ec = + __ATTR(max_ec, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_reserved_for_bad = + __ATTR(reserved_for_bad, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_bad_peb_count = + __ATTR(bad_peb_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_max_vol_count = + __ATTR(max_vol_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_min_io_size = + __ATTR(min_io_size, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_bgt_enabled = + __ATTR(bgt_enabled, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_mtd_num = + __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL); +#endif + +void cmd_ubi_start(int start) +{ + ubi_start = start; +} + +void cmd_ubi_size(int size) +{ + ubi_size = size; +} + +/** + * ubi_get_device - get UBI device. + * @ubi_num: UBI device number + * + * This function returns UBI device description object for UBI device number + * @ubi_num, or %NULL if the device does not exist. This function increases the + * device reference count to prevent removal of the device. In other words, the + * device cannot be removed if its reference count is not zero. + */ +struct ubi_device *ubi_get_device(int ubi_num) +{ + struct ubi_device *ubi; + + spin_lock(&ubi_devices_lock); + ubi = ubi_devices[ubi_num]; + if (ubi) { + ubi_assert(ubi->ref_count >= 0); + ubi->ref_count += 1; + get_device(&ubi->dev); + } + spin_unlock(&ubi_devices_lock); + + return ubi; +} + +/** + * ubi_put_device - drop an UBI device reference. + * @ubi: UBI device description object + */ +void ubi_put_device(struct ubi_device *ubi) +{ + spin_lock(&ubi_devices_lock); + ubi->ref_count -= 1; + put_device(&ubi->dev); + spin_unlock(&ubi_devices_lock); +} + +/** + * ubi_get_by_major - get UBI device description object by character device + * major number. + * @major: major number + * + * This function is similar to 'ubi_get_device()', but it searches the device + * by its major number. + */ +struct ubi_device *ubi_get_by_major(int major) +{ + int i; + struct ubi_device *ubi; + + spin_lock(&ubi_devices_lock); + for (i = 0; i < UBI_MAX_DEVICES; i++) { + ubi = ubi_devices[i]; + if (ubi && MAJOR(ubi->cdev.dev) == major) { + ubi_assert(ubi->ref_count >= 0); + ubi->ref_count += 1; + get_device(&ubi->dev); + spin_unlock(&ubi_devices_lock); + return ubi; + } + } + spin_unlock(&ubi_devices_lock); + + return NULL; +} + +/** + * ubi_major2num - get UBI device number by character device major number. + * @major: major number + * + * This function searches UBI device number object by its major number. If UBI + * device was not found, this function returns -ENODEV, otherwise the UBI device + * number is returned. + */ +int ubi_major2num(int major) +{ + int i, ubi_num = -ENODEV; + + spin_lock(&ubi_devices_lock); + for (i = 0; i < UBI_MAX_DEVICES; i++) { + struct ubi_device *ubi = ubi_devices[i]; + + if (ubi && MAJOR(ubi->cdev.dev) == major) { + ubi_num = ubi->ubi_num; + break; + } + } + spin_unlock(&ubi_devices_lock); + + return ubi_num; +} + +#ifdef UBI_LINUX +/* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */ +static ssize_t dev_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t ret; + struct ubi_device *ubi; + + /* + * The below code looks weird, but it actually makes sense. We get the + * UBI device reference from the contained 'struct ubi_device'. But it + * is unclear if the device was removed or not yet. Indeed, if the + * device was removed before we increased its reference count, + * 'ubi_get_device()' will return -ENODEV and we fail. + * + * Remember, 'struct ubi_device' is freed in the release function, so + * we still can use 'ubi->ubi_num'. + */ + ubi = container_of(dev, struct ubi_device, dev); + ubi = ubi_get_device(ubi->ubi_num); + if (!ubi) + return -ENODEV; + + if (attr == &dev_eraseblock_size) + ret = sprintf(buf, "%d\n", ubi->leb_size); + else if (attr == &dev_avail_eraseblocks) + ret = sprintf(buf, "%d\n", ubi->avail_pebs); + else if (attr == &dev_total_eraseblocks) + ret = sprintf(buf, "%d\n", ubi->good_peb_count); + else if (attr == &dev_volumes_count) + ret = sprintf(buf, "%d\n", ubi->vol_count - UBI_INT_VOL_COUNT); + else if (attr == &dev_max_ec) + ret = sprintf(buf, "%d\n", ubi->max_ec); + else if (attr == &dev_reserved_for_bad) + ret = sprintf(buf, "%d\n", ubi->beb_rsvd_pebs); + else if (attr == &dev_bad_peb_count) + ret = sprintf(buf, "%d\n", ubi->bad_peb_count); + else if (attr == &dev_max_vol_count) + ret = sprintf(buf, "%d\n", ubi->vtbl_slots); + else if (attr == &dev_min_io_size) + ret = sprintf(buf, "%d\n", ubi->min_io_size); + else if (attr == &dev_bgt_enabled) + ret = sprintf(buf, "%d\n", ubi->thread_enabled); + else if (attr == &dev_mtd_num) + ret = sprintf(buf, "%d\n", ubi->mtd->index); + else + ret = -EINVAL; + + ubi_put_device(ubi); + return ret; +} + +/* Fake "release" method for UBI devices */ +static void dev_release(struct device *dev) { } + +/** + * ubi_sysfs_init - initialize sysfs for an UBI device. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int ubi_sysfs_init(struct ubi_device *ubi) +{ + int err; + + ubi->dev.release = dev_release; + ubi->dev.devt = ubi->cdev.dev; + ubi->dev.class = ubi_class; + sprintf(&ubi->dev.bus_id[0], UBI_NAME_STR"%d", ubi->ubi_num); + err = device_register(&ubi->dev); + if (err) + return err; + + err = device_create_file(&ubi->dev, &dev_eraseblock_size); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_avail_eraseblocks); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_total_eraseblocks); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_volumes_count); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_max_ec); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_reserved_for_bad); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_bad_peb_count); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_max_vol_count); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_min_io_size); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_bgt_enabled); + if (err) + return err; + err = device_create_file(&ubi->dev, &dev_mtd_num); + return err; +} + +/** + * ubi_sysfs_close - close sysfs for an UBI device. + * @ubi: UBI device description object + */ +static void ubi_sysfs_close(struct ubi_device *ubi) +{ + device_remove_file(&ubi->dev, &dev_mtd_num); + device_remove_file(&ubi->dev, &dev_bgt_enabled); + device_remove_file(&ubi->dev, &dev_min_io_size); + device_remove_file(&ubi->dev, &dev_max_vol_count); + device_remove_file(&ubi->dev, &dev_bad_peb_count); + device_remove_file(&ubi->dev, &dev_reserved_for_bad); + device_remove_file(&ubi->dev, &dev_max_ec); + device_remove_file(&ubi->dev, &dev_volumes_count); + device_remove_file(&ubi->dev, &dev_total_eraseblocks); + device_remove_file(&ubi->dev, &dev_avail_eraseblocks); + device_remove_file(&ubi->dev, &dev_eraseblock_size); + device_unregister(&ubi->dev); +} +#endif + +/** + * kill_volumes - destroy all volumes. + * @ubi: UBI device description object + */ +static void kill_volumes(struct ubi_device *ubi) +{ + int i; + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) + ubi_free_volume(ubi, ubi->volumes[i]); +} + +/** + * uif_init - initialize user interfaces for an UBI device. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int uif_init(struct ubi_device *ubi) +{ + int i, err; +#ifdef UBI_LINUX + dev_t dev; +#endif + + sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); + + /* + * Major numbers for the UBI character devices are allocated + * dynamically. Major numbers of volume character devices are + * equivalent to ones of the corresponding UBI character device. Minor + * numbers of UBI character devices are 0, while minor numbers of + * volume character devices start from 1. Thus, we allocate one major + * number and ubi->vtbl_slots + 1 minor numbers. + */ + err = alloc_chrdev_region(&dev, 0, ubi->vtbl_slots + 1, ubi->ubi_name); + if (err) { + ubi_err("cannot register UBI character devices"); + return err; + } + + ubi_assert(MINOR(dev) == 0); + cdev_init(&ubi->cdev, &ubi_cdev_operations); + dbg_msg("%s major is %u", ubi->ubi_name, MAJOR(dev)); + ubi->cdev.owner = THIS_MODULE; + + err = cdev_add(&ubi->cdev, dev, 1); + if (err) { + ubi_err("cannot add character device"); + goto out_unreg; + } + + err = ubi_sysfs_init(ubi); + if (err) + goto out_sysfs; + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) { + err = ubi_add_volume(ubi, ubi->volumes[i]); + if (err) { + ubi_err("cannot add volume %d", i); + goto out_volumes; + } + } + + return 0; + +out_volumes: + kill_volumes(ubi); +out_sysfs: + ubi_sysfs_close(ubi); + cdev_del(&ubi->cdev); +out_unreg: + unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); + ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err); + return err; +} + +/** + * uif_close - close user interfaces for an UBI device. + * @ubi: UBI device description object + */ +static void uif_close(struct ubi_device *ubi) +{ + kill_volumes(ubi); + ubi_sysfs_close(ubi); + cdev_del(&ubi->cdev); + unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); +} + +/** + * attach_by_scanning - attach an MTD device using scanning method. + * @ubi: UBI device descriptor + * + * This function returns zero in case of success and a negative error code in + * case of failure. + * + * Note, currently this is the only method to attach UBI devices. Hopefully in + * the future we'll have more scalable attaching methods and avoid full media + * scanning. But even in this case scanning will be needed as a fall-back + * attaching method if there are some on-flash table corruptions. + */ +static int attach_by_scanning(struct ubi_device *ubi) +{ + int err; + struct ubi_scan_info *si; + + si = ubi_scan(ubi); + if (IS_ERR(si)) + return PTR_ERR(si); + + ubi->bad_peb_count = si->bad_peb_count; + ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count; + ubi->max_ec = si->max_ec; + ubi->mean_ec = si->mean_ec; + + err = ubi_read_volume_table(ubi, si); + if (err) + goto out_si; + err = ubi_wl_init_scan(ubi, si); + if (err) + goto out_vtbl; + err = ubi_eba_init_scan(ubi, si); + if (err) + goto out_wl; + + ubi_scan_destroy_si(si); + return 0; + +out_wl: + ubi_wl_close(ubi); +out_vtbl: + vfree(ubi->vtbl); +out_si: + ubi_scan_destroy_si(si); + return err; +} + +/** + * io_init - initialize I/O unit for a given UBI device. + * @ubi: UBI device description object + * + * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are + * assumed: + * o EC header is always at offset zero - this cannot be changed; + * o VID header starts just after the EC header at the closest address + * aligned to @io->hdrs_min_io_size; + * o data starts just after the VID header at the closest address aligned to + * @io->min_io_size + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int io_init(struct ubi_device *ubi) +{ + + if (ubi->vid_hdr_offset < 0) + return -EINVAL; + + /* + * Note, in this implementation we support MTD devices with 0x7FFFFFFF + * physical eraseblocks maximum. + */ + ubi->peb_start = ubi_start; + ubi->peb_size = nand_dev_desc[0].erasesize;; + ubi->flash_size = ubi_size * ubi->peb_size; + ubi->peb_count = ubi_size; + + //if (nand_dev_desc[0].block_isbad && nand_dev_desc[0].block_markbad) + ubi->bad_allowed = 1; + + ubi->min_io_size = nand_dev_desc[0].oobblock; + ubi->hdrs_min_io_size = ubi->min_io_size; + + /* + * Make sure minimal I/O unit is power of 2. Note, there is no + * fundamental reason for this assumption. It is just an optimization + * which allows us to avoid costly division operations. + */ + if (!is_power_of_2(ubi->min_io_size)) { + ubi_err("min. I/O unit (%d) is not power of 2", + ubi->min_io_size); + return -EINVAL; + } + + ubi_assert(ubi->hdrs_min_io_size > 0); + ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size); + ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0); + + /* Calculate default aligned sizes of EC and VID headers */ + ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); + ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); + + dbg_msg("min_io_size %d", ubi->min_io_size); + dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size); + dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize); + dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize); + + if (ubi->vid_hdr_offset == 0) + /* Default offset */ + ubi->vid_hdr_offset = ubi->vid_hdr_aloffset = + ubi->ec_hdr_alsize; + else { + ubi->vid_hdr_aloffset = ubi->vid_hdr_offset & + ~(ubi->hdrs_min_io_size - 1); + ubi->vid_hdr_shift = ubi->vid_hdr_offset - + ubi->vid_hdr_aloffset; + } + + /* Similar for the data offset */ + ubi->leb_start = ubi->vid_hdr_offset + UBI_EC_HDR_SIZE; + ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); + + dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset); + dbg_msg("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset); + dbg_msg("vid_hdr_shift %d", ubi->vid_hdr_shift); + dbg_msg("leb_start %d", ubi->leb_start); + + /* The shift must be aligned to 32-bit boundary */ + if (ubi->vid_hdr_shift % 4) { + ubi_err("unaligned VID header shift %d", + ubi->vid_hdr_shift); + return -EINVAL; + } + + /* Check sanity */ + if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE || + ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE || + ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE || + ubi->leb_start & (ubi->min_io_size - 1)) { + ubi_err("bad VID header (%d) or data offsets (%d)", + ubi->vid_hdr_offset, ubi->leb_start); + return -EINVAL; + } + + /* + * It may happen that EC and VID headers are situated in one minimal + * I/O unit. In this case we can only accept this UBI image in + * read-only mode. + */ + if (ubi->vid_hdr_offset + UBI_VID_HDR_SIZE <= ubi->hdrs_min_io_size) { + ubi_warn("EC and VID headers are in the same minimal I/O unit, " + "switch to read-only mode"); + ubi->ro_mode = 1; + } + + ubi->leb_size = ubi->peb_size - ubi->leb_start; + + ubi_msg("physical eraseblock size: %d bytes (%d KiB)", + ubi->peb_size, ubi->peb_size >> 10); + ubi_msg("logical eraseblock size: %d bytes", ubi->leb_size); + ubi_msg("smallest flash I/O unit: %d", ubi->min_io_size); + if (ubi->hdrs_min_io_size != ubi->min_io_size) + ubi_msg("sub-page size: %d", + ubi->hdrs_min_io_size); + ubi_msg("VID header offset: %d (aligned %d)", + ubi->vid_hdr_offset, ubi->vid_hdr_aloffset); + ubi_msg("data offset: %d", ubi->leb_start); + + /* + * Note, ideally, we have to initialize ubi->bad_peb_count here. But + * unfortunately, MTD does not provide this information. We should loop + * over all physical eraseblocks and invoke mtd->block_is_bad() for + * each physical eraseblock. So, we skip ubi->bad_peb_count + * uninitialized and initialize it after scanning. + */ + + return 0; +} + +/** + * autoresize - re-size the volume which has the "auto-resize" flag set. + * @ubi: UBI device description object + * @vol_id: ID of the volume to re-size + * + * This function re-sizes the volume marked by the @UBI_VTBL_AUTORESIZE_FLG in + * the volume table to the largest possible size. See comments in ubi-header.h + * for more description of the flag. Returns zero in case of success and a + * negative error code in case of failure. + */ +static int autoresize(struct ubi_device *ubi, int vol_id) +{ + struct ubi_volume_desc desc; + struct ubi_volume *vol = ubi->volumes[vol_id]; + int err, old_reserved_pebs = vol->reserved_pebs; + + /* + * Clear the auto-resize flag in the volume in-memory copy of the + * volume table, and 'ubi_resize_volume()' will propogate this change + * to the flash. + */ + ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG; + + if (ubi->avail_pebs == 0) { + struct ubi_vtbl_record vtbl_rec; + + /* + * No avalilable PEBs to re-size the volume, clear the flag on + * flash and exit. + */ + memcpy(&vtbl_rec, &ubi->vtbl[vol_id], + sizeof(struct ubi_vtbl_record)); + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + ubi_err("cannot clean auto-resize flag for volume %d", + vol_id); + } else { + desc.vol = vol; + err = ubi_resize_volume(&desc, + old_reserved_pebs + ubi->avail_pebs); + if (err) + ubi_err("cannot auto-resize volume %d", vol_id); + } + + if (err) + return err; + + ubi_msg("volume %d (\"%s\") re-sized from %d to %d LEBs", vol_id, + vol->name, old_reserved_pebs, vol->reserved_pebs); + return 0; +} + +/** + * ubi_attach_mtd_dev - attach an MTD device. + * @mtd_dev: MTD device description object + * @ubi_num: number to assign to the new UBI device + * @vid_hdr_offset: VID header offset + * + * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number + * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in + * which case this function finds a vacant device nubert and assings it + * automatically. Returns the new UBI device number in case of success and a + * negative error code in case of failure. + * + * Note, the invocations of this function has to be serialized by the + * @ubi_devices_mutex. + */ +int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) +{ + struct ubi_device *ubi; + int i, err; + + /* + * Check if we already have the same MTD device attached. + * + * Note, this function assumes that UBI devices creations and deletions + * are serialized, so it does not take the &ubi_devices_lock. + */ + for (i = 0; i < UBI_MAX_DEVICES; i++) { + ubi = ubi_devices[i]; + if (ubi) { + dbg_err("ubi is already attached to ubi%d", i); + return -EEXIST; + } + } + + if (ubi_num == UBI_DEV_NUM_AUTO) { + /* Search for an empty slot in the @ubi_devices array */ + for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) + if (!ubi_devices[ubi_num]) + break; + if (ubi_num == UBI_MAX_DEVICES) { + dbg_err("only %d UBI devices may be created", UBI_MAX_DEVICES); + return -ENFILE; + } + } else { + if (ubi_num >= UBI_MAX_DEVICES) + return -EINVAL; + + /* Make sure ubi_num is not busy */ + if (ubi_devices[ubi_num]) { + dbg_err("ubi%d already exists", ubi_num); + return -EEXIST; + } + } + + ubi = kzalloc(sizeof(struct ubi_device), GFP_KERNEL); + if (!ubi) + return -ENOMEM; + + ubi->ubi_num = ubi_num; + ubi->vid_hdr_offset = vid_hdr_offset; + ubi->autoresize_vol_id = -1; + + mutex_init(&ubi->buf_mutex); + mutex_init(&ubi->ckvol_mutex); + mutex_init(&ubi->volumes_mutex); + spin_lock_init(&ubi->volumes_lock); + + ubi_msg("attaching to ubi%d", ubi_num); + + err = io_init(ubi); + if (err) + goto out_free; + + err = -ENOMEM; +// ubi->peb_buf1 = vmalloc(4096); +// if (!ubi->peb_buf1) +// goto out_free; +// printf("\n vmalloc !!!"); +// ubi->peb_buf2 = vmalloc(ubi->peb_size); +// if (!ubi->peb_buf2) +// goto out_free; +// printf("\n vmalloc !!!"); + mutex_init(&ubi->dbg_buf_mutex); + ubi->dbg_peb_buf = vmalloc(ubi->min_io_size * 2); + if (!ubi->dbg_peb_buf) + goto out_free; + err = attach_by_scanning(ubi); + if (err) { + dbg_err("failed to attach by scanning, error %d", err); + goto out_free; + } + + if (ubi->autoresize_vol_id != -1) { + err = autoresize(ubi, ubi->autoresize_vol_id); + if (err) + goto out_detach; + } + + err = uif_init(ubi); + if (err) + goto out_detach; + + ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); + if (IS_ERR(ubi->bgt_thread)) { + err = PTR_ERR(ubi->bgt_thread); + ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, + err); + goto out_uif; + } + + ubi_msg("attached to ubi%d",ubi_num); + ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20); + ubi_msg("number of good PEBs: %d", ubi->good_peb_count); + ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count); + ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots); + ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD); + ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT); + ubi_msg("number of user volumes: %d", + ubi->vol_count - UBI_INT_VOL_COUNT); + ubi_msg("available PEBs: %d", ubi->avail_pebs); + ubi_msg("total number of reserved PEBs: %d", ubi->rsvd_pebs); + ubi_msg("number of PEBs reserved for bad PEB handling: %d", + ubi->beb_rsvd_pebs); + ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec); + + /* Enable the background thread */ + if (!DBG_DISABLE_BGT) { + ubi->thread_enabled = 1; + wake_up_process(ubi->bgt_thread); + } + + ubi_devices[ubi_num] = ubi; + return ubi_num; + +out_uif: + uif_close(ubi); +out_detach: + ubi_eba_close(ubi); + ubi_wl_close(ubi); + vfree(ubi->vtbl); +out_free: + //vfree(ubi->peb_buf1); + //vfree(ubi->peb_buf2); +#ifdef CONFIG_MTD_UBI_DEBUG + vfree(ubi->dbg_peb_buf); +#endif + kfree(ubi); + return err; +} + +/** + * ubi_detach_mtd_dev - detach an MTD device. + * @ubi_num: UBI device number to detach from + * @anyway: detach MTD even if device reference count is not zero + * + * This function destroys an UBI device number @ubi_num and detaches the + * underlying MTD device. Returns zero in case of success and %-EBUSY if the + * UBI device is busy and cannot be destroyed, and %-EINVAL if it does not + * exist. + * + * Note, the invocations of this function has to be serialized by the + * @ubi_devices_mutex. + */ +int ubi_detach_mtd_dev(int ubi_num, int anyway) +{ + struct ubi_device *ubi; + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return -EINVAL; + + spin_lock(&ubi_devices_lock); + ubi = ubi_devices[ubi_num]; + if (!ubi) { + spin_unlock(&ubi_devices_lock); + return -EINVAL; + } + + if (ubi->ref_count) { + if (!anyway) { + spin_unlock(&ubi_devices_lock); + return -EBUSY; + } + /* This may only happen if there is a bug */ + ubi_err("%s reference count %d, destroy anyway", + ubi->ubi_name, ubi->ref_count); + } + ubi_devices[ubi_num] = NULL; + spin_unlock(&ubi_devices_lock); + + ubi_assert(ubi_num == ubi->ubi_num); + dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num); + + /* + * Before freeing anything, we have to stop the background thread to + * prevent it from doing anything on this device while we are freeing. + */ + if (ubi->bgt_thread) + kthread_stop(ubi->bgt_thread); + + uif_close(ubi); + ubi_eba_close(ubi); + ubi_wl_close(ubi); + vfree(ubi->vtbl); + //vfree(ubi->peb_buf1); + //vfree(ubi->peb_buf2); +#ifdef CONFIG_MTD_UBI_DEBUG + vfree(ubi->dbg_peb_buf); +#endif + ubi_msg("device is detached from ubi%d", ubi->ubi_num); + kfree(ubi); + return 0; +} +#if 0 +/** + * find_mtd_device - open an MTD device by its name or number. + * @mtd_dev: name or number of the device + * + * This function tries to open and MTD device described by @mtd_dev string, + * which is first treated as an ASCII number, and if it is not true, it is + * treated as MTD device name. Returns MTD device description object in case of + * success and a negative error code in case of failure. + */ +static struct mtd_info * __init open_mtd_device(const char *mtd_dev) +{ + struct mtd_info *mtd; + int mtd_num; + char *endp; + + mtd_num = simple_strtoul(mtd_dev, &endp, 0); + if (*endp != '\0' || mtd_dev == endp) { + /* + * This does not look like an ASCII integer, probably this is + * MTD device name. + */ + mtd = get_mtd_device_nm(mtd_dev); + } else + mtd = get_mtd_device(NULL, mtd_num); + + return mtd; +} + +#endif +int __init ubi_init(void) +{ + int err, i, k; + + /* Ensure that EC and VID headers have correct size */ + BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64); + BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64); + + if (mtd_devs > UBI_MAX_DEVICES) { + ubi_err("too many MTD devices, maximum is %d", UBI_MAX_DEVICES); + return -EINVAL; + } + + /* Create base sysfs directory and sysfs files */ + ubi_class = class_create(THIS_MODULE, UBI_NAME_STR); + if (IS_ERR(ubi_class)) { + err = PTR_ERR(ubi_class); + ubi_err("cannot create UBI class"); + goto out; + } + + err = class_create_file(ubi_class, &ubi_version); + if (err) { + ubi_err("cannot create sysfs file"); + goto out_class; + } + + err = misc_register(&ubi_ctrl_cdev); + if (err) { + ubi_err("cannot register device"); + goto out_version; + } + +#ifdef UBI_LINUX + ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab", + sizeof(struct ubi_wl_entry), + 0, 0, NULL); + if (!ubi_wl_entry_slab) + goto out_dev_unreg; +#endif + + /* Attach MTD devices */ + for (i = 0; i < mtd_devs; i++) { + struct mtd_dev_param *p = &mtd_dev_param[i]; + struct mtd_info *mtd; + + cond_resched(); + + // mtd = open_mtd_device(p->name); + // if (IS_ERR(mtd)) { + // err = PTR_ERR(mtd); + // goto out_detach; + // } + + mutex_lock(&ubi_devices_mutex); + err = ubi_attach_mtd_dev(mtd, UBI_DEV_NUM_AUTO, + p->vid_hdr_offs); + mutex_unlock(&ubi_devices_mutex); + if (err < 0) { + ubi_err("cannot attach device"); + goto out_detach; + } + } + + return 0; + +out_detach: + for (k = 0; k < i; k++) + if (ubi_devices[k]) { + mutex_lock(&ubi_devices_mutex); + ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); + mutex_unlock(&ubi_devices_mutex); + } +#ifdef UBI_LINUX + kmem_cache_destroy(ubi_wl_entry_slab); +out_dev_unreg: +#endif + misc_deregister(&ubi_ctrl_cdev); +out_version: + class_remove_file(ubi_class, &ubi_version); +out_class: + class_destroy(ubi_class); +out: + ubi_err("UBI error: cannot initialize UBI, error %d", err); + return err; +} +module_init(ubi_init); + +void __exit ubi_exit(void) +{ + int i; + + for (i = 0; i < UBI_MAX_DEVICES; i++) + if (ubi_devices[i]) { + mutex_lock(&ubi_devices_mutex); + ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); + mutex_unlock(&ubi_devices_mutex); + } + kmem_cache_destroy(ubi_wl_entry_slab); + misc_deregister(&ubi_ctrl_cdev); + class_remove_file(ubi_class, &ubi_version); + class_destroy(ubi_class); + mtd_devs = 0; +} +module_exit(ubi_exit); + +/** + * bytes_str_to_int - convert a string representing number of bytes to an + * integer. + * @str: the string to convert + * + * This function returns positive resulting integer in case of success and a + * negative error code in case of failure. + */ +static int __init bytes_str_to_int(const char *str) +{ + char *endp; + unsigned long result; + + result = simple_strtoul(str, &endp, 0); + if (str == endp || result < 0) { + printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", + str); + return -EINVAL; + } + + switch (*endp) { + case 'G': + result *= 1024; + case 'M': + result *= 1024; + case 'K': + result *= 1024; + if (endp[1] == 'i' && endp[2] == 'B') + endp += 2; + case '\0': + break; + default: + printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", + str); + return -EINVAL; + } + + return result; +} + +/** + * ubi_mtd_param_parse - parse the 'mtd=' UBI parameter. + * @val: the parameter value to parse + * @kp: not used + * + * This function returns zero in case of success and a negative error code in + * case of error. + */ +int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp) +{ + int i, len; + struct mtd_dev_param *p; + char buf[20]; + char *pbuf = &buf[0]; + char *tokens[2] = {NULL, NULL}; + + if (!val) + return -EINVAL; + + if (mtd_devs == UBI_MAX_DEVICES) { + printk(KERN_ERR "UBI error: too many parameters, max. is %d\n", + UBI_MAX_DEVICES); + return -EINVAL; + } + + len = strnlen(val, 20); + if (len == 20) { + printk(KERN_ERR "UBI error: parameter \"%s\" is too long, " + "max. is %d\n", val, 20); + return -EINVAL; + } + + if (len == 0) { + printk(KERN_WARNING "UBI warning: empty 'mtd=' parameter - " + "ignored\n"); + return 0; + } + + strcpy(buf, val); + + /* Get rid of the final newline */ + if (buf[len - 1] == '\n') + buf[len - 1] = '\0'; + + for (i = 0; i < 2; i++) + tokens[i] = strsep(&pbuf, ","); + + if (pbuf) { + printk(KERN_ERR "UBI error: too many arguments at \"%s\"\n", + val); + return -EINVAL; + } + + p = &mtd_dev_param[mtd_devs]; + strcpy(&p->name[0], tokens[0]); + + if (tokens[1]) + p->vid_hdr_offs = bytes_str_to_int(tokens[1]); + + if (p->vid_hdr_offs < 0) + return p->vid_hdr_offs; + + mtd_devs += 1; + return 0; +} + +module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000); +MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: " + "mtd=<name|num>[,<vid_hdr_offs>].\n" + "Multiple \"mtd\" parameters may be specified.\n" + "MTD devices may be specified by their number or name.\n" + "Optional \"vid_hdr_offs\" parameter specifies UBI VID " + "header position and data starting position to be used " + "by UBI.\n" + "Example: mtd=content,1984 mtd=4 - attach MTD device" + "with name \"content\" using VID header offset 1984, and " + "MTD device number 4 with default VID header offset."); + +MODULE_VERSION(__stringify(UBI_VERSION)); +MODULE_DESCRIPTION("UBI - Unsorted Block Images"); +MODULE_AUTHOR("Artem Bityutskiy"); +MODULE_LICENSE("GPL"); diff --git a/fs/ubi/crc32.c b/fs/ubi/crc32.c new file mode 100755 index 0000000..a7e26b0 --- /dev/null +++ b/fs/ubi/crc32.c @@ -0,0 +1,518 @@ +/* + * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> + * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! + * Code was from the public domain, copyright abandoned. Code was + * subsequently included in the kernel, thus was re-licensed under the + * GNU GPL v2. + * + * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> + * Same crc32 function was used in 5 other places in the kernel. + * I made one version, and deleted the others. + * There are various incantations of crc32(). Some use a seed of 0 or ~0. + * Some xor at the end with ~0. The generic crc32() function takes + * seed as an argument, and doesn't xor at the end. Then individual + * users can do whatever they need. + * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. + * fs/jffs2 uses seed 0, doesn't xor with ~0. + * fs/partitions/efi.c uses seed ~0, xor's with ~0. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#ifdef UBI_LINUX +#include <linux/crc32.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/compiler.h> +#endif +#include <linux/types.h> + +#include <asm/byteorder.h> + +#ifdef UBI_LINUX +#include <linux/slab.h> +#include <linux/init.h> +#include <asm/atomic.h> +#endif +#include "crc32defs.h" +#define CRC_LE_BITS 8 + +# define __force +#ifndef __constant_cpu_to_le32 +#define __constant_cpu_to_le32(x) ((__force __le32)(__u32)(x)) +#endif +#ifndef __constant_le32_to_cpu +#define __constant_le32_to_cpu(x) ((__force __u32)(__le32)(x)) +#endif + +#if CRC_LE_BITS == 8 +#define tole(x) __constant_cpu_to_le32(x) +#define tobe(x) __constant_cpu_to_be32(x) +#else +#define tole(x) (x) +#define tobe(x) (x) +#endif +#include "crc32table.h" +#ifdef UBI_LINUX +MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); +MODULE_DESCRIPTION("Ethernet CRC32 calculations"); +MODULE_LICENSE("GPL"); +#endif +/** + * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 + * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p: pointer to buffer over which CRC is run + * @len: length of buffer @p + */ +u32 crc32_le(u32 crc, unsigned char const *p, size_t len); + +#if CRC_LE_BITS == 1 +/* + * In fact, the table-based code will work in this case, but it can be + * simplified by inlining the table in ?: form. + */ + +u32 crc32_le(u32 crc, unsigned char const *p, size_t len) +{ + int i; + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; +} +#else /* Table-based approach */ + +u32 crc32_le(u32 crc, unsigned char const *p, size_t len) +{ +# if CRC_LE_BITS == 8 + const u32 *b =(u32 *)p; + const u32 *tab = crc32table_le; + +# ifdef __LITTLE_ENDIAN +# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8) +# else +# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8) +# endif + /* printf("Crc32_le crc=%x\n",crc); */ + crc = __cpu_to_le32(crc); + /* Align it */ + if((((long)b)&3 && len)){ + do { + u8 *p = (u8 *)b; + DO_CRC(*p++); + b = (void *)p; + } while ((--len) && ((long)b)&3 ); + } + if((len >= 4)){ + /* load data 32 bits wide, xor data 32 bits wide. */ + size_t save_len = len & 3; + len = len >> 2; + --b; /* use pre increment below(*++b) for speed */ + do { + crc ^= *++b; + DO_CRC(0); + DO_CRC(0); + DO_CRC(0); + DO_CRC(0); + } while (--len); + b++; /* point to next byte(s) */ + len = save_len; + } + /* And the last few bytes */ + if(len){ + do { + u8 *p = (u8 *)b; + DO_CRC(*p++); + b = (void *)p; + } while (--len); + } + + return __le32_to_cpu(crc); +#undef ENDIAN_SHIFT +#undef DO_CRC + +# elif CRC_LE_BITS == 4 + while (len--) { + crc ^= *p++; + crc = (crc >> 4) ^ crc32table_le[crc & 15]; + crc = (crc >> 4) ^ crc32table_le[crc & 15]; + } + return crc; +# elif CRC_LE_BITS == 2 + while (len--) { + crc ^= *p++; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + } + return crc; +# endif +} +#endif +#ifdef UBI_LINUX +/** + * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 + * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p: pointer to buffer over which CRC is run + * @len: length of buffer @p + */ +u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len); + +#if CRC_BE_BITS == 1 +/* + * In fact, the table-based code will work in this case, but it can be + * simplified by inlining the table in ?: form. + */ + +u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len) +{ + int i; + while (len--) { + crc ^= *p++ << 24; + for (i = 0; i < 8; i++) + crc = + (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : + 0); + } + return crc; +} + +#else /* Table-based approach */ +u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len) +{ +# if CRC_BE_BITS == 8 + const u32 *b =(u32 *)p; + const u32 *tab = crc32table_be; + +# ifdef __LITTLE_ENDIAN +# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8) +# else +# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8) +# endif + + crc = __cpu_to_be32(crc); + /* Align it */ + if(unlikely(((long)b)&3 && len)){ + do { + u8 *p = (u8 *)b; + DO_CRC(*p++); + b = (u32 *)p; + } while ((--len) && ((long)b)&3 ); + } + if(likely(len >= 4)){ + /* load data 32 bits wide, xor data 32 bits wide. */ + size_t save_len = len & 3; + len = len >> 2; + --b; /* use pre increment below(*++b) for speed */ + do { + crc ^= *++b; + DO_CRC(0); + DO_CRC(0); + DO_CRC(0); + DO_CRC(0); + } while (--len); + b++; /* point to next byte(s) */ + len = save_len; + } + /* And the last few bytes */ + if(len){ + do { + u8 *p = (u8 *)b; + DO_CRC(*p++); + b = (void *)p; + } while (--len); + } + return __be32_to_cpu(crc); +#undef ENDIAN_SHIFT +#undef DO_CRC + +# elif CRC_BE_BITS == 4 + while (len--) { + crc ^= *p++ << 24; + crc = (crc << 4) ^ crc32table_be[crc >> 28]; + crc = (crc << 4) ^ crc32table_be[crc >> 28]; + } + return crc; +# elif CRC_BE_BITS == 2 + while (len--) { + crc ^= *p++ << 24; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + } + return crc; +# endif +} +#endif + +EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(crc32_be); +#endif +/* + * A brief CRC tutorial. + * + * A CRC is a long-division remainder. You add the CRC to the message, + * and the whole thing (message+CRC) is a multiple of the given + * CRC polynomial. To check the CRC, you can either check that the + * CRC matches the recomputed value, *or* you can check that the + * remainder computed on the message+CRC is 0. This latter approach + * is used by a lot of hardware implementations, and is why so many + * protocols put the end-of-frame flag after the CRC. + * + * It's actually the same long division you learned in school, except that + * - We're working in binary, so the digits are only 0 and 1, and + * - When dividing polynomials, there are no carries. Rather than add and + * subtract, we just xor. Thus, we tend to get a bit sloppy about + * the difference between adding and subtracting. + * + * A 32-bit CRC polynomial is actually 33 bits long. But since it's + * 33 bits long, bit 32 is always going to be set, so usually the CRC + * is written in hex with the most significant bit omitted. (If you're + * familiar with the IEEE 754 floating-point format, it's the same idea.) + * + * Note that a CRC is computed over a string of *bits*, so you have + * to decide on the endianness of the bits within each byte. To get + * the best error-detecting properties, this should correspond to the + * order they're actually sent. For example, standard RS-232 serial is + * little-endian; the most significant bit (sometimes used for parity) + * is sent last. And when appending a CRC word to a message, you should + * do it in the right order, matching the endianness. + * + * Just like with ordinary division, the remainder is always smaller than + * the divisor (the CRC polynomial) you're dividing by. Each step of the + * division, you take one more digit (bit) of the dividend and append it + * to the current remainder. Then you figure out the appropriate multiple + * of the divisor to subtract to being the remainder back into range. + * In binary, it's easy - it has to be either 0 or 1, and to make the + * XOR cancel, it's just a copy of bit 32 of the remainder. + * + * When computing a CRC, we don't care about the quotient, so we can + * throw the quotient bit away, but subtract the appropriate multiple of + * the polynomial from the remainder and we're back to where we started, + * ready to process the next bit. + * + * A big-endian CRC written this way would be coded like: + * for (i = 0; i < input_bits; i++) { + * multiple = remainder & 0x80000000 ? CRCPOLY : 0; + * remainder = (remainder << 1 | next_input_bit()) ^ multiple; + * } + * Notice how, to get at bit 32 of the shifted remainder, we look + * at bit 31 of the remainder *before* shifting it. + * + * But also notice how the next_input_bit() bits we're shifting into + * the remainder don't actually affect any decision-making until + * 32 bits later. Thus, the first 32 cycles of this are pretty boring. + * Also, to add the CRC to a message, we need a 32-bit-long hole for it at + * the end, so we have to add 32 extra cycles shifting in zeros at the + * end of every message, + * + * So the standard trick is to rearrage merging in the next_input_bit() + * until the moment it's needed. Then the first 32 cycles can be precomputed, + * and merging in the final 32 zero bits to make room for the CRC can be + * skipped entirely. + * This changes the code to: + * for (i = 0; i < input_bits; i++) { + * remainder ^= next_input_bit() << 31; + * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * With this optimization, the little-endian code is simpler: + * for (i = 0; i < input_bits; i++) { + * remainder ^= next_input_bit(); + * multiple = (remainder & 1) ? CRCPOLY : 0; + * remainder = (remainder >> 1) ^ multiple; + * } + * + * Note that the other details of endianness have been hidden in CRCPOLY + * (which must be bit-reversed) and next_input_bit(). + * + * However, as long as next_input_bit is returning the bits in a sensible + * order, we can actually do the merging 8 or more bits at a time rather + * than one bit at a time: + * for (i = 0; i < input_bytes; i++) { + * remainder ^= next_input_byte() << 24; + * for (j = 0; j < 8; j++) { + * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * } + * Or in little-endian: + * for (i = 0; i < input_bytes; i++) { + * remainder ^= next_input_byte(); + * for (j = 0; j < 8; j++) { + * multiple = (remainder & 1) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * } + * If the input is a multiple of 32 bits, you can even XOR in a 32-bit + * word at a time and increase the inner loop count to 32. + * + * You can also mix and match the two loop styles, for example doing the + * bulk of a message byte-at-a-time and adding bit-at-a-time processing + * for any fractional bytes at the end. + * + * The only remaining optimization is to the byte-at-a-time table method. + * Here, rather than just shifting one bit of the remainder to decide + * in the correct multiple to subtract, we can shift a byte at a time. + * This produces a 40-bit (rather than a 33-bit) intermediate remainder, + * but again the multiple of the polynomial to subtract depends only on + * the high bits, the high 8 bits in this case. + * + * The multile we need in that case is the low 32 bits of a 40-bit + * value whose high 8 bits are given, and which is a multiple of the + * generator polynomial. This is simply the CRC-32 of the given + * one-byte message. + * + * Two more details: normally, appending zero bits to a message which + * is already a multiple of a polynomial produces a larger multiple of that + * polynomial. To enable a CRC to detect this condition, it's common to + * invert the CRC before appending it. This makes the remainder of the + * message+crc come out not as zero, but some fixed non-zero value. + * + * The same problem applies to zero bits prepended to the message, and + * a similar solution is used. Instead of starting with a remainder of + * 0, an initial remainder of all ones is used. As long as you start + * the same way on decoding, it doesn't make a difference. + */ + +#ifdef UNITTEST + +#include <stdlib.h> +#include <stdio.h> + +#ifdef UBI_LINUX /*Not used at present */ +static void +buf_dump(char const *prefix, unsigned char const *buf, size_t len) +{ + fputs(prefix, stdout); + while (len--) + printf(" %02x", *buf++); + putchar('\n'); + +} +#endif + +static void bytereverse(unsigned char *buf, size_t len) +{ + while (len--) { + unsigned char x = bitrev8(*buf); + *buf++ = x; + } +} + +static void random_garbage(unsigned char *buf, size_t len) +{ + while (len--) + *buf++ = (unsigned char) random(); +} + +#ifdef UBI_LINUX /* Not used at present */ +static void store_le(u32 x, unsigned char *buf) +{ + buf[0] = (unsigned char) x; + buf[1] = (unsigned char) (x >> 8); + buf[2] = (unsigned char) (x >> 16); + buf[3] = (unsigned char) (x >> 24); +} +#endif + +static void store_be(u32 x, unsigned char *buf) +{ + buf[0] = (unsigned char) (x >> 24); + buf[1] = (unsigned char) (x >> 16); + buf[2] = (unsigned char) (x >> 8); + buf[3] = (unsigned char) x; +} + +/* + * This checks that CRC(buf + CRC(buf)) = 0, and that + * CRC commutes with bit-reversal. This has the side effect + * of bytewise bit-reversing the input buffer, and returns + * the CRC of the reversed buffer. + */ +static u32 test_step(u32 init, unsigned char *buf, size_t len) +{ + u32 crc1, crc2; + size_t i; + + crc1 = crc32_be(init, buf, len); + store_be(crc1, buf + len); + crc2 = crc32_be(init, buf, len + 4); + if (crc2) + printf("\nCRC cancellation fail: 0x%08x should be 0\n", + crc2); + + for (i = 0; i <= len + 4; i++) { + crc2 = crc32_be(init, buf, i); + crc2 = crc32_be(crc2, buf + i, len + 4 - i); + if (crc2) + printf("\nCRC split fail: 0x%08x\n", crc2); + } + + /* Now swap it around for the other test */ + + bytereverse(buf, len + 4); + init = bitrev32(init); + crc2 = bitrev32(crc1); + if (crc1 != bitrev32(crc2)) + printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n", + crc1, crc2, bitrev32(crc2)); + crc1 = crc32_le(init, buf, len); + if (crc1 != crc2) + printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1, + crc2); + crc2 = crc32_le(init, buf, len + 4); + if (crc2) + printf("\nCRC cancellation fail: 0x%08x should be 0\n", + crc2); + + for (i = 0; i <= len + 4; i++) { + crc2 = crc32_le(init, buf, i); + crc2 = crc32_le(crc2, buf + i, len + 4 - i); + if (crc2) + printf("\nCRC split fail: 0x%08x\n", crc2); + } + + return crc1; +} + +#define SIZE 64 +#define INIT1 0 +#define INIT2 0 + +int main(void) +{ + unsigned char buf1[SIZE + 4]; + unsigned char buf2[SIZE + 4]; + unsigned char buf3[SIZE + 4]; + int i, j; + u32 crc1, crc2, crc3; + + for (i = 0; i <= SIZE; i++) { + printf("\rTesting length %d...", i); + fflush(stdout); + random_garbage(buf1, i); + random_garbage(buf2, i); + for (j = 0; j < i; j++) + buf3[j] = buf1[j] ^ buf2[j]; + + crc1 = test_step(INIT1, buf1, i); + crc2 = test_step(INIT2, buf2, i); + /* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */ + crc3 = test_step(INIT1 ^ INIT2, buf3, i); + if (crc3 != (crc1 ^ crc2)) + printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n", + crc3, crc1, crc2); + } + printf("\nAll test complete. No failures expected.\n"); + return 0; +} + +#endif /* UNITTEST */ diff --git a/fs/ubi/crc32defs.h b/fs/ubi/crc32defs.h new file mode 100755 index 0000000..f5a5401 --- /dev/null +++ b/fs/ubi/crc32defs.h @@ -0,0 +1,32 @@ +/* + * There are multiple 16-bit CRC polynomials in common use, but this is + * *the* standard CRC-32 polynomial, first popularized by Ethernet. + * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 + */ +#define CRCPOLY_LE 0xedb88320 +#define CRCPOLY_BE 0x04c11db7 + +/* How many bits at a time to use. Requires a table of 4<<CRC_xx_BITS bytes. */ +/* For less performance-sensitive, use 4 */ +#ifndef CRC_LE_BITS +# define CRC_LE_BITS 8 +#endif +#ifndef CRC_BE_BITS +# define CRC_BE_BITS 8 +#endif + +/* + * Little-endian CRC computation. Used with serial bit streams sent + * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC. + */ +#if CRC_LE_BITS > 8 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1 +# error CRC_LE_BITS must be a power of 2 between 1 and 8 +#endif + +/* + * Big-endian CRC computation. Used with serial bit streams sent + * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. + */ +#if CRC_BE_BITS > 8 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1 +# error CRC_BE_BITS must be a power of 2 between 1 and 8 +#endif diff --git a/fs/ubi/crc32table.h b/fs/ubi/crc32table.h new file mode 100755 index 0000000..0438af4 --- /dev/null +++ b/fs/ubi/crc32table.h @@ -0,0 +1,136 @@ +/* this file is generated - do not edit */ + +static const u32 crc32table_le[] = { +tole(0x00000000L), tole(0x77073096L), tole(0xee0e612cL), tole(0x990951baL), +tole(0x076dc419L), tole(0x706af48fL), tole(0xe963a535L), tole(0x9e6495a3L), +tole(0x0edb8832L), tole(0x79dcb8a4L), tole(0xe0d5e91eL), tole(0x97d2d988L), +tole(0x09b64c2bL), tole(0x7eb17cbdL), tole(0xe7b82d07L), tole(0x90bf1d91L), +tole(0x1db71064L), tole(0x6ab020f2L), tole(0xf3b97148L), tole(0x84be41deL), +tole(0x1adad47dL), tole(0x6ddde4ebL), tole(0xf4d4b551L), tole(0x83d385c7L), +tole(0x136c9856L), tole(0x646ba8c0L), tole(0xfd62f97aL), tole(0x8a65c9ecL), +tole(0x14015c4fL), tole(0x63066cd9L), tole(0xfa0f3d63L), tole(0x8d080df5L), +tole(0x3b6e20c8L), tole(0x4c69105eL), tole(0xd56041e4L), tole(0xa2677172L), +tole(0x3c03e4d1L), tole(0x4b04d447L), tole(0xd20d85fdL), tole(0xa50ab56bL), +tole(0x35b5a8faL), tole(0x42b2986cL), tole(0xdbbbc9d6L), tole(0xacbcf940L), +tole(0x32d86ce3L), tole(0x45df5c75L), tole(0xdcd60dcfL), tole(0xabd13d59L), +tole(0x26d930acL), tole(0x51de003aL), tole(0xc8d75180L), tole(0xbfd06116L), +tole(0x21b4f4b5L), tole(0x56b3c423L), tole(0xcfba9599L), tole(0xb8bda50fL), +tole(0x2802b89eL), tole(0x5f058808L), tole(0xc60cd9b2L), tole(0xb10be924L), +tole(0x2f6f7c87L), tole(0x58684c11L), tole(0xc1611dabL), tole(0xb6662d3dL), +tole(0x76dc4190L), tole(0x01db7106L), tole(0x98d220bcL), tole(0xefd5102aL), +tole(0x71b18589L), tole(0x06b6b51fL), tole(0x9fbfe4a5L), tole(0xe8b8d433L), +tole(0x7807c9a2L), tole(0x0f00f934L), tole(0x9609a88eL), tole(0xe10e9818L), +tole(0x7f6a0dbbL), tole(0x086d3d2dL), tole(0x91646c97L), tole(0xe6635c01L), +tole(0x6b6b51f4L), tole(0x1c6c6162L), tole(0x856530d8L), tole(0xf262004eL), +tole(0x6c0695edL), tole(0x1b01a57bL), tole(0x8208f4c1L), tole(0xf50fc457L), +tole(0x65b0d9c6L), tole(0x12b7e950L), tole(0x8bbeb8eaL), tole(0xfcb9887cL), +tole(0x62dd1ddfL), tole(0x15da2d49L), tole(0x8cd37cf3L), tole(0xfbd44c65L), +tole(0x4db26158L), tole(0x3ab551ceL), tole(0xa3bc0074L), tole(0xd4bb30e2L), +tole(0x4adfa541L), tole(0x3dd895d7L), tole(0xa4d1c46dL), tole(0xd3d6f4fbL), +tole(0x4369e96aL), tole(0x346ed9fcL), tole(0xad678846L), tole(0xda60b8d0L), +tole(0x44042d73L), tole(0x33031de5L), tole(0xaa0a4c5fL), tole(0xdd0d7cc9L), +tole(0x5005713cL), tole(0x270241aaL), tole(0xbe0b1010L), tole(0xc90c2086L), +tole(0x5768b525L), tole(0x206f85b3L), tole(0xb966d409L), tole(0xce61e49fL), +tole(0x5edef90eL), tole(0x29d9c998L), tole(0xb0d09822L), tole(0xc7d7a8b4L), +tole(0x59b33d17L), tole(0x2eb40d81L), tole(0xb7bd5c3bL), tole(0xc0ba6cadL), +tole(0xedb88320L), tole(0x9abfb3b6L), tole(0x03b6e20cL), tole(0x74b1d29aL), +tole(0xead54739L), tole(0x9dd277afL), tole(0x04db2615L), tole(0x73dc1683L), +tole(0xe3630b12L), tole(0x94643b84L), tole(0x0d6d6a3eL), tole(0x7a6a5aa8L), +tole(0xe40ecf0bL), tole(0x9309ff9dL), tole(0x0a00ae27L), tole(0x7d079eb1L), +tole(0xf00f9344L), tole(0x8708a3d2L), tole(0x1e01f268L), tole(0x6906c2feL), +tole(0xf762575dL), tole(0x806567cbL), tole(0x196c3671L), tole(0x6e6b06e7L), +tole(0xfed41b76L), tole(0x89d32be0L), tole(0x10da7a5aL), tole(0x67dd4accL), +tole(0xf9b9df6fL), tole(0x8ebeeff9L), tole(0x17b7be43L), tole(0x60b08ed5L), +tole(0xd6d6a3e8L), tole(0xa1d1937eL), tole(0x38d8c2c4L), tole(0x4fdff252L), +tole(0xd1bb67f1L), tole(0xa6bc5767L), tole(0x3fb506ddL), tole(0x48b2364bL), +tole(0xd80d2bdaL), tole(0xaf0a1b4cL), tole(0x36034af6L), tole(0x41047a60L), +tole(0xdf60efc3L), tole(0xa867df55L), tole(0x316e8eefL), tole(0x4669be79L), +tole(0xcb61b38cL), tole(0xbc66831aL), tole(0x256fd2a0L), tole(0x5268e236L), +tole(0xcc0c7795L), tole(0xbb0b4703L), tole(0x220216b9L), tole(0x5505262fL), +tole(0xc5ba3bbeL), tole(0xb2bd0b28L), tole(0x2bb45a92L), tole(0x5cb36a04L), +tole(0xc2d7ffa7L), tole(0xb5d0cf31L), tole(0x2cd99e8bL), tole(0x5bdeae1dL), +tole(0x9b64c2b0L), tole(0xec63f226L), tole(0x756aa39cL), tole(0x026d930aL), +tole(0x9c0906a9L), tole(0xeb0e363fL), tole(0x72076785L), tole(0x05005713L), +tole(0x95bf4a82L), tole(0xe2b87a14L), tole(0x7bb12baeL), tole(0x0cb61b38L), +tole(0x92d28e9bL), tole(0xe5d5be0dL), tole(0x7cdcefb7L), tole(0x0bdbdf21L), +tole(0x86d3d2d4L), tole(0xf1d4e242L), tole(0x68ddb3f8L), tole(0x1fda836eL), +tole(0x81be16cdL), tole(0xf6b9265bL), tole(0x6fb077e1L), tole(0x18b74777L), +tole(0x88085ae6L), tole(0xff0f6a70L), tole(0x66063bcaL), tole(0x11010b5cL), +tole(0x8f659effL), tole(0xf862ae69L), tole(0x616bffd3L), tole(0x166ccf45L), +tole(0xa00ae278L), tole(0xd70dd2eeL), tole(0x4e048354L), tole(0x3903b3c2L), +tole(0xa7672661L), tole(0xd06016f7L), tole(0x4969474dL), tole(0x3e6e77dbL), +tole(0xaed16a4aL), tole(0xd9d65adcL), tole(0x40df0b66L), tole(0x37d83bf0L), +tole(0xa9bcae53L), tole(0xdebb9ec5L), tole(0x47b2cf7fL), tole(0x30b5ffe9L), +tole(0xbdbdf21cL), tole(0xcabac28aL), tole(0x53b39330L), tole(0x24b4a3a6L), +tole(0xbad03605L), tole(0xcdd70693L), tole(0x54de5729L), tole(0x23d967bfL), +tole(0xb3667a2eL), tole(0xc4614ab8L), tole(0x5d681b02L), tole(0x2a6f2b94L), +tole(0xb40bbe37L), tole(0xc30c8ea1L), tole(0x5a05df1bL), tole(0x2d02ef8dL) +}; +#ifdef UBI_LINUX +static const u32 crc32table_be[] = { +tobe(0x00000000L), tobe(0x04c11db7L), tobe(0x09823b6eL), tobe(0x0d4326d9L), +tobe(0x130476dcL), tobe(0x17c56b6bL), tobe(0x1a864db2L), tobe(0x1e475005L), +tobe(0x2608edb8L), tobe(0x22c9f00fL), tobe(0x2f8ad6d6L), tobe(0x2b4bcb61L), +tobe(0x350c9b64L), tobe(0x31cd86d3L), tobe(0x3c8ea00aL), tobe(0x384fbdbdL), +tobe(0x4c11db70L), tobe(0x48d0c6c7L), tobe(0x4593e01eL), tobe(0x4152fda9L), +tobe(0x5f15adacL), tobe(0x5bd4b01bL), tobe(0x569796c2L), tobe(0x52568b75L), +tobe(0x6a1936c8L), tobe(0x6ed82b7fL), tobe(0x639b0da6L), tobe(0x675a1011L), +tobe(0x791d4014L), tobe(0x7ddc5da3L), tobe(0x709f7b7aL), tobe(0x745e66cdL), +tobe(0x9823b6e0L), tobe(0x9ce2ab57L), tobe(0x91a18d8eL), tobe(0x95609039L), +tobe(0x8b27c03cL), tobe(0x8fe6dd8bL), tobe(0x82a5fb52L), tobe(0x8664e6e5L), +tobe(0xbe2b5b58L), tobe(0xbaea46efL), tobe(0xb7a96036L), tobe(0xb3687d81L), +tobe(0xad2f2d84L), tobe(0xa9ee3033L), tobe(0xa4ad16eaL), tobe(0xa06c0b5dL), +tobe(0xd4326d90L), tobe(0xd0f37027L), tobe(0xddb056feL), tobe(0xd9714b49L), +tobe(0xc7361b4cL), tobe(0xc3f706fbL), tobe(0xceb42022L), tobe(0xca753d95L), +tobe(0xf23a8028L), tobe(0xf6fb9d9fL), tobe(0xfbb8bb46L), tobe(0xff79a6f1L), +tobe(0xe13ef6f4L), tobe(0xe5ffeb43L), tobe(0xe8bccd9aL), tobe(0xec7dd02dL), +tobe(0x34867077L), tobe(0x30476dc0L), tobe(0x3d044b19L), tobe(0x39c556aeL), +tobe(0x278206abL), tobe(0x23431b1cL), tobe(0x2e003dc5L), tobe(0x2ac12072L), +tobe(0x128e9dcfL), tobe(0x164f8078L), tobe(0x1b0ca6a1L), tobe(0x1fcdbb16L), +tobe(0x018aeb13L), tobe(0x054bf6a4L), tobe(0x0808d07dL), tobe(0x0cc9cdcaL), +tobe(0x7897ab07L), tobe(0x7c56b6b0L), tobe(0x71159069L), tobe(0x75d48ddeL), +tobe(0x6b93dddbL), tobe(0x6f52c06cL), tobe(0x6211e6b5L), tobe(0x66d0fb02L), +tobe(0x5e9f46bfL), tobe(0x5a5e5b08L), tobe(0x571d7dd1L), tobe(0x53dc6066L), +tobe(0x4d9b3063L), tobe(0x495a2dd4L), tobe(0x44190b0dL), tobe(0x40d816baL), +tobe(0xaca5c697L), tobe(0xa864db20L), tobe(0xa527fdf9L), tobe(0xa1e6e04eL), +tobe(0xbfa1b04bL), tobe(0xbb60adfcL), tobe(0xb6238b25L), tobe(0xb2e29692L), +tobe(0x8aad2b2fL), tobe(0x8e6c3698L), tobe(0x832f1041L), tobe(0x87ee0df6L), +tobe(0x99a95df3L), tobe(0x9d684044L), tobe(0x902b669dL), tobe(0x94ea7b2aL), +tobe(0xe0b41de7L), tobe(0xe4750050L), tobe(0xe9362689L), tobe(0xedf73b3eL), +tobe(0xf3b06b3bL), tobe(0xf771768cL), tobe(0xfa325055L), tobe(0xfef34de2L), +tobe(0xc6bcf05fL), tobe(0xc27dede8L), tobe(0xcf3ecb31L), tobe(0xcbffd686L), +tobe(0xd5b88683L), tobe(0xd1799b34L), tobe(0xdc3abdedL), tobe(0xd8fba05aL), +tobe(0x690ce0eeL), tobe(0x6dcdfd59L), tobe(0x608edb80L), tobe(0x644fc637L), +tobe(0x7a089632L), tobe(0x7ec98b85L), tobe(0x738aad5cL), tobe(0x774bb0ebL), +tobe(0x4f040d56L), tobe(0x4bc510e1L), tobe(0x46863638L), tobe(0x42472b8fL), +tobe(0x5c007b8aL), tobe(0x58c1663dL), tobe(0x558240e4L), tobe(0x51435d53L), +tobe(0x251d3b9eL), tobe(0x21dc2629L), tobe(0x2c9f00f0L), tobe(0x285e1d47L), +tobe(0x36194d42L), tobe(0x32d850f5L), tobe(0x3f9b762cL), tobe(0x3b5a6b9bL), +tobe(0x0315d626L), tobe(0x07d4cb91L), tobe(0x0a97ed48L), tobe(0x0e56f0ffL), +tobe(0x1011a0faL), tobe(0x14d0bd4dL), tobe(0x19939b94L), tobe(0x1d528623L), +tobe(0xf12f560eL), tobe(0xf5ee4bb9L), tobe(0xf8ad6d60L), tobe(0xfc6c70d7L), +tobe(0xe22b20d2L), tobe(0xe6ea3d65L), tobe(0xeba91bbcL), tobe(0xef68060bL), +tobe(0xd727bbb6L), tobe(0xd3e6a601L), tobe(0xdea580d8L), tobe(0xda649d6fL), +tobe(0xc423cd6aL), tobe(0xc0e2d0ddL), tobe(0xcda1f604L), tobe(0xc960ebb3L), +tobe(0xbd3e8d7eL), tobe(0xb9ff90c9L), tobe(0xb4bcb610L), tobe(0xb07daba7L), +tobe(0xae3afba2L), tobe(0xaafbe615L), tobe(0xa7b8c0ccL), tobe(0xa379dd7bL), +tobe(0x9b3660c6L), tobe(0x9ff77d71L), tobe(0x92b45ba8L), tobe(0x9675461fL), +tobe(0x8832161aL), tobe(0x8cf30badL), tobe(0x81b02d74L), tobe(0x857130c3L), +tobe(0x5d8a9099L), tobe(0x594b8d2eL), tobe(0x5408abf7L), tobe(0x50c9b640L), +tobe(0x4e8ee645L), tobe(0x4a4ffbf2L), tobe(0x470cdd2bL), tobe(0x43cdc09cL), +tobe(0x7b827d21L), tobe(0x7f436096L), tobe(0x7200464fL), tobe(0x76c15bf8L), +tobe(0x68860bfdL), tobe(0x6c47164aL), tobe(0x61043093L), tobe(0x65c52d24L), +tobe(0x119b4be9L), tobe(0x155a565eL), tobe(0x18197087L), tobe(0x1cd86d30L), +tobe(0x029f3d35L), tobe(0x065e2082L), tobe(0x0b1d065bL), tobe(0x0fdc1becL), +tobe(0x3793a651L), tobe(0x3352bbe6L), tobe(0x3e119d3fL), tobe(0x3ad08088L), +tobe(0x2497d08dL), tobe(0x2056cd3aL), tobe(0x2d15ebe3L), tobe(0x29d4f654L), +tobe(0xc5a92679L), tobe(0xc1683bceL), tobe(0xcc2b1d17L), tobe(0xc8ea00a0L), +tobe(0xd6ad50a5L), tobe(0xd26c4d12L), tobe(0xdf2f6bcbL), tobe(0xdbee767cL), +tobe(0xe3a1cbc1L), tobe(0xe760d676L), tobe(0xea23f0afL), tobe(0xeee2ed18L), +tobe(0xf0a5bd1dL), tobe(0xf464a0aaL), tobe(0xf9278673L), tobe(0xfde69bc4L), +tobe(0x89b8fd09L), tobe(0x8d79e0beL), tobe(0x803ac667L), tobe(0x84fbdbd0L), +tobe(0x9abc8bd5L), tobe(0x9e7d9662L), tobe(0x933eb0bbL), tobe(0x97ffad0cL), +tobe(0xafb010b1L), tobe(0xab710d06L), tobe(0xa6322bdfL), tobe(0xa2f33668L), +tobe(0xbcb4666dL), tobe(0xb8757bdaL), tobe(0xb5365d03L), tobe(0xb1f740b4L) +}; +#endif diff --git a/fs/ubi/debug.c b/fs/ubi/debug.c new file mode 100755 index 0000000..492ab5c --- /dev/null +++ b/fs/ubi/debug.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * Here we keep all the UBI debugging stuff which should normally be disabled + * and compiled-out, but it is extremely helpful when hunting bugs or doing big + * changes. + */ +#include <ubi_uboot.h> + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG + +#include "ubi.h" + +/** + * ubi_dbg_dump_ec_hdr - dump an erase counter header. + * @ec_hdr: the erase counter header to dump + */ +void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) +{ + dbg_msg("erase counter header dump:"); + dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic)); + dbg_msg("version %d", (int)ec_hdr->version); + dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec)); + dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset)); + dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset)); + dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc)); + dbg_msg("erase counter header hexdump:"); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ec_hdr, UBI_EC_HDR_SIZE, 1); +} + +/** + * ubi_dbg_dump_vid_hdr - dump a volume identifier header. + * @vid_hdr: the volume identifier header to dump + */ +void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) +{ + dbg_msg("volume identifier header dump:"); + dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic)); + dbg_msg("version %d", (int)vid_hdr->version); + dbg_msg("vol_type %d", (int)vid_hdr->vol_type); + dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag); + dbg_msg("compat %d", (int)vid_hdr->compat); + dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id)); + dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum)); + dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver)); + dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size)); + dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs)); + dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad)); + dbg_msg("sqnum %llu", + (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); + dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc)); + dbg_msg("volume identifier header hexdump:"); +} + +/** + * ubi_dbg_dump_vol_info- dump volume information. + * @vol: UBI volume description object + */ +void ubi_dbg_dump_vol_info(const struct ubi_volume *vol) +{ + dbg_msg("volume information dump:"); + dbg_msg("vol_id %d", vol->vol_id); + dbg_msg("reserved_pebs %d", vol->reserved_pebs); + dbg_msg("alignment %d", vol->alignment); + dbg_msg("data_pad %d", vol->data_pad); + dbg_msg("vol_type %d", vol->vol_type); + dbg_msg("name_len %d", vol->name_len); + dbg_msg("usable_leb_size %d", vol->usable_leb_size); + dbg_msg("used_ebs %d", vol->used_ebs); + dbg_msg("used_bytes %lld", vol->used_bytes); + dbg_msg("last_eb_bytes %d", vol->last_eb_bytes); + dbg_msg("corrupted %d", vol->corrupted); + dbg_msg("upd_marker %d", vol->upd_marker); + + if (vol->name_len <= UBI_VOL_NAME_MAX && + strnlen(vol->name, vol->name_len + 1) == vol->name_len) { + dbg_msg("name %s", vol->name); + } else { + dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c", + vol->name[0], vol->name[1], vol->name[2], + vol->name[3], vol->name[4]); + } +} + +/** + * ubi_dbg_dump_vtbl_record - dump a &struct ubi_vtbl_record object. + * @r: the object to dump + * @idx: volume table index + */ +void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) +{ + int name_len = be16_to_cpu(r->name_len); + + dbg_msg("volume table record %d dump:", idx); + dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs)); + dbg_msg("alignment %d", be32_to_cpu(r->alignment)); + dbg_msg("data_pad %d", be32_to_cpu(r->data_pad)); + dbg_msg("vol_type %d", (int)r->vol_type); + dbg_msg("upd_marker %d", (int)r->upd_marker); + dbg_msg("name_len %d", name_len); + + if (r->name[0] == '\0') { + dbg_msg("name NULL"); + return; + } + + if (name_len <= UBI_VOL_NAME_MAX && + strnlen(&r->name[0], name_len + 1) == name_len) { + dbg_msg("name %s", &r->name[0]); + } else { + dbg_msg("1st 5 characters of the name: %c%c%c%c%c", + r->name[0], r->name[1], r->name[2], r->name[3], + r->name[4]); + } + dbg_msg("crc %#08x", be32_to_cpu(r->crc)); +} + +/** + * ubi_dbg_dump_sv - dump a &struct ubi_scan_volume object. + * @sv: the object to dump + */ +void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) +{ + dbg_msg("volume scanning information dump:"); + dbg_msg("vol_id %d", sv->vol_id); + dbg_msg("highest_lnum %d", sv->highest_lnum); + dbg_msg("leb_count %d", sv->leb_count); + dbg_msg("compat %d", sv->compat); + dbg_msg("vol_type %d", sv->vol_type); + dbg_msg("used_ebs %d", sv->used_ebs); + dbg_msg("last_data_size %d", sv->last_data_size); + dbg_msg("data_pad %d", sv->data_pad); +} + +/** + * ubi_dbg_dump_seb - dump a &struct ubi_scan_leb object. + * @seb: the object to dump + * @type: object type: 0 - not corrupted, 1 - corrupted + */ +void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type) +{ + dbg_msg("eraseblock scanning information dump:"); + dbg_msg("ec %d", seb->ec); + dbg_msg("pnum %d", seb->pnum); + if (type == 0) { + dbg_msg("lnum %d", seb->lnum); + dbg_msg("scrub %d", seb->scrub); + dbg_msg("sqnum %llu", seb->sqnum); + dbg_msg("leb_ver %u", seb->leb_ver); + } +} + +/** + * ubi_dbg_dump_mkvol_req - dump a &struct ubi_mkvol_req object. + * @req: the object to dump + */ +void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req) +{ + char nm[17]; + + dbg_msg("volume creation request dump:"); + dbg_msg("vol_id %d", req->vol_id); + dbg_msg("alignment %d", req->alignment); + dbg_msg("bytes %lld", (long long)req->bytes); + dbg_msg("vol_type %d", req->vol_type); + dbg_msg("name_len %d", req->name_len); + + memcpy(nm, req->name, 16); + nm[16] = 0; + dbg_msg("the 1st 16 characters of the name: %s", nm); +} + +#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ diff --git a/fs/ubi/debug.h b/fs/ubi/debug.h new file mode 100755 index 0000000..b44380b --- /dev/null +++ b/fs/ubi/debug.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +#ifndef __UBI_DEBUG_H__ +#define __UBI_DEBUG_H__ + +#ifdef CONFIG_MTD_UBI_DEBUG +#ifdef UBI_LINUX +#include <linux/random.h> +#endif + +#define ubi_assert(expr) BUG_ON(!(expr)) +#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) +#else +#define ubi_assert(expr) ({}) +#define dbg_err(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT +#define DBG_DISABLE_BGT 1 +#else +#define DBG_DISABLE_BGT 0 +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG +/* Generic debugging message */ +#define dbg_msg(fmt, ...) \ + printk(KERN_DEBUG "UBI DBG: %s: " fmt "\n", \ + __FUNCTION__, ##__VA_ARGS__) + +#define ubi_dbg_dump_stack() dump_stack() + +struct ubi_ec_hdr; +struct ubi_vid_hdr; +struct ubi_volume; +struct ubi_vtbl_record; +struct ubi_scan_volume; +struct ubi_scan_leb; +struct ubi_mkvol_req; + +void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr); +void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr); +void ubi_dbg_dump_vol_info(const struct ubi_volume *vol); +void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx); +void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); +void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); +void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); + +#else + +#define dbg_msg(fmt, ...) ({}) +#define ubi_dbg_dump_stack() ({}) +#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) +#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) +#define ubi_dbg_dump_vol_info(vol) ({}) +#define ubi_dbg_dump_vtbl_record(r, idx) ({}) +#define ubi_dbg_dump_sv(sv) ({}) +#define ubi_dbg_dump_seb(seb, type) ({}) +#define ubi_dbg_dump_mkvol_req(req) ({}) + +#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA +/* Messages from the eraseblock association unit */ +#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#else +#define dbg_eba(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL +/* Messages from the wear-leveling unit */ +#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#else +#define dbg_wl(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO +/* Messages from the input/output unit */ +#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#else +#define dbg_io(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD +/* Initialization and build messages */ +#define dbg_bld(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#else +#define dbg_bld(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS +/** + * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. + * + * Returns non-zero if a bit-flip should be emulated, otherwise returns zero. + */ +static inline int ubi_dbg_is_bitflip(void) +{ + return !(random32() % 200); +} +#else +#define ubi_dbg_is_bitflip() 0 +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES +/** + * ubi_dbg_is_write_failure - if it is time to emulate a write failure. + * + * Returns non-zero if a write failure should be emulated, otherwise returns + * zero. + */ +static inline int ubi_dbg_is_write_failure(void) +{ + return !(random32() % 500); +} +#else +#define ubi_dbg_is_write_failure() 0 +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES +/** + * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. + * + * Returns non-zero if an erase failure should be emulated, otherwise returns + * zero. + */ +static inline int ubi_dbg_is_erase_failure(void) +{ + return !(random32() % 400); +} +#else +#define ubi_dbg_is_erase_failure() 0 +#endif + +#endif /* !__UBI_DEBUG_H__ */ diff --git a/fs/ubi/eba.c b/fs/ubi/eba.c new file mode 100755 index 0000000..7368f1f --- /dev/null +++ b/fs/ubi/eba.c @@ -0,0 +1,1256 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * The UBI Eraseblock Association (EBA) unit. + * + * This unit is responsible for I/O to/from logical eraseblock. + * + * Although in this implementation the EBA table is fully kept and managed in + * RAM, which assumes poor scalability, it might be (partially) maintained on + * flash in future implementations. + * + * The EBA unit implements per-logical eraseblock locking. Before accessing a + * logical eraseblock it is locked for reading or writing. The per-logical + * eraseblock locking is implemented by means of the lock tree. The lock tree + * is an RB-tree which refers all the currently locked logical eraseblocks. The + * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by + * (@vol_id, @lnum) pairs. + * + * EBA also maintains the global sequence counter which is incremented each + * time a logical eraseblock is mapped to a physical eraseblock and it is + * stored in the volume identifier header. This means that each VID header has + * a unique sequence number. The sequence number is only increased an we assume + * 64 bits is enough to never overflow. + */ + +#ifdef UBI_LINUX +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/err.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +/* Number of physical eraseblocks reserved for atomic LEB change operation */ +#define EBA_RESERVED_PEBS 1 + +/** + * next_sqnum - get next sequence number. + * @ubi: UBI device description object + * + * This function returns next sequence number to use, which is just the current + * global sequence counter value. It also increases the global sequence + * counter. + */ +static unsigned long long next_sqnum(struct ubi_device *ubi) +{ + unsigned long long sqnum; + + spin_lock(&ubi->ltree_lock); + sqnum = ubi->global_sqnum++; + spin_unlock(&ubi->ltree_lock); + + return sqnum; +} + +/** + * ubi_get_compat - get compatibility flags of a volume. + * @ubi: UBI device description object + * @vol_id: volume ID + * + * This function returns compatibility flags for an internal volume. User + * volumes have no compatibility flags, so %0 is returned. + */ +static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) +{ + if (vol_id == UBI_LAYOUT_VOLUME_ID) + return UBI_LAYOUT_VOLUME_COMPAT; + return 0; +} + +/** + * ltree_lookup - look up the lock tree. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function returns a pointer to the corresponding &struct ubi_ltree_entry + * object if the logical eraseblock is locked and %NULL if it is not. + * @ubi->ltree_lock has to be locked. + */ +static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, + int lnum) +{ + struct rb_node *p; + + p = ubi->ltree.rb_node; + while (p) { + struct ubi_ltree_entry *le; + + le = rb_entry(p, struct ubi_ltree_entry, rb); + + if (vol_id < le->vol_id) + p = p->rb_left; + else if (vol_id > le->vol_id) + p = p->rb_right; + else { + if (lnum < le->lnum) + p = p->rb_left; + else if (lnum > le->lnum) + p = p->rb_right; + else + return le; + } + } + + return NULL; +} + +/** + * ltree_add_entry - add new entry to the lock tree. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the + * lock tree. If such entry is already there, its usage counter is increased. + * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation + * failed. + */ +static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, + int vol_id, int lnum) +{ + struct ubi_ltree_entry *le, *le1, *le_free; + + le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS); + if (!le) + return ERR_PTR(-ENOMEM); + + le->users = 0; + init_rwsem(&le->mutex); + le->vol_id = vol_id; + le->lnum = lnum; + + spin_lock(&ubi->ltree_lock); + le1 = ltree_lookup(ubi, vol_id, lnum); + + if (le1) { + /* + * This logical eraseblock is already locked. The newly + * allocated lock entry is not needed. + */ + le_free = le; + le = le1; + } else { + struct rb_node **p, *parent = NULL; + + /* + * No lock entry, add the newly allocated one to the + * @ubi->ltree RB-tree. + */ + le_free = NULL; + + p = &ubi->ltree.rb_node; + while (*p) { + parent = *p; + le1 = rb_entry(parent, struct ubi_ltree_entry, rb); + + if (vol_id < le1->vol_id) + p = &(*p)->rb_left; + else if (vol_id > le1->vol_id) + p = &(*p)->rb_right; + else { + ubi_assert(lnum != le1->lnum); + if (lnum < le1->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + } + + rb_link_node(&le->rb, parent, p); + rb_insert_color(&le->rb, &ubi->ltree); + } + le->users += 1; + spin_unlock(&ubi->ltree_lock); + + if (le_free) + kfree(le_free); + + return le; +} + +/** + * leb_read_lock - lock logical eraseblock for reading. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for reading. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + down_read(&le->mutex); + return 0; +} + +/** + * leb_read_unlock - unlock logical eraseblock. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + */ +static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) +{ + int _free = 0; + struct ubi_ltree_entry *le; + + spin_lock(&ubi->ltree_lock); + le = ltree_lookup(ubi, vol_id, lnum); + le->users -= 1; + ubi_assert(le->users >= 0); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + _free = 1; + } + spin_unlock(&ubi->ltree_lock); + + up_read(&le->mutex); + if (_free) + kfree(le); +} + +/** + * leb_write_lock - lock logical eraseblock for writing. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for writing. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + down_write(&le->mutex); + return 0; +} + +/** + * leb_write_lock - lock logical eraseblock for writing. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for writing if there is no + * contention and does nothing if there is contention. Returns %0 in case of + * success, %1 in case of contention, and and a negative error code in case of + * failure. + */ +static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) +{ + int _free; + struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + if (down_write_trylock(&le->mutex)) + return 0; + + /* Contention, cancel */ + spin_lock(&ubi->ltree_lock); + le->users -= 1; + ubi_assert(le->users >= 0); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + _free = 1; + } else + _free = 0; + spin_unlock(&ubi->ltree_lock); + if (_free) + kfree(le); + + return 1; +} + +/** + * leb_write_unlock - unlock logical eraseblock. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + */ +static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) +{ + int _free; + struct ubi_ltree_entry *le; + + spin_lock(&ubi->ltree_lock); + le = ltree_lookup(ubi, vol_id, lnum); + le->users -= 1; + ubi_assert(le->users >= 0); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + _free = 1; + } else + _free = 0; + spin_unlock(&ubi->ltree_lock); + + up_write(&le->mutex); + if (_free) + kfree(le); +} + +/** + * ubi_eba_unmap_leb - un-map logical eraseblock. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and schedules corresponding + * physical eraseblock for erasure. Returns zero in case of success and a + * negative error code in case of failure. + */ +int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum) +{ + int err, pnum, vol_id = vol->vol_id; + + if (ubi->ro_mode) + return -EROFS; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum < 0) + /* This logical eraseblock is already unmapped */ + goto out_unlock; + + dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum); + + vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED; + err = ubi_wl_put_peb(ubi, pnum, 0); + +out_unlock: + leb_write_unlock(ubi, vol_id, lnum); + return err; +} + +/** + * ubi_eba_read_leb - read data. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: buffer to store the read data + * @offset: offset from where to read + * @len: how many bytes to read + * @check: data CRC check flag + * + * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF + * bytes. The @check flag only makes sense for static volumes and forces + * eraseblock data CRC checking. + * + * In case of success this function returns zero. In case of a static volume, + * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be + * returned for any volume type if an ECC error was detected by the MTD device + * driver. Other negative error cored may be returned in case of other errors. + */ +int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + void *buf, int offset, int len, int check) +{ + int err, pnum, scrub = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + uint32_t uninitialized_var(crc); + + err = leb_read_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum < 0) { + /* + * The logical eraseblock is not mapped, fill the whole buffer + * with 0xFF bytes. The exception is static volumes for which + * it is an error to read unmapped logical eraseblocks. + */ + dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)", + len, offset, vol_id, lnum); + leb_read_unlock(ubi, vol_id, lnum); + ubi_assert(vol->vol_type != UBI_STATIC_VOLUME); + memset(buf, 0xFF, len); + return 0; + } + + dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + check = 0; + +retry: + if (check) { + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) { + err = -ENOMEM; + goto out_unlock; + } + + err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1); + if (err && err != UBI_IO_BITFLIPS) { + if (err > 0) { + /* + * The header is either absent or corrupted. + * The former case means there is a bug - + * switch to read-only mode just in case. + * The latter case means a real corruption - we + * may try to recover data. FIXME: but this is + * not implemented. + */ + if (err == UBI_IO_BAD_VID_HDR) { + ubi_warn("bad VID header at PEB %d, LEB" + "%d:%d", pnum, vol_id, lnum); + err = -EBADMSG; + } else + ubi_ro_mode(ubi); + } + goto out_free; + } else if (err == UBI_IO_BITFLIPS) + scrub = 1; + + ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs)); + ubi_assert(len == be32_to_cpu(vid_hdr->data_size)); + + crc = be32_to_cpu(vid_hdr->data_crc); + ubi_free_vid_hdr(ubi, vid_hdr); + } + + err = ubi_io_read_data(ubi, buf, pnum, offset, len); + if (err) { + if (err == UBI_IO_BITFLIPS) { + scrub = 1; + err = 0; + } else if (err == -EBADMSG) { + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + goto out_unlock; + scrub = 1; + if (!check) { + ubi_msg("force data checking"); + check = 1; + goto retry; + } + } else + goto out_unlock; + } + + if (check) { + uint32_t crc1 = crc32(UBI_CRC32_INIT, buf, len); + if (crc1 != crc) { + ubi_warn("CRC error: calculated %#08x, must be %#08x", + crc1, crc); + err = -EBADMSG; + goto out_unlock; + } + } + + if (scrub) + err = ubi_wl_scrub_peb(ubi, pnum); + + leb_read_unlock(ubi, vol_id, lnum); + return err; + +out_free: + ubi_free_vid_hdr(ubi, vid_hdr); +out_unlock: + leb_read_unlock(ubi, vol_id, lnum); + return err; +} + +/** + * recover_peb - recover from write failure. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to recover + * @vol_id: volume ID + * @lnum: logical eraseblock number + * @buf: data which was not written because of the write failure + * @offset: offset of the failed write + * @len: how many bytes should have been written + * + * This function is called in case of a write failure and moves all good data + * from the potentially bad physical eraseblock to a good physical eraseblock. + * This function also writes the data which was not written due to the failure. + * Returns new physical eraseblock number in case of success, and a negative + * error code in case of failure. + */ +static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, + const void *buf, int offset, int len) +{ + int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0; + struct ubi_volume *vol = ubi->volumes[idx]; + struct ubi_vid_hdr *vid_hdr; + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) { + return -ENOMEM; + } + + mutex_lock(&ubi->buf_mutex); + +retry: + new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN); + if (new_pnum < 0) { + mutex_unlock(&ubi->buf_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + return new_pnum; + } + + ubi_msg("recover PEB %d, move data to PEB %d", pnum, new_pnum); + + err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1); + if (err && err != UBI_IO_BITFLIPS) { + if (err > 0) + err = -EIO; + goto out_put; + } + + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr); + if (err) + goto write_error; + + data_size = offset + len; + memset(ubi->peb_buf1 + offset, 0xFF, len); + + /* Read everything before the area where the write failure happened */ + if (offset > 0) { + err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); + if (err && err != UBI_IO_BITFLIPS) + goto out_put; + } + + memcpy(ubi->peb_buf1 + offset, buf, len); + + err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); + if (err) + goto write_error; + + mutex_unlock(&ubi->buf_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + + vol->eba_tbl[lnum] = new_pnum; + ubi_wl_put_peb(ubi, pnum, 1); + + ubi_msg("data was successfully recovered"); + return 0; + +out_put: + mutex_unlock(&ubi->buf_mutex); + ubi_wl_put_peb(ubi, new_pnum, 1); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +write_error: + /* + * Bad luck? This physical eraseblock is bad too? Crud. Let's try to + * get another one. + */ + ubi_warn("failed to write to PEB %d", new_pnum); + ubi_wl_put_peb(ubi, new_pnum, 1); + if (++tries > UBI_IO_RETRIES) { + mutex_unlock(&ubi->buf_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + ubi_msg("try again"); + goto retry; +} + +/** + * ubi_eba_write_leb - write data to dynamic volume. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: the data to write + * @offset: offset within the logical eraseblock where to write + * @len: how many bytes to write + * @dtype: data type + * + * This function writes data to logical eraseblock @lnum of a dynamic volume + * @vol. Returns zero in case of success and a negative error code in case + * of failure. In case of error, it is possible that something was still + * written to the flash media, but may be some garbage. + */ +int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + const void *buf, int offset, int len, int dtype) +{ + int err, pnum, tries = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + + if (ubi->ro_mode) + return -EROFS; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum >= 0) { + dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + err = ubi_io_write_data(ubi, buf, pnum, offset, len); + if (err) { + ubi_warn("failed to write data to PEB %d", pnum); + if (err == -EIO && ubi->bad_allowed) + err = recover_peb(ubi, pnum, vol_id, lnum, buf, + offset, len); + if (err) + ubi_ro_mode(ubi); + } + leb_write_unlock(ubi, vol_id, lnum); + return err; + } + + /* + * The logical eraseblock is not mapped. We have to get a free physical + * eraseblock and write the volume identifier header there first. + */ + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) { + leb_write_unlock(ubi, vol_id, lnum); + return -ENOMEM; + } + + vid_hdr->vol_type = UBI_VID_DYNAMIC; + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); + +retry: + pnum = ubi_wl_get_peb(ubi, dtype); + if (pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + leb_write_unlock(ubi, vol_id, lnum); + return pnum; + } + + dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + goto write_error; + } + + if (len) { + err = ubi_io_write_data(ubi, buf, pnum, offset, len); + if (err) { + ubi_warn("failed to write %d bytes at offset %d of " + "LEB %d:%d, PEB %d", len, offset, vol_id, + lnum, pnum); + goto write_error; + } + } + + vol->eba_tbl[lnum] = pnum; + + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + /* + * Fortunately, this is the first write operation to this physical + * eraseblock, so just put it and request a new one. We assume that if + * this physical eraseblock went bad, the erase code will handle that. + */ + err = ubi_wl_put_peb(ubi, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + ubi_msg("try another PEB"); + goto retry; +} + +/** + * ubi_eba_write_leb_st - write data to static volume. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: how many bytes to write + * @dtype: data type + * @used_ebs: how many logical eraseblocks will this volume contain + * + * This function writes data to logical eraseblock @lnum of static volume + * @vol. The @used_ebs argument should contain total number of logical + * eraseblock in this static volume. + * + * When writing to the last logical eraseblock, the @len argument doesn't have + * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent + * to the real data size, although the @buf buffer has to contain the + * alignment. In all other cases, @len has to be aligned. + * + * It is prohibited to write more then once to logical eraseblocks of static + * volumes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum, const void *buf, int len, int dtype, + int used_ebs) +{ + int err, pnum, tries = 0, data_size = len, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + uint32_t crc; + + if (ubi->ro_mode) + return -EROFS; + + if (lnum == used_ebs - 1) + /* If this is the last LEB @len may be unaligned */ + len = ALIGN(data_size, ubi->min_io_size); + else + ubi_assert(!(len & (ubi->min_io_size - 1))); + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) { + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); + + crc = crc32(UBI_CRC32_INIT, buf, data_size); + vid_hdr->vol_type = UBI_VID_STATIC; + vid_hdr->data_size = cpu_to_be32(data_size); + vid_hdr->used_ebs = cpu_to_be32(used_ebs); + vid_hdr->data_crc = cpu_to_be32(crc); + +retry: + pnum = ubi_wl_get_peb(ubi, dtype); + if (pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + leb_write_unlock(ubi, vol_id, lnum); + return pnum; + } + + dbg_eba("write VID hdr and %d bytes at LEB %d:%d, PEB %d, used_ebs %d", + len, vol_id, lnum, pnum, used_ebs); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + goto write_error; + } + + err = ubi_io_write_data(ubi, buf, pnum, 0, len); + if (err) { + ubi_warn("failed to write %d bytes of data to PEB %d", + len, pnum); + goto write_error; + } + + ubi_assert(vol->eba_tbl[lnum] < 0); + vol->eba_tbl[lnum] = pnum; + + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + /* + * This flash device does not admit of bad eraseblocks or + * something nasty and unexpected happened. Switch to read-only + * mode just in case. + */ + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + err = ubi_wl_put_peb(ubi, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + ubi_msg("try another PEB"); + goto retry; +} + +/* + * ubi_eba_atomic_leb_change - change logical eraseblock atomically. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: how many bytes to write + * @dtype: data type + * + * This function changes the contents of a logical eraseblock atomically. @buf + * has to contain new logical eraseblock data, and @len - the length of the + * data, which has to be aligned. This function guarantees that in case of an + * unclean reboot the old contents is preserved. Returns zero in case of + * success and a negative error code in case of failure. + * + * UBI reserves one LEB for the "atomic LEB change" operation, so only one + * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. + */ +int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum, const void *buf, int len, int dtype) +{ + int err, pnum, tries = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + uint32_t crc; + + if (ubi->ro_mode) + return -EROFS; + + if (len == 0) { + /* + * Special case when data length is zero. In this case the LEB + * has to be unmapped and mapped somewhere else. + */ + err = ubi_eba_unmap_leb(ubi, vol, lnum); + if (err) + return err; + return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); + } + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; + + mutex_lock(&ubi->alc_mutex); + err = leb_write_lock(ubi, vol_id, lnum); + if (err) + goto out_mutex; + + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); + + crc = crc32(UBI_CRC32_INIT, buf, len); + vid_hdr->vol_type = UBI_VID_DYNAMIC; + vid_hdr->data_size = cpu_to_be32(len); + vid_hdr->copy_flag = 1; + vid_hdr->data_crc = cpu_to_be32(crc); + +retry: + pnum = ubi_wl_get_peb(ubi, dtype); + if (pnum < 0) { + err = pnum; + goto out_leb_unlock; + } + + dbg_eba("change LEB %d:%d, PEB %d, write VID hdr to PEB %d", + vol_id, lnum, vol->eba_tbl[lnum], pnum); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + goto write_error; + } + + err = ubi_io_write_data(ubi, buf, pnum, 0, len); + if (err) { + ubi_warn("failed to write %d bytes of data to PEB %d", + len, pnum); + goto write_error; + } + + if (vol->eba_tbl[lnum] >= 0) { + err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1); + if (err) + goto out_leb_unlock; + } + + vol->eba_tbl[lnum] = pnum; + +out_leb_unlock: + leb_write_unlock(ubi, vol_id, lnum); +out_mutex: + mutex_unlock(&ubi->alc_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + /* + * This flash device does not admit of bad eraseblocks or + * something nasty and unexpected happened. Switch to read-only + * mode just in case. + */ + ubi_ro_mode(ubi); + goto out_leb_unlock; + } + + err = ubi_wl_put_peb(ubi, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + goto out_leb_unlock; + } + + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + ubi_msg("try another PEB"); + goto retry; +} +#if 0 +/** + * ubi_eba_copy_leb - copy logical eraseblock. + * @ubi: UBI device description object + * @from: physical eraseblock number from where to copy + * @to: physical eraseblock number where to copy + * @vid_hdr: VID header of the @from physical eraseblock + * + * This function copies logical eraseblock from physical eraseblock @from to + * physical eraseblock @to. The @vid_hdr buffer may be changed by this + * function. Returns: + * o %0 in case of success; + * o %1 if the operation was canceled and should be tried later (e.g., + * because a bit-flip was detected at the target PEB); + * o %2 if the volume is being deleted and this LEB should not be moved. + */ +int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + struct ubi_vid_hdr *vid_hdr) +{ + int err, vol_id, lnum, data_size, aldata_size, idx; + struct ubi_volume *vol; + uint32_t crc; + + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + + dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); + + if (vid_hdr->vol_type == UBI_VID_STATIC) { + data_size = be32_to_cpu(vid_hdr->data_size); + aldata_size = ALIGN(data_size, ubi->min_io_size); + } else + data_size = aldata_size = + ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); + + idx = vol_id2idx(ubi, vol_id); + spin_lock(&ubi->volumes_lock); + /* + * Note, we may race with volume deletion, which means that the volume + * this logical eraseblock belongs to might be being deleted. Since the + * volume deletion unmaps all the volume's logical eraseblocks, it will + * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. + */ + vol = ubi->volumes[idx]; + if (!vol) { + /* No need to do further work, cancel */ + dbg_eba("volume %d is being removed, cancel", vol_id); + spin_unlock(&ubi->volumes_lock); + return 2; + } + spin_unlock(&ubi->volumes_lock); + + /* + * We do not want anybody to write to this logical eraseblock while we + * are moving it, so lock it. + * + * Note, we are using non-waiting locking here, because we cannot sleep + * on the LEB, since it may cause deadlocks. Indeed, imagine a task is + * unmapping the LEB which is mapped to the PEB we are going to move + * (@from). This task locks the LEB and goes sleep in the + * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are + * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the + * LEB is already locked, we just do not move it and return %1. + */ + err = leb_write_trylock(ubi, vol_id, lnum); + if (err) { + dbg_eba("contention on LEB %d:%d, cancel", vol_id, lnum); + return err; + } + + /* + * The LEB might have been put meanwhile, and the task which put it is + * probably waiting on @ubi->move_mutex. No need to continue the work, + * cancel it. + */ + if (vol->eba_tbl[lnum] != from) { + dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " + "PEB %d, cancel", vol_id, lnum, from, + vol->eba_tbl[lnum]); + err = 1; + goto out_unlock_leb; + } + + /* + * OK, now the LEB is locked and we can safely start moving iy. Since + * this function utilizes thie @ubi->peb1_buf buffer which is shared + * with some other functions, so lock the buffer by taking the + * @ubi->buf_mutex. + */ + mutex_lock(&ubi->buf_mutex); + dbg_eba("read %d bytes of data", aldata_size); + err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); + if (err && err != UBI_IO_BITFLIPS) { + ubi_warn("error %d while reading data from PEB %d", + err, from); + goto out_unlock_buf; + } + + /* + * Now we have got to calculate how much data we have to to copy. In + * case of a static volume it is fairly easy - the VID header contains + * the data size. In case of a dynamic volume it is more difficult - we + * have to read the contents, cut 0xFF bytes from the end and copy only + * the first part. We must do this to avoid writing 0xFF bytes as it + * may have some side-effects. And not only this. It is important not + * to include those 0xFFs to CRC because later the they may be filled + * by data. + */ + if (vid_hdr->vol_type == UBI_VID_DYNAMIC) + aldata_size = data_size = + ubi_calc_data_len(ubi, ubi->peb_buf1, data_size); + + cond_resched(); + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf1, data_size); + cond_resched(); + + /* + * It may turn out to me that the whole @from physical eraseblock + * contains only 0xFF bytes. Then we have to only write the VID header + * and do not write any data. This also means we should not set + * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc. + */ + if (data_size > 0) { + vid_hdr->copy_flag = 1; + vid_hdr->data_size = cpu_to_be32(data_size); + vid_hdr->data_crc = cpu_to_be32(crc); + } + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + + err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); + if (err) + goto out_unlock_buf; + + cond_resched(); + + /* Read the VID header back and check if it was written correctly */ + err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1); + if (err) { + if (err != UBI_IO_BITFLIPS) + ubi_warn("cannot read VID header back from PEB %d", to); + else + err = 1; + goto out_unlock_buf; + } + + if (data_size > 0) { + err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); + if (err) + goto out_unlock_buf; + + cond_resched(); + + /* + * We've written the data and are going to read it back to make + * sure it was written correctly. + */ + + err = ubi_io_read_data(ubi, ubi->peb_buf2, to, 0, aldata_size); + if (err) { + if (err != UBI_IO_BITFLIPS) + ubi_warn("cannot read data back from PEB %d", + to); + else + err = 1; + goto out_unlock_buf; + } + + cond_resched(); + + if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { + ubi_warn("read data back from PEB %d - it is different", + to); + goto out_unlock_buf; + } + } + + ubi_assert(vol->eba_tbl[lnum] == from); + vol->eba_tbl[lnum] = to; + +out_unlock_buf: + mutex_unlock(&ubi->buf_mutex); +out_unlock_leb: + leb_write_unlock(ubi, vol_id, lnum); + return err; +} +#endif +/** + * ubi_eba_init_scan - initialize the EBA unit using scanning information. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) +{ + int i, j, err, num_volumes; + struct ubi_scan_volume *sv; + struct ubi_volume *vol; + struct ubi_scan_leb *seb; + struct rb_node *rb; + + dbg_eba("initialize EBA unit"); + + spin_lock_init(&ubi->ltree_lock); + mutex_init(&ubi->alc_mutex); + ubi->ltree = RB_ROOT; + + ubi->global_sqnum = si->max_sqnum + 1; + num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; + + for (i = 0; i < num_volumes; i++) { + vol = ubi->volumes[i]; + if (!vol) + continue; + + cond_resched(); + + vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), + GFP_KERNEL); + if (!vol->eba_tbl) { + err = -ENOMEM; + goto out_free; + } + + for (j = 0; j < vol->reserved_pebs; j++) + vol->eba_tbl[j] = UBI_LEB_UNMAPPED; + + sv = ubi_scan_find_sv(si, idx2vol_id(ubi, i)); + if (!sv) + continue; + + ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { + if (seb->lnum >= vol->reserved_pebs) + /* + * This may happen in case of an unclean reboot + * during re-size. + */ + ubi_scan_move_to_list(sv, seb, &si->erase); + vol->eba_tbl[seb->lnum] = seb->pnum; + } + } + + if (ubi->avail_pebs < EBA_RESERVED_PEBS) { + ubi_err("no enough physical eraseblocks (%d, need %d)", + ubi->avail_pebs, EBA_RESERVED_PEBS); + err = -ENOSPC; + goto out_free; + } + ubi->avail_pebs -= EBA_RESERVED_PEBS; + ubi->rsvd_pebs += EBA_RESERVED_PEBS; + + if (ubi->bad_allowed) { + ubi_calculate_reserved(ubi); + + if (ubi->avail_pebs < ubi->beb_rsvd_level) { + /* No enough free physical eraseblocks */ + ubi->beb_rsvd_pebs = ubi->avail_pebs; + ubi_warn("cannot reserve enough PEBs for bad PEB " + "handling, reserved %d, need %d", + ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); + } else + ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; + + ubi->avail_pebs -= ubi->beb_rsvd_pebs; + ubi->rsvd_pebs += ubi->beb_rsvd_pebs; + } + + dbg_eba("EBA unit is initialized"); + return 0; + +out_free: + for (i = 0; i < num_volumes; i++) { + if (!ubi->volumes[i]) + continue; + kfree(ubi->volumes[i]->eba_tbl); + } + return err; +} + +/** + * ubi_eba_close - close EBA unit. + * @ubi: UBI device description object + */ +void ubi_eba_close(const struct ubi_device *ubi) +{ + int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; + + dbg_eba("close EBA unit"); + + for (i = 0; i < num_volumes; i++) { + if (!ubi->volumes[i]) + continue; + kfree(ubi->volumes[i]->eba_tbl); + } +} diff --git a/fs/ubi/io.c b/fs/ubi/io.c new file mode 100755 index 0000000..cc5d2dc --- /dev/null +++ b/fs/ubi/io.c @@ -0,0 +1,1294 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * UBI input/output unit. + * + * This unit provides a uniform way to work with all kinds of the underlying + * MTD devices. It also implements handy functions for reading and writing UBI + * headers. + * + * We are trying to have a paranoid mindset and not to trust to what we read + * from the flash media in order to be more secure and robust. So this unit + * validates every single header it reads from the flash media. + * + * Some words about how the eraseblock headers are stored. + * + * The erase counter header is always stored at offset zero. By default, the + * VID header is stored after the EC header at the closest aligned offset + * (i.e. aligned to the minimum I/O unit size). Data starts next to the VID + * header at the closest aligned offset. But this default layout may be + * changed. For example, for different reasons (e.g., optimization) UBI may be + * asked to put the VID header at further offset, and even at an unaligned + * offset. Of course, if the offset of the VID header is unaligned, UBI adds + * proper padding in front of it. Data offset may also be changed but it has to + * be aligned. + * + * About minimal I/O units. In general, UBI assumes flash device model where + * there is only one minimal I/O unit size. E.g., in case of NOR flash it is 1, + * in case of NAND flash it is a NAND page, etc. This is reported by MTD in the + * @ubi->mtd->writesize field. But as an exception, UBI admits of using another + * (smaller) minimal I/O unit size for EC and VID headers to make it possible + * to do different optimizations. + * + * This is extremely useful in case of NAND flashes which admit of several + * write operations to one NAND page. In this case UBI can fit EC and VID + * headers at one NAND page. Thus, UBI may use "sub-page" size as the minimal + * I/O unit for the headers (the @ubi->hdrs_min_io_size field). But it still + * reports NAND page size (@ubi->min_io_size) as a minimal I/O unit for the UBI + * users. + * + * Example: some Samsung NANDs with 2KiB pages allow 4x 512-byte writes, so + * although the minimal I/O unit is 2K, UBI uses 512 bytes for EC and VID + * headers. + * + * Q: why not just to treat sub-page as a minimal I/O unit of this flash + * device, e.g., make @ubi->min_io_size = 512 in the example above? + * + * A: because when writing a sub-page, MTD still writes a full 2K page but the + * bytes which are no relevant to the sub-page are 0xFF. So, basically, writing + * 4x512 sub-pages is 4 times slower then writing one 2KiB NAND page. Thus, we + * prefer to use sub-pages only for EV and VID headers. + * + * As it was noted above, the VID header may start at a non-aligned offset. + * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page, + * the VID header may reside at offset 1984 which is the last 64 bytes of the + * last sub-page (EC header is always at offset zero). This causes some + * difficulties when reading and writing VID headers. + * + * Suppose we have a 64-byte buffer and we read a VID header at it. We change + * the data and want to write this VID header out. As we can only write in + * 512-byte chunks, we have to allocate one more buffer and copy our VID header + * to offset 448 of this buffer. + * + * The I/O unit does the following trick in order to avoid this extra copy. + * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header + * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the + * VID header is being written out, it shifts the VID header pointer back and + * writes the whole sub-page. + */ + +#ifdef UBI_LINUX +#include <linux/crc32.h> +#include <linux/err.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum); +static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); +static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr); +static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum); +static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr); +static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset, + int len); +#else +#define paranoid_check_not_bad(ubi, pnum) 0 +#define paranoid_check_peb_ec_hdr(ubi, pnum) 0 +#define paranoid_check_ec_hdr(ubi, pnum, ec_hdr) 0 +#define paranoid_check_peb_vid_hdr(ubi, pnum) 0 +#define paranoid_check_vid_hdr(ubi, pnum, vid_hdr) 0 +#define paranoid_check_all_ff(ubi, pnum, offset, len) 0 +#endif + +extern int read_ubi_nand(unsigned int addr, unsigned int len, size_t * retlen, u_char * buf); +extern int write_ubi_nand(unsigned int addr, unsigned int len, size_t * retlen, u_char * buf); +extern int erase_ubi_nand(unsigned int block); +extern void print_nand_buf(unsigned char * rvalue, int length); + +extern int nand_block_isbad(unsigned int block); +extern int nand_block_markbad(unsigned int block); + +/** + * ubi_io_read - read data from a physical eraseblock. + * @ubi: UBI device description object + * @buf: buffer where to store the read data + * @pnum: physical eraseblock number to read from + * @offset: offset within the physical eraseblock from where to read + * @len: how many bytes to read + * + * This function reads data from offset @offset of physical eraseblock @pnum + * and stores the read data in the @buf buffer. The following return codes are + * possible: + * + * o %0 if all the requested data were successfully read; + * o %UBI_IO_BITFLIPS if all the requested data were successfully read, but + * correctable bit-flips were detected; this is harmless but may indicate + * that this eraseblock may become bad soon (but do not have to); + * o %-EBADMSG if the MTD subsystem reported about data integrity problems, for + * example it can be an ECC error in case of NAND; this most probably means + * that the data is corrupted; + * o %-EIO if some I/O error occurred; + * o other negative error codes in case of other errors. + */ +int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, + int len) +{ + int err, retries = 0; + size_t read; + loff_t addr; + + dbg_io("read %d bytes from PEB %d:%d", len, pnum, offset); + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); + ubi_assert(len > 0); + pnum += ubi->peb_start; + err = paranoid_check_not_bad(ubi, pnum); + if (err){ + + printk("\nerr is happened in block %d", pnum); + + return err > 0 ? -EINVAL : err; + + } + addr = (loff_t)pnum * ubi->peb_size + offset; +retry: + err = read_ubi_nand(addr, len, &read, buf); + //err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + if (err) { + if (err == -EUCLEAN) { + /* + * -EUCLEAN is reported if there was a bit-flip which + * was corrected, so this is harmless. + */ + ubi_msg("fixable bit-flip detected at PEB %d", pnum); + ubi_assert(len == read); + return UBI_IO_BITFLIPS; + } + + if (read != len && retries++ < UBI_IO_RETRIES) { + dbg_io("error %d while reading %d bytes from PEB %d:%d, " + "read only %zd bytes, retry", + err, len, pnum, offset, read); + yield(); + goto retry; + } + + ubi_err("error %d while reading %d bytes from PEB %d:%d, " + "read %zd bytes", err, len, pnum, offset, read); + ubi_dbg_dump_stack(); + + /* + * The driver should never return -EBADMSG if it failed to read + * all the requested data. But some buggy drivers might do + * this, so we change it to -EIO. + */ + if (read != len && err == -EBADMSG) { + ubi_assert(0); + printk("%s[%d] not here\n", __func__, __LINE__); +/* err = -EIO; */ + } + } else { + ubi_assert(len == read); + + if (ubi_dbg_is_bitflip()) { + dbg_msg("bit-flip (emulated)"); + err = UBI_IO_BITFLIPS; + } + } + + return err; +} + +/** + * ubi_io_write - write data to a physical eraseblock. + * @ubi: UBI device description object + * @buf: buffer with the data to write + * @pnum: physical eraseblock number to write to + * @offset: offset within the physical eraseblock where to write + * @len: how many bytes to write + * + * This function writes @len bytes of data from buffer @buf to offset @offset + * of physical eraseblock @pnum. If all the data were successfully written, + * zero is returned. If an error occurred, this function returns a negative + * error code. If %-EIO is returned, the physical eraseblock most probably went + * bad. + * + * Note, in case of an error, it is possible that something was still written + * to the flash media, but may be some garbage. + */ +int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, + int len) +{ + int err; + size_t written; + loff_t addr; + + dbg_io("write %d bytes to PEB %d:%d", len, pnum, offset); + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); + ubi_assert(offset % ubi->hdrs_min_io_size == 0); + ubi_assert(len > 0 && len % ubi->hdrs_min_io_size == 0); + + if (ubi->ro_mode) { + ubi_err("read-only mode"); + return -EROFS; + } + pnum += ubi->peb_start; + /* The below has to be compiled out if paranoid checks are disabled */ + + err = paranoid_check_not_bad(ubi, pnum); + if (err) + return err > 0 ? -EINVAL : err; + + /* The area we are writing to has to contain all 0xFF bytes */ + err = paranoid_check_all_ff(ubi, pnum, offset, len); + if (err) + return err > 0 ? -EINVAL : err; + + if (offset >= ubi->leb_start) { + /* + * We write to the data area of the physical eraseblock. Make + * sure it has valid EC and VID headers. + */ + err = paranoid_check_peb_ec_hdr(ubi, pnum); + if (err) + return err > 0 ? -EINVAL : err; + err = paranoid_check_peb_vid_hdr(ubi, pnum); + if (err) + return err > 0 ? -EINVAL : err; + } + + if (ubi_dbg_is_write_failure()) { + dbg_err("cannot write %d bytes to PEB %d:%d " + "(emulated)", len, pnum, offset); + ubi_dbg_dump_stack(); + return -EIO; + } + + addr = (loff_t)pnum * ubi->peb_size + offset; + err = write_ubi_nand(addr, len, &written, buf); +// err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf); + if (err) { + ubi_err("error %d while writing %d bytes to PEB %d:%d, written" + " %zd bytes", err, len, pnum, offset, written); + ubi_dbg_dump_stack(); + } else + ubi_assert(written == len); + + return err; +} + +/** + * erase_callback - MTD erasure call-back. + * @ei: MTD erase information object. + * + * Note, even though MTD erase interface is asynchronous, all the current + * implementations are synchronous anyway. + */ +static void erase_callback(struct erase_info *ei) +{ + wake_up_interruptible((wait_queue_head_t *)ei->priv); +} + +/** + * do_sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to erase + * + * This function synchronously erases physical eraseblock @pnum and returns + * zero in case of success and a negative error code in case of failure. If + * %-EIO is returned, the physical eraseblock most probably went bad. + */ +static int do_sync_erase(struct ubi_device *ubi, int pnum) +{ + int err, retries = 0; + struct erase_info ei; + wait_queue_head_t wq; + + dbg_io("erase PEB %d", pnum); + pnum += ubi->peb_start; +retry: + init_waitqueue_head(&wq); + memset(&ei, 0, sizeof(struct erase_info)); + + ei.addr = (loff_t)pnum * ubi->peb_size; + ei.len = ubi->peb_size; + ei.callback = erase_callback; + ei.priv = (unsigned long)&wq; + err = erase_ubi_nand(pnum); + //err = ubi->mtd->erase(ubi->mtd, &ei); + if (err) { + if (retries++ < UBI_IO_RETRIES) { + dbg_io("error %d while erasing PEB %d, retry", + err, pnum); + yield(); + goto retry; + } + ubi_err("cannot erase PEB %d, error %d", pnum, err); + ubi_dbg_dump_stack(); + return err; + } + + err = wait_event_interruptible(wq, ei.state == MTD_ERASE_DONE || + ei.state == MTD_ERASE_FAILED); + if (err) { + ubi_err("interrupted PEB %d erasure", pnum); + return -EINTR; + } + +/* if (ei.state == MTD_ERASE_FAILED) { + if (retries++ < UBI_IO_RETRIES) { + dbg_io("error while erasing PEB %d, retry", pnum); + yield(); + goto retry; + } + ubi_err("cannot erase PEB %d", pnum); + ubi_dbg_dump_stack(); + return -EIO; + } +*/ + err = paranoid_check_all_ff(ubi, pnum, 0, ubi->peb_size); + if (err) + return err > 0 ? -EINVAL : err; + + if (ubi_dbg_is_erase_failure() && !err) { + dbg_err("cannot erase PEB %d (emulated)", pnum); + return -EIO; + } + + return 0; +} + +/** + * check_pattern - check if buffer contains only a certain byte pattern. + * @buf: buffer to check + * @patt: the pattern to check + * @size: buffer size in bytes + * + * This function returns %1 in there are only @patt bytes in @buf, and %0 if + * something else was also found. + */ +static int check_pattern(const void *buf, uint8_t patt, int size) +{ + int i; + + for (i = 0; i < size; i++) + if (((const uint8_t *)buf)[i] != patt) + return 0; + return 1; +} + +/* Patterns to write to a physical eraseblock when torturing it */ +static uint8_t patterns[] = {0xa5, 0x5a, 0x0}; + +/** + * torture_peb - test a supposedly bad physical eraseblock. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to test + * + * This function returns %-EIO if the physical eraseblock did not pass the + * test, a positive number of erase operations done if the test was + * successfully passed, and other negative error codes in case of other errors. + */ +static int torture_peb(struct ubi_device *ubi, int pnum) +{ + int err, i, patt_count; + + patt_count = ARRAY_SIZE(patterns); + ubi_assert(patt_count > 0); + + mutex_lock(&ubi->buf_mutex); + for (i = 0; i < patt_count; i++) { + err = do_sync_erase(ubi, pnum); + if (err) + goto out; + + /* Make sure the PEB contains only 0xFF bytes */ + err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); + if (err) + goto out; + + err = check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size); + if (err == 0) { + ubi_err("erased PEB %d, but a non-0xFF byte found", + pnum); + err = -EIO; + goto out; + } + + /* Write a pattern and check it */ + memset(ubi->peb_buf1, patterns[i], ubi->peb_size); + err = ubi_io_write(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); + if (err) + goto out; + + memset(ubi->peb_buf1, ~patterns[i], ubi->peb_size); + err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); + if (err) + goto out; + + err = check_pattern(ubi->peb_buf1, patterns[i], ubi->peb_size); + if (err == 0) { + ubi_err("pattern %x checking failed for PEB %d", + patterns[i], pnum); + err = -EIO; + goto out; + } + } + + err = patt_count; + +out: + mutex_unlock(&ubi->buf_mutex); + if (err == UBI_IO_BITFLIPS || err == -EBADMSG) { + /* + * If a bit-flip or data integrity error was detected, the test + * has not passed because it happened on a freshly erased + * physical eraseblock which means something is wrong with it. + */ + ubi_err("read problems on freshly erased PEB %d, must be bad", + pnum); + err = -EIO; + } + return err; +} + +/** + * ubi_io_sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to erase + * @torture: if this physical eraseblock has to be tortured + * + * This function synchronously erases physical eraseblock @pnum. If @torture + * flag is not zero, the physical eraseblock is checked by means of writing + * different patterns to it and reading them back. If the torturing is enabled, + * the physical eraseblock is erased more then once. + * + * This function returns the number of erasures made in case of success, %-EIO + * if the erasure failed or the torturing test failed, and other negative error + * codes in case of other errors. Note, %-EIO means that the physical + * eraseblock is bad. + */ +int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture) +{ + int err, ret = 0; + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + err = paranoid_check_not_bad(ubi, pnum); + if (err != 0) + return err > 0 ? -EINVAL : err; + + if (ubi->ro_mode) { + ubi_err("read-only mode"); + return -EROFS; + } + + if (torture) { + ret = torture_peb(ubi, pnum); + if (ret < 0) + return ret; + } + + err = do_sync_erase(ubi, pnum); + if (err) + return err; + + return ret + 1; +} + +/** + * ubi_io_is_bad - check if a physical eraseblock is bad. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns a positive number if the physical eraseblock is bad, + * zero if not, and a negative error code if an error occurred. + */ +int ubi_io_is_bad(const struct ubi_device *ubi, int pnum) +{ + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + pnum += ubi->peb_start; + if (ubi->bad_allowed) { + int ret; + + ret = nand_block_isbad(pnum); + +// ret = mtd->block_isbad(mtd, (loff_t)pnum * ubi->peb_size); + if (ret < 0) + ubi_err("error %d while checking if PEB %d is bad", + ret, pnum); + else if (ret) + dbg_io("PEB %d is bad", pnum); + return ret; + } + + return 0; +} + +/** + * ubi_io_mark_bad - mark a physical eraseblock as bad. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to mark + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum) +{ + int err; + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + if (ubi->ro_mode) { + ubi_err("read-only mode"); + return -EROFS; + } + + if (!ubi->bad_allowed) + return 0; + pnum += ubi->peb_start; + err = nand_block_markbad(pnum); +// err = mtd->block_markbad(mtd, (loff_t)pnum * ubi->peb_size); + if (err) + ubi_err("cannot mark PEB %d bad, error %d", pnum, err); + return err; +} + +/** + * validate_ec_hdr - validate an erase counter header. + * @ubi: UBI device description object + * @ec_hdr: the erase counter header to check + * + * This function returns zero if the erase counter header is OK, and %1 if + * not. + */ +static int validate_ec_hdr(const struct ubi_device *ubi, + const struct ubi_ec_hdr *ec_hdr) +{ + long long ec; + int vid_hdr_offset, leb_start; + + ec = be64_to_cpu(ec_hdr->ec); + vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset); + leb_start = be32_to_cpu(ec_hdr->data_offset); + + if (ec_hdr->version != UBI_VERSION) { + ubi_err("node with incompatible UBI version found: " + "this UBI version is %d, image version is %d", + UBI_VERSION, (int)ec_hdr->version); + goto bad; + } + + if (vid_hdr_offset != ubi->vid_hdr_offset) { + ubi_err("bad VID header offset %d, expected %d", + vid_hdr_offset, ubi->vid_hdr_offset); + goto bad; + } + + if (leb_start != ubi->leb_start) { + ubi_err("bad data offset %d, expected %d", + leb_start, ubi->leb_start); + goto bad; + } + + if (ec < 0 || ec > UBI_MAX_ERASECOUNTER) { + ubi_err("bad erase counter %lld", ec); + goto bad; + } + + return 0; + +bad: + ubi_err("bad EC header"); + ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_dbg_dump_stack(); + return 1; +} + +/** + * ubi_io_read_ec_hdr - read and check an erase counter header. + * @ubi: UBI device description object + * @pnum: physical eraseblock to read from + * @ec_hdr: a &struct ubi_ec_hdr object where to store the read erase counter + * header + * @verbose: be verbose if the header is corrupted or was not found + * + * This function reads erase counter header from physical eraseblock @pnum and + * stores it in @ec_hdr. This function also checks CRC checksum of the read + * erase counter header. The following codes may be returned: + * + * o %0 if the CRC checksum is correct and the header was successfully read; + * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected + * and corrected by the flash driver; this is harmless but may indicate that + * this eraseblock may become bad soon (but may be not); + * o %UBI_IO_BAD_EC_HDR if the erase counter header is corrupted (a CRC error); + * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty; + * o a negative error code in case of failure. + */ +int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr, int verbose) +{ + int err, read_err = 0; + uint32_t crc, magic, hdr_crc; + + dbg_io("read EC header from PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + if (UBI_IO_DEBUG) + verbose = 1; + + err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (err) { + if (err != UBI_IO_BITFLIPS && err != -EBADMSG) + return err; + + /* + * We read all the data, but either a correctable bit-flip + * occurred, or MTD reported about some data integrity error, + * like an ECC error in case of NAND. The former is harmless, + * the later may mean that the read data is corrupted. But we + * have a CRC check-sum and we will detect this. If the EC + * header is still OK, we just report this as there was a + * bit-flip. + */ + read_err = err; + } + + magic = be32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { + /* + * The magic field is wrong. Let's check if we have read all + * 0xFF. If yes, this physical eraseblock is assumed to be + * empty. + * + * But if there was a read error, we do not test it for all + * 0xFFs. Even if it does contain all 0xFFs, this error + * indicates that something is still wrong with this physical + * eraseblock and we anyway cannot treat it as empty. + */ + if (read_err != -EBADMSG && + check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { + /* The physical eraseblock is supposedly empty */ + + /* + * The below is just a paranoid check, it has to be + * compiled out if paranoid checks are disabled. + */ + err = paranoid_check_all_ff(ubi, pnum, 0, + ubi->peb_size); + //print_nand_buf((unsigned char *)ec_hdr, UBI_EC_HDR_SIZE); + if (err) + return err > 0 ? UBI_IO_BAD_EC_HDR : err; + + if (verbose) + ubi_warn("no EC header found at PEB %d, " + "only 0xFF bytes", pnum); + return UBI_IO_PEB_EMPTY; + } + +// printf("\n"); +// print_nand_buf((unsigned char *)ec_hdr, UBI_EC_HDR_SIZE); +// printf("\n"); + /* + * This is not a valid erase counter header, and these are not + * 0xFF bytes. Report that the header is corrupted. + */ + if (verbose) { + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_EC_HDR_MAGIC); + ubi_dbg_dump_ec_hdr(ec_hdr); + } + return UBI_IO_BAD_EC_HDR; + } + + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); + + if (hdr_crc != crc) { + if (verbose) { + ubi_warn("bad EC header CRC at PEB %d, calculated %#08x," + " read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_ec_hdr(ec_hdr); + } + return UBI_IO_BAD_EC_HDR; + } + + /* And of course validate what has just been read from the media */ + err = validate_ec_hdr(ubi, ec_hdr); + if (err) { + ubi_err("validation failed for PEB %d", pnum); + return -EINVAL; + } + + return read_err ? UBI_IO_BITFLIPS : 0; +} + +/** + * ubi_io_write_ec_hdr - write an erase counter header. + * @ubi: UBI device description object + * @pnum: physical eraseblock to write to + * @ec_hdr: the erase counter header to write + * + * This function writes erase counter header described by @ec_hdr to physical + * eraseblock @pnum. It also fills most fields of @ec_hdr before writing, so + * the caller do not have to fill them. Callers must only fill the @ec_hdr->ec + * field. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If %-EIO is returned, the physical eraseblock most probably + * went bad. + */ +int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr) +{ + int err; + uint32_t crc; + + dbg_io("write EC header to PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + ec_hdr->magic = cpu_to_be32(UBI_EC_HDR_MAGIC); + ec_hdr->version = UBI_VERSION; + ec_hdr->vid_hdr_offset = cpu_to_be32(ubi->vid_hdr_offset); + ec_hdr->data_offset = cpu_to_be32(ubi->leb_start); + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + ec_hdr->hdr_crc = cpu_to_be32(crc); + + err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr); + if (err) + return -EINVAL; + + err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize); + return err; +} + +/** + * validate_vid_hdr - validate a volume identifier header. + * @ubi: UBI device description object + * @vid_hdr: the volume identifier header to check + * + * This function checks that data stored in the volume identifier header + * @vid_hdr. Returns zero if the VID header is OK and %1 if not. + */ +static int validate_vid_hdr(const struct ubi_device *ubi, + const struct ubi_vid_hdr *vid_hdr) +{ + int vol_type = vid_hdr->vol_type; + int copy_flag = vid_hdr->copy_flag; + int vol_id = be32_to_cpu(vid_hdr->vol_id); + int lnum = be32_to_cpu(vid_hdr->lnum); + int compat = vid_hdr->compat; + int data_size = be32_to_cpu(vid_hdr->data_size); + int used_ebs = be32_to_cpu(vid_hdr->used_ebs); + int data_pad = be32_to_cpu(vid_hdr->data_pad); + int data_crc = be32_to_cpu(vid_hdr->data_crc); + int usable_leb_size = ubi->leb_size - data_pad; + + if (copy_flag != 0 && copy_flag != 1) { + dbg_err("bad copy_flag"); + goto bad; + } + + if (vol_id < 0 || lnum < 0 || data_size < 0 || used_ebs < 0 || + data_pad < 0) { + dbg_err("negative values"); + goto bad; + } + + if (vol_id >= UBI_MAX_VOLUMES && vol_id < UBI_INTERNAL_VOL_START) { + dbg_err("bad vol_id"); + goto bad; + } + + if (vol_id < UBI_INTERNAL_VOL_START && compat != 0) { + dbg_err("bad compat"); + goto bad; + } + + if (vol_id >= UBI_INTERNAL_VOL_START && compat != UBI_COMPAT_DELETE && + compat != UBI_COMPAT_RO && compat != UBI_COMPAT_PRESERVE && + compat != UBI_COMPAT_REJECT) { + dbg_err("bad compat"); + goto bad; + } + + if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { + dbg_err("bad vol_type"); + goto bad; + } + + if (data_pad >= ubi->leb_size / 2) { + dbg_err("bad data_pad"); + goto bad; + } + + if (vol_type == UBI_VID_STATIC) { + /* + * Although from high-level point of view static volumes may + * contain zero bytes of data, but no VID headers can contain + * zero at these fields, because they empty volumes do not have + * mapped logical eraseblocks. + */ + if (used_ebs == 0) { + dbg_err("zero used_ebs"); + goto bad; + } + if (data_size == 0) { + dbg_err("zero data_size"); + goto bad; + } + if (lnum < used_ebs - 1) { + if (data_size != usable_leb_size) { + dbg_err("bad data_size"); + goto bad; + } + } else if (lnum == used_ebs - 1) { + if (data_size == 0) { + dbg_err("bad data_size at last LEB"); + goto bad; + } + } else { + dbg_err("too high lnum"); + goto bad; + } + } else { + if (copy_flag == 0) { + if (data_crc != 0) { + printf("\ndata_crc is %d", data_crc); + dbg_err("non-zero data CRC"); + goto bad; + } + if (data_size != 0) { + dbg_err("non-zero data_size"); + goto bad; + } + } else { + if (data_size == 0) { + dbg_err("zero data_size of copy"); + goto bad; + } + } + if (used_ebs != 0) { + dbg_err("bad used_ebs"); + goto bad; + } + } + + return 0; + +bad: + ubi_err("bad VID header"); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_stack(); + return 1; +} + +/** + * ubi_io_read_vid_hdr - read and check a volume identifier header. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to read from + * @vid_hdr: &struct ubi_vid_hdr object where to store the read volume + * identifier header + * @verbose: be verbose if the header is corrupted or wasn't found + * + * This function reads the volume identifier header from physical eraseblock + * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read + * volume identifier header. The following codes may be returned: + * + * o %0 if the CRC checksum is correct and the header was successfully read; + * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected + * and corrected by the flash driver; this is harmless but may indicate that + * this eraseblock may become bad soon; + * o %UBI_IO_BAD_VID_HRD if the volume identifier header is corrupted (a CRC + * error detected); + * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID + * header there); + * o a negative error code in case of failure. + */ +int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr, int verbose) +{ + int err, read_err = 0; + uint32_t crc, magic, hdr_crc; + void *p; + + dbg_io("read VID header from PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + if (UBI_IO_DEBUG) + verbose = 1; + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + if (err) { + if (err != UBI_IO_BITFLIPS && err != -EBADMSG) + return err; + + /* + * We read all the data, but either a correctable bit-flip + * occurred, or MTD reported about some data integrity error, + * like an ECC error in case of NAND. The former is harmless, + * the later may mean the read data is corrupted. But we have a + * CRC check-sum and we will identify this. If the VID header is + * still OK, we just report this as there was a bit-flip. + */ + read_err = err; + } + + magic = be32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { + /* + * If we have read all 0xFF bytes, the VID header probably does + * not exist and the physical eraseblock is assumed to be free. + * + * But if there was a read error, we do not test the data for + * 0xFFs. Even if it does contain all 0xFFs, this error + * indicates that something is still wrong with this physical + * eraseblock and it cannot be regarded as free. + */ + if (read_err != -EBADMSG && + check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { + /* The physical eraseblock is supposedly free */ + + /* + * The below is just a paranoid check, it has to be + * compiled out if paranoid checks are disabled. + */ + err = paranoid_check_all_ff(ubi, pnum, ubi->leb_start, + ubi->leb_size); + if (err) + return err > 0 ? UBI_IO_BAD_VID_HDR : err; + + if (verbose) + ubi_warn("no VID header found at PEB %d, " + "only 0xFF bytes", pnum); + return UBI_IO_PEB_FREE; + } + + /* + * This is not a valid VID header, and these are not 0xFF + * bytes. Report that the header is corrupted. + */ + if (verbose) { + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_VID_HDR_MAGIC); + ubi_dbg_dump_vid_hdr(vid_hdr); + } + return UBI_IO_BAD_VID_HDR; + } + + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); + hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); + + if (hdr_crc != crc) { + if (verbose) { + ubi_warn("bad CRC at PEB %d, calculated %#08x, " + "read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_vid_hdr(vid_hdr); + } + return UBI_IO_BAD_VID_HDR; + } + + /* Validate the VID header that we have just read */ + err = validate_vid_hdr(ubi, vid_hdr); + if (err) { + ubi_err("validation failed for PEB %d", pnum); + return -EINVAL; + } + + return read_err ? UBI_IO_BITFLIPS : 0; +} + +/** + * ubi_io_write_vid_hdr - write a volume identifier header. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to write to + * @vid_hdr: the volume identifier header to write + * + * This function writes the volume identifier header described by @vid_hdr to + * physical eraseblock @pnum. This function automatically fills the + * @vid_hdr->magic and the @vid_hdr->version fields, as well as calculates + * header CRC checksum and stores it at vid_hdr->hdr_crc. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If %-EIO is returned, the physical eraseblock probably went + * bad. + */ +int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr) +{ + int err; + uint32_t crc; + void *p; + + dbg_io("write VID header to PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + err = paranoid_check_peb_ec_hdr(ubi, pnum); + if (err) + return err > 0 ? -EINVAL: err; + + vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); + vid_hdr->version = UBI_VERSION; + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); + vid_hdr->hdr_crc = cpu_to_be32(crc); + + err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr); + if (err) + return -EINVAL; + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + return err; +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_not_bad - ensure that a physical eraseblock is not bad. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to check + * + * This function returns zero if the physical eraseblock is good, a positive + * number if it is bad and a negative error code if an error occurred. + */ +static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum) +{ + int err; + + err = ubi_io_is_bad(ubi, pnum); + if (!err) + return err; + + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_stack(); + return err; +} + +/** + * paranoid_check_ec_hdr - check if an erase counter header is all right. + * @ubi: UBI device description object + * @pnum: physical eraseblock number the erase counter header belongs to + * @ec_hdr: the erase counter header to check + * + * This function returns zero if the erase counter header contains valid + * values, and %1 if not. + */ +static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr) +{ + int err; + uint32_t magic; + + magic = be32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { + ubi_err("bad magic %#08x, must be %#08x", + magic, UBI_EC_HDR_MAGIC); + goto fail; + } + + err = validate_ec_hdr(ubi, ec_hdr); + if (err) { + ubi_err("paranoid check failed for PEB %d", pnum); + goto fail; + } + + return 0; + +fail: + ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_dbg_dump_stack(); + return 1; +} + +/** + * paranoid_check_peb_ec_hdr - check that the erase counter header of a + * physical eraseblock is in-place and is all right. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns zero if the erase counter header is all right, %1 if + * not, and a negative error code if an error occurred. + */ +static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) +{ + int err; + uint32_t crc, hdr_crc; + struct ubi_ec_hdr *ec_hdr; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + goto exit; + + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); + if (hdr_crc != crc) { + ubi_err("bad CRC, calculated %#08x, read %#08x", crc, hdr_crc); + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_dbg_dump_stack(); + err = 1; + goto exit; + } + + err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr); + +exit: + kfree(ec_hdr); + return err; +} + +/** + * paranoid_check_vid_hdr - check that a volume identifier header is all right. + * @ubi: UBI device description object + * @pnum: physical eraseblock number the volume identifier header belongs to + * @vid_hdr: the volume identifier header to check + * + * This function returns zero if the volume identifier header is all right, and + * %1 if not. + */ +static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr) +{ + int err; + uint32_t magic; + + magic = be32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { + ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x", + magic, pnum, UBI_VID_HDR_MAGIC); + goto fail; + } + + err = validate_vid_hdr(ubi, vid_hdr); + if (err) { + ubi_err("paranoid check failed for PEB %d", pnum); + goto fail; + } + + return err; + +fail: + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_stack(); + return 1; + +} + +/** + * paranoid_check_peb_vid_hdr - check that the volume identifier header of a + * physical eraseblock is in-place and is all right. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns zero if the volume identifier header is all right, + * %1 if not, and a negative error code if an error occurred. + */ +static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) +{ + int err; + uint32_t crc, hdr_crc; + struct ubi_vid_hdr *vid_hdr; + void *p; + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + goto exit; + + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); + if (hdr_crc != crc) { + ubi_err("bad VID header CRC at PEB %d, calculated %#08x, " + "read %#08x", pnum, crc, hdr_crc); + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_stack(); + err = 1; + goto exit; + } + + err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr); + +exit: + ubi_free_vid_hdr(ubi, vid_hdr); + return err; +} + +/** + * paranoid_check_all_ff - check that a region of flash is empty. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @offset: the starting offset within the physical eraseblock to check + * @len: the length of the region to check + * + * This function returns zero if only 0xFF bytes are present at offset + * @offset of the physical eraseblock @pnum, %1 if not, and a negative error + * code if an error occurred. + */ +static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset, + int len) +{ + size_t read; + int err; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + len = ubi->min_io_size * 2; + mutex_lock(&ubi->dbg_buf_mutex); + err = read_ubi_nand(addr, len, &read, ubi->dbg_peb_buf); +// err = ubi->mtd->read(ubi->mtd, addr, len, &read, ubi->dbg_peb_buf); + if (err && err != -EUCLEAN) { + ubi_err("error %d while reading %d bytes from PEB %d:%d, " + "read %zd bytes", err, len, pnum, offset, read); + goto error; + } + + err = check_pattern(ubi->dbg_peb_buf, 0xFF, len); + if (err == 0) { + ubi_err("flash region at PEB %d:%d, length %d does not " + "contain all 0xFF bytes", pnum, offset, len); + goto fail; + } + mutex_unlock(&ubi->dbg_buf_mutex); + + return 0; + +fail: + ubi_err("paranoid check failed for PEB %d", pnum); + dbg_msg("hex dump of the %d-%d region", offset, offset + len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ubi->dbg_peb_buf, len, 1); + err = 1; +error: + ubi_dbg_dump_stack(); + mutex_unlock(&ubi->dbg_buf_mutex); + return err; +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/fs/ubi/kapi.c b/fs/ubi/kapi.c new file mode 100755 index 0000000..423d479 --- /dev/null +++ b/fs/ubi/kapi.c @@ -0,0 +1,638 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* This file mostly implements UBI kernel API functions */ + +#ifdef UBI_LINUX +#include <linux/module.h> +#include <linux/err.h> +#include <asm/div64.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +/** + * ubi_get_device_info - get information about UBI device. + * @ubi_num: UBI device number + * @di: the information is stored here + * + * This function returns %0 in case of success, %-EINVAL if the UBI device + * number is invalid, and %-ENODEV if there is no such UBI device. + */ +int ubi_get_device_info(int ubi_num, struct ubi_device_info *di) +{ + struct ubi_device *ubi; + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return -EINVAL; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + + di->ubi_num = ubi->ubi_num; + di->leb_size = ubi->leb_size; + di->min_io_size = ubi->min_io_size; + di->ro_mode = ubi->ro_mode; + di->cdev = ubi->cdev.dev; + + ubi_put_device(ubi); + return 0; +} +EXPORT_SYMBOL_GPL(ubi_get_device_info); + +/** + * ubi_get_volume_info - get information about UBI volume. + * @desc: volume descriptor + * @vi: the information is stored here + */ +void ubi_get_volume_info(struct ubi_volume_desc *desc, + struct ubi_volume_info *vi) +{ + const struct ubi_volume *vol = desc->vol; + const struct ubi_device *ubi = vol->ubi; + + vi->vol_id = vol->vol_id; + vi->ubi_num = ubi->ubi_num; + vi->size = vol->reserved_pebs; + vi->used_bytes = vol->used_bytes; + vi->vol_type = vol->vol_type; + vi->corrupted = vol->corrupted; + vi->upd_marker = vol->upd_marker; + vi->alignment = vol->alignment; + vi->usable_leb_size = vol->usable_leb_size; + vi->name_len = vol->name_len; + vi->name = vol->name; + vi->cdev = vol->cdev.dev; +} +EXPORT_SYMBOL_GPL(ubi_get_volume_info); + +/** + * ubi_open_volume - open UBI volume. + * @ubi_num: UBI device number + * @vol_id: volume ID + * @mode: open mode + * + * The @mode parameter specifies if the volume should be opened in read-only + * mode, read-write mode, or exclusive mode. The exclusive mode guarantees that + * nobody else will be able to open this volume. UBI allows to have many volume + * readers and one writer at a time. + * + * If a static volume is being opened for the first time since boot, it will be + * checked by this function, which means it will be fully read and the CRC + * checksum of each logical eraseblock will be checked. + * + * This function returns volume descriptor in case of success and a negative + * error code in case of failure. + */ +struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode) +{ + int err; + struct ubi_volume_desc *desc; + struct ubi_device *ubi; + struct ubi_volume *vol; + + dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode); + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return ERR_PTR(-EINVAL); + + if (mode != UBI_READONLY && mode != UBI_READWRITE && + mode != UBI_EXCLUSIVE) + return ERR_PTR(-EINVAL); + + /* + * First of all, we have to get the UBI device to prevent its removal. + */ + ubi = ubi_get_device(ubi_num); + if (!ubi) + return ERR_PTR(-ENODEV); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) { + err = -EINVAL; + goto out_put_ubi; + } + + desc = kmalloc(sizeof(struct ubi_volume_desc), GFP_KERNEL); + if (!desc) { + err = -ENOMEM; + goto out_put_ubi; + } + + err = -ENODEV; + if (!try_module_get(THIS_MODULE)) + goto out_free; + + spin_lock(&ubi->volumes_lock); + vol = ubi->volumes[vol_id]; + if (!vol) + goto out_unlock; + + err = -EBUSY; + switch (mode) { + case UBI_READONLY: + if (vol->exclusive) + goto out_unlock; + vol->readers += 1; + break; + + case UBI_READWRITE: + if (vol->exclusive || vol->writers > 0) + goto out_unlock; + vol->writers += 1; + break; + + case UBI_EXCLUSIVE: + if (vol->exclusive || vol->writers || vol->readers) + goto out_unlock; + vol->exclusive = 1; + break; + } + get_device(&vol->dev); + vol->ref_count += 1; + spin_unlock(&ubi->volumes_lock); + + desc->vol = vol; + desc->mode = mode; + + mutex_lock(&ubi->ckvol_mutex); + if (!vol->checked) { + /* This is the first open - check the volume */ + err = ubi_check_volume(ubi, vol_id); + if (err < 0) { + mutex_unlock(&ubi->ckvol_mutex); + ubi_close_volume(desc); + return ERR_PTR(err); + } + if (err == 1) { + ubi_warn("volume %d on UBI device %d is corrupted", + vol_id, ubi->ubi_num); + vol->corrupted = 1; + } + vol->checked = 1; + } + mutex_unlock(&ubi->ckvol_mutex); + + return desc; + +out_unlock: + spin_unlock(&ubi->volumes_lock); + module_put(THIS_MODULE); +out_free: + kfree(desc); +out_put_ubi: + ubi_put_device(ubi); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(ubi_open_volume); + +/** + * ubi_open_volume_nm - open UBI volume by name. + * @ubi_num: UBI device number + * @name: volume name + * @mode: open mode + * + * This function is similar to 'ubi_open_volume()', but opens a volume by name. + */ +struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, + int mode) +{ + int i, vol_id = -1, len; + struct ubi_device *ubi; + struct ubi_volume_desc *ret; + + dbg_msg("open volume %s, mode %d", name, mode); + + if (!name) + return ERR_PTR(-EINVAL); + + len = strnlen(name, UBI_VOL_NAME_MAX + 1); + if (len > UBI_VOL_NAME_MAX) + return ERR_PTR(-EINVAL); + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return ERR_PTR(-EINVAL); + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return ERR_PTR(-ENODEV); + + spin_lock(&ubi->volumes_lock); + /* Walk all volumes of this UBI device */ + for (i = 0; i < ubi->vtbl_slots; i++) { + struct ubi_volume *vol = ubi->volumes[i]; + + if (vol && len == vol->name_len && !strcmp(name, vol->name)) { + vol_id = i; + break; + } + } + spin_unlock(&ubi->volumes_lock); + + if (vol_id >= 0) + ret = ubi_open_volume(ubi_num, vol_id, mode); + else + ret = ERR_PTR(-ENODEV); + + /* + * We should put the UBI device even in case of success, because + * 'ubi_open_volume()' took a reference as well. + */ + ubi_put_device(ubi); + return ret; +} +EXPORT_SYMBOL_GPL(ubi_open_volume_nm); + +/** + * ubi_close_volume - close UBI volume. + * @desc: volume descriptor + */ +void ubi_close_volume(struct ubi_volume_desc *desc) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + + dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode); + + spin_lock(&ubi->volumes_lock); + switch (desc->mode) { + case UBI_READONLY: + vol->readers -= 1; + break; + case UBI_READWRITE: + vol->writers -= 1; + break; + case UBI_EXCLUSIVE: + vol->exclusive = 0; + } + vol->ref_count -= 1; + spin_unlock(&ubi->volumes_lock); + + kfree(desc); + put_device(&vol->dev); + ubi_put_device(ubi); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(ubi_close_volume); + +/** + * ubi_leb_read - read data. + * @desc: volume descriptor + * @lnum: logical eraseblock number to read from + * @buf: buffer where to store the read data + * @offset: offset within the logical eraseblock to read from + * @len: how many bytes to read + * @check: whether UBI has to check the read data's CRC or not. + * + * This function reads data from offset @offset of logical eraseblock @lnum and + * stores the data at @buf. When reading from static volumes, @check specifies + * whether the data has to be checked or not. If yes, the whole logical + * eraseblock will be read and its CRC checksum will be checked (i.e., the CRC + * checksum is per-eraseblock). So checking may substantially slow down the + * read speed. The @check argument is ignored for dynamic volumes. + * + * In case of success, this function returns zero. In case of failure, this + * function returns a negative error code. + * + * %-EBADMSG error code is returned: + * o for both static and dynamic volumes if MTD driver has detected a data + * integrity problem (unrecoverable ECC checksum mismatch in case of NAND); + * o for static volumes in case of data CRC mismatch. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF error code. + */ +int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err, vol_id = vol->vol_id; + + dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 || + lnum >= vol->used_ebs || offset < 0 || len < 0 || + offset + len > vol->usable_leb_size) + return -EINVAL; + + if (vol->vol_type == UBI_STATIC_VOLUME) { + if (vol->used_ebs == 0) + /* Empty static UBI volume */ + return 0; + if (lnum == vol->used_ebs - 1 && + offset + len > vol->last_eb_bytes) + return -EINVAL; + } + + if (vol->upd_marker) + return -EBADF; + if (len == 0) + return 0; + + err = ubi_eba_read_leb(ubi, vol, lnum, buf, offset, len, check); + if (err && err == -EBADMSG && vol->vol_type == UBI_STATIC_VOLUME) { + ubi_warn("mark volume %d as corrupted", vol_id); + vol->corrupted = 1; + } + + return err; +} +EXPORT_SYMBOL_GPL(ubi_leb_read); + +/** + * ubi_leb_write - write data. + * @desc: volume descriptor + * @lnum: logical eraseblock number to write to + * @buf: data to write + * @offset: offset within the logical eraseblock where to write + * @len: how many bytes to write + * @dtype: expected data type + * + * This function writes @len bytes of data from @buf to offset @offset of + * logical eraseblock @lnum. The @dtype argument describes expected lifetime of + * the data. + * + * This function takes care of physical eraseblock write failures. If write to + * the physical eraseblock write operation fails, the logical eraseblock is + * re-mapped to another physical eraseblock, the data is recovered, and the + * write finishes. UBI has a pool of reserved physical eraseblocks for this. + * + * If all the data were successfully written, zero is returned. If an error + * occurred and UBI has not been able to recover from it, this function returns + * a negative error code. Note, in case of an error, it is possible that + * something was still written to the flash media, but that may be some + * garbage. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF code. + */ +int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + + dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + return -EINVAL; + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 || + offset + len > vol->usable_leb_size || + offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1)) + return -EINVAL; + + if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && + dtype != UBI_UNKNOWN) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + if (len == 0) + return 0; + + return ubi_eba_write_leb(ubi, vol, lnum, buf, offset, len, dtype); +} +EXPORT_SYMBOL_GPL(ubi_leb_write); + +/* + * ubi_leb_change - change logical eraseblock atomically. + * @desc: volume descriptor + * @lnum: logical eraseblock number to change + * @buf: data to write + * @len: how many bytes to write + * @dtype: expected data type + * + * This function changes the contents of a logical eraseblock atomically. @buf + * has to contain new logical eraseblock data, and @len - the length of the + * data, which has to be aligned. The length may be shorter then the logical + * eraseblock size, ant the logical eraseblock may be appended to more times + * later on. This function guarantees that in case of an unclean reboot the old + * contents is preserved. Returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len, int dtype) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + + dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + return -EINVAL; + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 || + len > vol->usable_leb_size || len & (ubi->min_io_size - 1)) + return -EINVAL; + + if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && + dtype != UBI_UNKNOWN) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + if (len == 0) + return 0; + + return ubi_eba_atomic_leb_change(ubi, vol, lnum, buf, len, dtype); +} +EXPORT_SYMBOL_GPL(ubi_leb_change); + +/** + * ubi_leb_erase - erase logical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and synchronously erases the + * correspondent physical eraseblock. Returns zero in case of success and a + * negative error code in case of failure. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF code. + */ +int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err; + + dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + err = ubi_eba_unmap_leb(ubi, vol, lnum); + if (err) + return err; + + return ubi_wl_flush(ubi); +} +EXPORT_SYMBOL_GPL(ubi_leb_erase); + +/** + * ubi_leb_unmap - un-map logical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and schedules the + * corresponding physical eraseblock for erasure, so that it will eventually be + * physically erased in background. This operation is much faster then the + * erase operation. + * + * Unlike erase, the un-map operation does not guarantee that the logical + * eraseblock will contain all 0xFF bytes when UBI is initialized again. For + * example, if several logical eraseblocks are un-mapped, and an unclean reboot + * happens after this, the logical eraseblocks will not necessarily be + * un-mapped again when this MTD device is attached. They may actually be + * mapped to the same physical eraseblocks again. So, this function has to be + * used with care. + * + * In other words, when un-mapping a logical eraseblock, UBI does not store + * any information about this on the flash media, it just marks the logical + * eraseblock as "un-mapped" in RAM. If UBI is detached before the physical + * eraseblock is physically erased, it will be mapped again to the same logical + * eraseblock when the MTD device is attached again. + * + * The main and obvious use-case of this function is when the contents of a + * logical eraseblock has to be re-written. Then it is much more efficient to + * first un-map it, then write new data, rather then first erase it, then write + * new data. Note, once new data has been written to the logical eraseblock, + * UBI guarantees that the old contents has gone forever. In other words, if an + * unclean reboot happens after the logical eraseblock has been un-mapped and + * then written to, it will contain the last written data. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the volume is damaged because of an interrupted update + * this function just returns immediately with %-EBADF code. + */ +int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + + dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + return ubi_eba_unmap_leb(ubi, vol, lnum); +} +EXPORT_SYMBOL_GPL(ubi_leb_unmap); + +/** + * ubi_leb_map - map logical erasblock to a physical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * @dtype: expected data type + * + * This function maps an un-mapped logical eraseblock @lnum to a physical + * eraseblock. This means, that after a successfull invocation of this + * function the logical eraseblock @lnum will be empty (contain only %0xFF + * bytes) and be mapped to a physical eraseblock, even if an unclean reboot + * happens. + * + * This function returns zero in case of success, %-EBADF if the volume is + * damaged because of an interrupted update, %-EBADMSG if the logical + * eraseblock is already mapped, and other negative error codes in case of + * other failures. + */ +int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + + dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && + dtype != UBI_UNKNOWN) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + if (vol->eba_tbl[lnum] >= 0) + return -EBADMSG; + + return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); +} +EXPORT_SYMBOL_GPL(ubi_leb_map); + +/** + * ubi_is_mapped - check if logical eraseblock is mapped. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function checks if logical eraseblock @lnum is mapped to a physical + * eraseblock. If a logical eraseblock is un-mapped, this does not necessarily + * mean it will still be un-mapped after the UBI device is re-attached. The + * logical eraseblock may become mapped to the physical eraseblock it was last + * mapped to. + * + * This function returns %1 if the LEB is mapped, %0 if not, and a negative + * error code in case of failure. If the volume is damaged because of an + * interrupted update this function just returns immediately with %-EBADF error + * code. + */ +int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + + dbg_msg("test LEB %d:%d", vol->vol_id, lnum); + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + return vol->eba_tbl[lnum] >= 0; +} +EXPORT_SYMBOL_GPL(ubi_is_mapped); diff --git a/fs/ubi/misc.c b/fs/ubi/misc.c new file mode 100755 index 0000000..a6410bf --- /dev/null +++ b/fs/ubi/misc.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* Here we keep miscellaneous functions which are used all over the UBI code */ + +#include <ubi_uboot.h> +#include "ubi.h" + +/** + * calc_data_len - calculate how much real data is stored in a buffer. + * @ubi: UBI device description object + * @buf: a buffer with the contents of the physical eraseblock + * @length: the buffer length + * + * This function calculates how much "real data" is stored in @buf and returnes + * the length. Continuous 0xFF bytes at the end of the buffer are not + * considered as "real data". + */ +int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, + int length) +{ + int i; + + ubi_assert(!(length & (ubi->min_io_size - 1))); + + for (i = length - 1; i >= 0; i--) + if (((const uint8_t *)buf)[i] != 0xFF) + break; + + /* The resulting length must be aligned to the minimum flash I/O size */ + length = ALIGN(i + 1, ubi->min_io_size); + return length; +} + +/** + * ubi_check_volume - check the contents of a static volume. + * @ubi: UBI device description object + * @vol_id: ID of the volume to check + * + * This function checks if static volume @vol_id is corrupted by fully reading + * it and checking data CRC. This function returns %0 if the volume is not + * corrupted, %1 if it is corrupted and a negative error code in case of + * failure. Dynamic volumes are not checked and zero is returned immediately. + */ +int ubi_check_volume(struct ubi_device *ubi, int vol_id) +{ + void *buf; + int err = 0, i; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + if (vol->vol_type != UBI_STATIC_VOLUME) + return 0; + + buf = vmalloc(vol->usable_leb_size); + if (!buf) + return -ENOMEM; + + for (i = 0; i < vol->used_ebs; i++) { + int size; + + if (i == vol->used_ebs - 1) + size = vol->last_eb_bytes; + else + size = vol->usable_leb_size; + + err = ubi_eba_read_leb(ubi, vol, i, buf, 0, size, 1); + if (err) { + if (err == -EBADMSG) + err = 1; + break; + } + } + + vfree(buf); + return err; +} + +/** + * ubi_calculate_rsvd_pool - calculate how many PEBs must be reserved for bad + * eraseblock handling. + * @ubi: UBI device description object + */ +void ubi_calculate_reserved(struct ubi_device *ubi) +{ + ubi->beb_rsvd_level = ubi->good_peb_count/100; + ubi->beb_rsvd_level *= CONFIG_MTD_UBI_BEB_RESERVE; + if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS) + ubi->beb_rsvd_level = MIN_RESEVED_PEBS; +} diff --git a/fs/ubi/scan.c b/fs/ubi/scan.c new file mode 100755 index 0000000..a068179 --- /dev/null +++ b/fs/ubi/scan.c @@ -0,0 +1,1363 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * UBI scanning unit. + * + * This unit is responsible for scanning the flash media, checking UBI + * headers and providing complete information about the UBI flash image. + * + * The scanning information is represented by a &struct ubi_scan_info' object. + * Information about found volumes is represented by &struct ubi_scan_volume + * objects which are kept in volume RB-tree with root at the @volumes field. + * The RB-tree is indexed by the volume ID. + * + * Found logical eraseblocks are represented by &struct ubi_scan_leb objects. + * These objects are kept in per-volume RB-trees with the root at the + * corresponding &struct ubi_scan_volume object. To put it differently, we keep + * an RB-tree of per-volume objects and each of these objects is the root of + * RB-tree of per-eraseblock objects. + * + * Corrupted physical eraseblocks are put to the @corr list, free physical + * eraseblocks are put to the @free list and the physical eraseblock to be + * erased are put to the @erase list. + */ + +#ifdef UBI_LINUX +#include <linux/err.h> +#include <linux/crc32.h> +#include <asm/div64.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si); +#else +#define paranoid_check_si(ubi, si) 0 +#endif + +/* Temporary variables used during scanning */ +static struct ubi_ec_hdr *ech; +static struct ubi_vid_hdr *vidh; + +/** + * add_to_list - add physical eraseblock to a list. + * @si: scanning information + * @pnum: physical eraseblock number to add + * @ec: erase counter of the physical eraseblock + * @list: the list to add to + * + * This function adds physical eraseblock @pnum to free, erase, corrupted or + * alien lists. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, + struct list_head *list) +{ + struct ubi_scan_leb *seb; + + if (list == &si->free) + dbg_bld("add to free: PEB %d, EC %d", pnum, ec); + else if (list == &si->erase) + dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); + else if (list == &si->corr) + dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); + else if (list == &si->alien) + dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); +// else +// BUG(); + + seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); + if (!seb) + return -ENOMEM; + + seb->pnum = pnum; + seb->ec = ec; + list_add_tail(&seb->u.list, list); + return 0; +} + +/** + * validate_vid_hdr - check that volume identifier header is correct and + * consistent. + * @vid_hdr: the volume identifier header to check + * @sv: information about the volume this logical eraseblock belongs to + * @pnum: physical eraseblock number the VID header came from + * + * This function checks that data stored in @vid_hdr is consistent. Returns + * non-zero if an inconsistency was found and zero if not. + * + * Note, UBI does sanity check of everything it reads from the flash media. + * Most of the checks are done in the I/O unit. Here we check that the + * information in the VID header is consistent to the information in other VID + * headers of the same volume. + */ +static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr, + const struct ubi_scan_volume *sv, int pnum) +{ + int vol_type = vid_hdr->vol_type; + int vol_id = be32_to_cpu(vid_hdr->vol_id); + int used_ebs = be32_to_cpu(vid_hdr->used_ebs); + int data_pad = be32_to_cpu(vid_hdr->data_pad); + + if (sv->leb_count != 0) { + int sv_vol_type; + + /* + * This is not the first logical eraseblock belonging to this + * volume. Ensure that the data in its VID header is consistent + * to the data in previous logical eraseblock headers. + */ + + if (vol_id != sv->vol_id) { + dbg_err("inconsistent vol_id"); + goto bad; + } + + if (sv->vol_type == UBI_STATIC_VOLUME) + sv_vol_type = UBI_VID_STATIC; + else + sv_vol_type = UBI_VID_DYNAMIC; + + if (vol_type != sv_vol_type) { + dbg_err("inconsistent vol_type"); + goto bad; + } + + if (used_ebs != sv->used_ebs) { + dbg_err("inconsistent used_ebs"); + goto bad; + } + + if (data_pad != sv->data_pad) { + dbg_err("inconsistent data_pad"); + goto bad; + } + } + + return 0; + +bad: + ubi_err("inconsistent VID header at PEB %d", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_sv(sv); + return -EINVAL; +} + +/** + * add_volume - add volume to the scanning information. + * @si: scanning information + * @vol_id: ID of the volume to add + * @pnum: physical eraseblock number + * @vid_hdr: volume identifier header + * + * If the volume corresponding to the @vid_hdr logical eraseblock is already + * present in the scanning information, this function does nothing. Otherwise + * it adds corresponding volume to the scanning information. Returns a pointer + * to the scanning volume object in case of success and a negative error code + * in case of failure. + */ +static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id, + int pnum, + const struct ubi_vid_hdr *vid_hdr) +{ + struct ubi_scan_volume *sv; + struct rb_node **p = &si->volumes.rb_node, *parent = NULL; + + ubi_assert(vol_id == be32_to_cpu(vid_hdr->vol_id)); + + /* Walk the volume RB-tree to look if this volume is already present */ + while (*p) { + parent = *p; + sv = rb_entry(parent, struct ubi_scan_volume, rb); + + if (vol_id == sv->vol_id) + return sv; + + if (vol_id > sv->vol_id) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + /* The volume is absent - add it */ + sv = kmalloc(sizeof(struct ubi_scan_volume), GFP_KERNEL); + if (!sv) + return ERR_PTR(-ENOMEM); + + sv->highest_lnum = sv->leb_count = 0; + sv->vol_id = vol_id; + sv->root = RB_ROOT; + sv->used_ebs = be32_to_cpu(vid_hdr->used_ebs); + sv->data_pad = be32_to_cpu(vid_hdr->data_pad); + sv->compat = vid_hdr->compat; + sv->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME + : UBI_STATIC_VOLUME; + if (vol_id > si->highest_vol_id) + si->highest_vol_id = vol_id; + + rb_link_node(&sv->rb, parent, p); + rb_insert_color(&sv->rb, &si->volumes); + si->vols_found += 1; + dbg_bld("added volume %d", vol_id); + return sv; +} + +/** + * compare_lebs - find out which logical eraseblock is newer. + * @ubi: UBI device description object + * @seb: first logical eraseblock to compare + * @pnum: physical eraseblock number of the second logical eraseblock to + * compare + * @vid_hdr: volume identifier header of the second logical eraseblock + * + * This function compares 2 copies of a LEB and informs which one is newer. In + * case of success this function returns a positive value, in case of failure, a + * negative error code is returned. The success return codes use the following + * bits: + * o bit 0 is cleared: the first PEB (described by @seb) is newer then the + * second PEB (described by @pnum and @vid_hdr); + * o bit 0 is set: the second PEB is newer; + * o bit 1 is cleared: no bit-flips were detected in the newer LEB; + * o bit 1 is set: bit-flips were detected in the newer LEB; + * o bit 2 is cleared: the older LEB is not corrupted; + * o bit 2 is set: the older LEB is corrupted. + */ +static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, + int pnum, const struct ubi_vid_hdr *vid_hdr) +{ + void *buf; + int len, err, second_is_newer, bitflips = 0, corrupted = 0; + uint32_t data_crc, crc; + struct ubi_vid_hdr *vh = NULL; + unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); + + if (seb->sqnum == 0 && sqnum2 == 0) { + long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver); + + /* + * UBI constantly increases the logical eraseblock version + * number and it can overflow. Thus, we have to bear in mind + * that versions that are close to %0xFFFFFFFF are less then + * versions that are close to %0. + * + * The UBI WL unit guarantees that the number of pending tasks + * is not greater then %0x7FFFFFFF. So, if the difference + * between any two versions is greater or equivalent to + * %0x7FFFFFFF, there was an overflow and the logical + * eraseblock with lower version is actually newer then the one + * with higher version. + * + * FIXME: but this is anyway obsolete and will be removed at + * some point. + */ + dbg_bld("using old crappy leb_ver stuff"); + + if (v1 == v2) { + ubi_err("PEB %d and PEB %d have the same version %lld", + seb->pnum, pnum, v1); + return -EINVAL; + } + + abs = v1 - v2; + if (abs < 0) + abs = -abs; + + if (abs < 0x7FFFFFFF) + /* Non-overflow situation */ + second_is_newer = (v2 > v1); + else + second_is_newer = (v2 < v1); + } else + /* Obviously the LEB with lower sequence counter is older */ + second_is_newer = sqnum2 > seb->sqnum; + + /* + * Now we know which copy is newer. If the copy flag of the PEB with + * newer version is not set, then we just return, otherwise we have to + * check data CRC. For the second PEB we already have the VID header, + * for the first one - we'll need to re-read it from flash. + * + * FIXME: this may be optimized so that we wouldn't read twice. + */ + + if (second_is_newer) { + if (!vid_hdr->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("second PEB %d is newer, copy_flag is unset", + pnum); + return 1; + } + } else { + pnum = seb->pnum; + + vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vh) + return -ENOMEM; + + err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); + if (err) { + if (err == UBI_IO_BITFLIPS) + bitflips = 1; + else { + dbg_err("VID of PEB %d header is bad, but it " + "was OK earlier", pnum); + if (err > 0) + err = -EIO; + + goto out_free_vidh; + } + } + + if (!vh->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("first PEB %d is newer, copy_flag is unset", + pnum); + err = bitflips << 1; + goto out_free_vidh; + } + + vid_hdr = vh; + } + + /* Read the data of the copy and check the CRC */ + + len = be32_to_cpu(vid_hdr->data_size); + buf = vmalloc(len); + if (!buf) { + err = -ENOMEM; + goto out_free_vidh; + } + + err = ubi_io_read_data(ubi, buf, pnum, 0, len); + if (err && err != UBI_IO_BITFLIPS) + goto out_free_buf; + + data_crc = be32_to_cpu(vid_hdr->data_crc); + crc = crc32(UBI_CRC32_INIT, buf, len); + if (crc != data_crc) { + dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x", + pnum, crc, data_crc); + corrupted = 1; + bitflips = 0; + second_is_newer = !second_is_newer; + } else { + dbg_bld("PEB %d CRC is OK", pnum); + bitflips = !!err; + } + + vfree(buf); + ubi_free_vid_hdr(ubi, vh); + + if (second_is_newer) + dbg_bld("second PEB %d is newer, copy_flag is set", pnum); + else + dbg_bld("first PEB %d is newer, copy_flag is set", pnum); + + return second_is_newer | (bitflips << 1) | (corrupted << 2); + +out_free_buf: + vfree(buf); +out_free_vidh: + ubi_free_vid_hdr(ubi, vh); + return err; +} + +/** + * ubi_scan_add_used - add information about a physical eraseblock to the + * scanning information. + * @ubi: UBI device description object + * @si: scanning information + * @pnum: the physical eraseblock number + * @ec: erase counter + * @vid_hdr: the volume identifier header + * @bitflips: if bit-flips were detected when this physical eraseblock was read + * + * This function adds information about a used physical eraseblock to the + * 'used' tree of the corresponding volume. The function is rather complex + * because it has to handle cases when this is not the first physical + * eraseblock belonging to the same logical eraseblock, and the newer one has + * to be picked, while the older one has to be dropped. This function returns + * zero in case of success and a negative error code in case of failure. + */ +int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, + int pnum, int ec, const struct ubi_vid_hdr *vid_hdr, + int bitflips) +{ + int err, vol_id, lnum; + uint32_t leb_ver; + unsigned long long sqnum; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb; + struct rb_node **p, *parent = NULL; + + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + sqnum = be64_to_cpu(vid_hdr->sqnum); + leb_ver = be32_to_cpu(vid_hdr->leb_ver); + + dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d", + pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips); + + sv = add_volume(si, vol_id, pnum, vid_hdr); + if (IS_ERR(sv) < 0) + return PTR_ERR(sv); + + if (si->max_sqnum < sqnum) + si->max_sqnum = sqnum; + + /* + * Walk the RB-tree of logical eraseblocks of volume @vol_id to look + * if this is the first instance of this logical eraseblock or not. + */ + p = &sv->root.rb_node; + while (*p) { + int cmp_res; + + parent = *p; + seb = rb_entry(parent, struct ubi_scan_leb, u.rb); + if (lnum != seb->lnum) { + if (lnum < seb->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + continue; + } + + /* + * There is already a physical eraseblock describing the same + * logical eraseblock present. + */ + + dbg_bld("this LEB already exists: PEB %d, sqnum %llu, " + "LEB ver %u, EC %d", seb->pnum, seb->sqnum, + seb->leb_ver, seb->ec); + + /* + * Make sure that the logical eraseblocks have different + * versions. Otherwise the image is bad. + */ + if (seb->leb_ver == leb_ver && leb_ver != 0) { + ubi_err("two LEBs with same version %u", leb_ver); + ubi_dbg_dump_seb(seb, 0); + ubi_dbg_dump_vid_hdr(vid_hdr); + return -EINVAL; + } + + /* + * Make sure that the logical eraseblocks have different + * sequence numbers. Otherwise the image is bad. + * + * FIXME: remove 'sqnum != 0' check when leb_ver is removed. + */ + if (seb->sqnum == sqnum && sqnum != 0) { + ubi_err("two LEBs with same sequence number %llu", + sqnum); + ubi_dbg_dump_seb(seb, 0); + ubi_dbg_dump_vid_hdr(vid_hdr); + return -EINVAL; + } + + /* + * Now we have to drop the older one and preserve the newer + * one. + */ + cmp_res = compare_lebs(ubi, seb, pnum, vid_hdr); + if (cmp_res < 0) + return cmp_res; + + if (cmp_res & 1) { + /* + * This logical eraseblock is newer then the one + * found earlier. + */ + err = validate_vid_hdr(vid_hdr, sv, pnum); + if (err) + return err; + + if (cmp_res & 4) + err = add_to_list(si, seb->pnum, seb->ec, + &si->corr); + else + err = add_to_list(si, seb->pnum, seb->ec, + &si->erase); + if (err) + return err; + + seb->ec = ec; + seb->pnum = pnum; + seb->scrub = ((cmp_res & 2) || bitflips); + seb->sqnum = sqnum; + seb->leb_ver = leb_ver; + + if (sv->highest_lnum == lnum) + sv->last_data_size = + be32_to_cpu(vid_hdr->data_size); + + return 0; + } else { + /* + * This logical eraseblock is older then the one found + * previously. + */ + if (cmp_res & 4) + return add_to_list(si, pnum, ec, &si->corr); + else + return add_to_list(si, pnum, ec, &si->erase); + } + } + + /* + * We've met this logical eraseblock for the first time, add it to the + * scanning information. + */ + + err = validate_vid_hdr(vid_hdr, sv, pnum); + if (err) + return err; + + seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); + if (!seb) + return -ENOMEM; + + seb->ec = ec; + seb->pnum = pnum; + seb->lnum = lnum; + seb->sqnum = sqnum; + seb->scrub = bitflips; + seb->leb_ver = leb_ver; + + if (sv->highest_lnum <= lnum) { + sv->highest_lnum = lnum; + sv->last_data_size = be32_to_cpu(vid_hdr->data_size); + } + + sv->leb_count += 1; + rb_link_node(&seb->u.rb, parent, p); + rb_insert_color(&seb->u.rb, &sv->root); + return 0; +} + +/** + * ubi_scan_find_sv - find information about a particular volume in the + * scanning information. + * @si: scanning information + * @vol_id: the requested volume ID + * + * This function returns a pointer to the volume description or %NULL if there + * are no data about this volume in the scanning information. + */ +struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si, + int vol_id) +{ + struct ubi_scan_volume *sv; + struct rb_node *p = si->volumes.rb_node; + + while (p) { + sv = rb_entry(p, struct ubi_scan_volume, rb); + + if (vol_id == sv->vol_id) + return sv; + + if (vol_id > sv->vol_id) + p = p->rb_left; + else + p = p->rb_right; + } + + return NULL; +} + +/** + * ubi_scan_find_seb - find information about a particular logical + * eraseblock in the volume scanning information. + * @sv: a pointer to the volume scanning information + * @lnum: the requested logical eraseblock + * + * This function returns a pointer to the scanning logical eraseblock or %NULL + * if there are no data about it in the scanning volume information. + */ +struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv, + int lnum) +{ + struct ubi_scan_leb *seb; + struct rb_node *p = sv->root.rb_node; + + while (p) { + seb = rb_entry(p, struct ubi_scan_leb, u.rb); + + if (lnum == seb->lnum) + return seb; + + if (lnum > seb->lnum) + p = p->rb_left; + else + p = p->rb_right; + } + + return NULL; +} + +/** + * ubi_scan_rm_volume - delete scanning information about a volume. + * @si: scanning information + * @sv: the volume scanning information to delete + */ +void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv) +{ + struct rb_node *rb; + struct ubi_scan_leb *seb; + + dbg_bld("remove scanning information about volume %d", sv->vol_id); + + while ((rb = rb_first(&sv->root))) { + seb = rb_entry(rb, struct ubi_scan_leb, u.rb); + rb_erase(&seb->u.rb, &sv->root); + list_add_tail(&seb->u.list, &si->erase); + } + + rb_erase(&sv->rb, &si->volumes); + kfree(sv); + si->vols_found -= 1; +} + +/** + * ubi_scan_erase_peb - erase a physical eraseblock. + * @ubi: UBI device description object + * @si: scanning information + * @pnum: physical eraseblock number to erase; + * @ec: erase counter value to write (%UBI_SCAN_UNKNOWN_EC if it is unknown) + * + * This function erases physical eraseblock 'pnum', and writes the erase + * counter header to it. This function should only be used on UBI device + * initialization stages, when the EBA unit had not been yet initialized. This + * function returns zero in case of success and a negative error code in case + * of failure. + */ +int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, + int pnum, int ec) +{ + int err; + struct ubi_ec_hdr *ec_hdr; + + if ((long long)ec >= UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. Upgrade UBI and use 64-bit + * erase counters internally. + */ + ubi_err("erase counter overflow at PEB %d, EC %d", pnum, ec); + return -EINVAL; + } + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + ec_hdr->ec = cpu_to_be64(ec); + + err = ubi_io_sync_erase(ubi, pnum, 0); + if (err < 0) + goto out_free; + + err = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr); + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * ubi_scan_get_free_peb - get a free physical eraseblock. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns a free physical eraseblock. It is supposed to be + * called on the UBI initialization stages when the wear-leveling unit is not + * initialized yet. This function picks a physical eraseblocks from one of the + * lists, writes the EC header if it is needed, and removes it from the list. + * + * This function returns scanning physical eraseblock information in case of + * success and an error code in case of failure. + */ +struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, + struct ubi_scan_info *si) +{ + int err = 0, i; + struct ubi_scan_leb *seb; + + if (!list_empty(&si->free)) { + seb = list_entry(si->free.next, struct ubi_scan_leb, u.list); + list_del(&seb->u.list); + dbg_bld("return free PEB %d, EC %d", seb->pnum, seb->ec); + return seb; + } + + for (i = 0; i < 2; i++) { + struct list_head *head; + struct ubi_scan_leb *tmp_seb; + + if (i == 0) + head = &si->erase; + else + head = &si->corr; + + /* + * We try to erase the first physical eraseblock from the @head + * list and pick it if we succeed, or try to erase the + * next one if not. And so forth. We don't want to take care + * about bad eraseblocks here - they'll be handled later. + */ + list_for_each_entry_safe(seb, tmp_seb, head, u.list) { + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + + err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); + if (err) + continue; + + seb->ec += 1; + list_del(&seb->u.list); + dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); + return seb; + } + } + + ubi_err("no eraseblocks found"); + return ERR_PTR(-ENOSPC); +} + +/** + * process_eb - read UBI headers, check them and add corresponding data + * to the scanning information. + * @ubi: UBI device description object + * @si: scanning information + * @pnum: the physical eraseblock number + * + * This function returns a zero if the physical eraseblock was successfully + * handled and a negative error code in case of failure. + */ +static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) +{ + long long uninitialized_var(ec); + int err, bitflips = 0, vol_id, ec_corr = 0; + + dbg_bld("scan PEB %d", pnum); + + /* Skip bad physical eraseblocks */ + err = ubi_io_is_bad(ubi, pnum); + if (err < 0) + return err; + else if (err) { + /* + * FIXME: this is actually duty of the I/O unit to initialize + * this, but MTD does not provide enough information. + */ + si->bad_peb_count += 1; + return 0; + } + + err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (err < 0) + return err; + else if (err == UBI_IO_BITFLIPS) + { + printk("\n UBI_IO_BITFLIPS"); + bitflips = 1; + } + else if (err == UBI_IO_PEB_EMPTY) + return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); + else if (err == UBI_IO_BAD_EC_HDR) { + printk("\n UBI_IO_BAD_EC_HDR"); + /* + * We have to also look at the VID header, possibly it is not + * corrupted. Set %bitflips flag in order to make this PEB be + * moved and EC be re-created. + */ + ec_corr = 1; + ec = UBI_SCAN_UNKNOWN_EC; + bitflips = 1; + } + si->is_empty = 0; + + if (!ec_corr) { + /* Make sure UBI version is OK */ + if (ech->version != UBI_VERSION) { + ubi_err("this UBI version is %d, image version is %d", + UBI_VERSION, (int)ech->version); + return -EINVAL; + } + + ec = be64_to_cpu(ech->ec); + if (ec > UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. The EC headers have 64 bits + * reserved, but we anyway make use of only 31 bit + * values, as this seems to be enough for any existing + * flash. Upgrade UBI and use 64-bit erase counters + * internally. + */ + ubi_err("erase counter overflow, max is %d", + UBI_MAX_ERASECOUNTER); + ubi_dbg_dump_ec_hdr(ech); + return -EINVAL; + } + } + + /* OK, we've done with the EC header, let's look at the VID header */ + + err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); + if (err < 0) + return err; + else if (err == UBI_IO_BITFLIPS) + bitflips = 1; + else if (err == UBI_IO_BAD_VID_HDR || + (err == UBI_IO_PEB_FREE && ec_corr)) { + /* VID header is corrupted */ + err = add_to_list(si, pnum, ec, &si->corr); + if (err) + return err; + goto adjust_mean_ec; + } else if (err == UBI_IO_PEB_FREE) { + /* No VID header - the physical eraseblock is free */ + err = add_to_list(si, pnum, ec, &si->free); + if (err) + return err; + goto adjust_mean_ec; + } + + vol_id = be32_to_cpu(vidh->vol_id); + if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { + int lnum = be32_to_cpu(vidh->lnum); + + /* Unsupported internal volume */ + switch (vidh->compat) { + case UBI_COMPAT_DELETE: + ubi_msg("\"delete\" compatible internal volume %d:%d" + " found, remove it", vol_id, lnum); + err = add_to_list(si, pnum, ec, &si->corr); + if (err) + return err; + break; + + case UBI_COMPAT_RO: + ubi_msg("read-only compatible internal volume %d:%d" + " found, switch to read-only mode", + vol_id, lnum); + ubi->ro_mode = 1; + break; + + case UBI_COMPAT_PRESERVE: + ubi_msg("\"preserve\" compatible internal volume %d:%d" + " found", vol_id, lnum); + err = add_to_list(si, pnum, ec, &si->alien); + if (err) + return err; + si->alien_peb_count += 1; + return 0; + + case UBI_COMPAT_REJECT: + ubi_err("incompatible internal volume %d:%d found", + vol_id, lnum); + return -EINVAL; + } + } + + /* Both UBI headers seem to be fine */ + err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips); + if (err) + return err; + +adjust_mean_ec: + if (!ec_corr) { + si->ec_sum += ec; + si->ec_count += 1; + if (ec > si->max_ec) + si->max_ec = ec; + if (ec < si->min_ec) + si->min_ec = ec; + } + + return 0; +} + +/** + * ubi_scan - scan an MTD device. + * @ubi: UBI device description object + * + * This function does full scanning of an MTD device and returns complete + * information about it. In case of failure, an error code is returned. + */ +struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) +{ + int err, pnum; + struct rb_node *rb1, *rb2; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb; + struct ubi_scan_info *si; + + si = kzalloc(sizeof(struct ubi_scan_info), GFP_KERNEL); + if (!si) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&si->corr); + INIT_LIST_HEAD(&si->free); + INIT_LIST_HEAD(&si->erase); + INIT_LIST_HEAD(&si->alien); + si->volumes = RB_ROOT; + si->is_empty = 1; + + err = -ENOMEM; + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + goto out_si; + + vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vidh) + goto out_ech; + + for (pnum = 0; pnum < ubi->peb_count; pnum++) { + cond_resched(); + + dbg_msg("process PEB %d", pnum); + err = process_eb(ubi, si, pnum); + if (err < 0) + goto out_vidh; + } + + dbg_msg("scanning is finished"); + + /* Calculate mean erase counter */ + if (si->ec_count) { + do_div(si->ec_sum, si->ec_count); + si->mean_ec = si->ec_sum; + } + + if (si->is_empty) + ubi_msg("empty MTD device detected"); + + /* + * In case of unknown erase counter we use the mean erase counter + * value. + */ + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + } + + list_for_each_entry(seb, &si->free, u.list) { + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + } + + list_for_each_entry(seb, &si->corr, u.list) + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + + list_for_each_entry(seb, &si->erase, u.list) + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + + err = paranoid_check_si(ubi, si); + if (err) { + if (err > 0) + err = -EINVAL; + goto out_vidh; + } + + ubi_free_vid_hdr(ubi, vidh); + kfree(ech); + + return si; + +out_vidh: + ubi_free_vid_hdr(ubi, vidh); +out_ech: + kfree(ech); +out_si: + ubi_scan_destroy_si(si); + return ERR_PTR(err); +} + +/** + * destroy_sv - free the scanning volume information + * @sv: scanning volume information + * + * This function destroys the volume RB-tree (@sv->root) and the scanning + * volume information. + */ +static void destroy_sv(struct ubi_scan_volume *sv) +{ + struct ubi_scan_leb *seb; + struct rb_node *this = sv->root.rb_node; + + while (this) { + if (this->rb_left) + this = this->rb_left; + else if (this->rb_right) + this = this->rb_right; + else { + seb = rb_entry(this, struct ubi_scan_leb, u.rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &seb->u.rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + + kfree(seb); + } + } + kfree(sv); +} + +/** + * ubi_scan_destroy_si - destroy scanning information. + * @si: scanning information + */ +void ubi_scan_destroy_si(struct ubi_scan_info *si) +{ + struct ubi_scan_leb *seb, *seb_tmp; + struct ubi_scan_volume *sv; + struct rb_node *rb; + + list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) { + list_del(&seb->u.list); + kfree(seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) { + list_del(&seb->u.list); + kfree(seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) { + list_del(&seb->u.list); + kfree(seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) { + list_del(&seb->u.list); + kfree(seb); + } + + /* Destroy the volume RB-tree */ + rb = si->volumes.rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + sv = rb_entry(rb, struct ubi_scan_volume, rb); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &sv->rb) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + destroy_sv(sv); + } + } + + kfree(si); +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_si - check if the scanning information is correct and + * consistent. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns zero if the scanning information is all right, %1 if + * not and a negative error code if an error occurred. + */ +static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) +{ + int pnum, err, vols_found = 0; + struct rb_node *rb1, *rb2; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb, *last_seb; + uint8_t *buf; + + /* + * At first, check that scanning information is OK. + */ + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + int leb_count = 0; + + cond_resched(); + + vols_found += 1; + + if (si->is_empty) { + ubi_err("bad is_empty flag"); + goto bad_sv; + } + + if (sv->vol_id < 0 || sv->highest_lnum < 0 || + sv->leb_count < 0 || sv->vol_type < 0 || sv->used_ebs < 0 || + sv->data_pad < 0 || sv->last_data_size < 0) { + ubi_err("negative values"); + goto bad_sv; + } + + if (sv->vol_id >= UBI_MAX_VOLUMES && + sv->vol_id < UBI_INTERNAL_VOL_START) { + ubi_err("bad vol_id"); + goto bad_sv; + } + + if (sv->vol_id > si->highest_vol_id) { + ubi_err("highest_vol_id is %d, but vol_id %d is there", + si->highest_vol_id, sv->vol_id); + goto out; + } + + if (sv->vol_type != UBI_DYNAMIC_VOLUME && + sv->vol_type != UBI_STATIC_VOLUME) { + ubi_err("bad vol_type"); + goto bad_sv; + } + + if (sv->data_pad > ubi->leb_size / 2) { + ubi_err("bad data_pad"); + goto bad_sv; + } + + last_seb = NULL; + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + cond_resched(); + + last_seb = seb; + leb_count += 1; + + if (seb->pnum < 0 || seb->ec < 0) { + ubi_err("negative values"); + goto bad_seb; + } + + if (seb->ec < si->min_ec) { + ubi_err("bad si->min_ec (%d), %d found", + si->min_ec, seb->ec); + goto bad_seb; + } + + if (seb->ec > si->max_ec) { + ubi_err("bad si->max_ec (%d), %d found", + si->max_ec, seb->ec); + goto bad_seb; + } + + if (seb->pnum >= ubi->peb_count) { + ubi_err("too high PEB number %d, total PEBs %d", + seb->pnum, ubi->peb_count); + goto bad_seb; + } + + if (sv->vol_type == UBI_STATIC_VOLUME) { + if (seb->lnum >= sv->used_ebs) { + ubi_err("bad lnum or used_ebs"); + goto bad_seb; + } + } else { + if (sv->used_ebs != 0) { + ubi_err("non-zero used_ebs"); + goto bad_seb; + } + } + + if (seb->lnum > sv->highest_lnum) { + ubi_err("incorrect highest_lnum or lnum"); + goto bad_seb; + } + } + + if (sv->leb_count != leb_count) { + ubi_err("bad leb_count, %d objects in the tree", + leb_count); + goto bad_sv; + } + + if (!last_seb) + continue; + + seb = last_seb; + + if (seb->lnum != sv->highest_lnum) { + ubi_err("bad highest_lnum"); + goto bad_seb; + } + } + + if (vols_found != si->vols_found) { + ubi_err("bad si->vols_found %d, should be %d", + si->vols_found, vols_found); + goto out; + } + + /* Check that scanning information is correct */ + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + last_seb = NULL; + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + int vol_type; + + cond_resched(); + + last_seb = seb; + + err = ubi_io_read_vid_hdr(ubi, seb->pnum, vidh, 1); + if (err && err != UBI_IO_BITFLIPS) { + ubi_err("VID header is not OK (%d)", err); + if (err > 0) + err = -EIO; + return err; + } + + vol_type = vidh->vol_type == UBI_VID_DYNAMIC ? + UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; + if (sv->vol_type != vol_type) { + ubi_err("bad vol_type"); + goto bad_vid_hdr; + } + + if (seb->sqnum != be64_to_cpu(vidh->sqnum)) { + ubi_err("bad sqnum %llu", seb->sqnum); + goto bad_vid_hdr; + } + + if (sv->vol_id != be32_to_cpu(vidh->vol_id)) { + ubi_err("bad vol_id %d", sv->vol_id); + goto bad_vid_hdr; + } + + if (sv->compat != vidh->compat) { + ubi_err("bad compat %d", vidh->compat); + goto bad_vid_hdr; + } + + if (seb->lnum != be32_to_cpu(vidh->lnum)) { + ubi_err("bad lnum %d", seb->lnum); + goto bad_vid_hdr; + } + + if (sv->used_ebs != be32_to_cpu(vidh->used_ebs)) { + ubi_err("bad used_ebs %d", sv->used_ebs); + goto bad_vid_hdr; + } + + if (sv->data_pad != be32_to_cpu(vidh->data_pad)) { + ubi_err("bad data_pad %d", sv->data_pad); + goto bad_vid_hdr; + } + + if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) { + ubi_err("bad leb_ver %u", seb->leb_ver); + goto bad_vid_hdr; + } + } + + if (!last_seb) + continue; + + if (sv->highest_lnum != be32_to_cpu(vidh->lnum)) { + ubi_err("bad highest_lnum %d", sv->highest_lnum); + goto bad_vid_hdr; + } + + if (sv->last_data_size != be32_to_cpu(vidh->data_size)) { + ubi_err("bad last_data_size %d", sv->last_data_size); + goto bad_vid_hdr; + } + } + + /* + * Make sure that all the physical eraseblocks are in one of the lists + * or trees. + */ + buf = kzalloc(ubi->peb_count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (pnum = 0; pnum < ubi->peb_count; pnum++) { + err = ubi_io_is_bad(ubi, pnum); + if (err < 0) { + kfree(buf); + return err; + } + else if (err) + buf[pnum] = 1; + } + + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) + buf[seb->pnum] = 1; + + list_for_each_entry(seb, &si->free, u.list) + buf[seb->pnum] = 1; + + list_for_each_entry(seb, &si->corr, u.list) + buf[seb->pnum] = 1; + + list_for_each_entry(seb, &si->erase, u.list) + buf[seb->pnum] = 1; + + list_for_each_entry(seb, &si->alien, u.list) + buf[seb->pnum] = 1; + + err = 0; + for (pnum = 0; pnum < ubi->peb_count; pnum++) + if (!buf[pnum]) { + ubi_err("PEB %d is not referred", pnum); + err = 1; + } + + kfree(buf); + if (err) + goto out; + return 0; + +bad_seb: + ubi_err("bad scanning information about LEB %d", seb->lnum); + ubi_dbg_dump_seb(seb, 0); + ubi_dbg_dump_sv(sv); + goto out; + +bad_sv: + ubi_err("bad scanning information about volume %d", sv->vol_id); + ubi_dbg_dump_sv(sv); + goto out; + +bad_vid_hdr: + ubi_err("bad scanning information about volume %d", sv->vol_id); + ubi_dbg_dump_sv(sv); + ubi_dbg_dump_vid_hdr(vidh); + +out: + ubi_dbg_dump_stack(); + return 1; +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/fs/ubi/scan.h b/fs/ubi/scan.h new file mode 100755 index 0000000..966b9b6 --- /dev/null +++ b/fs/ubi/scan.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +#ifndef __UBI_SCAN_H__ +#define __UBI_SCAN_H__ + +/* The erase counter value for this physical eraseblock is unknown */ +#define UBI_SCAN_UNKNOWN_EC (-1) + +/** + * struct ubi_scan_leb - scanning information about a physical eraseblock. + * @ec: erase counter (%UBI_SCAN_UNKNOWN_EC if it is unknown) + * @pnum: physical eraseblock number + * @lnum: logical eraseblock number + * @scrub: if this physical eraseblock needs scrubbing + * @sqnum: sequence number + * @u: unions RB-tree or @list links + * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects + * @u.list: link in one of the eraseblock lists + * @leb_ver: logical eraseblock version (obsolete) + * + * One object of this type is allocated for each physical eraseblock during + * scanning. + */ +struct ubi_scan_leb { + int ec; + int pnum; + int lnum; + int scrub; + unsigned long long sqnum; + union { + struct rb_node rb; + struct list_head list; + } u; + uint32_t leb_ver; +}; + +/** + * struct ubi_scan_volume - scanning information about a volume. + * @vol_id: volume ID + * @highest_lnum: highest logical eraseblock number in this volume + * @leb_count: number of logical eraseblocks in this volume + * @vol_type: volume type + * @used_ebs: number of used logical eraseblocks in this volume (only for + * static volumes) + * @last_data_size: amount of data in the last logical eraseblock of this + * volume (always equivalent to the usable logical eraseblock size in case of + * dynamic volumes) + * @data_pad: how many bytes at the end of logical eraseblocks of this volume + * are not used (due to volume alignment) + * @compat: compatibility flags of this volume + * @rb: link in the volume RB-tree + * @root: root of the RB-tree containing all the eraseblock belonging to this + * volume (&struct ubi_scan_leb objects) + * + * One object of this type is allocated for each volume during scanning. + */ +struct ubi_scan_volume { + int vol_id; + int highest_lnum; + int leb_count; + int vol_type; + int used_ebs; + int last_data_size; + int data_pad; + int compat; + struct rb_node rb; + struct rb_root root; +}; + +/** + * struct ubi_scan_info - UBI scanning information. + * @volumes: root of the volume RB-tree + * @corr: list of corrupted physical eraseblocks + * @free: list of free physical eraseblocks + * @erase: list of physical eraseblocks which have to be erased + * @alien: list of physical eraseblocks which should not be used by UBI (e.g., + * @bad_peb_count: count of bad physical eraseblocks + * those belonging to "preserve"-compatible internal volumes) + * @vols_found: number of volumes found during scanning + * @highest_vol_id: highest volume ID + * @alien_peb_count: count of physical eraseblocks in the @alien list + * @is_empty: flag indicating whether the MTD device is empty or not + * @min_ec: lowest erase counter value + * @max_ec: highest erase counter value + * @max_sqnum: highest sequence number value + * @mean_ec: mean erase counter value + * @ec_sum: a temporary variable used when calculating @mean_ec + * @ec_count: a temporary variable used when calculating @mean_ec + * + * This data structure contains the result of scanning and may be used by other + * UBI units to build final UBI data structures, further error-recovery and so + * on. + */ +struct ubi_scan_info { + struct rb_root volumes; + struct list_head corr; + struct list_head free; + struct list_head erase; + struct list_head alien; + int bad_peb_count; + int vols_found; + int highest_vol_id; + int alien_peb_count; + int is_empty; + int min_ec; + int max_ec; + unsigned long long max_sqnum; + int mean_ec; + uint64_t ec_sum; + int ec_count; +}; + +struct ubi_device; +struct ubi_vid_hdr; + +/* + * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a + * list. + * + * @sv: volume scanning information + * @seb: scanning eraseblock infprmation + * @list: the list to move to + */ +static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv, + struct ubi_scan_leb *seb, + struct list_head *list) +{ + rb_erase(&seb->u.rb, &sv->root); + list_add_tail(&seb->u.list, list); +} + +int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, + int pnum, int ec, const struct ubi_vid_hdr *vid_hdr, + int bitflips); +struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si, + int vol_id); +struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv, + int lnum); +void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv); +struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, + struct ubi_scan_info *si); +int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, + int pnum, int ec); +struct ubi_scan_info *ubi_scan(struct ubi_device *ubi); +void ubi_scan_destroy_si(struct ubi_scan_info *si); + +#endif /* !__UBI_SCAN_H__ */ diff --git a/fs/ubi/ubi-media.h b/fs/ubi/ubi-media.h new file mode 100755 index 0000000..c3185d9 --- /dev/null +++ b/fs/ubi/ubi-media.h @@ -0,0 +1,372 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Thomas Gleixner + * Frank Haverkamp + * Oliver Lohmann + * Andreas Arnez + */ + +/* + * This file defines the layout of UBI headers and all the other UBI on-flash + * data structures. + */ + +#ifndef __UBI_MEDIA_H__ +#define __UBI_MEDIA_H__ + +#include <asm/byteorder.h> + +/* The version of UBI images supported by this implementation */ +#define UBI_VERSION 1 + +/* The highest erase counter value supported by this implementation */ +#define UBI_MAX_ERASECOUNTER 0x7FFFFFFF + +/* The initial CRC32 value used when calculating CRC checksums */ +#define UBI_CRC32_INIT 0xFFFFFFFFU + +/* Erase counter header magic number (ASCII "UBI#") */ +#define UBI_EC_HDR_MAGIC 0x55424923 +/* Volume identifier header magic number (ASCII "UBI!") */ +#define UBI_VID_HDR_MAGIC 0x55424921 + +/* + * Volume type constants used in the volume identifier header. + * + * @UBI_VID_DYNAMIC: dynamic volume + * @UBI_VID_STATIC: static volume + */ +enum { + UBI_VID_DYNAMIC = 1, + UBI_VID_STATIC = 2 +}; + +/* + * Volume flags used in the volume table record. + * + * @UBI_VTBL_AUTORESIZE_FLG: auto-resize this volume + * + * %UBI_VTBL_AUTORESIZE_FLG flag can be set only for one volume in the volume + * table. UBI automatically re-sizes the volume which has this flag and makes + * the volume to be of largest possible size. This means that if after the + * initialization UBI finds out that there are available physical eraseblocks + * present on the device, it automatically appends all of them to the volume + * (the physical eraseblocks reserved for bad eraseblocks handling and other + * reserved physical eraseblocks are not taken). So, if there is a volume with + * the %UBI_VTBL_AUTORESIZE_FLG flag set, the amount of available logical + * eraseblocks will be zero after UBI is loaded, because all of them will be + * reserved for this volume. Note, the %UBI_VTBL_AUTORESIZE_FLG bit is cleared + * after the volume had been initialized. + * + * The auto-resize feature is useful for device production purposes. For + * example, different NAND flash chips may have different amount of initial bad + * eraseblocks, depending of particular chip instance. Manufacturers of NAND + * chips usually guarantee that the amount of initial bad eraseblocks does not + * exceed certain percent, e.g. 2%. When one creates an UBI image which will be + * flashed to the end devices in production, he does not know the exact amount + * of good physical eraseblocks the NAND chip on the device will have, but this + * number is required to calculate the volume sized and put them to the volume + * table of the UBI image. In this case, one of the volumes (e.g., the one + * which will store the root file system) is marked as "auto-resizable", and + * UBI will adjust its size on the first boot if needed. + * + * Note, first UBI reserves some amount of physical eraseblocks for bad + * eraseblock handling, and then re-sizes the volume, not vice-versa. This + * means that the pool of reserved physical eraseblocks will always be present. + */ +enum { + UBI_VTBL_AUTORESIZE_FLG = 0x01, +}; + +/* + * Compatibility constants used by internal volumes. + * + * @UBI_COMPAT_DELETE: delete this internal volume before anything is written + * to the flash + * @UBI_COMPAT_RO: attach this device in read-only mode + * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its + * physical eraseblocks, don't allow the wear-leveling unit to move them + * @UBI_COMPAT_REJECT: reject this UBI image + */ +enum { + UBI_COMPAT_DELETE = 1, + UBI_COMPAT_RO = 2, + UBI_COMPAT_PRESERVE = 4, + UBI_COMPAT_REJECT = 5 +}; + +/* Sizes of UBI headers */ +#define UBI_EC_HDR_SIZE sizeof(struct ubi_ec_hdr) +#define UBI_VID_HDR_SIZE sizeof(struct ubi_vid_hdr) + +/* Sizes of UBI headers without the ending CRC */ +#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(__be32)) +#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(__be32)) + +/** + * struct ubi_ec_hdr - UBI erase counter header. + * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC) + * @version: version of UBI implementation which is supposed to accept this + * UBI image + * @padding1: reserved for future, zeroes + * @ec: the erase counter + * @vid_hdr_offset: where the VID header starts + * @data_offset: where the user data start + * @padding2: reserved for future, zeroes + * @hdr_crc: erase counter header CRC checksum + * + * The erase counter header takes 64 bytes and has a plenty of unused space for + * future usage. The unused fields are zeroed. The @version field is used to + * indicate the version of UBI implementation which is supposed to be able to + * work with this UBI image. If @version is greater then the current UBI + * version, the image is rejected. This may be useful in future if something + * is changed radically. This field is duplicated in the volume identifier + * header. + * + * The @vid_hdr_offset and @data_offset fields contain the offset of the the + * volume identifier header and user data, relative to the beginning of the + * physical eraseblock. These values have to be the same for all physical + * eraseblocks. + */ +struct ubi_ec_hdr { + __be32 magic; + __u8 version; + __u8 padding1[3]; + __be64 ec; /* Warning: the current limit is 31-bit anyway! */ + __be32 vid_hdr_offset; + __be32 data_offset; + __u8 padding2[36]; + __be32 hdr_crc; +} __attribute__ ((packed)); + +/** + * struct ubi_vid_hdr - on-flash UBI volume identifier header. + * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC) + * @version: UBI implementation version which is supposed to accept this UBI + * image (%UBI_VERSION) + * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC) + * @copy_flag: if this logical eraseblock was copied from another physical + * eraseblock (for wear-leveling reasons) + * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE, + * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) + * @vol_id: ID of this volume + * @lnum: logical eraseblock number + * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be + * removed, kept only for not breaking older UBI users) + * @data_size: how many bytes of data this logical eraseblock contains + * @used_ebs: total number of used logical eraseblocks in this volume + * @data_pad: how many bytes at the end of this physical eraseblock are not + * used + * @data_crc: CRC checksum of the data stored in this logical eraseblock + * @padding1: reserved for future, zeroes + * @sqnum: sequence number + * @padding2: reserved for future, zeroes + * @hdr_crc: volume identifier header CRC checksum + * + * The @sqnum is the value of the global sequence counter at the time when this + * VID header was created. The global sequence counter is incremented each time + * UBI writes a new VID header to the flash, i.e. when it maps a logical + * eraseblock to a new physical eraseblock. The global sequence counter is an + * unsigned 64-bit integer and we assume it never overflows. The @sqnum + * (sequence number) is used to distinguish between older and newer versions of + * logical eraseblocks. + * + * There are 2 situations when there may be more then one physical eraseblock + * corresponding to the same logical eraseblock, i.e., having the same @vol_id + * and @lnum values in the volume identifier header. Suppose we have a logical + * eraseblock L and it is mapped to the physical eraseblock P. + * + * 1. Because UBI may erase physical eraseblocks asynchronously, the following + * situation is possible: L is asynchronously erased, so P is scheduled for + * erasure, then L is written to,i.e. mapped to another physical eraseblock P1, + * so P1 is written to, then an unclean reboot happens. Result - there are 2 + * physical eraseblocks P and P1 corresponding to the same logical eraseblock + * L. But P1 has greater sequence number, so UBI picks P1 when it attaches the + * flash. + * + * 2. From time to time UBI moves logical eraseblocks to other physical + * eraseblocks for wear-leveling reasons. If, for example, UBI moves L from P + * to P1, and an unclean reboot happens before P is physically erased, there + * are two physical eraseblocks P and P1 corresponding to L and UBI has to + * select one of them when the flash is attached. The @sqnum field says which + * PEB is the original (obviously P will have lower @sqnum) and the copy. But + * it is not enough to select the physical eraseblock with the higher sequence + * number, because the unclean reboot could have happen in the middle of the + * copying process, so the data in P is corrupted. It is also not enough to + * just select the physical eraseblock with lower sequence number, because the + * data there may be old (consider a case if more data was added to P1 after + * the copying). Moreover, the unclean reboot may happen when the erasure of P + * was just started, so it result in unstable P, which is "mostly" OK, but + * still has unstable bits. + * + * UBI uses the @copy_flag field to indicate that this logical eraseblock is a + * copy. UBI also calculates data CRC when the data is moved and stores it at + * the @data_crc field of the copy (P1). So when UBI needs to pick one physical + * eraseblock of two (P or P1), the @copy_flag of the newer one (P1) is + * examined. If it is cleared, the situation* is simple and the newer one is + * picked. If it is set, the data CRC of the copy (P1) is examined. If the CRC + * checksum is correct, this physical eraseblock is selected (P1). Otherwise + * the older one (P) is selected. + * + * Note, there is an obsolete @leb_ver field which was used instead of @sqnum + * in the past. But it is not used anymore and we keep it in order to be able + * to deal with old UBI images. It will be removed at some point. + * + * There are 2 sorts of volumes in UBI: user volumes and internal volumes. + * Internal volumes are not seen from outside and are used for various internal + * UBI purposes. In this implementation there is only one internal volume - the + * layout volume. Internal volumes are the main mechanism of UBI extensions. + * For example, in future one may introduce a journal internal volume. Internal + * volumes have their own reserved range of IDs. + * + * The @compat field is only used for internal volumes and contains the "degree + * of their compatibility". It is always zero for user volumes. This field + * provides a mechanism to introduce UBI extensions and to be still compatible + * with older UBI binaries. For example, if someone introduced a journal in + * future, he would probably use %UBI_COMPAT_DELETE compatibility for the + * journal volume. And in this case, older UBI binaries, which know nothing + * about the journal volume, would just delete this volume and work perfectly + * fine. This is similar to what Ext2fs does when it is fed by an Ext3fs image + * - it just ignores the Ext3fs journal. + * + * The @data_crc field contains the CRC checksum of the contents of the logical + * eraseblock if this is a static volume. In case of dynamic volumes, it does + * not contain the CRC checksum as a rule. The only exception is when the + * data of the physical eraseblock was moved by the wear-leveling unit, then + * the wear-leveling unit calculates the data CRC and stores it in the + * @data_crc field. And of course, the @copy_flag is %in this case. + * + * The @data_size field is used only for static volumes because UBI has to know + * how many bytes of data are stored in this eraseblock. For dynamic volumes, + * this field usually contains zero. The only exception is when the data of the + * physical eraseblock was moved to another physical eraseblock for + * wear-leveling reasons. In this case, UBI calculates CRC checksum of the + * contents and uses both @data_crc and @data_size fields. In this case, the + * @data_size field contains data size. + * + * The @used_ebs field is used only for static volumes and indicates how many + * eraseblocks the data of the volume takes. For dynamic volumes this field is + * not used and always contains zero. + * + * The @data_pad is calculated when volumes are created using the alignment + * parameter. So, effectively, the @data_pad field reduces the size of logical + * eraseblocks of this volume. This is very handy when one uses block-oriented + * software (say, cramfs) on top of the UBI volume. + */ +struct ubi_vid_hdr { + __be32 magic; + __u8 version; + __u8 vol_type; + __u8 copy_flag; + __u8 compat; + __be32 vol_id; + __be32 lnum; + __be32 leb_ver; /* obsolete, to be removed, don't use */ + __be32 data_size; + __be32 used_ebs; + __be32 data_pad; + __be32 data_crc; + __u8 padding1[4]; + __be64 sqnum; + __u8 padding2[12]; + __be32 hdr_crc; +} __attribute__ ((packed)); + +/* Internal UBI volumes count */ +#define UBI_INT_VOL_COUNT 1 + +/* + * Starting ID of internal volumes. There is reserved room for 4096 internal + * volumes. + */ +#define UBI_INTERNAL_VOL_START (0x7FFFFFFF - 4096) + +/* The layout volume contains the volume table */ + +#define UBI_LAYOUT_VOLUME_ID UBI_INTERNAL_VOL_START +#define UBI_LAYOUT_VOLUME_TYPE UBI_VID_DYNAMIC +#define UBI_LAYOUT_VOLUME_ALIGN 1 +#define UBI_LAYOUT_VOLUME_EBS 2 +#define UBI_LAYOUT_VOLUME_NAME "layout volume" +#define UBI_LAYOUT_VOLUME_COMPAT UBI_COMPAT_REJECT + +/* The maximum number of volumes per one UBI device */ +#define UBI_MAX_VOLUMES 128 + +/* The maximum volume name length */ +#define UBI_VOL_NAME_MAX 127 + +/* Size of the volume table record */ +#define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record) + +/* Size of the volume table record without the ending CRC */ +#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(__be32)) + +/** + * struct ubi_vtbl_record - a record in the volume table. + * @reserved_pebs: how many physical eraseblocks are reserved for this volume + * @alignment: volume alignment + * @data_pad: how many bytes are unused at the end of the each physical + * eraseblock to satisfy the requested alignment + * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) + * @upd_marker: if volume update was started but not finished + * @name_len: volume name length + * @name: the volume name + * @flags: volume flags (%UBI_VTBL_AUTORESIZE_FLG) + * @padding: reserved, zeroes + * @crc: a CRC32 checksum of the record + * + * The volume table records are stored in the volume table, which is stored in + * the layout volume. The layout volume consists of 2 logical eraseblock, each + * of which contains a copy of the volume table (i.e., the volume table is + * duplicated). The volume table is an array of &struct ubi_vtbl_record + * objects indexed by the volume ID. + * + * If the size of the logical eraseblock is large enough to fit + * %UBI_MAX_VOLUMES records, the volume table contains %UBI_MAX_VOLUMES + * records. Otherwise, it contains as many records as it can fit (i.e., size of + * logical eraseblock divided by sizeof(struct ubi_vtbl_record)). + * + * The @upd_marker flag is used to implement volume update. It is set to %1 + * before update and set to %0 after the update. So if the update operation was + * interrupted, UBI knows that the volume is corrupted. + * + * The @alignment field is specified when the volume is created and cannot be + * later changed. It may be useful, for example, when a block-oriented file + * system works on top of UBI. The @data_pad field is calculated using the + * logical eraseblock size and @alignment. The alignment must be multiple to the + * minimal flash I/O unit. If @alignment is 1, all the available space of + * the physical eraseblocks is used. + * + * Empty records contain all zeroes and the CRC checksum of those zeroes. + */ +struct ubi_vtbl_record { + __be32 reserved_pebs; + __be32 alignment; + __be32 data_pad; + __u8 vol_type; + __u8 upd_marker; + __be16 name_len; + __u8 name[UBI_VOL_NAME_MAX+1]; + __u8 flags; + __u8 padding[23]; + __be32 crc; +} __attribute__ ((packed)); + +#endif /* !__UBI_MEDIA_H__ */ diff --git a/fs/ubi/ubi.h b/fs/ubi/ubi.h new file mode 100755 index 0000000..dbf0bb7 --- /dev/null +++ b/fs/ubi/ubi.h @@ -0,0 +1,712 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +#ifndef __UBI_UBI_H__ +#define __UBI_UBI_H__ + +#ifdef UBI_LINUX +#include <linux/init.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/ubi.h> +#endif + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/string.h> +#include <linux/mtd/ubi.h> + +#include "ubi-media.h" +#include "scan.h" +#include "debug.h" + +#define kmalloc(size, flags) malloc(size) +#define kfree(size) free(size) +#define kzalloc(size, flags) calloc(size, 1) + +#define printk printf + +#define vmalloc(a) malloc(a) +#define vfree(ptr) do { if (ptr != NULL) free(ptr); } while(0) + +/*Johnny Liu*/ +#define CONFIG_MTD_DEVICE 1 +#define CONFIG_MTD_UBIFS +#define CONFIG_CMD_UBI +#define CONFIG_LZO 1 +#define CONFIG_RBTREE 1 + +/* Maximum number of supported UBI devices */ +#define UBI_MAX_DEVICES 32 + +/* UBI name used for character devices, sysfs, etc */ +#define UBI_NAME_STR "ubi" + +/* Normal UBI messages */ +#define ubi_msg(fmt, ...) printk(KERN_NOTICE "UBI: " fmt "\n", ##__VA_ARGS__) +/* UBI warning messages */ +#define ubi_warn(fmt, ...) printk(KERN_WARNING "UBI warning: %s: " fmt "\n", \ + __func__, ##__VA_ARGS__) +/* UBI error messages */ +#define ubi_err(fmt, ...) printk(KERN_ERR "UBI error: %s: " fmt "\n", \ + __func__, ##__VA_ARGS__) + +/* Lowest number PEBs reserved for bad PEB handling */ +#define MIN_RESEVED_PEBS 2 + +/* Background thread name pattern */ +#define UBI_BGT_NAME_PATTERN "ubi_bgt%dd" + +/* This marker in the EBA table means that the LEB is um-mapped */ +#define UBI_LEB_UNMAPPED -1 + +/* + * In case of errors, UBI tries to repeat the operation several times before + * returning error. The below constant defines how many times UBI re-tries. + */ +#define UBI_IO_RETRIES 3 + +/* + * Error codes returned by the I/O unit. + * + * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only + * 0xFF bytes + * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a + * valid erase counter header, and the rest are %0xFF bytes + * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC) + * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or + * CRC) + * UBI_IO_BITFLIPS: bit-flips were detected and corrected + */ +enum { + UBI_IO_PEB_EMPTY = 1, + UBI_IO_PEB_FREE, + UBI_IO_BAD_EC_HDR, + UBI_IO_BAD_VID_HDR, + UBI_IO_BITFLIPS +}; + + +struct mtd_info { + u_char type; + u_int32_t flags; + uint64_t size; /* Total size of the MTD */ + + u_int32_t erasesize; + u_int32_t writesize; + + u_int32_t oobsize; /* Amount of OOB data per block (e.g. 16) */ + u_int32_t oobavail; /* Available OOB bytes per block */ + + const char *name; + int index; + + int numeraseregions; + + int subpage_sft; + + void *priv; + + struct module *owner; + int usecount; + +}; + + +struct mtd_partition { + char *name; /* identifier string */ + uint64_t size; /* partition size */ + uint64_t offset; /* offset within the master MTD space */ + u_int32_t mask_flags; /* master MTD flags to mask out for this partition */ + struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/ + struct mtd_info **mtdp; /* pointer to store the MTD object */ +}; + +#define MTDPART_OFS_NXTBLK (-2) +#define MTDPART_OFS_APPEND (-1) +#define MTDPART_SIZ_FULL (0) + + +int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); +int del_mtd_partitions(struct mtd_info *); + + +struct erase_info { + uint64_t addr; + uint64_t len; + uint64_t fail_addr; + u_long time; + u_long retries; + u_int dev; + u_int cell; + void (*callback) (struct erase_info *self); + u_long priv; + u_char state; + struct erase_info *next; +}; +/** + * struct ubi_wl_entry - wear-leveling entry. + * @rb: link in the corresponding RB-tree + * @ec: erase counter + * @pnum: physical eraseblock number + * + * This data structure is used in the WL unit. Each physical eraseblock has a + * corresponding &struct wl_entry object which may be kept in different + * RB-trees. See WL unit for details. + */ +struct ubi_wl_entry { + struct rb_node rb; + int ec; + int pnum; +}; + +/** + * struct ubi_ltree_entry - an entry in the lock tree. + * @rb: links RB-tree nodes + * @vol_id: volume ID of the locked logical eraseblock + * @lnum: locked logical eraseblock number + * @users: how many tasks are using this logical eraseblock or wait for it + * @mutex: read/write mutex to implement read/write access serialization to + * the (@vol_id, @lnum) logical eraseblock + * + * This data structure is used in the EBA unit to implement per-LEB locking. + * When a logical eraseblock is being locked - corresponding + * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree). + * See EBA unit for details. + */ +struct ubi_ltree_entry { + struct rb_node rb; + int vol_id; + int lnum; + int users; + struct rw_semaphore mutex; +}; + +struct ubi_volume_desc; + +/** + * struct ubi_volume - UBI volume description data structure. + * @dev: device object to make use of the the Linux device model + * @cdev: character device object to create character device + * @ubi: reference to the UBI device description object + * @vol_id: volume ID + * @ref_count: volume reference count + * @readers: number of users holding this volume in read-only mode + * @writers: number of users holding this volume in read-write mode + * @exclusive: whether somebody holds this volume in exclusive mode + * + * @reserved_pebs: how many physical eraseblocks are reserved for this volume + * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) + * @usable_leb_size: logical eraseblock size without padding + * @used_ebs: how many logical eraseblocks in this volume contain data + * @last_eb_bytes: how many bytes are stored in the last logical eraseblock + * @used_bytes: how many bytes of data this volume contains + * @alignment: volume alignment + * @data_pad: how many bytes are not used at the end of physical eraseblocks to + * satisfy the requested alignment + * @name_len: volume name length + * @name: volume name + * + * @upd_ebs: how many eraseblocks are expected to be updated + * @ch_lnum: LEB number which is being changing by the atomic LEB change + * operation + * @ch_dtype: data persistency type which is being changing by the atomic LEB + * change operation + * @upd_bytes: how many bytes are expected to be received for volume update or + * atomic LEB change + * @upd_received: how many bytes were already received for volume update or + * atomic LEB change + * @upd_buf: update buffer which is used to collect update data or data for + * atomic LEB change + * + * @eba_tbl: EBA table of this volume (LEB->PEB mapping) + * @checked: %1 if this static volume was checked + * @corrupted: %1 if the volume is corrupted (static volumes only) + * @upd_marker: %1 if the update marker is set for this volume + * @updating: %1 if the volume is being updated + * @changing_leb: %1 if the atomic LEB change ioctl command is in progress + * + * @gluebi_desc: gluebi UBI volume descriptor + * @gluebi_refcount: reference count of the gluebi MTD device + * @gluebi_mtd: MTD device description object of the gluebi MTD device + * + * The @corrupted field indicates that the volume's contents is corrupted. + * Since UBI protects only static volumes, this field is not relevant to + * dynamic volumes - it is user's responsibility to assure their data + * integrity. + * + * The @upd_marker flag indicates that this volume is either being updated at + * the moment or is damaged because of an unclean reboot. + */ +struct ubi_volume { + struct device dev; + struct cdev cdev; + struct ubi_device *ubi; + int vol_id; + int ref_count; + int readers; + int writers; + int exclusive; + + int reserved_pebs; + int vol_type; + int usable_leb_size; + int used_ebs; + int last_eb_bytes; + long long used_bytes; + int alignment; + int data_pad; + int name_len; + char name[UBI_VOL_NAME_MAX+1]; + + int upd_ebs; + int ch_lnum; + int ch_dtype; + long long upd_bytes; + long long upd_received; + void *upd_buf; + + int *eba_tbl; + unsigned int checked:1; + unsigned int corrupted:1; + unsigned int upd_marker:1; + unsigned int updating:1; + unsigned int changing_leb:1; + +#ifdef CONFIG_MTD_UBI_GLUEBI + /* + * Gluebi-related stuff may be compiled out. + * TODO: this should not be built into UBI but should be a separate + * ubimtd driver which works on top of UBI and emulates MTD devices. + */ + struct ubi_volume_desc *gluebi_desc; + int gluebi_refcount; + struct mtd_info gluebi_mtd; +#endif +}; + +/** + * struct ubi_volume_desc - descriptor of the UBI volume returned when it is + * opened. + * @vol: reference to the corresponding volume description object + * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE) + */ +struct ubi_volume_desc { + struct ubi_volume *vol; + int mode; +}; + +struct ubi_wl_entry; + +/** + * struct ubi_device - UBI device description structure + * @dev: UBI device object to use the the Linux device model + * @cdev: character device object to create character device + * @ubi_num: UBI device number + * @ubi_name: UBI device name + * @vol_count: number of volumes in this UBI device + * @volumes: volumes of this UBI device + * @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs, + * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, + * @vol->readers, @vol->writers, @vol->exclusive, + * @vol->ref_count, @vol->mapping and @vol->eba_tbl. + * @ref_count: count of references on the UBI device + * + * @rsvd_pebs: count of reserved physical eraseblocks + * @avail_pebs: count of available physical eraseblocks + * @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB + * handling + * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling + * + * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end + * of UBI ititializetion + * @vtbl_slots: how many slots are available in the volume table + * @vtbl_size: size of the volume table in bytes + * @vtbl: in-RAM volume table copy + * @volumes_mutex: protects on-flash volume table and serializes volume + * changes, like creation, deletion, update, resize + * + * @max_ec: current highest erase counter value + * @mean_ec: current mean erase counter value + * + * @global_sqnum: global sequence number + * @ltree_lock: protects the lock tree and @global_sqnum + * @ltree: the lock tree + * @alc_mutex: serializes "atomic LEB change" operations + * + * @used: RB-tree of used physical eraseblocks + * @free: RB-tree of free physical eraseblocks + * @scrub: RB-tree of physical eraseblocks which need scrubbing + * @prot: protection trees + * @prot.pnum: protection tree indexed by physical eraseblock numbers + * @prot.aec: protection tree indexed by absolute erase counter value + * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, + * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works + * fields + * @move_mutex: serializes eraseblock moves + * @wl_scheduled: non-zero if the wear-leveling was scheduled + * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any + * physical eraseblock + * @abs_ec: absolute erase counter + * @move_from: physical eraseblock from where the data is being moved + * @move_to: physical eraseblock where the data is being moved to + * @move_to_put: if the "to" PEB was put + * @works: list of pending works + * @works_count: count of pending works + * @bgt_thread: background thread description object + * @thread_enabled: if the background thread is enabled + * @bgt_name: background thread name + * + * @flash_size: underlying MTD device size (in bytes) + * @peb_count: count of physical eraseblocks on the MTD device + * @peb_size: physical eraseblock size + * @bad_peb_count: count of bad physical eraseblocks + * @good_peb_count: count of good physical eraseblocks + * @min_io_size: minimal input/output unit size of the underlying MTD device + * @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers + * @ro_mode: if the UBI device is in read-only mode + * @leb_size: logical eraseblock size + * @leb_start: starting offset of logical eraseblocks within physical + * eraseblocks + * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size + * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size + * @vid_hdr_offset: starting offset of the volume identifier header (might be + * unaligned) + * @vid_hdr_aloffset: starting offset of the VID header aligned to + * @hdrs_min_io_size + * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset + * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or + * not + * @mtd: MTD device descriptor + * + * @peb_buf1: a buffer of PEB size used for different purposes + * @peb_buf2: another buffer of PEB size used for different purposes + * @buf_mutex: proptects @peb_buf1 and @peb_buf2 + * @dbg_peb_buf: buffer of PEB size used for debugging + * @dbg_buf_mutex: proptects @dbg_peb_buf + */ +struct ubi_device { + struct cdev cdev; + struct device dev; + int ubi_num; + char ubi_name[sizeof(UBI_NAME_STR)+5]; + int vol_count; + struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT]; + spinlock_t volumes_lock; + int ref_count; + + int rsvd_pebs; + int avail_pebs; + int beb_rsvd_pebs; + int beb_rsvd_level; + + int autoresize_vol_id; + int vtbl_slots; + int vtbl_size; + struct ubi_vtbl_record *vtbl; + struct mutex volumes_mutex; + + int max_ec; + /* TODO: mean_ec is not updated run-time, fix */ + int mean_ec; + + /* EBA unit's stuff */ + unsigned long long global_sqnum; + spinlock_t ltree_lock; + struct rb_root ltree; + struct mutex alc_mutex; + + /* Wear-leveling unit's stuff */ + struct rb_root used; + struct rb_root free; + struct rb_root scrub; + struct { + struct rb_root pnum; + struct rb_root aec; + } prot; + spinlock_t wl_lock; + struct mutex move_mutex; + struct rw_semaphore work_sem; + int wl_scheduled; + struct ubi_wl_entry **lookuptbl; + unsigned long long abs_ec; + struct ubi_wl_entry *move_from; + struct ubi_wl_entry *move_to; + int move_to_put; + struct list_head works; + int works_count; + struct task_struct *bgt_thread; + int thread_enabled; + char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2]; + + /* I/O unit's stuff */ + long long flash_size; + int peb_start; // add by Johnny Liu + int peb_count; + int peb_size; + int bad_peb_count; + int good_peb_count; + int min_io_size; + int hdrs_min_io_size; + int ro_mode; + int leb_size; + int leb_start; + int ec_hdr_alsize; + int vid_hdr_alsize; + int vid_hdr_offset; + int vid_hdr_aloffset; + int vid_hdr_shift; + int bad_allowed; + struct mtd_info *mtd; + + void *peb_buf1; + void *peb_buf2; + struct mutex buf_mutex; + struct mutex ckvol_mutex; + void *dbg_peb_buf; + struct mutex dbg_buf_mutex; +}; + +extern struct kmem_cache *ubi_wl_entry_slab; +extern struct file_operations ubi_ctrl_cdev_operations; +extern struct file_operations ubi_cdev_operations; +extern struct file_operations ubi_vol_cdev_operations; +extern struct class *ubi_class; +extern struct mutex ubi_devices_mutex; + +/* vtbl.c */ +int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, + struct ubi_vtbl_record *vtbl_rec); +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si); + +/* vmt.c */ +int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req); +int ubi_remove_volume(struct ubi_volume_desc *desc); +int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs); +int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol); +void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol); + +/* upd.c */ +int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, + long long bytes); +int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count); +int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, + const struct ubi_leb_change_req *req); +int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count); + +/* misc.c */ +int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length); +int ubi_check_volume(struct ubi_device *ubi, int vol_id); +void ubi_calculate_reserved(struct ubi_device *ubi); + +/* gluebi.c */ +#ifdef CONFIG_MTD_UBI_GLUEBI +int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol); +int ubi_destroy_gluebi(struct ubi_volume *vol); +void ubi_gluebi_updated(struct ubi_volume *vol); +#else +#define ubi_create_gluebi(ubi, vol) 0 +#define ubi_destroy_gluebi(vol) 0 +#define ubi_gluebi_updated(vol) +#endif + +/* eba.c */ +int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum); +int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + void *buf, int offset, int len, int check); +int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + const void *buf, int offset, int len, int dtype); +int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum, const void *buf, int len, int dtype, + int used_ebs); +int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, + int lnum, const void *buf, int len, int dtype); +int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + struct ubi_vid_hdr *vid_hdr); +int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); +void ubi_eba_close(const struct ubi_device *ubi); + +/* wl.c */ +int ubi_wl_get_peb(struct ubi_device *ubi, int dtype); +int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture); +int ubi_wl_flush(struct ubi_device *ubi); +int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum); +int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); +void ubi_wl_close(struct ubi_device *ubi); +int ubi_thread(void *u); + +/* io.c */ +int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, + int len); +int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, + int len); +int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture); +int ubi_io_is_bad(const struct ubi_device *ubi, int pnum); +int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum); +int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr, int verbose); +int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr); +int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr, int verbose); +int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr); + +/* build.c */ +int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset); +int ubi_detach_mtd_dev(int ubi_num, int anyway); +struct ubi_device *ubi_get_device(int ubi_num); +void ubi_put_device(struct ubi_device *ubi); +struct ubi_device *ubi_get_by_major(int major); +int ubi_major2num(int major); + +/* + * ubi_rb_for_each_entry - walk an RB-tree. + * @rb: a pointer to type 'struct rb_node' to to use as a loop counter + * @pos: a pointer to RB-tree entry type to use as a loop counter + * @root: RB-tree's root + * @member: the name of the 'struct rb_node' within the RB-tree entry + */ +#define ubi_rb_for_each_entry(rb, pos, root, member) \ + for (rb = rb_first(root), \ + pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \ + rb; \ + rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member)) + +/** + * ubi_zalloc_vid_hdr - allocate a volume identifier header object. + * @ubi: UBI device description object + * @gfp_flags: GFP flags to allocate with + * + * This function returns a pointer to the newly allocated and zero-filled + * volume identifier header object in case of success and %NULL in case of + * failure. + */ +static inline struct ubi_vid_hdr * +ubi_zalloc_vid_hdr(const struct ubi_device *ubi, gfp_t gfp_flags) +{ + void *vid_hdr; + + vid_hdr = kzalloc(ubi->vid_hdr_alsize, GFP_KERNEL); + if (!vid_hdr) + return NULL; + + /* + * VID headers may be stored at un-aligned flash offsets, so we shift + * the pointer. + */ + return vid_hdr + ubi->vid_hdr_shift; +} + +/** + * ubi_free_vid_hdr - free a volume identifier header object. + * @ubi: UBI device description object + * @vid_hdr: the object to free + */ +static inline void ubi_free_vid_hdr(const struct ubi_device *ubi, + struct ubi_vid_hdr *vid_hdr) +{ + void *p = vid_hdr; + + if (!p) + return; + + kfree(p - ubi->vid_hdr_shift); +} + +/* + * This function is equivalent to 'ubi_io_read()', but @offset is relative to + * the beginning of the logical eraseblock, not to the beginning of the + * physical eraseblock. + */ +static inline int ubi_io_read_data(const struct ubi_device *ubi, void *buf, + int pnum, int offset, int len) +{ + ubi_assert(offset >= 0); + return ubi_io_read(ubi, buf, pnum, offset + ubi->leb_start, len); +} + +/* + * This function is equivalent to 'ubi_io_write()', but @offset is relative to + * the beginning of the logical eraseblock, not to the beginning of the + * physical eraseblock. + */ +static inline int ubi_io_write_data(struct ubi_device *ubi, const void *buf, + int pnum, int offset, int len) +{ + ubi_assert(offset >= 0); + return ubi_io_write(ubi, buf, pnum, offset + ubi->leb_start, len); +} + +/** + * ubi_ro_mode - switch to read-only mode. + * @ubi: UBI device description object + */ +static inline void ubi_ro_mode(struct ubi_device *ubi) +{ + if (!ubi->ro_mode) { + ubi->ro_mode = 1; + ubi_warn("switch to read-only mode"); + } +} + +/** + * vol_id2idx - get table index by volume ID. + * @ubi: UBI device description object + * @vol_id: volume ID + */ +static inline int vol_id2idx(const struct ubi_device *ubi, int vol_id) +{ + if (vol_id >= UBI_INTERNAL_VOL_START) + return vol_id - UBI_INTERNAL_VOL_START + ubi->vtbl_slots; + else + return vol_id; +} + +/** + * idx2vol_id - get volume ID by table index. + * @ubi: UBI device description object + * @idx: table index + */ +static inline int idx2vol_id(const struct ubi_device *ubi, int idx) +{ + if (idx >= ubi->vtbl_slots) + return idx - ubi->vtbl_slots + UBI_INTERNAL_VOL_START; + else + return idx; +} +#endif /* !__UBI_UBI_H__ */ diff --git a/fs/ubi/upd.c b/fs/ubi/upd.c new file mode 100755 index 0000000..5f7ed7b --- /dev/null +++ b/fs/ubi/upd.c @@ -0,0 +1,441 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + * + * Jan 2007: Alexander Schmidt, hacked per-volume update. + */ + +/* + * This file contains implementation of the volume update and atomic LEB change + * functionality. + * + * The update operation is based on the per-volume update marker which is + * stored in the volume table. The update marker is set before the update + * starts, and removed after the update has been finished. So if the update was + * interrupted by an unclean re-boot or due to some other reasons, the update + * marker stays on the flash media and UBI finds it when it attaches the MTD + * device next time. If the update marker is set for a volume, the volume is + * treated as damaged and most I/O operations are prohibited. Only a new update + * operation is allowed. + * + * Note, in general it is possible to implement the update operation as a + * transaction with a roll-back capability. + */ + +#ifdef UBI_LINUX +#include <linux/err.h> +#include <asm/uaccess.h> +#include <asm/div64.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +/** + * set_update_marker - set update marker. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function sets the update marker flag for volume @vol. Returns zero + * in case of success and a negative error code in case of failure. + */ +static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) +{ + int err; + struct ubi_vtbl_record vtbl_rec; + + dbg_msg("set update marker for volume %d", vol->vol_id); + + if (vol->upd_marker) { + ubi_assert(ubi->vtbl[vol->vol_id].upd_marker); + dbg_msg("already set"); + return 0; + } + + memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], + sizeof(struct ubi_vtbl_record)); + vtbl_rec.upd_marker = 1; + + mutex_lock(&ubi->volumes_mutex); + err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); + mutex_unlock(&ubi->volumes_mutex); + vol->upd_marker = 1; + return err; +} + +/** + * clear_update_marker - clear update marker. + * @ubi: UBI device description object + * @vol: volume description object + * @bytes: new data size in bytes + * + * This function clears the update marker for volume @vol, sets new volume + * data size and clears the "corrupted" flag (static volumes only). Returns + * zero in case of success and a negative error code in case of failure. + */ +static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, + long long bytes) +{ + int err; + uint64_t tmp; + struct ubi_vtbl_record vtbl_rec; + + dbg_msg("clear update marker for volume %d", vol->vol_id); + + memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], + sizeof(struct ubi_vtbl_record)); + ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); + vtbl_rec.upd_marker = 0; + + if (vol->vol_type == UBI_STATIC_VOLUME) { + vol->corrupted = 0; + vol->used_bytes = tmp = bytes; + vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size); + vol->used_ebs = tmp; + if (vol->last_eb_bytes) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; + } + + mutex_lock(&ubi->volumes_mutex); + err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); + mutex_unlock(&ubi->volumes_mutex); + vol->upd_marker = 0; + return err; +} + +/** + * ubi_start_update - start volume update. + * @ubi: UBI device description object + * @vol: volume description object + * @bytes: update bytes + * + * This function starts volume update operation. If @bytes is zero, the volume + * is just wiped out. Returns zero in case of success and a negative error code + * in case of failure. + */ +int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, + long long bytes) +{ + int i, err; + uint64_t tmp; + + dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes); + ubi_assert(!vol->updating && !vol->changing_leb); + vol->updating = 1; + + err = set_update_marker(ubi, vol); + if (err) + return err; + + /* Before updating - wipe out the volume */ + for (i = 0; i < vol->reserved_pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol, i); + if (err) + return err; + } + + if (bytes == 0) { + err = clear_update_marker(ubi, vol, 0); + if (err) + return err; + err = ubi_wl_flush(ubi); + if (!err) + vol->updating = 0; + } + + vol->upd_buf = vmalloc(ubi->leb_size); + if (!vol->upd_buf) + return -ENOMEM; + + tmp = bytes; + vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size); + vol->upd_ebs += tmp; + vol->upd_bytes = bytes; + vol->upd_received = 0; + return 0; +} + +/** + * ubi_start_leb_change - start atomic LEB change. + * @ubi: UBI device description object + * @vol: volume description object + * @req: operation request + * + * This function starts atomic LEB change operation. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, + const struct ubi_leb_change_req *req) +{ + ubi_assert(!vol->updating && !vol->changing_leb); + + dbg_msg("start changing LEB %d:%d, %u bytes", + vol->vol_id, req->lnum, req->bytes); + if (req->bytes == 0) + return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0, + req->dtype); + + vol->upd_bytes = req->bytes; + vol->upd_received = 0; + vol->changing_leb = 1; + vol->ch_lnum = req->lnum; + vol->ch_dtype = req->dtype; + + vol->upd_buf = vmalloc(req->bytes); + if (!vol->upd_buf) + return -ENOMEM; + + return 0; +} + +/** + * write_leb - write update data. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: data size + * @used_ebs: how many logical eraseblocks will this volume contain (static + * volumes only) + * + * This function writes update data to corresponding logical eraseblock. In + * case of dynamic volume, this function checks if the data contains 0xFF bytes + * at the end. If yes, the 0xFF bytes are cut and not written. So if the whole + * buffer contains only 0xFF bytes, the LEB is left unmapped. + * + * The reason why we skip the trailing 0xFF bytes in case of dynamic volume is + * that we want to make sure that more data may be appended to the logical + * eraseblock in future. Indeed, writing 0xFF bytes may have side effects and + * this PEB won't be writable anymore. So if one writes the file-system image + * to the UBI volume where 0xFFs mean free space - UBI makes sure this free + * space is writable after the update. + * + * We do not do this for static volumes because they are read-only. But this + * also cannot be done because we have to store per-LEB CRC and the correct + * data length. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + void *buf, int len, int used_ebs) +{ + int err; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + int l = ALIGN(len, ubi->min_io_size); + + memset(buf + len, 0xFF, l - len); + len = ubi_calc_data_len(ubi, buf, l); + if (len == 0) { + dbg_msg("all %d bytes contain 0xFF - skip", len); + return 0; + } + + err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN); + } else { + /* + * When writing static volume, and this is the last logical + * eraseblock, the length (@len) does not have to be aligned to + * the minimal flash I/O unit. The 'ubi_eba_write_leb_st()' + * function accepts exact (unaligned) length and stores it in + * the VID header. And it takes care of proper alignment by + * padding the buffer. Here we just make sure the padding will + * contain zeros, not random trash. + */ + memset(buf + len, 0, vol->usable_leb_size - len); + err = ubi_eba_write_leb_st(ubi, vol, lnum, buf, len, + UBI_UNKNOWN, used_ebs); + } + + return err; +} + +/** + * ubi_more_update_data - write more update data. + * @vol: volume description object + * @buf: write data (user-space memory buffer) + * @count: how much bytes to write + * + * This function writes more data to the volume which is being updated. It may + * be called arbitrary number of times until all the update data arriveis. This + * function returns %0 in case of success, number of bytes written during the + * last call if the whole volume update has been successfully finished, and a + * negative error code in case of failure. + */ +int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count) +{ + uint64_t tmp; + int lnum, offs, err = 0, len, to_write = count; + + dbg_msg("write %d of %lld bytes, %lld already passed", + count, vol->upd_bytes, vol->upd_received); + + if (ubi->ro_mode) + return -EROFS; + + tmp = vol->upd_received; + offs = do_div(tmp, vol->usable_leb_size); + lnum = tmp; + + if (vol->upd_received + count > vol->upd_bytes) + to_write = count = vol->upd_bytes - vol->upd_received; + + /* + * When updating volumes, we accumulate whole logical eraseblock of + * data and write it at once. + */ + if (offs != 0) { + /* + * This is a write to the middle of the logical eraseblock. We + * copy the data to our update buffer and wait for more data or + * flush it if the whole eraseblock is written or the update + * is finished. + */ + + len = vol->usable_leb_size - offs; + if (len > count) + len = count; + + err = copy_from_user(vol->upd_buf + offs, buf, len); + if (err) + return -EFAULT; + + if (offs + len == vol->usable_leb_size || + vol->upd_received + len == vol->upd_bytes) { + int flush_len = offs + len; + + /* + * OK, we gathered either the whole eraseblock or this + * is the last chunk, it's time to flush the buffer. + */ + ubi_assert(flush_len <= vol->usable_leb_size); + err = write_leb(ubi, vol, lnum, vol->upd_buf, flush_len, + vol->upd_ebs); + if (err) + return err; + } + + vol->upd_received += len; + count -= len; + buf += len; + lnum += 1; + } + + /* + * If we've got more to write, let's continue. At this point we know we + * are starting from the beginning of an eraseblock. + */ + while (count) { + if (count > vol->usable_leb_size) + len = vol->usable_leb_size; + else + len = count; + + err = copy_from_user(vol->upd_buf, buf, len); + if (err) + return -EFAULT; + + if (len == vol->usable_leb_size || + vol->upd_received + len == vol->upd_bytes) { + err = write_leb(ubi, vol, lnum, vol->upd_buf, + len, vol->upd_ebs); + if (err) + break; + } + + vol->upd_received += len; + count -= len; + lnum += 1; + buf += len; + } + + ubi_assert(vol->upd_received <= vol->upd_bytes); + if (vol->upd_received == vol->upd_bytes) { + /* The update is finished, clear the update marker */ + err = clear_update_marker(ubi, vol, vol->upd_bytes); + if (err) + return err; + err = ubi_wl_flush(ubi); + if (err == 0) { + vol->updating = 0; + err = to_write; + vfree(vol->upd_buf); + } + } + + return err; +} + +/** + * ubi_more_leb_change_data - accept more data for atomic LEB change. + * @vol: volume description object + * @buf: write data (user-space memory buffer) + * @count: how much bytes to write + * + * This function accepts more data to the volume which is being under the + * "atomic LEB change" operation. It may be called arbitrary number of times + * until all data arrives. This function returns %0 in case of success, number + * of bytes written during the last call if the whole "atomic LEB change" + * operation has been successfully finished, and a negative error code in case + * of failure. + */ +int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count) +{ + int err; + + dbg_msg("write %d of %lld bytes, %lld already passed", + count, vol->upd_bytes, vol->upd_received); + + if (ubi->ro_mode) + return -EROFS; + + if (vol->upd_received + count > vol->upd_bytes) + count = vol->upd_bytes - vol->upd_received; + + err = copy_from_user(vol->upd_buf + vol->upd_received, buf, count); + if (err) + return -EFAULT; + + vol->upd_received += count; + + if (vol->upd_received == vol->upd_bytes) { + int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size); + + memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes); + len = ubi_calc_data_len(ubi, vol->upd_buf, len); + err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum, + vol->upd_buf, len, UBI_UNKNOWN); + if (err) + return err; + } + + ubi_assert(vol->upd_received <= vol->upd_bytes); + if (vol->upd_received == vol->upd_bytes) { + vol->changing_leb = 0; + err = count; + vfree(vol->upd_buf); + } + + return err; +} diff --git a/fs/ubi/vmt.c b/fs/ubi/vmt.c new file mode 100755 index 0000000..061da64 --- /dev/null +++ b/fs/ubi/vmt.c @@ -0,0 +1,862 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file contains implementation of volume creation, deletion, updating and + * resizing. + */ + +#ifdef UBI_LINUX +#include <linux/err.h> +#include <asm/div64.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static void paranoid_check_volumes(struct ubi_device *ubi); +#else +#define paranoid_check_volumes(ubi) +#endif + +#ifdef UBI_LINUX +static ssize_t vol_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf); + +/* Device attributes corresponding to files in '/<sysfs>/class/ubi/ubiX_Y' */ +static struct device_attribute attr_vol_reserved_ebs = + __ATTR(reserved_ebs, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_type = + __ATTR(type, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_name = + __ATTR(name, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_corrupted = + __ATTR(corrupted, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_alignment = + __ATTR(alignment, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_usable_eb_size = + __ATTR(usable_eb_size, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_data_bytes = + __ATTR(data_bytes, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_upd_marker = + __ATTR(upd_marker, S_IRUGO, vol_attribute_show, NULL); + +/* + * "Show" method for files in '/<sysfs>/class/ubi/ubiX_Y/'. + * + * Consider a situation: + * A. process 1 opens a sysfs file related to volume Y, say + * /<sysfs>/class/ubi/ubiX_Y/reserved_ebs; + * B. process 2 removes volume Y; + * C. process 1 starts reading the /<sysfs>/class/ubi/ubiX_Y/reserved_ebs file; + * + * In this situation, this function will return %-ENODEV because it will find + * out that the volume was removed from the @ubi->volumes array. + */ +static ssize_t vol_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + struct ubi_device *ubi; + + ubi = ubi_get_device(vol->ubi->ubi_num); + if (!ubi) + return -ENODEV; + + spin_lock(&ubi->volumes_lock); + if (!ubi->volumes[vol->vol_id]) { + spin_unlock(&ubi->volumes_lock); + ubi_put_device(ubi); + return -ENODEV; + } + /* Take a reference to prevent volume removal */ + vol->ref_count += 1; + spin_unlock(&ubi->volumes_lock); + + if (attr == &attr_vol_reserved_ebs) + ret = sprintf(buf, "%d\n", vol->reserved_pebs); + else if (attr == &attr_vol_type) { + const char *tp; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + tp = "dynamic"; + else + tp = "static"; + ret = sprintf(buf, "%s\n", tp); + } else if (attr == &attr_vol_name) + ret = sprintf(buf, "%s\n", vol->name); + else if (attr == &attr_vol_corrupted) + ret = sprintf(buf, "%d\n", vol->corrupted); + else if (attr == &attr_vol_alignment) + ret = sprintf(buf, "%d\n", vol->alignment); + else if (attr == &attr_vol_usable_eb_size) + ret = sprintf(buf, "%d\n", vol->usable_leb_size); + else if (attr == &attr_vol_data_bytes) + ret = sprintf(buf, "%lld\n", vol->used_bytes); + else if (attr == &attr_vol_upd_marker) + ret = sprintf(buf, "%d\n", vol->upd_marker); + else + /* This must be a bug */ + ret = -EINVAL; + + /* We've done the operation, drop volume and UBI device references */ + spin_lock(&ubi->volumes_lock); + vol->ref_count -= 1; + ubi_assert(vol->ref_count >= 0); + spin_unlock(&ubi->volumes_lock); + ubi_put_device(ubi); + return ret; +} +#endif + +/* Release method for volume devices */ +static void vol_release(struct device *dev) +{ + struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + + kfree(vol); +} + +#ifdef UBI_LINUX +/** + * volume_sysfs_init - initialize sysfs for new volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + * + * Note, this function does not free allocated resources in case of failure - + * the caller does it. This is because this would cause release() here and the + * caller would oops. + */ +static int volume_sysfs_init(struct ubi_device *ubi, struct ubi_volume *vol) +{ + int err; + + err = device_create_file(&vol->dev, &attr_vol_reserved_ebs); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_type); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_name); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_corrupted); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_alignment); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_usable_eb_size); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_data_bytes); + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_upd_marker); + return err; +} + +/** + * volume_sysfs_close - close sysfs for a volume. + * @vol: volume description object + */ +static void volume_sysfs_close(struct ubi_volume *vol) +{ + device_remove_file(&vol->dev, &attr_vol_upd_marker); + device_remove_file(&vol->dev, &attr_vol_data_bytes); + device_remove_file(&vol->dev, &attr_vol_usable_eb_size); + device_remove_file(&vol->dev, &attr_vol_alignment); + device_remove_file(&vol->dev, &attr_vol_corrupted); + device_remove_file(&vol->dev, &attr_vol_name); + device_remove_file(&vol->dev, &attr_vol_type); + device_remove_file(&vol->dev, &attr_vol_reserved_ebs); + device_unregister(&vol->dev); +} +#endif + +/** + * ubi_create_volume - create volume. + * @ubi: UBI device description object + * @req: volume creation request + * + * This function creates volume described by @req. If @req->vol_id id + * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume + * and saves it in @req->vol_id. Returns zero in case of success and a negative + * error code in case of failure. Note, the caller has to have the + * @ubi->volumes_mutex locked. + */ +int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) +{ + int i, err, vol_id = req->vol_id, dont_free = 0; + struct ubi_volume *vol; + struct ubi_vtbl_record vtbl_rec; + uint64_t bytes; + dev_t dev; + + if (ubi->ro_mode) + return -EROFS; + + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + spin_lock(&ubi->volumes_lock); + if (vol_id == UBI_VOL_NUM_AUTO) { + /* Find unused volume ID */ + dbg_msg("search for vacant volume ID"); + for (i = 0; i < ubi->vtbl_slots; i++) + if (!ubi->volumes[i]) { + vol_id = i; + break; + } + + if (vol_id == UBI_VOL_NUM_AUTO) { + dbg_err("out of volume IDs"); + err = -ENFILE; + goto out_unlock; + } + req->vol_id = vol_id; + } + + dbg_msg("volume ID %d, %llu bytes, type %d, name %s", + vol_id, (unsigned long long)req->bytes, + (int)req->vol_type, req->name); + + /* Ensure that this volume does not exist */ + err = -EEXIST; + if (ubi->volumes[vol_id]) { + dbg_err("volume %d already exists", vol_id); + goto out_unlock; + } + + /* Ensure that the name is unique */ + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i] && + ubi->volumes[i]->name_len == req->name_len && + !strcmp(ubi->volumes[i]->name, req->name)) { + dbg_err("volume \"%s\" exists (ID %d)", req->name, i); + goto out_unlock; + } + + /* Calculate how many eraseblocks are requested */ + vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; + bytes = req->bytes; + if (do_div(bytes, vol->usable_leb_size)) + vol->reserved_pebs = 1; + vol->reserved_pebs += bytes; + + /* Reserve physical eraseblocks */ + if (vol->reserved_pebs > ubi->avail_pebs) { + dbg_err("not enough PEBs, only %d available", ubi->avail_pebs); + err = -ENOSPC; + goto out_unlock; + } + ubi->avail_pebs -= vol->reserved_pebs; + ubi->rsvd_pebs += vol->reserved_pebs; + spin_unlock(&ubi->volumes_lock); + + vol->vol_id = vol_id; + vol->alignment = req->alignment; + vol->data_pad = ubi->leb_size % vol->alignment; + vol->vol_type = req->vol_type; + vol->name_len = req->name_len; + memcpy(vol->name, req->name, vol->name_len + 1); + vol->ubi = ubi; + + /* + * Finish all pending erases because there may be some LEBs belonging + * to the same volume ID. + */ + err = ubi_wl_flush(ubi); + if (err) + goto out_acc; + + vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), GFP_KERNEL); + if (!vol->eba_tbl) { + err = -ENOMEM; + goto out_acc; + } + + for (i = 0; i < vol->reserved_pebs; i++) + vol->eba_tbl[i] = UBI_LEB_UNMAPPED; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = + (long long)vol->used_ebs * vol->usable_leb_size; + } else { + bytes = vol->used_bytes; + vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size); + vol->used_ebs = bytes; + if (vol->last_eb_bytes) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; + } + + /* Register character device for the volume */ + cdev_init(&vol->cdev, &ubi_vol_cdev_operations); + vol->cdev.owner = THIS_MODULE; + dev = MKDEV(MAJOR(ubi->cdev.dev), vol_id + 1); + err = cdev_add(&vol->cdev, dev, 1); + if (err) { + ubi_err("cannot add character device"); + goto out_mapping; + } + + err = ubi_create_gluebi(ubi, vol); + if (err) + goto out_cdev; + + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; + vol->dev.devt = dev; + vol->dev.class = ubi_class; + + sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); + if (err) { + ubi_err("cannot register device"); + goto out_gluebi; + } + + err = volume_sysfs_init(ubi, vol); + if (err) + goto out_sysfs; + + /* Fill volume table record */ + memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record)); + vtbl_rec.reserved_pebs = cpu_to_be32(vol->reserved_pebs); + vtbl_rec.alignment = cpu_to_be32(vol->alignment); + vtbl_rec.data_pad = cpu_to_be32(vol->data_pad); + vtbl_rec.name_len = cpu_to_be16(vol->name_len); + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + vtbl_rec.vol_type = UBI_VID_DYNAMIC; + else + vtbl_rec.vol_type = UBI_VID_STATIC; + memcpy(vtbl_rec.name, vol->name, vol->name_len + 1); + + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_sysfs; + + spin_lock(&ubi->volumes_lock); + ubi->volumes[vol_id] = vol; + ubi->vol_count += 1; + spin_unlock(&ubi->volumes_lock); + + paranoid_check_volumes(ubi); + return 0; + +out_sysfs: + /* + * We have registered our device, we should not free the volume* + * description object in this function in case of an error - it is + * freed by the release function. + * + * Get device reference to prevent the release function from being + * called just after sysfs has been closed. + */ + dont_free = 1; + get_device(&vol->dev); + volume_sysfs_close(vol); +out_gluebi: + if (ubi_destroy_gluebi(vol)) + dbg_err("cannot destroy gluebi for volume %d:%d", + ubi->ubi_num, vol_id); +out_cdev: + cdev_del(&vol->cdev); +out_mapping: + kfree(vol->eba_tbl); +out_acc: + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= vol->reserved_pebs; + ubi->avail_pebs += vol->reserved_pebs; +out_unlock: + spin_unlock(&ubi->volumes_lock); + if (dont_free) + put_device(&vol->dev); + else + kfree(vol); + ubi_err("cannot create volume %d, error %d", vol_id, err); + return err; +} + +/** + * ubi_remove_volume - remove volume. + * @desc: volume descriptor + * + * This function removes volume described by @desc. The volume has to be opened + * in "exclusive" mode. Returns zero in case of success and a negative error + * code in case of failure. The caller has to have the @ubi->volumes_mutex + * locked. + */ +int ubi_remove_volume(struct ubi_volume_desc *desc) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; + + dbg_msg("remove UBI volume %d", vol_id); + ubi_assert(desc->mode == UBI_EXCLUSIVE); + ubi_assert(vol == ubi->volumes[vol_id]); + + if (ubi->ro_mode) + return -EROFS; + + spin_lock(&ubi->volumes_lock); + if (vol->ref_count > 1) { + /* + * The volume is busy, probably someone is reading one of its + * sysfs files. + */ + err = -EBUSY; + goto out_unlock; + } + ubi->volumes[vol_id] = NULL; + spin_unlock(&ubi->volumes_lock); + + err = ubi_destroy_gluebi(vol); + if (err) + goto out_err; + + err = ubi_change_vtbl_record(ubi, vol_id, NULL); + if (err) + goto out_err; + + for (i = 0; i < vol->reserved_pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol, i); + if (err) + goto out_err; + } + + kfree(vol->eba_tbl); + vol->eba_tbl = NULL; + cdev_del(&vol->cdev); + volume_sysfs_close(vol); + + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= reserved_pebs; + ubi->avail_pebs += reserved_pebs; + i = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; + if (i > 0) { + i = ubi->avail_pebs >= i ? i : ubi->avail_pebs; + ubi->avail_pebs -= i; + ubi->rsvd_pebs += i; + ubi->beb_rsvd_pebs += i; + if (i > 0) + ubi_msg("reserve more %d PEBs", i); + } + ubi->vol_count -= 1; + spin_unlock(&ubi->volumes_lock); + + paranoid_check_volumes(ubi); + return 0; + +out_err: + ubi_err("cannot remove volume %d, error %d", vol_id, err); + spin_lock(&ubi->volumes_lock); + ubi->volumes[vol_id] = vol; +out_unlock: + spin_unlock(&ubi->volumes_lock); + return err; +} + +/** + * ubi_resize_volume - re-size volume. + * @desc: volume descriptor + * @reserved_pebs: new size in physical eraseblocks + * + * This function re-sizes the volume and returns zero in case of success, and a + * negative error code in case of failure. The caller has to have the + * @ubi->volumes_mutex locked. + */ +int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) +{ + int i, err, pebs, *new_mapping; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + struct ubi_vtbl_record vtbl_rec; + int vol_id = vol->vol_id; + + if (ubi->ro_mode) + return -EROFS; + + dbg_msg("re-size volume %d to from %d to %d PEBs", + vol_id, vol->reserved_pebs, reserved_pebs); + + if (vol->vol_type == UBI_STATIC_VOLUME && + reserved_pebs < vol->used_ebs) { + dbg_err("too small size %d, %d LEBs contain data", + reserved_pebs, vol->used_ebs); + return -EINVAL; + } + + /* If the size is the same, we have nothing to do */ + if (reserved_pebs == vol->reserved_pebs) + return 0; + + new_mapping = kmalloc(reserved_pebs * sizeof(int), GFP_KERNEL); + if (!new_mapping) + return -ENOMEM; + + for (i = 0; i < reserved_pebs; i++) + new_mapping[i] = UBI_LEB_UNMAPPED; + + spin_lock(&ubi->volumes_lock); + if (vol->ref_count > 1) { + spin_unlock(&ubi->volumes_lock); + err = -EBUSY; + goto out_free; + } + spin_unlock(&ubi->volumes_lock); + + /* Reserve physical eraseblocks */ + pebs = reserved_pebs - vol->reserved_pebs; + if (pebs > 0) { + spin_lock(&ubi->volumes_lock); + if (pebs > ubi->avail_pebs) { + dbg_err("not enough PEBs: requested %d, available %d", + pebs, ubi->avail_pebs); + spin_unlock(&ubi->volumes_lock); + err = -ENOSPC; + goto out_free; + } + ubi->avail_pebs -= pebs; + ubi->rsvd_pebs += pebs; + for (i = 0; i < vol->reserved_pebs; i++) + new_mapping[i] = vol->eba_tbl[i]; + kfree(vol->eba_tbl); + vol->eba_tbl = new_mapping; + spin_unlock(&ubi->volumes_lock); + } + + /* Change volume table record */ + memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); + vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_acc; + + if (pebs < 0) { + for (i = 0; i < -pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i); + if (err) + goto out_acc; + } + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs += pebs; + ubi->avail_pebs -= pebs; + pebs = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; + if (pebs > 0) { + pebs = ubi->avail_pebs >= pebs ? pebs : ubi->avail_pebs; + ubi->avail_pebs -= pebs; + ubi->rsvd_pebs += pebs; + ubi->beb_rsvd_pebs += pebs; + if (pebs > 0) + ubi_msg("reserve more %d PEBs", pebs); + } + for (i = 0; i < reserved_pebs; i++) + new_mapping[i] = vol->eba_tbl[i]; + kfree(vol->eba_tbl); + vol->eba_tbl = new_mapping; + spin_unlock(&ubi->volumes_lock); + } + + vol->reserved_pebs = reserved_pebs; + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = + (long long)vol->used_ebs * vol->usable_leb_size; + } + + paranoid_check_volumes(ubi); + return 0; + +out_acc: + if (pebs > 0) { + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= pebs; + ubi->avail_pebs += pebs; + spin_unlock(&ubi->volumes_lock); + } +out_free: + kfree(new_mapping); + return err; +} + +/** + * ubi_add_volume - add volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function adds an existing volume and initializes all its data + * structures. Returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) +{ + int err, vol_id = vol->vol_id; + dev_t dev; + + dbg_msg("add volume %d", vol_id); + ubi_dbg_dump_vol_info(vol); + + /* Register character device for the volume */ + cdev_init(&vol->cdev, &ubi_vol_cdev_operations); + vol->cdev.owner = THIS_MODULE; + dev = MKDEV(MAJOR(ubi->cdev.dev), vol->vol_id + 1); + err = cdev_add(&vol->cdev, dev, 1); + if (err) { + ubi_err("cannot add character device for volume %d, error %d", + vol_id, err); + return err; + } + + err = ubi_create_gluebi(ubi, vol); + if (err) + goto out_cdev; + + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; + vol->dev.devt = dev; + vol->dev.class = ubi_class; + sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); + if (err) + goto out_gluebi; + + err = volume_sysfs_init(ubi, vol); + if (err) { + cdev_del(&vol->cdev); + err = ubi_destroy_gluebi(vol); + volume_sysfs_close(vol); + return err; + } + + paranoid_check_volumes(ubi); + return 0; + +out_gluebi: + err = ubi_destroy_gluebi(vol); +out_cdev: + cdev_del(&vol->cdev); + return err; +} + +/** + * ubi_free_volume - free volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function frees all resources for volume @vol but does not remove it. + * Used only when the UBI device is detached. + */ +void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) +{ + int err; + + dbg_msg("free volume %d", vol->vol_id); + + ubi->volumes[vol->vol_id] = NULL; + err = ubi_destroy_gluebi(vol); + cdev_del(&vol->cdev); + volume_sysfs_close(vol); +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_volume - check volume information. + * @ubi: UBI device description object + * @vol_id: volume ID + */ +static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) +{ + int idx = vol_id2idx(ubi, vol_id); + int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; + const struct ubi_volume *vol; + long long n; + const char *name; + + spin_lock(&ubi->volumes_lock); + reserved_pebs = be32_to_cpu(ubi->vtbl[vol_id].reserved_pebs); + vol = ubi->volumes[idx]; + + if (!vol) { + if (reserved_pebs) { + ubi_err("no volume info, but volume exists"); + goto fail; + } + spin_unlock(&ubi->volumes_lock); + return; + } + + if (vol->exclusive) { + /* + * The volume may be being created at the moment, do not check + * it (e.g., it may be in the middle of ubi_create_volume(). + */ + spin_unlock(&ubi->volumes_lock); + return; + } + + if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || + vol->name_len < 0) { + ubi_err("negative values"); + goto fail; + } + if (vol->alignment > ubi->leb_size || vol->alignment == 0) { + ubi_err("bad alignment"); + goto fail; + } + + n = vol->alignment & (ubi->min_io_size - 1); + if (vol->alignment != 1 && n) { + ubi_err("alignment is not multiple of min I/O unit"); + goto fail; + } + + n = ubi->leb_size % vol->alignment; + if (vol->data_pad != n) { + ubi_err("bad data_pad, has to be %lld", n); + goto fail; + } + + if (vol->vol_type != UBI_DYNAMIC_VOLUME && + vol->vol_type != UBI_STATIC_VOLUME) { + ubi_err("bad vol_type"); + goto fail; + } + + if (vol->upd_marker && vol->corrupted) { + dbg_err("update marker and corrupted simultaneously"); + goto fail; + } + + if (vol->reserved_pebs > ubi->good_peb_count) { + ubi_err("too large reserved_pebs"); + goto fail; + } + + n = ubi->leb_size - vol->data_pad; + if (vol->usable_leb_size != ubi->leb_size - vol->data_pad) { + ubi_err("bad usable_leb_size, has to be %lld", n); + goto fail; + } + + if (vol->name_len > UBI_VOL_NAME_MAX) { + ubi_err("too long volume name, max is %d", UBI_VOL_NAME_MAX); + goto fail; + } + + if (!vol->name) { + ubi_err("NULL volume name"); + goto fail; + } + + n = strnlen(vol->name, vol->name_len + 1); + if (n != vol->name_len) { + ubi_err("bad name_len %lld", n); + goto fail; + } + + n = (long long)vol->used_ebs * vol->usable_leb_size; + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + if (vol->corrupted) { + ubi_err("corrupted dynamic volume"); + goto fail; + } + if (vol->used_ebs != vol->reserved_pebs) { + ubi_err("bad used_ebs"); + goto fail; + } + if (vol->last_eb_bytes != vol->usable_leb_size) { + ubi_err("bad last_eb_bytes"); + goto fail; + } + if (vol->used_bytes != n) { + ubi_err("bad used_bytes"); + goto fail; + } + } else { + if (vol->used_ebs < 0 || vol->used_ebs > vol->reserved_pebs) { + ubi_err("bad used_ebs"); + goto fail; + } + if (vol->last_eb_bytes < 0 || + vol->last_eb_bytes > vol->usable_leb_size) { + ubi_err("bad last_eb_bytes"); + goto fail; + } + if (vol->used_bytes < 0 || vol->used_bytes > n || + vol->used_bytes < n - vol->usable_leb_size) { + ubi_err("bad used_bytes"); + goto fail; + } + } + + alignment = be32_to_cpu(ubi->vtbl[vol_id].alignment); + data_pad = be32_to_cpu(ubi->vtbl[vol_id].data_pad); + name_len = be16_to_cpu(ubi->vtbl[vol_id].name_len); + upd_marker = ubi->vtbl[vol_id].upd_marker; + name = &ubi->vtbl[vol_id].name[0]; + if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC) + vol_type = UBI_DYNAMIC_VOLUME; + else + vol_type = UBI_STATIC_VOLUME; + + if (alignment != vol->alignment || data_pad != vol->data_pad || + upd_marker != vol->upd_marker || vol_type != vol->vol_type || + name_len!= vol->name_len || strncmp(name, vol->name, name_len)) { + ubi_err("volume info is different"); + goto fail; + } + + spin_unlock(&ubi->volumes_lock); + return; + +fail: + ubi_err("paranoid check failed for volume %d", vol_id); + ubi_dbg_dump_vol_info(vol); + ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); + spin_unlock(&ubi->volumes_lock); + BUG(); +} + +/** + * paranoid_check_volumes - check information about all volumes. + * @ubi: UBI device description object + */ +static void paranoid_check_volumes(struct ubi_device *ubi) +{ + int i; + + for (i = 0; i < ubi->vtbl_slots; i++) + paranoid_check_volume(ubi, i); +} +#endif diff --git a/fs/ubi/vtbl.c b/fs/ubi/vtbl.c new file mode 100755 index 0000000..6da6400 --- /dev/null +++ b/fs/ubi/vtbl.c @@ -0,0 +1,835 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file includes volume table manipulation code. The volume table is an + * on-flash table containing volume meta-data like name, number of reserved + * physical eraseblocks, type, etc. The volume table is stored in the so-called + * "layout volume". + * + * The layout volume is an internal volume which is organized as follows. It + * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical + * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each + * other. This redundancy guarantees robustness to unclean reboots. The volume + * table is basically an array of volume table records. Each record contains + * full information about the volume and protected by a CRC checksum. + * + * The volume table is changed, it is first changed in RAM. Then LEB 0 is + * erased, and the updated volume table is written back to LEB 0. Then same for + * LEB 1. This scheme guarantees recoverability from unclean reboots. + * + * In this UBI implementation the on-flash volume table does not contain any + * information about how many data static volumes contain. This information may + * be found from the scanning data. + * + * But it would still be beneficial to store this information in the volume + * table. For example, suppose we have a static volume X, and all its physical + * eraseblocks became bad for some reasons. Suppose we are attaching the + * corresponding MTD device, the scanning has found no logical eraseblocks + * corresponding to the volume X. According to the volume table volume X does + * exist. So we don't know whether it is just empty or all its physical + * eraseblocks went bad. So we cannot alarm the user about this corruption. + * + * The volume table also stores so-called "update marker", which is used for + * volume updates. Before updating the volume, the update marker is set, and + * after the update operation is finished, the update marker is cleared. So if + * the update operation was interrupted (e.g. by an unclean reboot) - the + * update marker is still there and we know that the volume's contents is + * damaged. + */ + +#ifdef UBI_LINUX +#include <linux/crc32.h> +#include <linux/err.h> +#include <asm/div64.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static void paranoid_vtbl_check(const struct ubi_device *ubi); +#else +#define paranoid_vtbl_check(ubi) +#endif + +/* Empty volume table record */ +static struct ubi_vtbl_record empty_vtbl_record; + + +/** + * ubi_change_vtbl_record - change volume table record. + * @ubi: UBI device description object + * @idx: table index to change + * @vtbl_rec: new volume table record + * + * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty + * volume table record is written. The caller does not have to calculate CRC of + * the record as it is done by this function. Returns zero in case of success + * and a negative error code in case of failure. + */ +int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, + struct ubi_vtbl_record *vtbl_rec) +{ + int i, err; + uint32_t crc; + struct ubi_volume *layout_vol; + + ubi_assert(idx >= 0 && idx < ubi->vtbl_slots); + layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; + + if (!vtbl_rec) + vtbl_rec = &empty_vtbl_record; + else { + crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC); + vtbl_rec->crc = cpu_to_be32(crc); + } + + memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record)); + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + err = ubi_eba_unmap_leb(ubi, layout_vol, i); + if (err) + return err; + + err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, + ubi->vtbl_size, UBI_LONGTERM); + if (err) + return err; + } + + paranoid_vtbl_check(ubi); + return 0; +} + +/** + * vtbl_check - check if volume table is not corrupted and contains sensible + * data. + * @ubi: UBI device description object + * @vtbl: volume table + * + * This function returns zero if @vtbl is all right, %1 if CRC is incorrect, + * and %-EINVAL if it contains inconsistent data. + */ +static int vtbl_check(const struct ubi_device *ubi, + const struct ubi_vtbl_record *vtbl) +{ + int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; + int upd_marker, err; + uint32_t crc; + const char *name; + + for (i = 0; i < ubi->vtbl_slots; i++) { + cond_resched(); + + reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); + alignment = be32_to_cpu(vtbl[i].alignment); + data_pad = be32_to_cpu(vtbl[i].data_pad); + upd_marker = vtbl[i].upd_marker; + vol_type = vtbl[i].vol_type; + name_len = be16_to_cpu(vtbl[i].name_len); + name = (const char *) &vtbl[i].name[0]; + + crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC); + if (be32_to_cpu(vtbl[i].crc) != crc) { + ubi_err("bad CRC at record %u: %#08x, not %#08x", + i, crc, be32_to_cpu(vtbl[i].crc)); + ubi_dbg_dump_vtbl_record(&vtbl[i], i); + return 1; + } + + if (reserved_pebs == 0) { + if (memcmp(&vtbl[i], &empty_vtbl_record, + UBI_VTBL_RECORD_SIZE)) { + err = 2; + goto bad; + } + continue; + } + + if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || + name_len < 0) { + err = 3; + goto bad; + } + + if (alignment > ubi->leb_size || alignment == 0) { + err = 4; + goto bad; + } + + n = alignment & (ubi->min_io_size - 1); + if (alignment != 1 && n) { + err = 5; + goto bad; + } + + n = ubi->leb_size % alignment; + if (data_pad != n) { + dbg_err("bad data_pad, has to be %d", n); + err = 6; + goto bad; + } + + if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { + err = 7; + goto bad; + } + + if (upd_marker != 0 && upd_marker != 1) { + err = 8; + goto bad; + } + + if (reserved_pebs > ubi->good_peb_count) { + dbg_err("too large reserved_pebs, good PEBs %d", + ubi->good_peb_count); + err = 9; + goto bad; + } + + if (name_len > UBI_VOL_NAME_MAX) { + err = 10; + goto bad; + } + + if (name[0] == '\0') { + err = 11; + goto bad; + } + + if (name_len != strnlen(name, name_len + 1)) { + err = 12; + goto bad; + } + } + + /* Checks that all names are unique */ + for (i = 0; i < ubi->vtbl_slots - 1; i++) { + for (n = i + 1; n < ubi->vtbl_slots; n++) { + int len1 = be16_to_cpu(vtbl[i].name_len); + int len2 = be16_to_cpu(vtbl[n].name_len); + + if (len1 > 0 && len1 == len2 && + !strncmp((char *)vtbl[i].name, (char *)vtbl[n].name, len1)) { + ubi_err("volumes %d and %d have the same name" + " \"%s\"", i, n, vtbl[i].name); + ubi_dbg_dump_vtbl_record(&vtbl[i], i); + ubi_dbg_dump_vtbl_record(&vtbl[n], n); + return -EINVAL; + } + } + } + + return 0; + +bad: + ubi_err("volume table check failed: record %d, error %d", i, err); + ubi_dbg_dump_vtbl_record(&vtbl[i], i); + return -EINVAL; +} + +/** + * create_vtbl - create a copy of volume table. + * @ubi: UBI device description object + * @si: scanning information + * @copy: number of the volume table copy + * @vtbl: contents of the volume table + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, + int copy, void *vtbl) +{ + int err, tries = 0; + static struct ubi_vid_hdr *vid_hdr; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *new_seb, *old_seb = NULL; + + ubi_msg("create volume table (copy #%d)", copy + 1); + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vid_hdr) + return -ENOMEM; + /* + * Check if there is a logical eraseblock which would have to contain + * this volume table copy was found during scanning. It has to be wiped + * out. + */ + sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); + if (sv) + old_seb = ubi_scan_find_seb(sv, copy); + +retry: + new_seb = ubi_scan_get_free_peb(ubi, si); + if (IS_ERR(new_seb)) { + err = PTR_ERR(new_seb); + goto out_free; + } + + vid_hdr->vol_type = UBI_VID_DYNAMIC; + vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID); + vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; + vid_hdr->data_size = vid_hdr->used_ebs = + vid_hdr->data_pad = cpu_to_be32(0); + vid_hdr->lnum = cpu_to_be32(copy); + vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum); + vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0); + /* The EC header is already there, write the VID header */ + err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); + if (err) + goto write_error; + + /* Write the layout volume contents */ + err = ubi_io_write_data(ubi, vtbl, new_seb->pnum, 0, ubi->vtbl_size); + if (err) + goto write_error; + + /* + * And add it to the scanning information. Don't delete the old + * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'. + */ + err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec, + vid_hdr, 0); + kfree(new_seb); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +write_error: + if (err == -EIO && ++tries <= 5) { + /* + * Probably this physical eraseblock went bad, try to pick + * another one. + */ + list_add_tail(&new_seb->u.list, &si->corr); + goto retry; + } + kfree(new_seb); +out_free: + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +} + +/** + * process_lvol - process the layout volume. + * @ubi: UBI device description object + * @si: scanning information + * @sv: layout volume scanning information + * + * This function is responsible for reading the layout volume, ensuring it is + * not corrupted, and recovering from corruptions if needed. Returns volume + * table in case of success and a negative error code in case of failure. + */ +static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, + struct ubi_scan_info *si, + struct ubi_scan_volume *sv) +{ + int err; + struct rb_node *rb; + struct ubi_scan_leb *seb; + struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL }; + int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1}; + + /* + * UBI goes through the following steps when it changes the layout + * volume: + * a. erase LEB 0; + * b. write new data to LEB 0; + * c. erase LEB 1; + * d. write new data to LEB 1. + * + * Before the change, both LEBs contain the same data. + * + * Due to unclean reboots, the contents of LEB 0 may be lost, but there + * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not. + * Similarly, LEB 1 may be lost, but there should be LEB 0. And + * finally, unclean reboots may result in a situation when neither LEB + * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB + * 0 contains more recent information. + * + * So the plan is to first check LEB 0. Then + * a. if LEB 0 is OK, it must be containing the most resent data; then + * we compare it with LEB 1, and if they are different, we copy LEB + * 0 to LEB 1; + * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1 + * to LEB 0. + */ + + dbg_msg("check layout volume"); + + /* Read both LEB 0 and LEB 1 into memory */ + ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { + leb[seb->lnum] = vmalloc(ubi->vtbl_size); + if (!leb[seb->lnum]) { + err = -ENOMEM; + goto out_free; + } + memset(leb[seb->lnum], 0, ubi->vtbl_size); + + err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, + ubi->vtbl_size); + if (err == UBI_IO_BITFLIPS || err == -EBADMSG) + /* + * Scrub the PEB later. Note, -EBADMSG indicates an + * uncorrectable ECC error, but we have our own CRC and + * the data will be checked later. If the data is OK, + * the PEB will be scrubbed (because we set + * seb->scrub). If the data is not OK, the contents of + * the PEB will be recovered from the second copy, and + * seb->scrub will be cleared in + * 'ubi_scan_add_used()'. + */ + seb->scrub = 1; + else if (err) + goto out_free; + } + + err = -EINVAL; + if (leb[0]) { + leb_corrupted[0] = vtbl_check(ubi, leb[0]); + if (leb_corrupted[0] < 0) + goto out_free; + } + + if (!leb_corrupted[0]) { + /* LEB 0 is OK */ + if (leb[1]) + leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); + if (leb_corrupted[1]) { + ubi_warn("volume table copy #2 is corrupted"); + err = create_vtbl(ubi, si, 1, leb[0]); + if (err) + goto out_free; + ubi_msg("volume table was restored"); + } + + /* Both LEB 1 and LEB 2 are OK and consistent */ + vfree(leb[1]); + return leb[0]; + } else { + /* LEB 0 is corrupted or does not exist */ + if (leb[1]) { + leb_corrupted[1] = vtbl_check(ubi, leb[1]); + if (leb_corrupted[1] < 0) + goto out_free; + } + if (leb_corrupted[1]) { + /* Both LEB 0 and LEB 1 are corrupted */ + ubi_err("both volume tables are corrupted"); + goto out_free; + } + + ubi_warn("volume table copy #1 is corrupted"); + err = create_vtbl(ubi, si, 0, leb[1]); + if (err) + goto out_free; + ubi_msg("volume table was restored"); + + vfree(leb[0]); + return leb[1]; + } + +out_free: + vfree(leb[0]); + vfree(leb[1]); + return ERR_PTR(err); +} + +/** + * create_empty_lvol - create empty layout volume. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns volume table contents in case of success and a + * negative error code in case of failure. + */ +static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi, + struct ubi_scan_info *si) +{ + int i; + struct ubi_vtbl_record *vtbl; + + vtbl = vmalloc(ubi->vtbl_size); + if (!vtbl) + return ERR_PTR(-ENOMEM); + memset(vtbl, 0, ubi->vtbl_size); + + for (i = 0; i < ubi->vtbl_slots; i++) + memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE); + + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + int err; + + err = create_vtbl(ubi, si, i, vtbl); + if (err) { + vfree(vtbl); + return ERR_PTR(err); + } + } + + return vtbl; +} + +/** + * init_volumes - initialize volume information for existing volumes. + * @ubi: UBI device description object + * @si: scanning information + * @vtbl: volume table + * + * This function allocates volume description objects for existing volumes. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, + const struct ubi_vtbl_record *vtbl) +{ + int i, reserved_pebs = 0; + struct ubi_scan_volume *sv; + struct ubi_volume *vol; + + for (i = 0; i < ubi->vtbl_slots; i++) { + cond_resched(); + + if (be32_to_cpu(vtbl[i].reserved_pebs) == 0) + continue; /* Empty record */ + + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); + vol->alignment = be32_to_cpu(vtbl[i].alignment); + vol->data_pad = be32_to_cpu(vtbl[i].data_pad); + vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ? + UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; + vol->name_len = be16_to_cpu(vtbl[i].name_len); + vol->usable_leb_size = ubi->leb_size - vol->data_pad; + memcpy(vol->name, vtbl[i].name, vol->name_len); + vol->name[vol->name_len] = '\0'; + vol->vol_id = i; + + if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { + /* Auto re-size flag may be set only for one volume */ + if (ubi->autoresize_vol_id != -1) { + ubi_err("more then one auto-resize volume (%d " + "and %d)", ubi->autoresize_vol_id, i); + kfree(vol); + return -EINVAL; + } + + ubi->autoresize_vol_id = i; + } + + ubi_assert(!ubi->volumes[i]); + ubi->volumes[i] = vol; + ubi->vol_count += 1; + vol->ubi = ubi; + reserved_pebs += vol->reserved_pebs; + + /* + * In case of dynamic volume UBI knows nothing about how many + * data is stored there. So assume the whole volume is used. + */ + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = + (long long)vol->used_ebs * vol->usable_leb_size; + continue; + } + + /* Static volumes only */ + sv = ubi_scan_find_sv(si, i); + if (!sv) { + /* + * No eraseblocks belonging to this volume found. We + * don't actually know whether this static volume is + * completely corrupted or just contains no data. And + * we cannot know this as long as data size is not + * stored on flash. So we just assume the volume is + * empty. FIXME: this should be handled. + */ + continue; + } + + if (sv->leb_count != sv->used_ebs) { + /* + * We found a static volume which misses several + * eraseblocks. Treat it as corrupted. + */ + ubi_warn("static volume %d misses %d LEBs - corrupted", + sv->vol_id, sv->used_ebs - sv->leb_count); + vol->corrupted = 1; + continue; + } + + vol->used_ebs = sv->used_ebs; + vol->used_bytes = + (long long)(vol->used_ebs - 1) * vol->usable_leb_size; + vol->used_bytes += sv->last_data_size; + vol->last_eb_bytes = sv->last_data_size; + } + + /* And add the layout volume */ + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS; + vol->alignment = 1; + vol->vol_type = UBI_DYNAMIC_VOLUME; + vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1; + memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1); + vol->usable_leb_size = ubi->leb_size; + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->reserved_pebs; + vol->used_bytes = + (long long)vol->used_ebs * (ubi->leb_size - vol->data_pad); + vol->vol_id = UBI_LAYOUT_VOLUME_ID; + vol->ref_count = 1; + + ubi_assert(!ubi->volumes[i]); + ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol; + reserved_pebs += vol->reserved_pebs; + ubi->vol_count += 1; + vol->ubi = ubi; + + if (reserved_pebs > ubi->avail_pebs) + ubi_err("not enough PEBs, required %d, available %d", + reserved_pebs, ubi->avail_pebs); + ubi->rsvd_pebs += reserved_pebs; + ubi->avail_pebs -= reserved_pebs; + + return 0; +} + +/** + * check_sv - check volume scanning information. + * @vol: UBI volume description object + * @sv: volume scanning information + * + * This function returns zero if the volume scanning information is consistent + * to the data read from the volume tabla, and %-EINVAL if not. + */ +static int check_sv(const struct ubi_volume *vol, + const struct ubi_scan_volume *sv) +{ + int err; + + if (sv->highest_lnum >= vol->reserved_pebs) { + err = 1; + goto bad; + } + if (sv->leb_count > vol->reserved_pebs) { + err = 2; + goto bad; + } + if (sv->vol_type != vol->vol_type) { + err = 3; + goto bad; + } + if (sv->used_ebs > vol->reserved_pebs) { + err = 4; + goto bad; + } + if (sv->data_pad != vol->data_pad) { + err = 5; + goto bad; + } + return 0; + +bad: + ubi_err("bad scanning information, error %d", err); + ubi_dbg_dump_sv(sv); + ubi_dbg_dump_vol_info(vol); + return -EINVAL; +} + +/** + * check_scanning_info - check that scanning information. + * @ubi: UBI device description object + * @si: scanning information + * + * Even though we protect on-flash data by CRC checksums, we still don't trust + * the media. This function ensures that scanning information is consistent to + * the information read from the volume table. Returns zero if the scanning + * information is OK and %-EINVAL if it is not. + */ +static int check_scanning_info(const struct ubi_device *ubi, + struct ubi_scan_info *si) +{ + int err, i; + struct ubi_scan_volume *sv; + struct ubi_volume *vol; + + if (si->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) { + ubi_err("scanning found %d volumes, maximum is %d + %d", + si->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots); + return -EINVAL; + } + + if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && + si->highest_vol_id < UBI_INTERNAL_VOL_START) { + ubi_err("too large volume ID %d found by scanning", + si->highest_vol_id); + return -EINVAL; + } + + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + cond_resched(); + + sv = ubi_scan_find_sv(si, i); + vol = ubi->volumes[i]; + if (!vol) { + if (sv) + ubi_scan_rm_volume(si, sv); + continue; + } + + if (vol->reserved_pebs == 0) { + ubi_assert(i < ubi->vtbl_slots); + + if (!sv) + continue; + + /* + * During scanning we found a volume which does not + * exist according to the information in the volume + * table. This must have happened due to an unclean + * reboot while the volume was being removed. Discard + * these eraseblocks. + */ + ubi_msg("finish volume %d removal", sv->vol_id); + ubi_scan_rm_volume(si, sv); + } else if (sv) { + err = check_sv(vol, sv); + if (err) + return err; + } + } + + return 0; +} + +/** + * ubi_read_volume_table - read volume table. + * information. + * @ubi: UBI device description object + * @si: scanning information + * + * This function reads volume table, checks it, recover from errors if needed, + * or creates it if needed. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) +{ + int i, err; + struct ubi_scan_volume *sv; + + empty_vtbl_record.crc = cpu_to_be32(0xf116c36b); + /* + * The number of supported volumes is limited by the eraseblock size + * and by the UBI_MAX_VOLUMES constant. + */ + ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; + if (ubi->vtbl_slots > UBI_MAX_VOLUMES) + ubi->vtbl_slots = UBI_MAX_VOLUMES; + + ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE; + ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size); + + sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); + if (!sv) { + /* + * No logical eraseblocks belonging to the layout volume were + * found. This could mean that the flash is just empty. In + * this case we create empty layout volume. + * + * But if flash is not empty this must be a corruption or the + * MTD device just contains garbage. + */ + if (si->is_empty) { + ubi->vtbl = create_empty_lvol(ubi, si); + if (IS_ERR(ubi->vtbl)) + return PTR_ERR(ubi->vtbl); + } else { + ubi_err("the layout volume was not found"); + return -EINVAL; + } + } else { + if (sv->leb_count > UBI_LAYOUT_VOLUME_EBS) { + /* This must not happen with proper UBI images */ + dbg_err("too many LEBs (%d) in layout volume", + sv->leb_count); + return -EINVAL; + } + + ubi->vtbl = process_lvol(ubi, si, sv); + if (IS_ERR(ubi->vtbl)) + return PTR_ERR(ubi->vtbl); + } + + ubi->avail_pebs = ubi->good_peb_count; + + /* + * The layout volume is OK, initialize the corresponding in-RAM data + * structures. + */ + err = init_volumes(ubi, si, ubi->vtbl); + if (err) + goto out_free; + + /* + * Get sure that the scanning information is consistent to the + * information stored in the volume table. + */ + err = check_scanning_info(ubi, si); + if (err) + goto out_free; + + return 0; + +out_free: + vfree(ubi->vtbl); + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) + if (ubi->volumes[i]) { + kfree(ubi->volumes[i]); + ubi->volumes[i] = NULL; + } + return err; +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_vtbl_check - check volume table. + * @ubi: UBI device description object + */ +static void paranoid_vtbl_check(const struct ubi_device *ubi) +{ + if (vtbl_check(ubi, ubi->vtbl)) { + ubi_err("paranoid check failed"); + BUG(); + } +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/fs/ubi/wl.c b/fs/ubi/wl.c new file mode 100755 index 0000000..3a70e41 --- /dev/null +++ b/fs/ubi/wl.c @@ -0,0 +1,1232 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём), Thomas Gleixner + */ + +/* + * UBI wear-leveling unit. + * + * This unit is responsible for wear-leveling. It works in terms of physical + * eraseblocks and erase counters and knows nothing about logical eraseblocks, + * volumes, etc. From this unit's perspective all physical eraseblocks are of + * two types - used and free. Used physical eraseblocks are those that were + * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are + * those that were put by the 'ubi_wl_put_peb()' function. + * + * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter + * header. The rest of the physical eraseblock contains only 0xFF bytes. + * + * When physical eraseblocks are returned to the WL unit by means of the + * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is + * done asynchronously in context of the per-UBI device background thread, + * which is also managed by the WL unit. + * + * The wear-leveling is ensured by means of moving the contents of used + * physical eraseblocks with low erase counter to free physical eraseblocks + * with high erase counter. + * + * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick + * an "optimal" physical eraseblock. For example, when it is known that the + * physical eraseblock will be "put" soon because it contains short-term data, + * the WL unit may pick a free physical eraseblock with low erase counter, and + * so forth. + * + * If the WL unit fails to erase a physical eraseblock, it marks it as bad. + * + * This unit is also responsible for scrubbing. If a bit-flip is detected in a + * physical eraseblock, it has to be moved. Technically this is the same as + * moving it for wear-leveling reasons. + * + * As it was said, for the UBI unit all physical eraseblocks are either "free" + * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used + * eraseblocks are kept in a set of different RB-trees: @wl->used, + * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. + * + * Note, in this implementation, we keep a small in-RAM object for each physical + * eraseblock. This is surely not a scalable solution. But it appears to be good + * enough for moderately large flashes and it is simple. In future, one may + * re-work this unit and make it more scalable. + * + * At the moment this unit does not utilize the sequence number, which was + * introduced relatively recently. But it would be wise to do this because the + * sequence number of a logical eraseblock characterizes how old is it. For + * example, when we move a PEB with low erase counter, and we need to pick the + * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we + * pick target PEB with an average EC if our PEB is not very "old". This is a + * room for future re-works of the WL unit. + * + * FIXME: looks too complex, should be simplified (later). + */ + +#ifdef UBI_LINUX +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/freezer.h> +#include <linux/kthread.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +/* Number of physical eraseblocks reserved for wear-leveling purposes */ +#define WL_RESERVED_PEBS 1 + +/* + * How many erase cycles are short term, unknown, and long term physical + * eraseblocks protected. + */ +#define ST_PROTECTION 16 +#define U_PROTECTION 10 +#define LT_PROTECTION 4 + +/* + * Maximum difference between two erase counters. If this threshold is + * exceeded, the WL unit starts moving data from used physical eraseblocks with + * low erase counter to free physical eraseblocks with high erase counter. + */ +#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD*1000 + +/* + * When a physical eraseblock is moved, the WL unit has to pick the target + * physical eraseblock to move to. The simplest way would be just to pick the + * one with the highest erase counter. But in certain workloads this could lead + * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a + * situation when the picked physical eraseblock is constantly erased after the + * data is written to it. So, we have a constant which limits the highest erase + * counter of the free physical eraseblock to pick. Namely, the WL unit does + * not pick eraseblocks with erase counter greater then the lowest erase + * counter plus %WL_FREE_MAX_DIFF. + */ +#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) + +/* + * Maximum number of consecutive background thread failures which is enough to + * switch to read-only mode. + */ +#define WL_MAX_FAILURES 32 + +/** + * struct ubi_wl_prot_entry - PEB protection entry. + * @rb_pnum: link in the @wl->prot.pnum RB-tree + * @rb_aec: link in the @wl->prot.aec RB-tree + * @abs_ec: the absolute erase counter value when the protection ends + * @e: the wear-leveling entry of the physical eraseblock under protection + * + * When the WL unit returns a physical eraseblock, the physical eraseblock is + * protected from being moved for some "time". For this reason, the physical + * eraseblock is not directly moved from the @wl->free tree to the @wl->used + * tree. There is one more tree in between where this physical eraseblock is + * temporarily stored (@wl->prot). + * + * All this protection stuff is needed because: + * o we don't want to move physical eraseblocks just after we have given them + * to the user; instead, we first want to let users fill them up with data; + * + * o there is a chance that the user will put the physical eraseblock very + * soon, so it makes sense not to move it for some time, but wait; this is + * especially important in case of "short term" physical eraseblocks. + * + * Physical eraseblocks stay protected only for limited time. But the "time" is + * measured in erase cycles in this case. This is implemented with help of the + * absolute erase counter (@wl->abs_ec). When it reaches certain value, the + * physical eraseblocks are moved from the protection trees (@wl->prot.*) to + * the @wl->used tree. + * + * Protected physical eraseblocks are searched by physical eraseblock number + * (when they are put) and by the absolute erase counter (to check if it is + * time to move them to the @wl->used tree). So there are actually 2 RB-trees + * storing the protected physical eraseblocks: @wl->prot.pnum and + * @wl->prot.aec. They are referred to as the "protection" trees. The + * first one is indexed by the physical eraseblock number. The second one is + * indexed by the absolute erase counter. Both trees store + * &struct ubi_wl_prot_entry objects. + * + * Each physical eraseblock has 2 main states: free and used. The former state + * corresponds to the @wl->free tree. The latter state is split up on several + * sub-states: + * o the WL movement is allowed (@wl->used tree); + * o the WL movement is temporarily prohibited (@wl->prot.pnum and + * @wl->prot.aec trees); + * o scrubbing is needed (@wl->scrub tree). + * + * Depending on the sub-state, wear-leveling entries of the used physical + * eraseblocks may be kept in one of those trees. + */ +struct ubi_wl_prot_entry { + struct rb_node rb_pnum; + struct rb_node rb_aec; + unsigned long long abs_ec; + struct ubi_wl_entry *e; +}; + +/** + * struct ubi_work - UBI work description data structure. + * @list: a link in the list of pending works + * @func: worker function + * @priv: private data of the worker function + * + * @e: physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * The @func pointer points to the worker function. If the @cancel argument is + * not zero, the worker has to free the resources and exit immediately. The + * worker has to return zero in case of success and a negative error code in + * case of failure. + */ +struct ubi_work { + struct list_head list; + int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel); + /* The below fields are only relevant to erasure works */ + struct ubi_wl_entry *e; + int torture; +}; + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + struct rb_root *root); +#else +#define paranoid_check_ec(ubi, pnum, ec) 0 +#define paranoid_check_in_wl_tree(e, root) +#endif + +/** + * wl_tree_add - add a wear-leveling entry to a WL RB-tree. + * @e: the wear-leveling entry to add + * @root: the root of the tree + * + * Note, we use (erase counter, physical eraseblock number) pairs as keys in + * the @ubi->used and @ubi->free RB-trees. + */ +static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) +{ + struct rb_node **p, *parent = NULL; + + p = &root->rb_node; + while (*p) { + struct ubi_wl_entry *e1; + + parent = *p; + e1 = rb_entry(parent, struct ubi_wl_entry, rb); + + if (e->ec < e1->ec) + p = &(*p)->rb_left; + else if (e->ec > e1->ec) + p = &(*p)->rb_right; + else { + if (e->pnum < e1->pnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + } + + rb_link_node(&e->rb, parent, p); + rb_insert_color(&e->rb, root); +} + +/** + * do_work - do one pending work. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int do_work(struct ubi_device *ubi) +{ + int err; + struct ubi_work *wrk; + + cond_resched(); + + /* + * @ubi->work_sem is used to synchronize with the workers. Workers take + * it in read mode, so many of them may be doing works at a time. But + * the queue flush code has to be sure the whole queue of works is + * done, and it takes the mutex in write mode. + */ + down_read(&ubi->work_sem); + spin_lock(&ubi->wl_lock); + if (list_empty(&ubi->works)) { + spin_unlock(&ubi->wl_lock); + up_read(&ubi->work_sem); + return 0; + } + + wrk = list_entry(ubi->works.next, struct ubi_work, list); + list_del(&wrk->list); + ubi->works_count -= 1; + spin_unlock(&ubi->wl_lock); + + /* + * Call the worker function. Do not touch the work structure + * after this call as it will have been freed or reused by that + * time by the worker function. + */ + err = wrk->func(ubi, wrk, 0); + if (err) + ubi_err("work failed with error code %d", err); + up_read(&ubi->work_sem); + + return err; +} + +/** + * produce_free_peb - produce a free physical eraseblock. + * @ubi: UBI device description object + * + * This function tries to make a free PEB by means of synchronous execution of + * pending works. This may be needed if, for example the background thread is + * disabled. Returns zero in case of success and a negative error code in case + * of failure. + */ +static int produce_free_peb(struct ubi_device *ubi) +{ + int err; + + spin_lock(&ubi->wl_lock); + while (!ubi->free.rb_node) { + spin_unlock(&ubi->wl_lock); + + err = do_work(ubi); + if (err) + return err; + + spin_lock(&ubi->wl_lock); + } + spin_unlock(&ubi->wl_lock); + + return 0; +} + +/** + * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns non-zero if @e is in the @root RB-tree and zero if it + * is not. + */ +static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) +{ + struct rb_node *p; + + p = root->rb_node; + while (p) { + struct ubi_wl_entry *e1; + + e1 = rb_entry(p, struct ubi_wl_entry, rb); + + if (e->pnum == e1->pnum) { + return 1; + } + + if (e->ec < e1->ec) + p = p->rb_left; + else if (e->ec > e1->ec) + p = p->rb_right; + else { + if (e->pnum < e1->pnum) + p = p->rb_left; + else + p = p->rb_right; + } + } + + return 0; +} + +/** + * find_wl_entry - find wear-leveling entry closest to certain erase counter. + * @root: the RB-tree where to look for + * @max: highest possible erase counter + * + * This function looks for a wear leveling entry with erase counter closest to + * @max and less then @max. + */ +static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) +{ + struct rb_node *p; + struct ubi_wl_entry *e; + + e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); + max += e->ec; + + p = root->rb_node; + while (p) { + struct ubi_wl_entry *e1; + + e1 = rb_entry(p, struct ubi_wl_entry, rb); + if (e1->ec >= max) + p = p->rb_left; + else { + p = p->rb_right; + e = e1; + } + } + + return e; +} + +/** + * ubi_wl_get_peb - get a physical eraseblock. + * @ubi: UBI device description object + * @dtype: type of data which will be stored in this physical eraseblock + * + * This function returns a physical eraseblock in case of success and a + * negative error code in case of failure. Might sleep. + */ +int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) +{ + int err, protect, medium_ec; + struct ubi_wl_entry *e, *first, *last; + struct ubi_wl_prot_entry *pe; + + + pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); + if (!pe) + return -ENOMEM; + +retry: + spin_lock(&ubi->wl_lock); + if (!ubi->free.rb_node) { + if (ubi->works_count == 0) { + ubi_err("no free eraseblocks"); + spin_unlock(&ubi->wl_lock); + kfree(pe); + return -ENOSPC; + } + spin_unlock(&ubi->wl_lock); + + err = produce_free_peb(ubi); + if (err < 0) { + kfree(pe); + return err; + } + goto retry; + } + + switch (dtype) { + case UBI_LONGTERM: + /* + * For long term data we pick a physical eraseblock + * with high erase counter. But the highest erase + * counter we can pick is bounded by the the lowest + * erase counter plus %WL_FREE_MAX_DIFF. + */ + e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + protect = LT_PROTECTION; + break; + case UBI_UNKNOWN: + /* + * For unknown data we pick a physical eraseblock with + * medium erase counter. But we by no means can pick a + * physical eraseblock with erase counter greater or + * equivalent than the lowest erase counter plus + * %WL_FREE_MAX_DIFF. + */ + first = rb_entry(rb_first(&ubi->free), + struct ubi_wl_entry, rb); + last = rb_entry(rb_last(&ubi->free), + struct ubi_wl_entry, rb); + + if (last->ec - first->ec < WL_FREE_MAX_DIFF) + e = rb_entry(ubi->free.rb_node, + struct ubi_wl_entry, rb); + else { + medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; + e = find_wl_entry(&ubi->free, medium_ec); + } + protect = U_PROTECTION; + break; + case UBI_SHORTTERM: + /* + * For short term data we pick a physical eraseblock + * with the lowest erase counter as we expect it will + * be erased soon. + */ + e = rb_entry(rb_first(&ubi->free), + struct ubi_wl_entry, rb); + protect = ST_PROTECTION; + break; + default: + protect = 0; + e = NULL; + } + + /* + * Move the physical eraseblock to the protection trees where it will + * be protected from being moved for some time. + */ + paranoid_check_in_wl_tree(e, &ubi->free); + rb_erase(&e->rb, &ubi->free); + + wl_tree_add(e, &ubi->used); + + spin_unlock(&ubi->wl_lock); + + return e->pnum; +} + +/** + * sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @e: the the physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) +{ + int err; + struct ubi_ec_hdr *ec_hdr; + unsigned long long ec = e->ec; + + + err = paranoid_check_ec(ubi, e->pnum, e->ec); + if (err > 0) + return -EINVAL; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_sync_erase(ubi, e->pnum, torture); + if (err < 0) + goto out_free; + + ec += err; + if (ec > UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. Upgrade UBI and use 64-bit + * erase counters internally. + */ + ubi_err("erase counter overflow at PEB %d, EC %llu", + e->pnum, ec); + err = -EINVAL; + goto out_free; + } + + + ec_hdr->ec = cpu_to_be64(ec); + + err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); + if (err) + goto out_free; + + e->ec = ec; + spin_lock(&ubi->wl_lock); + if (e->ec > ubi->max_ec) + ubi->max_ec = e->ec; + spin_unlock(&ubi->wl_lock); + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * schedule_ubi_work - schedule a work. + * @ubi: UBI device description object + * @wrk: the work to schedule + * + * This function enqueues a work defined by @wrk to the tail of the pending + * works list. + */ +static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) +{ + spin_lock(&ubi->wl_lock); + list_add_tail(&wrk->list, &ubi->works); + ubi->works_count += 1; + + /* + * U-Boot special: We have no bgt_thread in U-Boot! + * So just call do_work() here directly. + */ + do_work(ubi); + + spin_unlock(&ubi->wl_lock); +} + +static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + int cancel); + +/** + * schedule_erase - schedule an erase work. + * @ubi: UBI device description object + * @e: the WL entry of the physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * This function returns zero in case of success and a %-ENOMEM in case of + * failure. + */ +static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, + int torture) +{ + struct ubi_work *wl_wrk; + + + wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); + if (!wl_wrk) + return -ENOMEM; + + wl_wrk->func = &erase_worker; + wl_wrk->e = e; + wl_wrk->torture = torture; + + schedule_ubi_work(ubi, wl_wrk); + return 0; +} + +/** + * wear_leveling_worker - wear-leveling worker function. + * @ubi: UBI device description object + * @wrk: the work object + * @cancel: non-zero if the worker has to free memory and exit + * + * This function copies a more worn out physical eraseblock to a less worn out + * one. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, + int cancel) +{ + + kfree(wrk); + + return 0; +} + +/** + * ensure_wear_leveling - schedule wear-leveling if it is needed. + * @ubi: UBI device description object + * + * This function checks if it is time to start wear-leveling and schedules it + * if yes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +static int ensure_wear_leveling(struct ubi_device *ubi) +{ + return 0; +} + +/** + * erase_worker - physical eraseblock erase worker function. + * @ubi: UBI device description object + * @wl_wrk: the work object + * @cancel: non-zero if the worker has to free memory and exit + * + * This function erases a physical eraseblock and perform torture testing if + * needed. It also takes care about marking the physical eraseblock bad if + * needed. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + int cancel) +{ + struct ubi_wl_entry *e = wl_wrk->e; + int pnum = e->pnum, err, need; + + if (cancel) { + kfree(wl_wrk); + kmem_cache_free(ubi_wl_entry_slab, e); + return 0; + } + + + err = sync_erase(ubi, e, wl_wrk->torture); + if (!err) { + /* Fine, we've erased it successfully */ + kfree(wl_wrk); + + spin_lock(&ubi->wl_lock); + ubi->abs_ec += 1; + wl_tree_add(e, &ubi->free); + spin_unlock(&ubi->wl_lock); + + /* + * One more erase operation has happened, take care about protected + * physical eraseblocks. + */ + + /* And take care about wear-leveling */ + err = ensure_wear_leveling(ubi); + return err; + } + + ubi_err("failed to erase PEB %d, error %d", pnum, err); + kfree(wl_wrk); + kmem_cache_free(ubi_wl_entry_slab, e); + + if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || + err == -EBUSY) { + int err1; + + /* Re-schedule the LEB for erasure */ + err1 = schedule_erase(ubi, e, 0); + if (err1) { + err = err1; + goto out_ro; + } + return err; + } else if (err != -EIO) { + /* + * If this is not %-EIO, we have no idea what to do. Scheduling + * this physical eraseblock for erasure again would cause + * errors again and again. Well, lets switch to RO mode. + */ + goto out_ro; + } + + /* It is %-EIO, the PEB went bad */ + + if (!ubi->bad_allowed) { + ubi_err("bad physical eraseblock %d detected", pnum); + goto out_ro; + } + + spin_lock(&ubi->volumes_lock); + need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1; + if (need > 0) { + need = ubi->avail_pebs >= need ? need : ubi->avail_pebs; + ubi->avail_pebs -= need; + ubi->rsvd_pebs += need; + ubi->beb_rsvd_pebs += need; + if (need > 0) + ubi_msg("reserve more %d PEBs", need); + } + + if (ubi->beb_rsvd_pebs == 0) { + spin_unlock(&ubi->volumes_lock); + ubi_err("no reserved physical eraseblocks"); + goto out_ro; + } + + spin_unlock(&ubi->volumes_lock); + ubi_msg("mark PEB %d as bad", pnum); + + err = ubi_io_mark_bad(ubi, pnum); + if (err) + goto out_ro; + + spin_lock(&ubi->volumes_lock); + ubi->beb_rsvd_pebs -= 1; + ubi->bad_peb_count += 1; + ubi->good_peb_count -= 1; + ubi_calculate_reserved(ubi); + if (ubi->beb_rsvd_pebs == 0) + ubi_warn("last PEB from the reserved pool was used"); + spin_unlock(&ubi->volumes_lock); + + return err; + +out_ro: + ubi_ro_mode(ubi); + return err; +} + +/** + * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit. + * @ubi: UBI device description object + * @pnum: physical eraseblock to return + * @torture: if this physical eraseblock has to be tortured + * + * This function is called to return physical eraseblock @pnum to the pool of + * free physical eraseblocks. The @torture flag has to be set if an I/O error + * occurred to this @pnum and it has to be tested. This function returns zero + * in case of success, and a negative error code in case of failure. + */ +int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) +{ + int err; + struct ubi_wl_entry *e; + +retry: + spin_lock(&ubi->wl_lock); + e = ubi->lookuptbl[pnum]; + if (e == ubi->move_from) { + /* + * User is putting the physical eraseblock which was selected to + * be moved. It will be scheduled for erasure in the + * wear-leveling worker. + */ + spin_unlock(&ubi->wl_lock); + + /* Wait for the WL worker by taking the @ubi->move_mutex */ + mutex_lock(&ubi->move_mutex); + mutex_unlock(&ubi->move_mutex); + goto retry; + } else if (e == ubi->move_to) { + /* + * User is putting the physical eraseblock which was selected + * as the target the data is moved to. It may happen if the EBA + * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but + * the WL unit has not put the PEB to the "used" tree yet, but + * it is about to do this. So we just set a flag which will + * tell the WL worker that the PEB is not needed anymore and + * should be scheduled for erasure. + */ + ubi->move_to_put = 1; + spin_unlock(&ubi->wl_lock); + return 0; + } else { + if (in_wl_tree(e, &ubi->used)) { + paranoid_check_in_wl_tree(e, &ubi->used); + rb_erase(&e->rb, &ubi->used); + } else if (in_wl_tree(e, &ubi->scrub)) { + paranoid_check_in_wl_tree(e, &ubi->scrub); + rb_erase(&e->rb, &ubi->scrub); + } + } + spin_unlock(&ubi->wl_lock); + + err = schedule_erase(ubi, e, torture); + if (err) { + spin_lock(&ubi->wl_lock); + wl_tree_add(e, &ubi->used); + spin_unlock(&ubi->wl_lock); + } + + return err; +} + +/** + * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to schedule + * + * If a bit-flip in a physical eraseblock is detected, this physical eraseblock + * needs scrubbing. This function schedules a physical eraseblock for + * scrubbing which is done in background. This function returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) +{ + struct ubi_wl_entry *e; + + ubi_msg("schedule PEB %d for scrubbing", pnum); + +retry: + spin_lock(&ubi->wl_lock); + e = ubi->lookuptbl[pnum]; + if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) { + spin_unlock(&ubi->wl_lock); + return 0; + } + + if (e == ubi->move_to) { + /* + * This physical eraseblock was used to move data to. The data + * was moved but the PEB was not yet inserted to the proper + * tree. We should just wait a little and let the WL worker + * proceed. + */ + spin_unlock(&ubi->wl_lock); + yield(); + goto retry; + } + + if (in_wl_tree(e, &ubi->used)) { + paranoid_check_in_wl_tree(e, &ubi->used); + rb_erase(&e->rb, &ubi->used); + } + wl_tree_add(e, &ubi->scrub); + spin_unlock(&ubi->wl_lock); + + /* + * Technically scrubbing is the same as wear-leveling, so it is done + * by the WL worker. + */ + return ensure_wear_leveling(ubi); +} + +/** + * ubi_wl_flush - flush all pending works. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_wl_flush(struct ubi_device *ubi) +{ + int err; + + /* + * Erase while the pending works queue is not empty, but not more then + * the number of currently pending works. + */ + while (ubi->works_count) { + err = do_work(ubi); + if (err) + return err; + } + + /* + * Make sure all the works which have been done in parallel are + * finished. + */ + down_write(&ubi->work_sem); + up_write(&ubi->work_sem); + + /* + * And in case last was the WL worker and it cancelled the LEB + * movement, flush again. + */ + while (ubi->works_count) { + err = do_work(ubi); + if (err) + return err; + } + + return 0; +} + +/** + * tree_destroy - destroy an RB-tree. + * @root: the root of the tree to destroy + */ +static void tree_destroy(struct rb_root *root) +{ + struct rb_node *rb; + struct ubi_wl_entry *e; + + rb = root->rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + e = rb_entry(rb, struct ubi_wl_entry, rb); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &e->rb) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + kmem_cache_free(ubi_wl_entry_slab, e); + } + } +} + +/** + * ubi_thread - UBI background thread. + * @u: the UBI device description object pointer + */ +int ubi_thread(void *u) +{ + int failures = 0; + struct ubi_device *ubi = u; + + ubi_msg("background thread \"%s\" started, PID %d", + ubi->bgt_name, task_pid_nr(current)); + + set_freezable(); + for (;;) { + int err; + + if (kthread_should_stop()) + break; + + if (try_to_freeze()) + continue; + + spin_lock(&ubi->wl_lock); + if (list_empty(&ubi->works) || ubi->ro_mode || + !ubi->thread_enabled) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock(&ubi->wl_lock); + schedule(); + continue; + } + spin_unlock(&ubi->wl_lock); + + err = do_work(ubi); + if (err) { + ubi_err("%s: work failed with error code %d", + ubi->bgt_name, err); + if (failures++ > WL_MAX_FAILURES) { + /* + * Too many failures, disable the thread and + * switch to read-only mode. + */ + ubi_msg("%s: %d consecutive failures", + ubi->bgt_name, WL_MAX_FAILURES); + ubi_ro_mode(ubi); + break; + } + } else + failures = 0; + + cond_resched(); + } + + return 0; +} + +/** + * cancel_pending - cancel all pending works. + * @ubi: UBI device description object + */ +static void cancel_pending(struct ubi_device *ubi) +{ + while (!list_empty(&ubi->works)) { + struct ubi_work *wrk; + + wrk = list_entry(ubi->works.next, struct ubi_work, list); + list_del(&wrk->list); + wrk->func(ubi, wrk, 1); + ubi->works_count -= 1; + } +} + +/** + * ubi_wl_init_scan - initialize the wear-leveling unit using scanning + * information. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns zero in case of success, and a negative error code in + * case of failure. + */ +int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) +{ + int err; + struct rb_node *rb1, *rb2; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb, *tmp; + struct ubi_wl_entry *e; + + + ubi->used = ubi->free = ubi->scrub = RB_ROOT; + ubi->prot.pnum = ubi->prot.aec = RB_ROOT; + spin_lock_init(&ubi->wl_lock); + mutex_init(&ubi->move_mutex); + init_rwsem(&ubi->work_sem); + ubi->max_ec = si->max_ec; + INIT_LIST_HEAD(&ubi->works); + + sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); + + err = -ENOMEM; + ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); + if (!ubi->lookuptbl) + return err; + + list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { + cond_resched(); + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = seb->pnum; + e->ec = seb->ec; + ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, 0)) { + kmem_cache_free(ubi_wl_entry_slab, e); + goto out_free; + } + } + + list_for_each_entry(seb, &si->free, u.list) { + cond_resched(); + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = seb->pnum; + e->ec = seb->ec; + wl_tree_add(e, &ubi->free); + ubi->lookuptbl[e->pnum] = e; + } + + list_for_each_entry(seb, &si->corr, u.list) { + cond_resched(); + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = seb->pnum; + e->ec = seb->ec; + ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, 0)) { + kmem_cache_free(ubi_wl_entry_slab, e); + goto out_free; + } + } + + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + cond_resched(); + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = seb->pnum; + e->ec = seb->ec; + ubi->lookuptbl[e->pnum] = e; + if (!seb->scrub) { + wl_tree_add(e, &ubi->used); + } else { + wl_tree_add(e, &ubi->scrub); + } + } + } + + if (ubi->avail_pebs < WL_RESERVED_PEBS) { + ubi_err("no enough physical eraseblocks (%d, need %d)", + ubi->avail_pebs, WL_RESERVED_PEBS); + goto out_free; + } + ubi->avail_pebs -= WL_RESERVED_PEBS; + ubi->rsvd_pebs += WL_RESERVED_PEBS; + + /* Schedule wear-leveling if needed */ + err = ensure_wear_leveling(ubi); + if (err) + goto out_free; + + return 0; + +out_free: + cancel_pending(ubi); + tree_destroy(&ubi->used); + tree_destroy(&ubi->free); + tree_destroy(&ubi->scrub); + kfree(ubi->lookuptbl); + return err; +} + +/** + * protection_trees_destroy - destroy the protection RB-trees. + * @ubi: UBI device description object + */ +static void protection_trees_destroy(struct ubi_device *ubi) +{ + struct rb_node *rb; + struct ubi_wl_prot_entry *pe; + + rb = ubi->prot.aec.rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &pe->rb_aec) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + kmem_cache_free(ubi_wl_entry_slab, pe->e); + kfree(pe); + } + } +} + +/** + * ubi_wl_close - close the wear-leveling unit. + * @ubi: UBI device description object + */ +void ubi_wl_close(struct ubi_device *ubi) +{ + cancel_pending(ubi); + protection_trees_destroy(ubi); + tree_destroy(&ubi->used); + tree_destroy(&ubi->free); + tree_destroy(&ubi->scrub); + kfree(ubi->lookuptbl); +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_ec - make sure that the erase counter of a physical eraseblock + * is correct. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @ec: the erase counter to check + * + * This function returns zero if the erase counter of physical eraseblock @pnum + * is equivalent to @ec, %1 if not, and a negative error code if an error + * occurred. + */ +static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) +{ + int err; + long long read_ec; + struct ubi_ec_hdr *ec_hdr; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); + if (err && err != UBI_IO_BITFLIPS) { + /* The header does not have to exist */ + err = 0; + goto out_free; + } + + read_ec = be64_to_cpu(ec_hdr->ec); + if (ec != read_ec) { + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_err("read EC is %lld, should be %d", read_ec, ec); + err = 1; + } else + err = 0; + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present + * in a WL RB-tree. + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns zero if @e is in the @root RB-tree and %1 if it + * is not. + */ +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + struct rb_root *root) +{ + if (in_wl_tree(e, root)) + return 0; + + ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ", + e->pnum, e->ec, root); + return 1; +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile new file mode 100755 index 0000000..74b92b6 --- /dev/null +++ b/fs/ubifs/Makefile @@ -0,0 +1,53 @@ +# +# (C) Copyright 2006 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# (C) Copyright 2003 +# Pavel Bartusek, Sysgo Real-Time Solutions AG, pba@sysgo.de +# +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +include $(TOPDIR)/config.mk + +LIB = libubifs.a + +COBJS = ubifs.o io.o super.o sb.o master.o +COBJS += scan.o +COBJS += tnc.o tnc_misc.o budget.o +COBJS += log.o recovery.o replay.o + +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend +######################################################################### + diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c new file mode 100755 index 0000000..85377ea --- /dev/null +++ b/fs/ubifs/budget.c @@ -0,0 +1,113 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file implements the budgeting sub-system which is responsible for UBIFS + * space management. + * + * Factors such as compression, wasted space at the ends of LEBs, space in other + * journal heads, the effect of updates on the index, and so on, make it + * impossible to accurately predict the amount of space needed. Consequently + * approximations are used. + */ + +#include "ubifs.h" +#include <linux/math64.h> + +/** + * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. + * @c: UBIFS file-system description object + * + * This function calculates and returns the number of eraseblocks which should + * be kept for index usage. + */ +int ubifs_calc_min_idx_lebs(struct ubifs_info *c) +{ + int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; + long long idx_size; + + idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; + + /* And make sure we have thrice the index size of space reserved */ + idx_size = idx_size + (idx_size << 1); + + /* + * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' + * pair, nor similarly the two variables for the new index size, so we + * have to do this costly 64-bit division on fast-path. + */ + idx_size += eff_leb_size - 1; + idx_lebs = div_u64(idx_size, eff_leb_size); + /* + * The index head is not available for the in-the-gaps method, so add an + * extra LEB to compensate. + */ + idx_lebs += 1; + if (idx_lebs < MIN_INDEX_LEBS) + idx_lebs = MIN_INDEX_LEBS; + return idx_lebs; +} + +/** + * ubifs_reported_space - calculate reported free space. + * @c: the UBIFS file-system description object + * @free: amount of free space + * + * This function calculates amount of free space which will be reported to + * user-space. User-space application tend to expect that if the file-system + * (e.g., via the 'statfs()' call) reports that it has N bytes available, they + * are able to write a file of size N. UBIFS attaches node headers to each data + * node and it has to write indexing nodes as well. This introduces additional + * overhead, and UBIFS has to report slightly less free space to meet the above + * expectations. + * + * This function assumes free space is made up of uncompressed data nodes and + * full index nodes (one per data node, tripled because we always allow enough + * space to write the index thrice). + * + * Note, the calculation is pessimistic, which means that most of the time + * UBIFS reports less space than it actually has. + */ +long long ubifs_reported_space(const struct ubifs_info *c, long long free) +{ + int divisor, factor, f; + + /* + * Reported space size is @free * X, where X is UBIFS block size + * divided by UBIFS block size + all overhead one data block + * introduces. The overhead is the node header + indexing overhead. + * + * Indexing overhead calculations are based on the following formula: + * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number + * of data nodes, f - fanout. Because effective UBIFS fanout is twice + * as less than maximum fanout, we assume that each data node + * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. + * Note, the multiplier 3 is because UBIFS reserves thrice as more space + * for the index. + */ + f = c->fanout > 3 ? c->fanout >> 1 : 2; + factor = UBIFS_BLOCK_SIZE; + divisor = UBIFS_MAX_DATA_NODE_SZ; + divisor += (c->max_idx_node_sz * 3) / (f - 1); + free *= factor; + return div_u64(free, divisor); +} diff --git a/fs/ubifs/crc16.c b/fs/ubifs/crc16.c new file mode 100755 index 0000000..443ccf8 --- /dev/null +++ b/fs/ubifs/crc16.c @@ -0,0 +1,60 @@ +/* + * crc16.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/types.h> +#include "crc16.h" + +/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ +u16 const crc16_table[256] = { + 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, + 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, + 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, + 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, + 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, + 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, + 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, + 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, + 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, + 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, + 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, + 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, + 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, + 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, + 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, + 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, + 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, + 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, + 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, + 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, + 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, + 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, + 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, + 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, + 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, + 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, + 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, + 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, + 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, + 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, + 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, + 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 +}; + +/** + * crc16 - compute the CRC-16 for the data buffer + * @crc: previous CRC value + * @buffer: data pointer + * @len: number of bytes in the buffer + * + * Returns the updated CRC value. + */ +u16 crc16(u16 crc, u8 const *buffer, size_t len) +{ + while (len--) + crc = crc16_byte(crc, *buffer++); + return crc; +} diff --git a/fs/ubifs/crc16.h b/fs/ubifs/crc16.h new file mode 100755 index 0000000..052fd33 --- /dev/null +++ b/fs/ubifs/crc16.h @@ -0,0 +1,29 @@ +/* + * crc16.h - CRC-16 routine + * + * Implements the standard CRC-16: + * Width 16 + * Poly 0x8005 (x^16 + x^15 + x^2 + 1) + * Init 0 + * + * Copyright (c) 2005 Ben Gardner <bgardner@wabtec.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#ifndef __CRC16_H +#define __CRC16_H + +#include <linux/types.h> + +extern u16 const crc16_table[256]; + +extern u16 crc16(u16 crc, const u8 *buffer, size_t len); + +static inline u16 crc16_byte(u16 crc, const u8 data) +{ + return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; +} + +#endif /* __CRC16_H */ diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c new file mode 100755 index 0000000..6afb883 --- /dev/null +++ b/fs/ubifs/debug.c @@ -0,0 +1,156 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +/* + * This file implements most of the debugging stuff which is compiled in only + * when it is enabled. But some debugging check functions are implemented in + * corresponding subsystem, just because they are closely related and utilize + * various local functions of those subsystems. + */ + +#define UBIFS_DBG_PRESERVE_UBI + +#include "ubifs.h" + +#ifdef CONFIG_UBIFS_FS_DEBUG + +DEFINE_SPINLOCK(dbg_lock); + +static char dbg_key_buf0[128]; +static char dbg_key_buf1[128]; + +unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; +unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; +unsigned int ubifs_tst_flags; + +module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); + +MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); +MODULE_PARM_DESC(debug_chks, "Debug check flags"); +MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); + +static const char *get_key_type(int type) +{ + switch (type) { + case UBIFS_INO_KEY: + return "inode"; + case UBIFS_DENT_KEY: + return "direntry"; + case UBIFS_XENT_KEY: + return "xentry"; + case UBIFS_DATA_KEY: + return "data"; + case UBIFS_TRUN_KEY: + return "truncate"; + default: + return "unknown/invalid key"; + } +} + +static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, + char *buffer) +{ + char *p = buffer; + int type = key_type(c, key); + + if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { + switch (type) { + case UBIFS_INO_KEY: + sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), + get_key_type(type)); + break; + case UBIFS_DENT_KEY: + case UBIFS_XENT_KEY: + sprintf(p, "(%lu, %s, %#08x)", + (unsigned long)key_inum(c, key), + get_key_type(type), key_hash(c, key)); + break; + case UBIFS_DATA_KEY: + sprintf(p, "(%lu, %s, %u)", + (unsigned long)key_inum(c, key), + get_key_type(type), key_block(c, key)); + break; + case UBIFS_TRUN_KEY: + sprintf(p, "(%lu, %s)", + (unsigned long)key_inum(c, key), + get_key_type(type)); + break; + default: + sprintf(p, "(bad key type: %#08x, %#08x)", + key->u32[0], key->u32[1]); + } + } else + sprintf(p, "bad key format %d", c->key_fmt); +} + +const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) +{ + /* dbg_lock must be held */ + sprintf_key(c, key, dbg_key_buf0); + return dbg_key_buf0; +} + +const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) +{ + /* dbg_lock must be held */ + sprintf_key(c, key, dbg_key_buf1); + return dbg_key_buf1; +} + +/** + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object + * + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_debugging_init(struct ubifs_info *c) +{ + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; + + c->dbg->buf = vmalloc(c->leb_size); + if (!c->dbg->buf) + goto out; + + return 0; + +out: + kfree(c->dbg); + return -ENOMEM; +} + +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) +{ + vfree(c->dbg->buf); + kfree(c->dbg); +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h new file mode 100755 index 0000000..47c8047 --- /dev/null +++ b/fs/ubifs/debug.h @@ -0,0 +1,392 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +#ifndef __UBIFS_DEBUG_H__ +#define __UBIFS_DEBUG_H__ + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * ubifs_debug_info - per-FS debugging information. + * @buf: a buffer of LEB size, used for various purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' + * @failure_mode: failure mode for recovery testing + * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @fail_timeout: time in jiffies when delay of failure mode expires + * @fail_cnt: current number of calls to failure mode I/O functions + * @fail_cnt_max: number of calls by which to delay failure mode + * @chk_lpt_sz: used by LPT tree size checker + * @chk_lpt_sz2: used by LPT tree size checker + * @chk_lpt_wastage: used by LPT tree size checker + * @chk_lpt_lebs: used by LPT tree size checker + * @new_nhead_offs: used by LPT tree size checker + * @new_ihead_lnum: used by debugging to check @c->ihead_lnum + * @new_ihead_offs: used by debugging to check @c->ihead_offs + * + * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') + * @saved_free: saved free space (used by 'dbg_save_space_info()') + * + * dfs_dir_name: name of debugfs directory containing this file-system's files + * dfs_dir: direntry object of the file-system debugfs directory + * dfs_dump_lprops: "dump lprops" debugfs knob + * dfs_dump_budg: "dump budgeting information" debugfs knob + * dfs_dump_tnc: "dump TNC" debugfs knob + */ +struct ubifs_debug_info { + void *buf; + struct ubifs_zbranch old_zroot; + int old_zroot_level; + unsigned long long old_zroot_sqnum; + int failure_mode; + int fail_delay; + unsigned long fail_timeout; + unsigned int fail_cnt; + unsigned int fail_cnt_max; + long long chk_lpt_sz; + long long chk_lpt_sz2; + long long chk_lpt_wastage; + int chk_lpt_lebs; + int new_nhead_offs; + int new_ihead_lnum; + int new_ihead_offs; + + struct ubifs_lp_stats saved_lst; + long long saved_free; + + char dfs_dir_name[100]; + struct dentry *dfs_dir; + struct dentry *dfs_dump_lprops; + struct dentry *dfs_dump_budg; + struct dentry *dfs_dump_tnc; +}; + +#define UBIFS_DBG(op) op + +#define ubifs_assert(expr) do { \ + if (unlikely(!(expr))) { \ + printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, 0); \ + dbg_dump_stack(); \ + } \ +} while (0) + +#define ubifs_assert_cmt_locked(c) do { \ + if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ + up_write(&(c)->commit_sem); \ + printk(KERN_CRIT "commit lock is not locked!\n"); \ + ubifs_assert(0); \ + } \ +} while (0) + +#define dbg_dump_stack() do { \ + if (!dbg_failure_mode) \ + dump_stack(); \ +} while (0) + +/* Generic debugging messages */ +#define dbg_msg(fmt, ...) do { \ + spin_lock(&dbg_lock); \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", 0, \ + __func__, ##__VA_ARGS__); \ + spin_unlock(&dbg_lock); \ +} while (0) + +#define dbg_do_msg(typ, fmt, ...) do { \ + if (ubifs_msg_flags & typ) \ + dbg_msg(fmt, ##__VA_ARGS__); \ +} while (0) + +#define dbg_err(fmt, ...) do { \ + spin_lock(&dbg_lock); \ + ubifs_err(fmt, ##__VA_ARGS__); \ + spin_unlock(&dbg_lock); \ +} while (0) + +const char *dbg_key_str0(const struct ubifs_info *c, + const union ubifs_key *key); +const char *dbg_key_str1(const struct ubifs_info *c, + const union ubifs_key *key); + +/* + * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message + * macros. + */ +#define DBGKEY(key) dbg_key_str0(c, (key)) +#define DBGKEY1(key) dbg_key_str1(c, (key)) + +/* General messages */ +#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) + +/* Additional journal messages */ +#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) + +/* Additional TNC messages */ +#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) + +/* Additional lprops messages */ +#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) + +/* Additional LEB find messages */ +#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) + +/* Additional mount messages */ +#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) + +/* Additional I/O messages */ +#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) + +/* Additional commit messages */ +#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) + +/* Additional budgeting messages */ +#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) + +/* Additional log messages */ +#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) + +/* Additional gc messages */ +#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) + +/* Additional scan messages */ +#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) + +/* Additional recovery messages */ +#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) + +/* + * Debugging message type flags (must match msg_type_names in debug.c). + * + * UBIFS_MSG_GEN: general messages + * UBIFS_MSG_JNL: journal messages + * UBIFS_MSG_MNT: mount messages + * UBIFS_MSG_CMT: commit messages + * UBIFS_MSG_FIND: LEB find messages + * UBIFS_MSG_BUDG: budgeting messages + * UBIFS_MSG_GC: garbage collection messages + * UBIFS_MSG_TNC: TNC messages + * UBIFS_MSG_LP: lprops messages + * UBIFS_MSG_IO: I/O messages + * UBIFS_MSG_LOG: log messages + * UBIFS_MSG_SCAN: scan messages + * UBIFS_MSG_RCVRY: recovery messages + */ +enum { + UBIFS_MSG_GEN = 0x1, + UBIFS_MSG_JNL = 0x2, + UBIFS_MSG_MNT = 0x4, + UBIFS_MSG_CMT = 0x8, + UBIFS_MSG_FIND = 0x10, + UBIFS_MSG_BUDG = 0x20, + UBIFS_MSG_GC = 0x40, + UBIFS_MSG_TNC = 0x80, + UBIFS_MSG_LP = 0x100, + UBIFS_MSG_IO = 0x200, + UBIFS_MSG_LOG = 0x400, + UBIFS_MSG_SCAN = 0x800, + UBIFS_MSG_RCVRY = 0x1000, +}; + +/* Debugging message type flags for each default debug message level */ +#define UBIFS_MSG_LVL_0 0 +#define UBIFS_MSG_LVL_1 0x1 +#define UBIFS_MSG_LVL_2 0x7f +#define UBIFS_MSG_LVL_3 0xffff + +/* + * Debugging check flags (must match chk_names in debug.c). + * + * UBIFS_CHK_GEN: general checks + * UBIFS_CHK_TNC: check TNC + * UBIFS_CHK_IDX_SZ: check index size + * UBIFS_CHK_ORPH: check orphans + * UBIFS_CHK_OLD_IDX: check the old index + * UBIFS_CHK_LPROPS: check lprops + * UBIFS_CHK_FS: check the file-system + */ +enum { + UBIFS_CHK_GEN = 0x1, + UBIFS_CHK_TNC = 0x2, + UBIFS_CHK_IDX_SZ = 0x4, + UBIFS_CHK_ORPH = 0x8, + UBIFS_CHK_OLD_IDX = 0x10, + UBIFS_CHK_LPROPS = 0x20, + UBIFS_CHK_FS = 0x40, +}; + +/* + * Special testing flags (must match tst_names in debug.c). + * + * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method + * UBIFS_TST_RCVRY: failure mode for recovery testing + */ +enum { + UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, + UBIFS_TST_RCVRY = 0x4, +}; + +#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 +#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 +#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 +#else +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 +#endif + +#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS +#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +#else +#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +#endif + +#define dbg_ntype(type) "" +#define dbg_cstate(cmt_state) "" +#define dbg_get_key_dump(c, key) ({}) +#define dbg_dump_inode(c, inode) ({}) +#define dbg_dump_node(c, node) ({}) +#define dbg_dump_budget_req(req) ({}) +#define dbg_dump_lstats(lst) ({}) +#define dbg_dump_budg(c) ({}) +#define dbg_dump_lprop(c, lp) ({}) +#define dbg_dump_lprops(c) ({}) +#define dbg_dump_lpt_info(c) ({}) +#define dbg_dump_leb(c, lnum) ({}) +#define dbg_dump_znode(c, znode) ({}) +#define dbg_dump_heap(c, heap, cat) ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c) ({}) +#define dbg_dump_index(c) ({}) + +#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 +#define dbg_old_index_check_init(c, zroot) 0 +#define dbg_check_old_index(c, zroot) 0 +#define dbg_check_cats(c) 0 +#define dbg_check_ltab(c) 0 +#define dbg_chk_lpt_free_spc(c) 0 +#define dbg_chk_lpt_sz(c, action, len) 0 +#define dbg_check_synced_i_size(inode) 0 +#define dbg_check_dir_size(c, dir) 0 +#define dbg_check_tnc(c, x) 0 +#define dbg_check_idx_size(c, idx_size) 0 +#define dbg_check_filesystem(c) 0 +#define dbg_check_heap(c, heap, cat, add_pos) ({}) +#define dbg_check_lprops(c) 0 +#define dbg_check_lpt_nodes(c, cnode, row, col) 0 +#define dbg_force_in_the_gaps_enabled 0 +#define dbg_force_in_the_gaps() 0 +#define dbg_failure_mode 0 +#define dbg_failure_mode_registration(c) ({}) +#define dbg_failure_mode_deregistration(c) ({}) + +int ubifs_debugging_init(struct ubifs_info *c); +void ubifs_debugging_exit(struct ubifs_info *c); + +#else /* !CONFIG_UBIFS_FS_DEBUG */ + +#define UBIFS_DBG(op) + +/* Use "if (0)" to make compiler check arguments even if debugging is off */ +#define ubifs_assert(expr) do { \ + if (0 && (expr)) \ + printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, 0); \ +} while (0) + +#define dbg_err(fmt, ...) do { \ + if (0) \ + ubifs_err(fmt, ##__VA_ARGS__); \ +} while (0) + +#define dbg_msg(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ + 0, __func__, ##__VA_ARGS__); \ +} while (0) + +#define dbg_dump_stack() +#define ubifs_assert_cmt_locked(c) + +#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) + +#define DBGKEY(key) ((char *)(key)) +#define DBGKEY1(key) ((char *)(key)) + +#define ubifs_debugging_init(c) 0 +#define ubifs_debugging_exit(c) ({}) + +#define dbg_ntype(type) "" +#define dbg_cstate(cmt_state) "" +#define dbg_get_key_dump(c, key) ({}) +#define dbg_dump_inode(c, inode) ({}) +#define dbg_dump_node(c, node) ({}) +#define dbg_dump_budget_req(req) ({}) +#define dbg_dump_lstats(lst) ({}) +#define dbg_dump_budg(c) ({}) +#define dbg_dump_lprop(c, lp) ({}) +#define dbg_dump_lprops(c) ({}) +#define dbg_dump_lpt_info(c) ({}) +#define dbg_dump_leb(c, lnum) ({}) +#define dbg_dump_znode(c, znode) ({}) +#define dbg_dump_heap(c, heap, cat) ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c) ({}) +#define dbg_dump_index(c) ({}) + +#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 +#define dbg_old_index_check_init(c, zroot) 0 +#define dbg_check_old_index(c, zroot) 0 +#define dbg_check_cats(c) 0 +#define dbg_check_ltab(c) 0 +#define dbg_chk_lpt_free_spc(c) 0 +#define dbg_chk_lpt_sz(c, action, len) 0 +#define dbg_check_synced_i_size(inode) 0 +#define dbg_check_dir_size(c, dir) 0 +#define dbg_check_tnc(c, x) 0 +#define dbg_check_idx_size(c, idx_size) 0 +#define dbg_check_filesystem(c) 0 +#define dbg_check_heap(c, heap, cat, add_pos) ({}) +#define dbg_check_lprops(c) 0 +#define dbg_check_lpt_nodes(c, cnode, row, col) 0 +#define dbg_force_in_the_gaps_enabled 0 +#define dbg_force_in_the_gaps() 0 +#define dbg_failure_mode 0 +#define dbg_failure_mode_registration(c) ({}) +#define dbg_failure_mode_deregistration(c) ({}) + +#endif /* !CONFIG_UBIFS_FS_DEBUG */ + +#endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c new file mode 100755 index 0000000..aae5c65 --- /dev/null +++ b/fs/ubifs/io.c @@ -0,0 +1,316 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + * Zoltan Sogor + */ + +/* + * This file implements UBIFS I/O subsystem which provides various I/O-related + * helper functions (reading/writing/checking/validating nodes) and implements + * write-buffering support. Write buffers help to save space which otherwise + * would have been wasted for padding to the nearest minimal I/O unit boundary. + * Instead, data first goes to the write-buffer and is flushed when the + * buffer is full or when it is not used for some time (by timer). This is + * similar to the mechanism is used by JFFS2. + * + * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by + * mutexes defined inside these objects. Since sometimes upper-level code + * has to lock the write-buffer (e.g. journal space reservation code), many + * functions related to write-buffers have "nolock" suffix which means that the + * caller has to lock the write-buffer before calling this function. + * + * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not + * aligned, UBIFS starts the next node from the aligned address, and the padded + * bytes may contain any rubbish. In other words, UBIFS does not put padding + * bytes in those small gaps. Common headers of nodes store real node lengths, + * not aligned lengths. Indexing nodes also store real lengths in branches. + * + * UBIFS uses padding when it pads to the next min. I/O unit. In this case it + * uses padding nodes or padding bytes, if the padding node does not fit. + * + * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes + * every time they are read from the flash media. + */ + +#include "ubifs.h" + +/** + * ubifs_ro_mode - switch UBIFS to read read-only mode. + * @c: UBIFS file-system description object + * @err: error code which is the reason of switching to R/O mode + */ +void ubifs_ro_mode(struct ubifs_info *c, int err) +{ + if (!c->ro_media) { + c->ro_media = 1; + c->no_chk_data_crc = 0; + ubifs_warn("switched to read-only mode, error %d", err); + dbg_dump_stack(); + } +} + +/** + * ubifs_check_node - check node. + * @c: UBIFS file-system description object + * @buf: node to check + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @quiet: print no messages + * @must_chk_crc: indicates whether to always check the CRC + * + * This function checks node magic number and CRC checksum. This function also + * validates node length to prevent UBIFS from becoming crazy when an attacker + * feeds it a file-system image with incorrect nodes. For example, too large + * node length in the common header could cause UBIFS to read memory outside of + * allocated buffer when checking the CRC checksum. + * + * This function may skip data nodes CRC checking if @c->no_chk_data_crc is + * true, which is controlled by corresponding UBIFS mount option. However, if + * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is + * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is + * ignored and CRC is checked. + * + * This function returns zero in case of success and %-EUCLEAN in case of bad + * CRC or magic. + */ +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, + int offs, int quiet, int must_chk_crc) +{ + int err = -EINVAL, type, node_len; + uint32_t crc, node_crc, magic; + const struct ubifs_ch *ch = buf; + + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + + magic = le32_to_cpu(ch->magic); + if (magic != UBIFS_NODE_MAGIC) { + if (!quiet) + ubifs_err("bad magic %#08x, expected %#08x", + magic, UBIFS_NODE_MAGIC); + err = -EUCLEAN; + goto out; + } + + type = ch->node_type; + if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { + if (!quiet) + ubifs_err("bad node type %d", type); + goto out; + } + + node_len = le32_to_cpu(ch->len); + if (node_len + offs > c->leb_size) + goto out_len; + + if (c->ranges[type].max_len == 0) { + if (node_len != c->ranges[type].len) + goto out_len; + } else if (node_len < c->ranges[type].min_len || + node_len > c->ranges[type].max_len) + goto out_len; + + if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && + c->no_chk_data_crc) + return 0; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); + node_crc = le32_to_cpu(ch->crc); + if (crc != node_crc) { + if (!quiet) + ubifs_err("bad CRC: calculated %#08x, read %#08x", + crc, node_crc); + err = -EUCLEAN; + goto out; + } + + return 0; + +out_len: + if (!quiet) + ubifs_err("bad node length %d", node_len); +out: + if (!quiet) { + ubifs_err("bad node at LEB %d:%d", lnum, offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + } + return err; +} + +/** + * ubifs_pad - pad flash space. + * @c: UBIFS file-system description object + * @buf: buffer to put padding to + * @pad: how many bytes to pad + * + * The flash media obliges us to write only in chunks of %c->min_io_size and + * when we have to write less data we add padding node to the write-buffer and + * pad it to the next minimal I/O unit's boundary. Padding nodes help when the + * media is being scanned. If the amount of wasted space is not enough to fit a + * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes + * pattern (%UBIFS_PADDING_BYTE). + * + * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is + * used. + */ +void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) +{ + uint32_t crc; + + ubifs_assert(pad >= 0 && !(pad & 7)); + + if (pad >= UBIFS_PAD_NODE_SZ) { + struct ubifs_ch *ch = buf; + struct ubifs_pad_node *pad_node = buf; + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->node_type = UBIFS_PAD_NODE; + ch->group_type = UBIFS_NO_NODE_GROUP; + ch->padding[0] = ch->padding[1] = 0; + ch->sqnum = 0; + ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); + pad -= UBIFS_PAD_NODE_SZ; + pad_node->pad_len = cpu_to_le32(pad); + crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); + ch->crc = cpu_to_le32(crc); + memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); + } else if (pad > 0) + /* Too little space, padding node won't fit */ + memset(buf, UBIFS_PADDING_BYTE, pad); +} + +/** + * next_sqnum - get next sequence number. + * @c: UBIFS file-system description object + */ +static unsigned long long next_sqnum(struct ubifs_info *c) +{ + unsigned long long sqnum; + + spin_lock(&c->cnt_lock); + sqnum = ++c->max_sqnum; + spin_unlock(&c->cnt_lock); + + if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { + if (sqnum >= SQNUM_WATERMARK) { + ubifs_err("sequence number overflow %llu, end of life", + sqnum); + ubifs_ro_mode(c, -EINVAL); + } + ubifs_warn("running out of sequence numbers, end of life soon"); + } + + return sqnum; +} + +/** + * ubifs_prepare_node - prepare node to be written to flash. + * @c: UBIFS file-system description object + * @node: the node to pad + * @len: node length + * @pad: if the buffer has to be padded + * + * This function prepares node at @node to be written to the media - it + * calculates node CRC, fills the common header, and adds proper padding up to + * the next minimum I/O unit if @pad is not zero. + */ +void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) +{ + uint32_t crc; + struct ubifs_ch *ch = node; + unsigned long long sqnum = next_sqnum(c); + + ubifs_assert(len >= UBIFS_CH_SZ); + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->len = cpu_to_le32(len); + ch->group_type = UBIFS_NO_NODE_GROUP; + ch->sqnum = cpu_to_le64(sqnum); + ch->padding[0] = ch->padding[1] = 0; + crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); + ch->crc = cpu_to_le32(crc); + + if (pad) { + len = ALIGN(len, 8); + pad = ALIGN(len, c->min_io_size) - len; + ubifs_pad(c, node + len, pad); + } +} + +/** + * ubifs_read_node - read node. + * @c: UBIFS file-system description object + * @buf: buffer to read to + * @type: node type + * @len: node length (not aligned) + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * + * This function reads a node of known type and and length, checks it and + * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched + * and a negative error code in case of failure. + */ +int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, + int lnum, int offs) +{ + int err, l; + struct ubifs_ch *ch = buf; + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err && err != -EBADMSG) { + ubifs_err("cannot read node %d from LEB %d:%d, error %d", + type, lnum, offs, err); + return err; + } + + if (type != ch->node_type) { + ubifs_err("bad node type (%d but expected %d)", + ch->node_type, type); + goto out; + } + + err = ubifs_check_node(c, buf, lnum, offs, 0, 0); + if (err) { + ubifs_err("expected node type %d", type); + return err; + } + + l = le32_to_cpu(ch->len); + if (l != len) { + ubifs_err("bad node length %d, expected %d", l, len); + goto out; + } + + return 0; + +out: + ubifs_err("bad node at LEB %d:%d", lnum, offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + return -EINVAL; +} diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h new file mode 100755 index 0000000..20941f9 --- /dev/null +++ b/fs/ubifs/key.h @@ -0,0 +1,557 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +/* + * This header contains various key-related definitions and helper function. + * UBIFS allows several key schemes, so we access key fields only via these + * helpers. At the moment only one key scheme is supported. + * + * Simple key scheme + * ~~~~~~~~~~~~~~~~~ + * + * Keys are 64-bits long. First 32-bits are inode number (parent inode number + * in case of direntry key). Next 3 bits are node type. The last 29 bits are + * 4KiB offset in case of inode node, and direntry hash in case of a direntry + * node. We use "r5" hash borrowed from reiserfs. + */ + +#ifndef __UBIFS_KEY_H__ +#define __UBIFS_KEY_H__ + +/** + * key_mask_hash - mask a valid hash value. + * @val: value to be masked + * + * We use hash values as offset in directories, so values %0 and %1 are + * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This + * function makes sure the reserved values are not used. + */ +static inline uint32_t key_mask_hash(uint32_t hash) +{ + hash &= UBIFS_S_KEY_HASH_MASK; + if (unlikely(hash <= 2)) + hash += 3; + return hash; +} + +/** + * key_r5_hash - R5 hash function (borrowed from reiserfs). + * @s: direntry name + * @len: name length + */ +static inline uint32_t key_r5_hash(const char *s, int len) +{ + uint32_t a = 0; + const signed char *str = (const signed char *)s; + + while (*str) { + a += *str << 4; + a += *str >> 4; + a *= 11; + str++; + } + + return key_mask_hash(a); +} + +/** + * key_test_hash - testing hash function. + * @str: direntry name + * @len: name length + */ +static inline uint32_t key_test_hash(const char *str, int len) +{ + uint32_t a = 0; + + len = min(len, 4); + memcpy(&a, str, len); + return key_mask_hash(a); +} + +/** + * ino_key_init - initialize inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void ino_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * ino_key_init_flash - initialize on-flash inode key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: inode number + */ +static inline void ino_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum) +{ + union ubifs_key *key = k; + + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_ino_key - get the lowest possible inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void lowest_ino_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = 0; +} + +/** + * highest_ino_key - get the highest possible inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void highest_ino_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = 0xffffffff; +} + +/** + * dent_key_init - initialize directory entry key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: parent inode number + * @nm: direntry name and length + */ +static inline void dent_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + const struct qstr *nm) +{ + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * dent_key_init_hash - initialize directory entry key without re-calculating + * hash function. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: parent inode number + * @hash: direntry name hash + */ +static inline void dent_key_init_hash(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + uint32_t hash) +{ + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * dent_key_init_flash - initialize on-flash directory entry key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: parent inode number + * @nm: direntry name and length + */ +static inline void dent_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, const struct qstr *nm) +{ + union ubifs_key *key = k; + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(hash | + (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_dent_key - get the lowest possible directory entry key. + * @c: UBIFS file-system description object + * @key: where to store the lowest key + * @inum: parent inode number + */ +static inline void lowest_dent_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS; +} + +/** + * xent_key_init - initialize extended attribute entry key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: host inode number + * @nm: extended attribute entry name and length + */ +static inline void xent_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + const struct qstr *nm) +{ + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * xent_key_init_hash - initialize extended attribute entry key without + * re-calculating hash function. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: host inode number + * @hash: extended attribute entry name hash + */ +static inline void xent_key_init_hash(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + uint32_t hash) +{ + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * xent_key_init_flash - initialize on-flash extended attribute entry key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: host inode number + * @nm: extended attribute entry name and length + */ +static inline void xent_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, const struct qstr *nm) +{ + union ubifs_key *key = k; + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(hash | + (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_xent_key - get the lowest possible extended attribute entry key. + * @c: UBIFS file-system description object + * @key: where to store the lowest key + * @inum: host inode number + */ +static inline void lowest_xent_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS; +} + +/** + * data_key_init - initialize data key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + * @block: block number + */ +static inline void data_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + unsigned int block) +{ + ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); + key->u32[0] = inum; + key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS); +} + +/** + * data_key_init_flash - initialize on-flash data key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: inode number + * @block: block number + */ +static inline void data_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, unsigned int block) +{ + union ubifs_key *key = k; + + ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(block | + (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * trun_key_init - initialize truncation node key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + * + * Note, UBIFS does not have truncation keys on the media and this function is + * only used for purposes of replay. + */ +static inline void trun_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_type - get key type. + * @c: UBIFS file-system description object + * @key: key to get type of + */ +static inline int key_type(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_type_flash - get type of a on-flash formatted key. + * @c: UBIFS file-system description object + * @k: key to get type of + */ +static inline int key_type_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_inum - fetch inode number from key. + * @c: UBIFS file-system description object + * @k: key to fetch inode number from + */ +static inline ino_t key_inum(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return key->u32[0]; +} + +/** + * key_inum_flash - fetch inode number from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: key to fetch inode number from + */ +static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->j32[0]); +} + +/** + * key_hash - get directory entry hash. + * @c: UBIFS file-system description object + * @key: the key to get hash from + */ +static inline int key_hash(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_hash_flash - get directory entry hash from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: the key to get hash from + */ +static inline int key_hash_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_block - get data block number. + * @c: UBIFS file-system description object + * @key: the key to get the block number from + */ +static inline unsigned int key_block(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK; +} + +/** + * key_block_flash - get data block number from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: the key to get the block number from + */ +static inline unsigned int key_block_flash(const struct ubifs_info *c, + const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK; +} + +/** + * key_read - transform a key to in-memory format. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_read(const struct ubifs_info *c, const void *from, + union ubifs_key *to) +{ + const union ubifs_key *f = from; + + to->u32[0] = le32_to_cpu(f->j32[0]); + to->u32[1] = le32_to_cpu(f->j32[1]); +} + +/** + * key_write - transform a key from in-memory format. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_write(const struct ubifs_info *c, + const union ubifs_key *from, void *to) +{ + union ubifs_key *t = to; + + t->j32[0] = cpu_to_le32(from->u32[0]); + t->j32[1] = cpu_to_le32(from->u32[1]); + memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * key_write_idx - transform a key from in-memory format for the index. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_write_idx(const struct ubifs_info *c, + const union ubifs_key *from, void *to) +{ + union ubifs_key *t = to; + + t->j32[0] = cpu_to_le32(from->u32[0]); + t->j32[1] = cpu_to_le32(from->u32[1]); +} + +/** + * key_copy - copy a key. + * @c: UBIFS file-system description object + * @from: the key to copy from + * @to: the key to copy to + */ +static inline void key_copy(const struct ubifs_info *c, + const union ubifs_key *from, union ubifs_key *to) +{ + to->u64[0] = from->u64[0]; +} + +/** + * keys_cmp - compare keys. + * @c: UBIFS file-system description object + * @key1: the first key to compare + * @key2: the second key to compare + * + * This function compares 2 keys and returns %-1 if @key1 is less than + * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2. + */ +static inline int keys_cmp(const struct ubifs_info *c, + const union ubifs_key *key1, + const union ubifs_key *key2) +{ + if (key1->u32[0] < key2->u32[0]) + return -1; + if (key1->u32[0] > key2->u32[0]) + return 1; + if (key1->u32[1] < key2->u32[1]) + return -1; + if (key1->u32[1] > key2->u32[1]) + return 1; + + return 0; +} + +/** + * keys_eq - determine if keys are equivalent. + * @c: UBIFS file-system description object + * @key1: the first key to compare + * @key2: the second key to compare + * + * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and + * %0 if not. + */ +static inline int keys_eq(const struct ubifs_info *c, + const union ubifs_key *key1, + const union ubifs_key *key2) +{ + if (key1->u32[0] != key2->u32[0]) + return 0; + if (key1->u32[1] != key2->u32[1]) + return 0; + return 1; +} + +/** + * is_hash_key - is a key vulnerable to hash collisions. + * @c: UBIFS file-system description object + * @key: key + * + * This function returns %1 if @key is a hashed key or %0 otherwise. + */ +static inline int is_hash_key(const struct ubifs_info *c, + const union ubifs_key *key) +{ + int type = key_type(c, key); + + return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY; +} + +/** + * key_max_inode_size - get maximum file size allowed by current key format. + * @c: UBIFS file-system description object + */ +static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) +{ + switch (c->key_fmt) { + case UBIFS_SIMPLE_KEY_FMT: + return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE; + default: + return 0; + } +} +#endif /* !__UBIFS_KEY_H__ */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c new file mode 100755 index 0000000..61c4196 --- /dev/null +++ b/fs/ubifs/log.c @@ -0,0 +1,100 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +/* + * This file is a part of UBIFS journal implementation and contains various + * functions which manipulate the log. The log is a fixed area on the flash + * which does not contain any data but refers to buds. The log is a part of the + * journal. + */ + +#include "ubifs.h" + +/** + * ubifs_search_bud - search bud LEB. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number to search + * + * This function searches bud LEB @lnum. Returns bud description object in case + * of success and %NULL if there is no bud with this LEB number. + */ +struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum) +{ + struct rb_node *p; + struct ubifs_bud *bud; + + spin_lock(&c->buds_lock); + p = c->buds.rb_node; + while (p) { + bud = rb_entry(p, struct ubifs_bud, rb); + if (lnum < bud->lnum) + p = p->rb_left; + else if (lnum > bud->lnum) + p = p->rb_right; + else { + spin_unlock(&c->buds_lock); + return bud; + } + } + spin_unlock(&c->buds_lock); + return NULL; +} + +/** + * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list. + * @c: UBIFS file-system description object + * @bud: the bud to add + */ +void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) +{ + struct rb_node **p, *parent = NULL; + struct ubifs_bud *b; + struct ubifs_jhead *jhead; + + spin_lock(&c->buds_lock); + p = &c->buds.rb_node; + while (*p) { + parent = *p; + b = rb_entry(parent, struct ubifs_bud, rb); + ubifs_assert(bud->lnum != b->lnum); + if (bud->lnum < b->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&bud->rb, parent, p); + rb_insert_color(&bud->rb, &c->buds); + ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); + + /* + * Note, although this is a new bud, we anyway account this space now, + * before any data has been written to it, because this is about to + * guarantee fixed mount time, and this bud will anyway be read and + * scanned. + */ + c->bud_bytes += c->leb_size - bud->start; + + dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, + bud->start, bud->jhead, c->bud_bytes); + spin_unlock(&c->buds_lock); +} diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c new file mode 100755 index 0000000..8ce4949 --- /dev/null +++ b/fs/ubifs/lprops.c @@ -0,0 +1,842 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file implements the functions that access LEB properties and their + * categories. LEBs are categorized based on the needs of UBIFS, and the + * categories are stored as either heaps or lists to provide a fast way of + * finding a LEB in a particular category. For example, UBIFS may need to find + * an empty LEB for the journal, or a very dirty LEB for garbage collection. + */ + +#include "ubifs.h" + +/** + * get_heap_comp_val - get the LEB properties value for heap comparisons. + * @lprops: LEB properties + * @cat: LEB category + */ +static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat) +{ + switch (cat) { + case LPROPS_FREE: + return lprops->free; + case LPROPS_DIRTY_IDX: + return lprops->free + lprops->dirty; + default: + return lprops->dirty; + } +} + +/** + * move_up_lpt_heap - move a new heap entry up as far as possible. + * @c: UBIFS file-system description object + * @heap: LEB category heap + * @lprops: LEB properties to move + * @cat: LEB category + * + * New entries to a heap are added at the bottom and then moved up until the + * parent's value is greater. In the case of LPT's category heaps, the value + * is either the amount of free space or the amount of dirty space, depending + * on the category. + */ +static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, + struct ubifs_lprops *lprops, int cat) +{ + int val1, val2, hpos; + + hpos = lprops->hpos; + if (!hpos) + return; /* Already top of the heap */ + val1 = get_heap_comp_val(lprops, cat); + /* Compare to parent and, if greater, move up the heap */ + do { + int ppos = (hpos - 1) / 2; + + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val2 >= val1) + return; + /* Greater than parent so move up */ + heap->arr[ppos]->hpos = hpos; + heap->arr[hpos] = heap->arr[ppos]; + heap->arr[ppos] = lprops; + lprops->hpos = ppos; + hpos = ppos; + } while (hpos); +} + +/** + * adjust_lpt_heap - move a changed heap entry up or down the heap. + * @c: UBIFS file-system description object + * @heap: LEB category heap + * @lprops: LEB properties to move + * @hpos: heap position of @lprops + * @cat: LEB category + * + * Changed entries in a heap are moved up or down until the parent's value is + * greater. In the case of LPT's category heaps, the value is either the amount + * of free space or the amount of dirty space, depending on the category. + */ +static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, + struct ubifs_lprops *lprops, int hpos, int cat) +{ + int val1, val2, val3, cpos; + + val1 = get_heap_comp_val(lprops, cat); + /* Compare to parent and, if greater than parent, move up the heap */ + if (hpos) { + int ppos = (hpos - 1) / 2; + + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val1 > val2) { + /* Greater than parent so move up */ + while (1) { + heap->arr[ppos]->hpos = hpos; + heap->arr[hpos] = heap->arr[ppos]; + heap->arr[ppos] = lprops; + lprops->hpos = ppos; + hpos = ppos; + if (!hpos) + return; + ppos = (hpos - 1) / 2; + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val1 <= val2) + return; + /* Still greater than parent so keep going */ + } + } + } + + /* Not greater than parent, so compare to children */ + while (1) { + /* Compare to left child */ + cpos = hpos * 2 + 1; + if (cpos >= heap->cnt) + return; + val2 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 < val2) { + /* Less than left child, so promote biggest child */ + if (cpos + 1 < heap->cnt) { + val3 = get_heap_comp_val(heap->arr[cpos + 1], + cat); + if (val3 > val2) + cpos += 1; /* Right child is bigger */ + } + heap->arr[cpos]->hpos = hpos; + heap->arr[hpos] = heap->arr[cpos]; + heap->arr[cpos] = lprops; + lprops->hpos = cpos; + hpos = cpos; + continue; + } + /* Compare to right child */ + cpos += 1; + if (cpos >= heap->cnt) + return; + val3 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 < val3) { + /* Less than right child, so promote right child */ + heap->arr[cpos]->hpos = hpos; + heap->arr[hpos] = heap->arr[cpos]; + heap->arr[cpos] = lprops; + lprops->hpos = cpos; + hpos = cpos; + continue; + } + return; + } +} + +/** + * add_to_lpt_heap - add LEB properties to a LEB category heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to add + * @cat: LEB category + * + * This function returns %1 if @lprops is added to the heap for LEB category + * @cat, otherwise %0 is returned because the heap is full. + */ +static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops, + int cat) +{ + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + if (heap->cnt >= heap->max_cnt) { + const int b = LPT_HEAP_SZ / 2 - 1; + int cpos, val1, val2; + + /* Compare to some other LEB on the bottom of heap */ + /* Pick a position kind of randomly */ + cpos = (((size_t)lprops >> 4) & b) + b; + ubifs_assert(cpos >= b); + ubifs_assert(cpos < LPT_HEAP_SZ); + ubifs_assert(cpos < heap->cnt); + + val1 = get_heap_comp_val(lprops, cat); + val2 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 > val2) { + struct ubifs_lprops *lp; + + lp = heap->arr[cpos]; + lp->flags &= ~LPROPS_CAT_MASK; + lp->flags |= LPROPS_UNCAT; + list_add(&lp->list, &c->uncat_list); + lprops->hpos = cpos; + heap->arr[cpos] = lprops; + move_up_lpt_heap(c, heap, lprops, cat); + dbg_check_heap(c, heap, cat, lprops->hpos); + return 1; /* Added to heap */ + } + dbg_check_heap(c, heap, cat, -1); + return 0; /* Not added to heap */ + } else { + lprops->hpos = heap->cnt++; + heap->arr[lprops->hpos] = lprops; + move_up_lpt_heap(c, heap, lprops, cat); + dbg_check_heap(c, heap, cat, lprops->hpos); + return 1; /* Added to heap */ + } +} + +/** + * remove_from_lpt_heap - remove LEB properties from a LEB category heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to remove + * @cat: LEB category + */ +static void remove_from_lpt_heap(struct ubifs_info *c, + struct ubifs_lprops *lprops, int cat) +{ + struct ubifs_lpt_heap *heap; + int hpos = lprops->hpos; + + heap = &c->lpt_heap[cat - 1]; + ubifs_assert(hpos >= 0 && hpos < heap->cnt); + ubifs_assert(heap->arr[hpos] == lprops); + heap->cnt -= 1; + if (hpos < heap->cnt) { + heap->arr[hpos] = heap->arr[heap->cnt]; + heap->arr[hpos]->hpos = hpos; + adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat); + } + dbg_check_heap(c, heap, cat, -1); +} + +/** + * lpt_heap_replace - replace lprops in a category heap. + * @c: UBIFS file-system description object + * @old_lprops: LEB properties to replace + * @new_lprops: LEB properties with which to replace + * @cat: LEB category + * + * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) + * and the lprops that the pnode contains. When that happens, references in + * the category heaps to those lprops must be updated to point to the new + * lprops. This function does that. + */ +static void lpt_heap_replace(struct ubifs_info *c, + struct ubifs_lprops *old_lprops, + struct ubifs_lprops *new_lprops, int cat) +{ + struct ubifs_lpt_heap *heap; + int hpos = new_lprops->hpos; + + heap = &c->lpt_heap[cat - 1]; + heap->arr[hpos] = new_lprops; +} + +/** + * ubifs_add_to_cat - add LEB properties to a category list or heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to add + * @cat: LEB category to which to add + * + * LEB properties are categorized to enable fast find operations. + */ +void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, + int cat) +{ + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + if (add_to_lpt_heap(c, lprops, cat)) + break; + /* No more room on heap so make it uncategorized */ + cat = LPROPS_UNCAT; + /* Fall through */ + case LPROPS_UNCAT: + list_add(&lprops->list, &c->uncat_list); + break; + case LPROPS_EMPTY: + list_add(&lprops->list, &c->empty_list); + break; + case LPROPS_FREEABLE: + list_add(&lprops->list, &c->freeable_list); + c->freeable_cnt += 1; + break; + case LPROPS_FRDI_IDX: + list_add(&lprops->list, &c->frdi_idx_list); + break; + default: + ubifs_assert(0); + } + lprops->flags &= ~LPROPS_CAT_MASK; + lprops->flags |= cat; +} + +/** + * ubifs_remove_from_cat - remove LEB properties from a category list or heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to remove + * @cat: LEB category from which to remove + * + * LEB properties are categorized to enable fast find operations. + */ +static void ubifs_remove_from_cat(struct ubifs_info *c, + struct ubifs_lprops *lprops, int cat) +{ + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + remove_from_lpt_heap(c, lprops, cat); + break; + case LPROPS_FREEABLE: + c->freeable_cnt -= 1; + ubifs_assert(c->freeable_cnt >= 0); + /* Fall through */ + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FRDI_IDX: + ubifs_assert(!list_empty(&lprops->list)); + list_del(&lprops->list); + break; + default: + ubifs_assert(0); + } +} + +/** + * ubifs_replace_cat - replace lprops in a category list or heap. + * @c: UBIFS file-system description object + * @old_lprops: LEB properties to replace + * @new_lprops: LEB properties with which to replace + * + * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) + * and the lprops that the pnode contains. When that happens, references in + * category lists and heaps must be replaced. This function does that. + */ +void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, + struct ubifs_lprops *new_lprops) +{ + int cat; + + cat = new_lprops->flags & LPROPS_CAT_MASK; + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + lpt_heap_replace(c, old_lprops, new_lprops, cat); + break; + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + list_replace(&old_lprops->list, &new_lprops->list); + break; + default: + ubifs_assert(0); + } +} + +/** + * ubifs_ensure_cat - ensure LEB properties are categorized. + * @c: UBIFS file-system description object + * @lprops: LEB properties + * + * A LEB may have fallen off of the bottom of a heap, and ended up as + * uncategorized even though it has enough space for us now. If that is the case + * this function will put the LEB back onto a heap. + */ +void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + int cat = lprops->flags & LPROPS_CAT_MASK; + + if (cat != LPROPS_UNCAT) + return; + cat = ubifs_categorize_lprops(c, lprops); + if (cat == LPROPS_UNCAT) + return; + ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT); + ubifs_add_to_cat(c, lprops, cat); +} + +/** + * ubifs_categorize_lprops - categorize LEB properties. + * @c: UBIFS file-system description object + * @lprops: LEB properties to categorize + * + * LEB properties are categorized to enable fast find operations. This function + * returns the LEB category to which the LEB properties belong. Note however + * that if the LEB category is stored as a heap and the heap is full, the + * LEB properties may have their category changed to %LPROPS_UNCAT. + */ +int ubifs_categorize_lprops(const struct ubifs_info *c, + const struct ubifs_lprops *lprops) +{ + if (lprops->flags & LPROPS_TAKEN) + return LPROPS_UNCAT; + + if (lprops->free == c->leb_size) { + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return LPROPS_EMPTY; + } + + if (lprops->free + lprops->dirty == c->leb_size) { + if (lprops->flags & LPROPS_INDEX) + return LPROPS_FRDI_IDX; + else + return LPROPS_FREEABLE; + } + + if (lprops->flags & LPROPS_INDEX) { + if (lprops->dirty + lprops->free >= c->min_idx_node_sz) + return LPROPS_DIRTY_IDX; + } else { + if (lprops->dirty >= c->dead_wm && + lprops->dirty > lprops->free) + return LPROPS_DIRTY; + if (lprops->free > 0) + return LPROPS_FREE; + } + + return LPROPS_UNCAT; +} + +/** + * change_category - change LEB properties category. + * @c: UBIFS file-system description object + * @lprops: LEB properties to recategorize + * + * LEB properties are categorized to enable fast find operations. When the LEB + * properties change they must be recategorized. + */ +static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + int old_cat = lprops->flags & LPROPS_CAT_MASK; + int new_cat = ubifs_categorize_lprops(c, lprops); + + if (old_cat == new_cat) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; + + /* lprops on a heap now must be moved up or down */ + if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) + return; /* Not on a heap */ + heap = &c->lpt_heap[new_cat - 1]; + adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat); + } else { + ubifs_remove_from_cat(c, lprops, old_cat); + ubifs_add_to_cat(c, lprops, new_cat); + } +} + +/** + * calc_dark - calculate LEB dark space size. + * @c: the UBIFS file-system description object + * @spc: amount of free and dirty space in the LEB + * + * This function calculates amount of dark space in an LEB which has @spc bytes + * of free and dirty space. Returns the calculations result. + * + * Dark space is the space which is not always usable - it depends on which + * nodes are written in which order. E.g., if an LEB has only 512 free bytes, + * it is dark space, because it cannot fit a large data node. So UBIFS cannot + * count on this LEB and treat these 512 bytes as usable because it is not true + * if, for example, only big chunks of uncompressible data will be written to + * the FS. + */ +static int calc_dark(struct ubifs_info *c, int spc) +{ + ubifs_assert(!(spc & 7)); + + if (spc < c->dark_wm) + return spc; + + /* + * If we have slightly more space then the dark space watermark, we can + * anyway safely assume it we'll be able to write a node of the + * smallest size there. + */ + if (spc - c->dark_wm < MIN_WRITE_SZ) + return spc - MIN_WRITE_SZ; + + return c->dark_wm; +} + +/** + * is_lprops_dirty - determine if LEB properties are dirty. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to test + */ +static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + struct ubifs_pnode *pnode; + int pos; + + pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1); + pnode = (struct ubifs_pnode *)container_of(lprops - pos, + struct ubifs_pnode, + lprops[0]); + return !test_bit(COW_ZNODE, &pnode->flags) && + test_bit(DIRTY_CNODE, &pnode->flags); +} + +/** + * ubifs_change_lp - change LEB properties. + * @c: the UBIFS file-system description object + * @lp: LEB properties to change + * @free: new free space amount + * @dirty: new dirty space amount + * @flags: new flags + * @idx_gc_cnt: change to the count of idx_gc list + * + * This function changes LEB properties (@free, @dirty or @flag). However, the + * property which has the %LPROPS_NC value is not changed. Returns a pointer to + * the updated LEB properties on success and a negative error code on failure. + * + * Note, the LEB properties may have had to be copied (due to COW) and + * consequently the pointer returned may not be the same as the pointer + * passed. + */ +const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, + const struct ubifs_lprops *lp, + int free, int dirty, int flags, + int idx_gc_cnt) +{ + /* + * This is the only function that is allowed to change lprops, so we + * discard the const qualifier. + */ + struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; + + dbg_lp("LEB %d, free %d, dirty %d, flags %d", + lprops->lnum, free, dirty, flags); + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + ubifs_assert(c->lst.empty_lebs >= 0 && + c->lst.empty_lebs <= c->main_lebs); + ubifs_assert(c->freeable_cnt >= 0); + ubifs_assert(c->freeable_cnt <= c->main_lebs); + ubifs_assert(c->lst.taken_empty_lebs >= 0); + ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs); + ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7)); + ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7)); + ubifs_assert(!(c->lst.total_used & 7)); + ubifs_assert(free == LPROPS_NC || free >= 0); + ubifs_assert(dirty == LPROPS_NC || dirty >= 0); + + if (!is_lprops_dirty(c, lprops)) { + lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum); + if (IS_ERR(lprops)) + return lprops; + } else + ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum)); + + ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); + + spin_lock(&c->space_lock); + if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) + c->lst.taken_empty_lebs -= 1; + + if (!(lprops->flags & LPROPS_INDEX)) { + int old_spc; + + old_spc = lprops->free + lprops->dirty; + if (old_spc < c->dead_wm) + c->lst.total_dead -= old_spc; + else + c->lst.total_dark -= calc_dark(c, old_spc); + + c->lst.total_used -= c->leb_size - old_spc; + } + + if (free != LPROPS_NC) { + free = ALIGN(free, 8); + c->lst.total_free += free - lprops->free; + + /* Increase or decrease empty LEBs counter if needed */ + if (free == c->leb_size) { + if (lprops->free != c->leb_size) + c->lst.empty_lebs += 1; + } else if (lprops->free == c->leb_size) + c->lst.empty_lebs -= 1; + lprops->free = free; + } + + if (dirty != LPROPS_NC) { + dirty = ALIGN(dirty, 8); + c->lst.total_dirty += dirty - lprops->dirty; + lprops->dirty = dirty; + } + + if (flags != LPROPS_NC) { + /* Take care about indexing LEBs counter if needed */ + if ((lprops->flags & LPROPS_INDEX)) { + if (!(flags & LPROPS_INDEX)) + c->lst.idx_lebs -= 1; + } else if (flags & LPROPS_INDEX) + c->lst.idx_lebs += 1; + lprops->flags = flags; + } + + if (!(lprops->flags & LPROPS_INDEX)) { + int new_spc; + + new_spc = lprops->free + lprops->dirty; + if (new_spc < c->dead_wm) + c->lst.total_dead += new_spc; + else + c->lst.total_dark += calc_dark(c, new_spc); + + c->lst.total_used += c->leb_size - new_spc; + } + + if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) + c->lst.taken_empty_lebs += 1; + + change_category(c, lprops); + c->idx_gc_cnt += idx_gc_cnt; + spin_unlock(&c->space_lock); + return lprops; +} + +/** + * ubifs_get_lp_stats - get lprops statistics. + * @c: UBIFS file-system description object + * @st: return statistics + */ +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst) +{ + spin_lock(&c->space_lock); + memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats)); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_change_one_lp - change LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to change properties for + * @free: amount of free space + * @dirty: amount of dirty space + * @flags_set: flags to set + * @flags_clean: flags to clean + * @idx_gc_cnt: change to the count of idx_gc list + * + * This function changes properties of LEB @lnum. It is a helper wrapper over + * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the + * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and + * a negative error code in case of failure. + */ +int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean, int idx_gc_cnt) +{ + int err = 0, flags; + const struct ubifs_lprops *lp; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + flags = (lp->flags | flags_set) & ~flags_clean; + lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt); + if (IS_ERR(lp)) + err = PTR_ERR(lp); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_update_one_lp - update LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to change properties for + * @free: amount of free space + * @dirty: amount of dirty space to add + * @flags_set: flags to set + * @flags_clean: flags to clean + * + * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to + * current dirty space, not substitutes it. + */ +int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean) +{ + int err = 0, flags; + const struct ubifs_lprops *lp; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + flags = (lp->flags | flags_set) & ~flags_clean; + lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0); + if (IS_ERR(lp)) + err = PTR_ERR(lp); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_read_one_lp - read LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to read properties for + * @lp: where to store read properties + * + * This helper function reads properties of a LEB @lnum and stores them in @lp. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) +{ + int err = 0; + const struct ubifs_lprops *lpp; + + ubifs_get_lprops(c); + + lpp = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lpp)) { + err = PTR_ERR(lpp); + goto out; + } + + memcpy(lp, lpp, sizeof(struct ubifs_lprops)); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_fast_find_free - try to find a LEB with free space quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a LEB with free space or %NULL if + * the function is unable to find a LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + heap = &c->lpt_heap[LPROPS_FREE - 1]; + if (heap->cnt == 0) + return NULL; + + lprops = heap->arr[0]; + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_fast_find_empty - try to find an empty LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for an empty LEB or %NULL if the + * function is unable to find an empty LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->empty_list)) + return NULL; + + lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free == c->leb_size); + return lprops; +} + +/** + * ubifs_fast_find_freeable - try to find a freeable LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a freeable LEB or %NULL if the + * function is unable to find a freeable LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->freeable_list)) + return NULL; + + lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free + lprops->dirty == c->leb_size); + ubifs_assert(c->freeable_cnt > 0); + return lprops; +} + +/** + * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a freeable index LEB or %NULL if the + * function is unable to find a freeable index LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->frdi_idx_list)) + return NULL; + + lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert((lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free + lprops->dirty == c->leb_size); + return lprops; +} diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c new file mode 100755 index 0000000..b56c8ce --- /dev/null +++ b/fs/ubifs/lpt.c @@ -0,0 +1,1105 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file implements the LEB properties tree (LPT) area. The LPT area + * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and + * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits + * between the log and the orphan area. + * + * The LPT area is like a miniature self-contained file system. It is required + * that it never runs out of space, is fast to access and update, and scales + * logarithmically. The LEB properties tree is implemented as a wandering tree + * much like the TNC, and the LPT area has its own garbage collection. + * + * The LPT has two slightly different forms called the "small model" and the + * "big model". The small model is used when the entire LEB properties table + * can be written into a single eraseblock. In that case, garbage collection + * consists of just writing the whole table, which therefore makes all other + * eraseblocks reusable. In the case of the big model, dirty eraseblocks are + * selected for garbage collection, which consists of marking the clean nodes in + * that LEB as dirty, and then only the dirty nodes are written out. Also, in + * the case of the big model, a table of LEB numbers is saved so that the entire + * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first + * mounted. + */ + +#include "ubifs.h" +#include "crc16.h" +#include <linux/math64.h> + +/** + * do_calc_lpt_geom - calculate sizes for the LPT area. + * @c: the UBIFS file-system description object + * + * Calculate the sizes of LPT bit fields, nodes, and tree, based on the + * properties of the flash and whether LPT is "big" (c->big_lpt). + */ +static void do_calc_lpt_geom(struct ubifs_info *c) +{ + int i, n, bits, per_leb_wastage, max_pnode_cnt; + long long sz, tot_wastage; + + n = c->main_lebs + c->max_leb_cnt - c->leb_cnt; + max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); + + c->lpt_hght = 1; + n = UBIFS_LPT_FANOUT; + while (n < max_pnode_cnt) { + c->lpt_hght += 1; + n <<= UBIFS_LPT_FANOUT_SHIFT; + } + + c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + + n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT); + c->nnode_cnt = n; + for (i = 1; i < c->lpt_hght; i++) { + n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); + c->nnode_cnt += n; + } + + c->space_bits = fls(c->leb_size) - 3; + c->lpt_lnum_bits = fls(c->lpt_lebs); + c->lpt_offs_bits = fls(c->leb_size - 1); + c->lpt_spc_bits = fls(c->leb_size); + + n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT); + c->pcnt_bits = fls(n - 1); + + c->lnum_bits = fls(c->max_leb_cnt - 1); + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + (c->big_lpt ? c->pcnt_bits : 0) + + (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT; + c->pnode_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + (c->big_lpt ? c->pcnt_bits : 0) + + (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT; + c->nnode_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + c->lpt_lebs * c->lpt_spc_bits * 2; + c->ltab_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + c->lnum_bits * c->lsave_cnt; + c->lsave_sz = (bits + 7) / 8; + + /* Calculate the minimum LPT size */ + c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; + c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; + c->lpt_sz += c->ltab_sz; + if (c->big_lpt) + c->lpt_sz += c->lsave_sz; + + /* Add wastage */ + sz = c->lpt_sz; + per_leb_wastage = max(c->pnode_sz, c->nnode_sz); + sz += per_leb_wastage; + tot_wastage = per_leb_wastage; + while (sz > c->leb_size) { + sz += per_leb_wastage; + sz -= c->leb_size; + tot_wastage += per_leb_wastage; + } + tot_wastage += ALIGN(sz, c->min_io_size) - sz; + c->lpt_sz += tot_wastage; +} + +/** + * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area. + * @c: the UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_calc_lpt_geom(struct ubifs_info *c) +{ + int lebs_needed; + long long sz; + + do_calc_lpt_geom(c); + + /* Verify that lpt_lebs is big enough */ + sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ + lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); + if (lebs_needed > c->lpt_lebs) { + ubifs_err("too few LPT LEBs"); + return -EINVAL; + } + + /* Verify that ltab fits in a single LEB (since ltab is a single node */ + if (c->ltab_sz > c->leb_size) { + ubifs_err("LPT ltab too big"); + return -EINVAL; + } + + c->check_lpt_free = c->big_lpt; + return 0; +} + +/** + * ubifs_unpack_bits - unpack bit fields. + * @addr: address at which to unpack (passed and next address returned) + * @pos: bit position at which to unpack (passed and next position returned) + * @nrbits: number of bits of value to unpack (1-32) + * + * This functions returns the value unpacked. + */ +uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) +{ + const int k = 32 - nrbits; + uint8_t *p = *addr; + int b = *pos; + uint32_t uninitialized_var(val); + const int bytes = (nrbits + b + 7) >> 3; + + ubifs_assert(nrbits > 0); + ubifs_assert(nrbits <= 32); + ubifs_assert(*pos >= 0); + ubifs_assert(*pos < 8); + if (b) { + switch (bytes) { + case 2: + val = p[1]; + break; + case 3: + val = p[1] | ((uint32_t)p[2] << 8); + break; + case 4: + val = p[1] | ((uint32_t)p[2] << 8) | + ((uint32_t)p[3] << 16); + break; + case 5: + val = p[1] | ((uint32_t)p[2] << 8) | + ((uint32_t)p[3] << 16) | + ((uint32_t)p[4] << 24); + } + val <<= (8 - b); + val |= *p >> b; + nrbits += b; + } else { + switch (bytes) { + case 1: + val = p[0]; + break; + case 2: + val = p[0] | ((uint32_t)p[1] << 8); + break; + case 3: + val = p[0] | ((uint32_t)p[1] << 8) | + ((uint32_t)p[2] << 16); + break; + case 4: + val = p[0] | ((uint32_t)p[1] << 8) | + ((uint32_t)p[2] << 16) | + ((uint32_t)p[3] << 24); + break; + } + } + val <<= k; + val >>= k; + b = nrbits & 7; + p += nrbits >> 3; + *addr = p; + *pos = b; + ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); + return val; +} + +/** + * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number to which to add dirty space + * @dirty: amount of dirty space to add + */ +void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty) +{ + if (!dirty || !lnum) + return; + dbg_lp("LEB %d add %d to %d", + lnum, dirty, c->ltab[lnum - c->lpt_first].dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].dirty += dirty; +} + +/** + * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @nnode: nnode for which to add dirt + */ +void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode) +{ + struct ubifs_nnode *np = nnode->parent; + + if (np) + ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum, + c->nnode_sz); + else { + ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz); + if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { + c->lpt_drty_flgs |= LTAB_DIRTY; + ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); + } + } +} + +/** + * add_pnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @pnode: pnode for which to add dirt + */ +static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, + c->pnode_sz); +} + +/** + * calc_nnode_num_from_parent - calculate nnode number. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * The nnode number is a number that uniquely identifies a nnode and can be used + * easily to traverse the tree from the root to that nnode. + * + * This function calculates and returns the nnode number based on the parent's + * nnode number and the index in parent. + */ +static int calc_nnode_num_from_parent(const struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + int num, shft; + + if (!parent) + return 1; + shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT; + num = parent->num ^ (1 << shft); + num |= (UBIFS_LPT_FANOUT + iip) << shft; + return num; +} + +/** + * calc_pnode_num_from_parent - calculate pnode number. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * The pnode number is a number that uniquely identifies a pnode and can be used + * easily to traverse the tree from the root to that pnode. + * + * This function calculates and returns the pnode number based on the parent's + * nnode number and the index in parent. + */ +static int calc_pnode_num_from_parent(const struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; + + for (i = 0; i < n; i++) { + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= pnum & (UBIFS_LPT_FANOUT - 1); + pnum >>= UBIFS_LPT_FANOUT_SHIFT; + } + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= iip; + return num; +} + +/** + * update_cats - add LEB properties of a pnode to LEB category lists and heaps. + * @c: UBIFS file-system description object + * @pnode: pnode + * + * When a pnode is loaded into memory, the LEB properties it contains are added, + * by this function, to the LEB category lists and heaps. + */ +static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + int i; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK; + int lnum = pnode->lprops[i].lnum; + + if (!lnum) + return; + ubifs_add_to_cat(c, &pnode->lprops[i], cat); + } +} + +/** + * replace_cats - add LEB properties of a pnode to LEB category lists and heaps. + * @c: UBIFS file-system description object + * @old_pnode: pnode copied + * @new_pnode: pnode copy + * + * During commit it is sometimes necessary to copy a pnode + * (see dirty_cow_pnode). When that happens, references in + * category lists and heaps must be replaced. This function does that. + */ +static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode, + struct ubifs_pnode *new_pnode) +{ + int i; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (!new_pnode->lprops[i].lnum) + return; + ubifs_replace_cat(c, &old_pnode->lprops[i], + &new_pnode->lprops[i]); + } +} + +/** + * check_lpt_crc - check LPT node crc is correct. + * @c: UBIFS file-system description object + * @buf: buffer containing node + * @len: length of node + * + * This function returns %0 on success and a negative error code on failure. + */ +static int check_lpt_crc(void *buf, int len) +{ + int pos = 0; + uint8_t *addr = buf; + uint16_t crc, calc_crc; + + crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); + calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + len - UBIFS_LPT_CRC_BYTES); + if (crc != calc_crc) { + ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, + calc_crc); + dbg_dump_stack(); + return -EINVAL; + } + return 0; +} + +/** + * check_lpt_type - check LPT node type is correct. + * @c: UBIFS file-system description object + * @addr: address of type bit field is passed and returned updated here + * @pos: position of type bit field is passed and returned updated here + * @type: expected type + * + * This function returns %0 on success and a negative error code on failure. + */ +static int check_lpt_type(uint8_t **addr, int *pos, int type) +{ + int node_type; + + node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); + if (node_type != type) { + ubifs_err("invalid type (%d) in LPT node type %d", node_type, + type); + dbg_dump_stack(); + return -EINVAL; + } + return 0; +} + +/** + * unpack_pnode - unpack a pnode. + * @c: UBIFS file-system description object + * @buf: buffer containing packed pnode to unpack + * @pnode: pnode structure to fill + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_pnode(const struct ubifs_info *c, void *buf, + struct ubifs_pnode *pnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); + if (err) + return err; + if (c->big_lpt) + pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits); + lprops->free <<= 3; + lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits); + lprops->dirty <<= 3; + + if (ubifs_unpack_bits(&addr, &pos, 1)) + lprops->flags = LPROPS_INDEX; + else + lprops->flags = 0; + lprops->flags |= ubifs_categorize_lprops(c, lprops); + } + err = check_lpt_crc(buf, c->pnode_sz); + return err; +} + +/** + * ubifs_unpack_nnode - unpack a nnode. + * @c: UBIFS file-system description object + * @buf: buffer containing packed nnode to unpack + * @nnode: nnode structure to fill + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); + if (err) + return err; + if (c->big_lpt) + nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum; + + lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) + + c->lpt_first; + if (lnum == c->lpt_last + 1) + lnum = 0; + nnode->nbranch[i].lnum = lnum; + nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, + c->lpt_offs_bits); + } + err = check_lpt_crc(buf, c->nnode_sz); + return err; +} + +/** + * unpack_ltab - unpack the LPT's own lprops table. + * @c: UBIFS file-system description object + * @buf: buffer from which to unpack + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_ltab(const struct ubifs_info *c, void *buf) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); + if (err) + return err; + for (i = 0; i < c->lpt_lebs; i++) { + int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); + int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); + + if (free < 0 || free > c->leb_size || dirty < 0 || + dirty > c->leb_size || free + dirty > c->leb_size) + return -EINVAL; + + c->ltab[i].free = free; + c->ltab[i].dirty = dirty; + c->ltab[i].tgc = 0; + c->ltab[i].cmt = 0; + } + err = check_lpt_crc(buf, c->ltab_sz); + return err; +} + +/** + * validate_nnode - validate a nnode. + * @c: UBIFS file-system description object + * @nnode: nnode to validate + * @parent: parent nnode (or NULL for the root nnode) + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode, + struct ubifs_nnode *parent, int iip) +{ + int i, lvl, max_offs; + + if (c->big_lpt) { + int num = calc_nnode_num_from_parent(c, parent, iip); + + if (nnode->num != num) + return -EINVAL; + } + lvl = parent ? parent->level - 1 : c->lpt_hght; + if (lvl < 1) + return -EINVAL; + if (lvl == 1) + max_offs = c->leb_size - c->pnode_sz; + else + max_offs = c->leb_size - c->nnode_sz; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum = nnode->nbranch[i].lnum; + int offs = nnode->nbranch[i].offs; + + if (lnum == 0) { + if (offs != 0) + return -EINVAL; + continue; + } + if (lnum < c->lpt_first || lnum > c->lpt_last) + return -EINVAL; + if (offs < 0 || offs > max_offs) + return -EINVAL; + } + return 0; +} + +/** + * validate_pnode - validate a pnode. + * @c: UBIFS file-system description object + * @pnode: pnode to validate + * @parent: parent nnode + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip) +{ + int i; + + if (c->big_lpt) { + int num = calc_pnode_num_from_parent(c, parent, iip); + + if (pnode->num != num) + return -EINVAL; + } + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int free = pnode->lprops[i].free; + int dirty = pnode->lprops[i].dirty; + + if (free < 0 || free > c->leb_size || free % c->min_io_size || + (free & 7)) + return -EINVAL; + if (dirty < 0 || dirty > c->leb_size || (dirty & 7)) + return -EINVAL; + if (dirty + free > c->leb_size) + return -EINVAL; + } + return 0; +} + +/** + * set_pnode_lnum - set LEB numbers on a pnode. + * @c: UBIFS file-system description object + * @pnode: pnode to update + * + * This function calculates the LEB numbers for the LEB properties it contains + * based on the pnode number. + */ +static void set_pnode_lnum(const struct ubifs_info *c, + struct ubifs_pnode *pnode) +{ + int i, lnum; + + lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (lnum >= c->leb_cnt) + return; + pnode->lprops[i].lnum = lnum++; + } +} + +/** + * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory. + * @c: UBIFS file-system description object + * @parent: parent nnode (or NULL for the root) + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch = NULL; + struct ubifs_nnode *nnode = NULL; + void *buf = c->lpt_nod_buf; + int err, lnum, offs; + + if (parent) { + branch = &parent->nbranch[iip]; + lnum = branch->lnum; + offs = branch->offs; + } else { + lnum = c->lpt_lnum; + offs = c->lpt_offs; + } + nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + if (!nnode) { + err = -ENOMEM; + goto out; + } + if (lnum == 0) { + /* + * This nnode was not written which just means that the LEB + * properties in the subtree below it describe empty LEBs. We + * make the nnode as though we had read it, which in fact means + * doing almost nothing. + */ + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { + err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); + if (err) + goto out; + err = ubifs_unpack_nnode(c, buf, nnode); + if (err) + goto out; + } + err = validate_nnode(c, nnode, parent, iip); + if (err) + goto out; + if (!c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + if (parent) { + branch->nnode = nnode; + nnode->level = parent->level - 1; + } else { + c->nroot = nnode; + nnode->level = c->lpt_hght; + } + nnode->parent = parent; + nnode->iip = iip; + return 0; + +out: + ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); + kfree(nnode); + return err; +} + +/** + * read_pnode - read a pnode from flash and link it to the tree in memory. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode = NULL; + void *buf = c->lpt_nod_buf; + int err, lnum, offs; + + branch = &parent->nbranch[iip]; + lnum = branch->lnum; + offs = branch->offs; + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + if (!pnode) { + err = -ENOMEM; + goto out; + } + if (lnum == 0) { + /* + * This pnode was not written which just means that the LEB + * properties in it describe empty LEBs. We make the pnode as + * though we had read it. + */ + int i; + + if (c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = c->leb_size; + lprops->flags = ubifs_categorize_lprops(c, lprops); + } + } else { + err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); + if (err) + goto out; + err = unpack_pnode(c, buf, pnode); + if (err) + goto out; + } + err = validate_pnode(c, pnode, parent, iip); + if (err) + goto out; + if (!c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + branch->pnode = pnode; + pnode->parent = parent; + pnode->iip = iip; + set_pnode_lnum(c, pnode); + c->pnodes_have += 1; + return 0; + +out: + ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); + dbg_dump_pnode(c, pnode, parent, iip); + dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + kfree(pnode); + return err; +} + +/** + * read_ltab - read LPT's own lprops table. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_ltab(struct ubifs_info *c) +{ + int err; + void *buf; + + buf = vmalloc(c->ltab_sz); + if (!buf) + return -ENOMEM; + err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); + if (err) + goto out; + err = unpack_ltab(c, buf); +out: + vfree(buf); + return err; +} + +/** + * ubifs_get_nnode - get a nnode. + * @c: UBIFS file-system description object + * @parent: parent nnode (or NULL for the root) + * @iip: index in parent + * + * This function returns a pointer to the nnode on success or a negative error + * code on failure. + */ +struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_nnode *nnode; + int err; + + branch = &parent->nbranch[iip]; + nnode = branch->nnode; + if (nnode) + return nnode; + err = ubifs_read_nnode(c, parent, iip); + if (err) + return ERR_PTR(err); + return branch->nnode; +} + +/** + * ubifs_get_pnode - get a pnode. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * This function returns a pointer to the pnode on success or a negative error + * code on failure. + */ +struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode; + int err; + + branch = &parent->nbranch[iip]; + pnode = branch->pnode; + if (pnode) + return pnode; + err = read_pnode(c, parent, iip); + if (err) + return ERR_PTR(err); + update_cats(c, branch->pnode); + return branch->pnode; +} + +/** + * ubifs_lpt_lookup - lookup LEB properties in the LPT. + * @c: UBIFS file-system description object + * @lnum: LEB number to lookup + * + * This function returns a pointer to the LEB properties on success or a + * negative error code on failure. + */ +struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) +{ + int err, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + i = lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = ubifs_get_pnode(c, nnode, iip); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + iip = (i & (UBIFS_LPT_FANOUT - 1)); + dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, + pnode->lprops[iip].free, pnode->lprops[iip].dirty, + pnode->lprops[iip].flags); + return &pnode->lprops[iip]; +} + +/** + * dirty_cow_nnode - ensure a nnode is not being committed. + * @c: UBIFS file-system description object + * @nnode: nnode to check + * + * Returns dirtied nnode on success or negative error code on failure. + */ +static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, + struct ubifs_nnode *nnode) +{ + struct ubifs_nnode *n; + int i; + + if (!test_bit(COW_CNODE, &nnode->flags)) { + /* nnode is not being committed */ + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + } + return nnode; + } + + /* nnode is being committed, so copy it */ + n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + if (unlikely(!n)) + return ERR_PTR(-ENOMEM); + + memcpy(n, nnode, sizeof(struct ubifs_nnode)); + n->cnext = NULL; + __set_bit(DIRTY_CNODE, &n->flags); + __clear_bit(COW_CNODE, &n->flags); + + /* The children now have new parent */ + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_nbranch *branch = &n->nbranch[i]; + + if (branch->cnode) + branch->cnode->parent = n; + } + + ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags)); + __set_bit(OBSOLETE_CNODE, &nnode->flags); + + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + if (nnode->parent) + nnode->parent->nbranch[n->iip].nnode = n; + else + c->nroot = n; + return n; +} + +/** + * dirty_cow_pnode - ensure a pnode is not being committed. + * @c: UBIFS file-system description object + * @pnode: pnode to check + * + * Returns dirtied pnode on success or negative error code on failure. + */ +static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, + struct ubifs_pnode *pnode) +{ + struct ubifs_pnode *p; + + if (!test_bit(COW_CNODE, &pnode->flags)) { + /* pnode is not being committed */ + if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + } + return pnode; + } + + /* pnode is being committed, so copy it */ + p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + if (unlikely(!p)) + return ERR_PTR(-ENOMEM); + + memcpy(p, pnode, sizeof(struct ubifs_pnode)); + p->cnext = NULL; + __set_bit(DIRTY_CNODE, &p->flags); + __clear_bit(COW_CNODE, &p->flags); + replace_cats(c, pnode, p); + + ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags)); + __set_bit(OBSOLETE_CNODE, &pnode->flags); + + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + pnode->parent->nbranch[p->iip].pnode = p; + return p; +} + +/** + * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT. + * @c: UBIFS file-system description object + * @lnum: LEB number to lookup + * + * This function returns a pointer to the LEB properties on success or a + * negative error code on failure. + */ +struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) +{ + int err, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + nnode = dirty_cow_nnode(c, nnode); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + i = lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + nnode = dirty_cow_nnode(c, nnode); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = ubifs_get_pnode(c, nnode, iip); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + pnode = dirty_cow_pnode(c, pnode); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + iip = (i & (UBIFS_LPT_FANOUT - 1)); + dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, + pnode->lprops[iip].free, pnode->lprops[iip].dirty, + pnode->lprops[iip].flags); + ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags)); + return &pnode->lprops[iip]; +} + +/** + * lpt_init_rd - initialize the LPT for reading. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_init_rd(struct ubifs_info *c) +{ + int err, i; + + c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!c->ltab) + return -ENOMEM; + + i = max(c->nnode_sz, c->pnode_sz); + c->lpt_nod_buf = kmalloc(i, GFP_KERNEL); + if (!c->lpt_nod_buf) + return -ENOMEM; + + for (i = 0; i < LPROPS_HEAP_CNT; i++) { + c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, + GFP_KERNEL); + if (!c->lpt_heap[i].arr) + return -ENOMEM; + c->lpt_heap[i].cnt = 0; + c->lpt_heap[i].max_cnt = LPT_HEAP_SZ; + } + + c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL); + if (!c->dirty_idx.arr) + return -ENOMEM; + c->dirty_idx.cnt = 0; + c->dirty_idx.max_cnt = LPT_HEAP_SZ; + + err = read_ltab(c); + if (err) + return err; + + dbg_lp("space_bits %d", c->space_bits); + dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); + dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); + dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); + dbg_lp("pcnt_bits %d", c->pcnt_bits); + dbg_lp("lnum_bits %d", c->lnum_bits); + dbg_lp("pnode_sz %d", c->pnode_sz); + dbg_lp("nnode_sz %d", c->nnode_sz); + dbg_lp("ltab_sz %d", c->ltab_sz); + dbg_lp("lsave_sz %d", c->lsave_sz); + dbg_lp("lsave_cnt %d", c->lsave_cnt); + dbg_lp("lpt_hght %d", c->lpt_hght); + dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); + + return 0; +} + +/** + * ubifs_lpt_init - initialize the LPT. + * @c: UBIFS file-system description object + * @rd: whether to initialize lpt for reading + * @wr: whether to initialize lpt for writing + * + * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true + * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is + * true. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) +{ + int err; + + if (rd) { + err = lpt_init_rd(c); + if (err) + return err; + } + + return 0; +} diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c new file mode 100755 index 0000000..c0af818 --- /dev/null +++ b/fs/ubifs/lpt_commit.c @@ -0,0 +1,171 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file implements commit-related functionality of the LEB properties + * subsystem. + */ + +#include "crc16.h" +#include "ubifs.h" + +/** + * free_obsolete_cnodes - free obsolete cnodes for commit end. + * @c: UBIFS file-system description object + */ +static void free_obsolete_cnodes(struct ubifs_info *c) +{ + struct ubifs_cnode *cnode, *cnext; + + cnext = c->lpt_cnext; + if (!cnext) + return; + do { + cnode = cnext; + cnext = cnode->cnext; + if (test_bit(OBSOLETE_CNODE, &cnode->flags)) + kfree(cnode); + else + cnode->cnext = NULL; + } while (cnext != c->lpt_cnext); + c->lpt_cnext = NULL; +} + +/** + * first_nnode - find the first nnode in memory. + * @c: UBIFS file-system description object + * @hght: height of tree where nnode found is returned here + * + * This function returns a pointer to the nnode found or %NULL if no nnode is + * found. This function is a helper to 'ubifs_lpt_free()'. + */ +static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght) +{ + struct ubifs_nnode *nnode; + int h, i, found; + + nnode = c->nroot; + *hght = 0; + if (!nnode) + return NULL; + for (h = 1; h < c->lpt_hght; h++) { + found = 0; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (nnode->nbranch[i].nnode) { + found = 1; + nnode = nnode->nbranch[i].nnode; + *hght = h; + break; + } + } + if (!found) + break; + } + return nnode; +} + +/** + * next_nnode - find the next nnode in memory. + * @c: UBIFS file-system description object + * @nnode: nnode from which to start. + * @hght: height of tree where nnode is, is passed and returned here + * + * This function returns a pointer to the nnode found or %NULL if no nnode is + * found. This function is a helper to 'ubifs_lpt_free()'. + */ +static struct ubifs_nnode *next_nnode(struct ubifs_info *c, + struct ubifs_nnode *nnode, int *hght) +{ + struct ubifs_nnode *parent; + int iip, h, i, found; + + parent = nnode->parent; + if (!parent) + return NULL; + if (nnode->iip == UBIFS_LPT_FANOUT - 1) { + *hght -= 1; + return parent; + } + for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { + nnode = parent->nbranch[iip].nnode; + if (nnode) + break; + } + if (!nnode) { + *hght -= 1; + return parent; + } + for (h = *hght + 1; h < c->lpt_hght; h++) { + found = 0; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (nnode->nbranch[i].nnode) { + found = 1; + nnode = nnode->nbranch[i].nnode; + *hght = h; + break; + } + } + if (!found) + break; + } + return nnode; +} + +/** + * ubifs_lpt_free - free resources owned by the LPT. + * @c: UBIFS file-system description object + * @wr_only: free only resources used for writing + */ +void ubifs_lpt_free(struct ubifs_info *c, int wr_only) +{ + struct ubifs_nnode *nnode; + int i, hght; + + /* Free write-only things first */ + + free_obsolete_cnodes(c); /* Leftover from a failed commit */ + + vfree(c->ltab_cmt); + c->ltab_cmt = NULL; + vfree(c->lpt_buf); + c->lpt_buf = NULL; + kfree(c->lsave); + c->lsave = NULL; + + if (wr_only) + return; + + /* Now free the rest */ + + nnode = first_nnode(c, &hght); + while (nnode) { + for (i = 0; i < UBIFS_LPT_FANOUT; i++) + kfree(nnode->nbranch[i].nnode); + nnode = next_nnode(c, nnode, &hght); + } + for (i = 0; i < LPROPS_HEAP_CNT; i++) + kfree(c->lpt_heap[i].arr); + kfree(c->dirty_idx.arr); + kfree(c->nroot); + vfree(c->ltab); + kfree(c->lpt_nod_buf); +} diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c new file mode 100755 index 0000000..8b3c1b3 --- /dev/null +++ b/fs/ubifs/master.c @@ -0,0 +1,276 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +/* This file implements reading and writing the master node */ + +#include "ubifs.h" + +/** + * scan_for_master - search the valid master node. + * @c: UBIFS file-system description object + * + * This function scans the master node LEBs and search for the latest master + * node. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int scan_for_master(struct ubifs_info *c) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + int lnum, offs = 0, nodes_cnt; + + lnum = UBIFS_MST_LNUM; + + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + nodes_cnt = sleb->nodes_cnt; + if (nodes_cnt > 0) { + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + if (snod->type != UBIFS_MST_NODE) + goto out; + memcpy(c->mst_node, snod->node, snod->len); + offs = snod->offs; + } + ubifs_scan_destroy(sleb); + + lnum += 1; + + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + if (sleb->nodes_cnt != nodes_cnt) + goto out; + if (!sleb->nodes_cnt) + goto out; + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); + if (snod->type != UBIFS_MST_NODE) + goto out; + if (snod->offs != offs) + goto out; + if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, + (void *)snod->node + UBIFS_CH_SZ, + UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) + goto out; + c->mst_offs = offs; + ubifs_scan_destroy(sleb); + return 0; + +out: + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * validate_master - validate master node. + * @c: UBIFS file-system description object + * + * This function validates data which was read from master node. Returns zero + * if the data is all right and %-EINVAL if not. + */ +static int validate_master(const struct ubifs_info *c) +{ + long long main_sz; + int err; + + if (c->max_sqnum >= SQNUM_WATERMARK) { + err = 1; + goto out; + } + + if (c->cmt_no >= c->max_sqnum) { + err = 2; + goto out; + } + + if (c->highest_inum >= INUM_WATERMARK) { + err = 3; + goto out; + } + + if (c->lhead_lnum < UBIFS_LOG_LNUM || + c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs || + c->lhead_offs < 0 || c->lhead_offs >= c->leb_size || + c->lhead_offs & (c->min_io_size - 1)) { + err = 4; + goto out; + } + + if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first || + c->zroot.offs >= c->leb_size || c->zroot.offs & 7) { + err = 5; + goto out; + } + + if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len || + c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) { + err = 6; + goto out; + } + + if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) { + err = 7; + goto out; + } + + if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first || + c->ihead_offs % c->min_io_size || c->ihead_offs < 0 || + c->ihead_offs > c->leb_size || c->ihead_offs & 7) { + err = 8; + goto out; + } + + main_sz = (long long)c->main_lebs * c->leb_size; + if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { + err = 9; + goto out; + } + + if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last || + c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) { + err = 10; + goto out; + } + + if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last || + c->nhead_offs < 0 || c->nhead_offs % c->min_io_size || + c->nhead_offs > c->leb_size) { + err = 11; + goto out; + } + + if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last || + c->ltab_offs < 0 || + c->ltab_offs + c->ltab_sz > c->leb_size) { + err = 12; + goto out; + } + + if (c->big_lpt && (c->lsave_lnum < c->lpt_first || + c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 || + c->lsave_offs + c->lsave_sz > c->leb_size)) { + err = 13; + goto out; + } + + if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) { + err = 14; + goto out; + } + + + return 0; + +out: + ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); + dbg_dump_node(c, c->mst_node); + return -EINVAL; +} + +/** + * ubifs_read_master - read master node. + * @c: UBIFS file-system description object + * + * This function finds and reads the master node during file-system mount. If + * the flash is empty, it creates default master node as well. Returns zero in + * case of success and a negative error code in case of failure. + */ +int ubifs_read_master(struct ubifs_info *c) +{ + int err, old_leb_cnt; + + c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL); + if (!c->mst_node) + return -ENOMEM; + + err = scan_for_master(c); + if (err) { + err = ubifs_recover_master_node(c); + if (err) + /* + * Note, we do not free 'c->mst_node' here because the + * unmount routine will take care of this. + */ + return err; + } + + /* Make sure that the recovery flag is clear */ + c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY); + + c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum); + c->highest_inum = le64_to_cpu(c->mst_node->highest_inum); + c->cmt_no = le64_to_cpu(c->mst_node->cmt_no); + c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum); + c->zroot.offs = le32_to_cpu(c->mst_node->root_offs); + c->zroot.len = le32_to_cpu(c->mst_node->root_len); + c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum); + c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); + c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); + c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); + c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); + c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); + c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); + c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs); + c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum); + c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs); + c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum); + c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs); + c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum); + + c->calc_idx_sz = c->old_idx_sz; + + if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) + c->no_orphs = 1; + + if (old_leb_cnt != c->leb_cnt) { + /* The file system has been resized */ + int growth = c->leb_cnt - old_leb_cnt; + + if (c->leb_cnt < old_leb_cnt || + c->leb_cnt < UBIFS_MIN_LEB_CNT) { + ubifs_err("bad leb_cnt on master node"); + dbg_dump_node(c, c->mst_node); + return -EINVAL; + } + + dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs", + old_leb_cnt, c->leb_cnt); + + /* + * Reflect changes back onto the master node. N.B. the master + * node gets written immediately whenever mounting (or + * remounting) in read-write mode, so we do not need to write it + * here. + */ + c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt); + } + + err = validate_master(c); + if (err) + return err; + + err = dbg_old_index_check_init(c, &c->zroot); + + return err; +} diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h new file mode 100755 index 0000000..d453cbc --- /dev/null +++ b/fs/ubifs/misc.h @@ -0,0 +1,241 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +/* + * This file contains miscellaneous helper functions. + */ + +#ifndef __UBIFS_MISC_H__ +#define __UBIFS_MISC_H__ + +/** + * ubifs_zn_dirty - check if znode is dirty. + * @znode: znode to check + * + * This helper function returns %1 if @znode is dirty and %0 otherwise. + */ +static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) +{ + return !!test_bit(DIRTY_ZNODE, &znode->flags); +} + +/** + * ubifs_wake_up_bgt - wake up background thread. + * @c: UBIFS file-system description object + */ +static inline void ubifs_wake_up_bgt(struct ubifs_info *c) +{ + if (c->bgt && !c->need_bgt) { + c->need_bgt = 1; + wake_up_process(c->bgt); + } +} + +/** + * ubifs_tnc_find_child - find next child in znode. + * @znode: znode to search at + * @start: the zbranch index to start at + * + * This helper function looks for znode child starting at index @start. Returns + * the child or %NULL if no children were found. + */ +static inline struct ubifs_znode * +ubifs_tnc_find_child(struct ubifs_znode *znode, int start) +{ + while (start < znode->child_cnt) { + if (znode->zbranch[start].znode) + return znode->zbranch[start].znode; + start += 1; + } + + return NULL; +} + +/** + * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object. + * @inode: the VFS 'struct inode' pointer + */ +static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) +{ + return container_of(inode, struct ubifs_inode, vfs_inode); +} + +/** + * ubifs_leb_unmap - unmap an LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to unmap + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_unmap(c->ubi, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_write - write to a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @offs: offset within LEB to write to + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, + const void *buf, int offs, int len, int dtype) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + if (err) { + ubifs_err("writing %d bytes at %d:%d, error %d", + len, lnum, offs, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_change - atomic LEB change. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, + const void *buf, int len, int dtype) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + if (err) { + ubifs_err("changing %d bytes in LEB %d, error %d", + len, lnum, err); + return err; + } + + return 0; +} + +/** + * ubifs_add_dirt - add dirty space to LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to add dirty space for + * @dirty: dirty space to add + * + * This is a helper function which increased amount of dirty LEB space. Returns + * zero in case of success and a negative error code in case of failure. + */ +static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty) +{ + return 0; +} + +/** + * ubifs_return_leb - return LEB to lprops. + * @c: the UBIFS file-system description object + * @lnum: LEB to return + * + * This helper function cleans the "taken" flag of a logical eraseblock in the + * lprops. Returns zero in case of success and a negative error code in case of + * failure. + */ +static inline int ubifs_return_leb(struct ubifs_info *c, int lnum) +{ + return 0; +} + +/** + * ubifs_idx_node_sz - return index node size. + * @c: the UBIFS file-system description object + * @child_cnt: number of children of this index node + */ +static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt) +{ + return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt; +} + +/** + * ubifs_idx_branch - return pointer to an index branch. + * @c: the UBIFS file-system description object + * @idx: index node + * @bnum: branch number + */ +static inline +struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c, + const struct ubifs_idx_node *idx, + int bnum) +{ + return (struct ubifs_branch *)((void *)idx->branches + + (UBIFS_BRANCH_SZ + c->key_len) * bnum); +} + +/** + * ubifs_idx_key - return pointer to an index key. + * @c: the UBIFS file-system description object + * @idx: index node + */ +static inline void *ubifs_idx_key(const struct ubifs_info *c, + const struct ubifs_idx_node *idx) +{ + return (void *)((struct ubifs_branch *)idx->branches)->key; +} + +/** + * ubifs_tnc_lookup - look up a file-system node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. + */ +static inline int ubifs_tnc_lookup(struct ubifs_info *c, + const union ubifs_key *key, void *node) +{ + return ubifs_tnc_locate(c, key, node, NULL, NULL); +} + + +#endif /* __UBIFS_MISC_H__ */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c new file mode 100755 index 0000000..d091031 --- /dev/null +++ b/fs/ubifs/orphan.c @@ -0,0 +1,316 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: Adrian Hunter + */ + +#include "ubifs.h" + +/* + * An orphan is an inode number whose inode node has been committed to the index + * with a link count of zero. That happens when an open file is deleted + * (unlinked) and then a commit is run. In the normal course of events the inode + * would be deleted when the file is closed. However in the case of an unclean + * unmount, orphans need to be accounted for. After an unclean unmount, the + * orphans' inodes must be deleted which means either scanning the entire index + * looking for them, or keeping a list on flash somewhere. This unit implements + * the latter approach. + * + * The orphan area is a fixed number of LEBs situated between the LPT area and + * the main area. The number of orphan area LEBs is specified when the file + * system is created. The minimum number is 1. The size of the orphan area + * should be so that it can hold the maximum number of orphans that are expected + * to ever exist at one time. + * + * The number of orphans that can fit in a LEB is: + * + * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64) + * + * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough. + * + * Orphans are accumulated in a rb-tree. When an inode's link count drops to + * zero, the inode number is added to the rb-tree. It is removed from the tree + * when the inode is deleted. Any new orphans that are in the orphan tree when + * the commit is run, are written to the orphan area in 1 or more orphan nodes. + * If the orphan area is full, it is consolidated to make space. There is + * always enough space because validation prevents the user from creating more + * than the maximum number of orphans allowed. + */ + +/** + * tot_avail_orphs - calculate total space. + * @c: UBIFS file-system description object + * + * This function returns the number of orphans that can be written in half + * the total space. That leaves half the space for adding new orphans. + */ +static int tot_avail_orphs(struct ubifs_info *c) +{ + int avail_lebs, avail; + + avail_lebs = c->orph_lebs; + avail = avail_lebs * + ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); + return avail / 2; +} + +/** + * ubifs_clear_orphans - erase all LEBs used for orphans. + * @c: UBIFS file-system description object + * + * If recovery is not required, then the orphans from the previous session + * are not needed. This function locates the LEBs used to record + * orphans, and un-maps them. + */ +int ubifs_clear_orphans(struct ubifs_info *c) +{ + int lnum, err; + + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + return 0; +} + +/** + * insert_dead_orphan - insert an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * This function is a helper to the 'do_kill_orphans()' function. The orphan + * must be kept until the next commit, so it is added to the rb-tree and the + * deletion list. + */ +static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + + p = &c->orph_tree.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + /* Already added - no problem */ + kfree(orphan); + return 0; + } + } + c->tot_orphans += 1; + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, &c->orph_tree); + list_add_tail(&orphan->list, &c->orph_list); + orphan->dnext = c->orph_dnext; + c->orph_dnext = orphan; + dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, + c->new_orphans, c->tot_orphans); + return 0; +} + +/** + * do_kill_orphans - remove orphan inodes from the index. + * @c: UBIFS file-system description object + * @sleb: scanned LEB + * @last_cmt_no: cmt_no of last orphan node read is passed and returned here + * @outofdate: whether the LEB is out of date is returned here + * @last_flagged: whether the end orphan node is encountered + * + * This function is a helper to the 'kill_orphans()' function. It goes through + * every orphan node in a LEB and for every inode number recorded, removes + * all keys for that inode from the TNC. + */ +static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + unsigned long long *last_cmt_no, int *outofdate, + int *last_flagged) +{ + struct ubifs_scan_node *snod; + struct ubifs_orph_node *orph; + unsigned long long cmt_no; + ino_t inum; + int i, n, err, first = 1; + + list_for_each_entry(snod, &sleb->nodes, list) { + if (snod->type != UBIFS_ORPH_NODE) { + ubifs_err("invalid node type %d in orphan area at " + "%d:%d", snod->type, sleb->lnum, snod->offs); + dbg_dump_node(c, snod->node); + return -EINVAL; + } + + orph = snod->node; + + /* Check commit number */ + cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX; + /* + * The commit number on the master node may be less, because + * of a failed commit. If there are several failed commits in a + * row, the commit number written on orphan nodes will continue + * to increase (because the commit number is adjusted here) even + * though the commit number on the master node stays the same + * because the master node has not been re-written. + */ + if (cmt_no > c->cmt_no) + c->cmt_no = cmt_no; + if (cmt_no < *last_cmt_no && *last_flagged) { + /* + * The last orphan node had a higher commit number and + * was flagged as the last written for that commit + * number. That makes this orphan node, out of date. + */ + if (!first) { + ubifs_err("out of order commit number %llu in " + "orphan node at %d:%d", + cmt_no, sleb->lnum, snod->offs); + dbg_dump_node(c, snod->node); + return -EINVAL; + } + dbg_rcvry("out of date LEB %d", sleb->lnum); + *outofdate = 1; + return 0; + } + + if (first) + first = 0; + + n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; + for (i = 0; i < n; i++) { + inum = le64_to_cpu(orph->inos[i]); + dbg_rcvry("deleting orphaned inode %lu", + (unsigned long)inum); + err = ubifs_tnc_remove_ino(c, inum); + if (err) + return err; + err = insert_dead_orphan(c, inum); + if (err) + return err; + } + + *last_cmt_no = cmt_no; + if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) { + dbg_rcvry("last orph node for commit %llu at %d:%d", + cmt_no, sleb->lnum, snod->offs); + *last_flagged = 1; + } else + *last_flagged = 0; + } + + return 0; +} + +/** + * kill_orphans - remove all orphan inodes from the index. + * @c: UBIFS file-system description object + * + * If recovery is required, then orphan inodes recorded during the previous + * session (which ended with an unclean unmount) must be deleted from the index. + * This is done by updating the TNC, but since the index is not updated until + * the next commit, the LEBs where the orphan information is recorded are not + * erased until the next commit. + */ +static int kill_orphans(struct ubifs_info *c) +{ + unsigned long long last_cmt_no = 0; + int lnum, err = 0, outofdate = 0, last_flagged = 0; + + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) { + dbg_rcvry("no orphans"); + return 0; + } + /* + * Orph nodes always start at c->orph_first and are written to each + * successive LEB in turn. Generally unused LEBs will have been unmapped + * but may contain out of date orphan nodes if the unmap didn't go + * through. In addition, the last orphan node written for each commit is + * marked (top bit of orph->cmt_no is set to 1). It is possible that + * there are orphan nodes from the next commit (i.e. the commit did not + * complete successfully). In that case, no orphans will have been lost + * due to the way that orphans are written, and any orphans added will + * be valid orphans anyway and so can be deleted. + */ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + + dbg_rcvry("LEB %d", lnum); + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) { + sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; + } + } + err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate, + &last_flagged); + if (err || outofdate) { + ubifs_scan_destroy(sleb); + break; + } + if (sleb->endpt) { + c->ohead_lnum = lnum; + c->ohead_offs = sleb->endpt; + } + ubifs_scan_destroy(sleb); + } + return err; +} + +/** + * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them. + * @c: UBIFS file-system description object + * @unclean: indicates recovery from unclean unmount + * @read_only: indicates read only mount + * + * This function is called when mounting to erase orphans from the previous + * session. If UBIFS was not unmounted cleanly, then the inodes recorded as + * orphans are deleted. + */ +int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) +{ + int err = 0; + + c->max_orphans = tot_avail_orphs(c); + + if (!read_only) { + c->orph_buf = vmalloc(c->leb_size); + if (!c->orph_buf) + return -ENOMEM; + } + + if (unclean) + err = kill_orphans(c); + else if (!read_only) + err = ubifs_clear_orphans(c); + + return err; +} diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c new file mode 100755 index 0000000..422561e --- /dev/null +++ b/fs/ubifs/recovery.c @@ -0,0 +1,1225 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file implements functions needed to recover from unclean un-mounts. + * When UBIFS is mounted, it checks a flag on the master node to determine if + * an un-mount was completed sucessfully. If not, the process of mounting + * incorparates additional checking and fixing of on-flash data structures. + * UBIFS always cleans away all remnants of an unclean un-mount, so that + * errors do not accumulate. However UBIFS defers recovery if it is mounted + * read-only, and the flash is not modified in that case. + */ + +#include "ubifs.h" + +/** + * is_empty - determine whether a buffer is empty (contains all 0xff). + * @buf: buffer to clean + * @len: length of buffer + * + * This function returns %1 if the buffer is empty (contains all 0xff) otherwise + * %0 is returned. + */ +static int is_empty(void *buf, int len) +{ + uint8_t *p = buf; + int i; + + for (i = 0; i < len; i++) + if (*p++ != 0xff) + return 0; + return 1; +} + +/** + * get_master_node - get the last valid master node allowing for corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @pbuf: buffer containing the LEB read, is returned here + * @mst: master node, if found, is returned here + * @cor: corruption, if found, is returned here + * + * This function allocates a buffer, reads the LEB into it, and finds and + * returns the last valid master node allowing for one area of corruption. + * The corrupt area, if there is one, must be consistent with the assumption + * that it is the result of an unclean unmount while the master node was being + * written. Under those circumstances, it is valid to use the previously written + * master node. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, + struct ubifs_mst_node **mst, void **cor) +{ + const int sz = c->mst_node_alsz; + int err, offs, len; + void *sbuf, *buf; + + sbuf = vmalloc(c->leb_size); + if (!sbuf) + return -ENOMEM; + + err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); + if (err && err != -EBADMSG) + goto out_free; + + /* Find the first position that is definitely not a node */ + offs = 0; + buf = sbuf; + len = c->leb_size; + while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) { + struct ubifs_ch *ch = buf; + + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) + break; + offs += sz; + buf += sz; + len -= sz; + } + /* See if there was a valid master node before that */ + if (offs) { + int ret; + + offs -= sz; + buf -= sz; + len += sz; + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret != SCANNED_A_NODE && offs) { + /* Could have been corruption so check one place back */ + offs -= sz; + buf -= sz; + len += sz; + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret != SCANNED_A_NODE) + /* + * We accept only one area of corruption because + * we are assuming that it was caused while + * trying to write a master node. + */ + goto out_err; + } + if (ret == SCANNED_A_NODE) { + struct ubifs_ch *ch = buf; + + if (ch->node_type != UBIFS_MST_NODE) + goto out_err; + dbg_rcvry("found a master node at %d:%d", lnum, offs); + *mst = buf; + offs += sz; + buf += sz; + len -= sz; + } + } + /* Check for corruption */ + if (offs < c->leb_size) { + if (!is_empty(buf, min(len, sz))) { + *cor = buf; + dbg_rcvry("found corruption at %d:%d", lnum, offs); + } + offs += sz; + buf += sz; + len -= sz; + } + /* Check remaining empty space */ + if (offs < c->leb_size) + if (!is_empty(buf, len)) + goto out_err; + *pbuf = sbuf; + return 0; + +out_err: + err = -EINVAL; +out_free: + vfree(sbuf); + *mst = NULL; + *cor = NULL; + return err; +} + +/** + * write_rcvrd_mst_node - write recovered master node. + * @c: UBIFS file-system description object + * @mst: master node + * + * This function returns %0 on success and a negative error code on failure. + */ +static int write_rcvrd_mst_node(struct ubifs_info *c, + struct ubifs_mst_node *mst) +{ + int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; + __le32 save_flags; + + dbg_rcvry("recovery"); + + save_flags = mst->flags; + mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); + + ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); + err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); + if (err) + goto out; + err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); + if (err) + goto out; +out: + mst->flags = save_flags; + return err; +} + +/** + * ubifs_recover_master_node - recover the master node. + * @c: UBIFS file-system description object + * + * This function recovers the master node from corruption that may occur due to + * an unclean unmount. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_master_node(struct ubifs_info *c) +{ + void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL; + struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst; + const int sz = c->mst_node_alsz; + int err, offs1, offs2; + + dbg_rcvry("recovery"); + + err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1); + if (err) + goto out_free; + + err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2); + if (err) + goto out_free; + + if (mst1) { + offs1 = (void *)mst1 - buf1; + if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && + (offs1 == 0 && !cor1)) { + /* + * mst1 was written by recovery at offset 0 with no + * corruption. + */ + dbg_rcvry("recovery recovery"); + mst = mst1; + } else if (mst2) { + offs2 = (void *)mst2 - buf2; + if (offs1 == offs2) { + /* Same offset, so must be the same */ + if (memcmp((void *)mst1 + UBIFS_CH_SZ, + (void *)mst2 + UBIFS_CH_SZ, + UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) + goto out_err; + mst = mst1; + } else if (offs2 + sz == offs1) { + /* 1st LEB was written, 2nd was not */ + if (cor1) + goto out_err; + mst = mst1; + } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { + /* 1st LEB was unmapped and written, 2nd not */ + if (cor1) + goto out_err; + mst = mst1; + } else + goto out_err; + } else { + /* + * 2nd LEB was unmapped and about to be written, so + * there must be only one master node in the first LEB + * and no corruption. + */ + if (offs1 != 0 || cor1) + goto out_err; + mst = mst1; + } + } else { + if (!mst2) + goto out_err; + /* + * 1st LEB was unmapped and about to be written, so there must + * be no room left in 2nd LEB. + */ + offs2 = (void *)mst2 - buf2; + if (offs2 + sz + sz <= c->leb_size) + goto out_err; + mst = mst2; + } + + dbg_rcvry("recovered master node from LEB %d", + (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); + + memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); + + if ((c->vfs_sb->s_flags & MS_RDONLY)) { + /* Read-only mode. Keep a copy for switching to rw mode */ + c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); + if (!c->rcvrd_mst_node) { + err = -ENOMEM; + goto out_free; + } + memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); + } + + vfree(buf2); + vfree(buf1); + + return 0; + +out_err: + err = -EINVAL; +out_free: + ubifs_err("failed to recover master node"); + if (mst1) { + dbg_err("dumping first master node"); + dbg_dump_node(c, mst1); + } + if (mst2) { + dbg_err("dumping second master node"); + dbg_dump_node(c, mst2); + } + vfree(buf2); + vfree(buf1); + return err; +} + +/** + * ubifs_write_rcvrd_mst_node - write the recovered master node. + * @c: UBIFS file-system description object + * + * This function writes the master node that was recovered during mounting in + * read-only mode and must now be written because we are remounting rw. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) +{ + int err; + + if (!c->rcvrd_mst_node) + return 0; + c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = write_rcvrd_mst_node(c, c->rcvrd_mst_node); + if (err) + return err; + kfree(c->rcvrd_mst_node); + c->rcvrd_mst_node = NULL; + return 0; +} + +/** + * is_last_write - determine if an offset was in the last write to a LEB. + * @c: UBIFS file-system description object + * @buf: buffer to check + * @offs: offset to check + * + * This function returns %1 if @offs was in the last write to the LEB whose data + * is in @buf, otherwise %0 is returned. The determination is made by checking + * for subsequent empty space starting from the next min_io_size boundary (or a + * bit less than the common header size if min_io_size is one). + */ +static int is_last_write(const struct ubifs_info *c, void *buf, int offs) +{ + int empty_offs; + int check_len; + uint8_t *p; + + if (c->min_io_size == 1) { + check_len = c->leb_size - offs; + p = buf + check_len; + for (; check_len > 0; check_len--) + if (*--p != 0xff) + break; + /* + * 'check_len' is the size of the corruption which cannot be + * more than the size of 1 node if it was caused by an unclean + * unmount. + */ + if (check_len > UBIFS_MAX_NODE_SZ) + return 0; + return 1; + } + + /* + * Round up to the next c->min_io_size boundary i.e. 'offs' is in the + * last wbuf written. After that should be empty space. + */ + empty_offs = ALIGN(offs + 1, c->min_io_size); + check_len = c->leb_size - empty_offs; + p = buf + empty_offs - offs; + + for (; check_len > 0; check_len--) + if (*p++ != 0xff) + return 0; + return 1; +} + +/** + * clean_buf - clean the data from an LEB sitting in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer to clean + * @lnum: LEB number to clean + * @offs: offset from which to clean + * @len: length of buffer + * + * This function pads up to the next min_io_size boundary (if there is one) and + * sets empty space to all 0xff. @buf, @offs and @len are updated to the next + * min_io_size boundary (if there is one). + */ +static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, + int *offs, int *len) +{ + int empty_offs, pad_len; + + lnum = lnum; + dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); + + if (c->min_io_size == 1) { + memset(*buf, 0xff, c->leb_size - *offs); + return; + } + + ubifs_assert(!(*offs & 7)); + empty_offs = ALIGN(*offs, c->min_io_size); + pad_len = empty_offs - *offs; + ubifs_pad(c, *buf, pad_len); + *offs += pad_len; + *buf += pad_len; + *len -= pad_len; + memset(*buf, 0xff, c->leb_size - empty_offs); +} + +/** + * no_more_nodes - determine if there are no more nodes in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer to check + * @len: length of buffer + * @lnum: LEB number of the LEB from which @buf was read + * @offs: offset from which @buf was read + * + * This function ensures that the corrupted node at @offs is the last thing + * written to a LEB. This function returns %1 if more data is not found and + * %0 if more data is found. + */ +static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, + int lnum, int offs) +{ + struct ubifs_ch *ch = buf; + int skip, dlen = le32_to_cpu(ch->len); + + /* Check for empty space after the corrupt node's common header */ + skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; + if (is_empty(buf + skip, len - skip)) + return 1; + /* + * The area after the common header size is not empty, so the common + * header must be intact. Check it. + */ + if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { + dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); + return 0; + } + /* Now we know the corrupt node's length we can skip over it */ + skip = ALIGN(offs + dlen, c->min_io_size) - offs; + /* After which there should be empty space */ + if (is_empty(buf + skip, len - skip)) + return 1; + dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip); + return 0; +} + +/** + * fix_unclean_leb - fix an unclean LEB. + * @c: UBIFS file-system description object + * @sleb: scanned LEB information + * @start: offset where scan started + */ +static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int start) +{ + int lnum = sleb->lnum, endpt = start; + + /* Get the end offset of the last node we are keeping */ + if (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + + snod = list_entry(sleb->nodes.prev, + struct ubifs_scan_node, list); + endpt = snod->offs + snod->len; + } + + if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { + /* Add to recovery list */ + struct ubifs_unclean_leb *ucleb; + + dbg_rcvry("need to fix LEB %d start %d endpt %d", + lnum, start, sleb->endpt); + ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS); + if (!ucleb) + return -ENOMEM; + ucleb->lnum = lnum; + ucleb->endpt = endpt; + list_add_tail(&ucleb->list, &c->unclean_leb_list); + } + return 0; +} + +/** + * drop_incomplete_group - drop nodes from an incomplete group. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * + * This function returns %1 if nodes are dropped and %0 otherwise. + */ +static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) +{ + int dropped = 0; + + while (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + struct ubifs_ch *ch; + + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + ch = snod->node; + if (ch->group_type != UBIFS_IN_NODE_GROUP) + return dropped; + dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; + dropped = 1; + } + return dropped; +} + +/** + * ubifs_recover_leb - scan and recover a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use + * @grouped: nodes may be grouped for recovery + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf, int grouped) +{ + int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; + int empty_chkd = 0, start = offs; + struct ubifs_scan_leb *sleb; + void *buf = sbuf + offs; + + dbg_rcvry("%d:%d", lnum, offs); + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + + if (sleb->ecc) + need_clean = 1; + + while (len >= 8) { + int ret; + + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + + cond_resched(); + + /* + * Scan quietly until there is an error from which we cannot + * recover + */ + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; + int node_len; + + err = ubifs_add_snod(c, sleb, buf, offs); + if (err) + goto error; + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + continue; + } + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) { + if (!is_empty(buf, len)) { + if (!is_last_write(c, buf, offs)) + break; + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + empty_chkd = 1; + break; + } + + if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) + if (is_last_write(c, buf, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + empty_chkd = 1; + break; + } + + if (ret == SCANNED_A_CORRUPT_NODE) + if (no_more_nodes(c, buf, len, lnum, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + empty_chkd = 1; + break; + } + + if (quiet) { + /* Redo the last scan but noisily */ + quiet = 0; + continue; + } + + switch (ret) { + case SCANNED_GARBAGE: + dbg_err("garbage"); + goto corrupted; + case SCANNED_A_CORRUPT_NODE: + case SCANNED_A_BAD_PAD_NODE: + dbg_err("bad node"); + goto corrupted; + default: + dbg_err("unknown"); + goto corrupted; + } + } + + if (!empty_chkd && !is_empty(buf, len)) { + if (is_last_write(c, buf, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } else { + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); + goto corrupted; + } + } + + /* Drop nodes from incomplete group */ + if (grouped && drop_incomplete_group(sleb, &offs)) { + buf = sbuf + offs; + len = c->leb_size - offs; + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + + if (offs % c->min_io_size) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + + ubifs_end_scan(c, sleb, lnum, offs); + + if (need_clean) { + err = fix_unclean_leb(c, sleb, start); + if (err) + goto error; + } + + return sleb; + +corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +error: + ubifs_err("LEB %d scanning failed", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(err); +} + +/** + * get_cs_sqnum - get commit start sequence number. + * @c: UBIFS file-system description object + * @lnum: LEB number of commit start node + * @offs: offset of commit start node + * @cs_sqnum: commit start sequence number is returned here + * + * This function returns %0 on success and a negative error code on failure. + */ +static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, + unsigned long long *cs_sqnum) +{ + struct ubifs_cs_node *cs_node = NULL; + int err, ret; + + dbg_rcvry("at %d:%d", lnum, offs); + cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL); + if (!cs_node) + return -ENOMEM; + if (c->leb_size - offs < UBIFS_CS_NODE_SZ) + goto out_err; + err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); + if (err && err != -EBADMSG) + goto out_free; + ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); + if (ret != SCANNED_A_NODE) { + dbg_err("Not a valid node"); + goto out_err; + } + if (cs_node->ch.node_type != UBIFS_CS_NODE) { + dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); + goto out_err; + } + if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { + dbg_err("CS node cmt_no %llu != current cmt_no %llu", + (unsigned long long)le64_to_cpu(cs_node->cmt_no), + c->cmt_no); + goto out_err; + } + *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); + dbg_rcvry("commit start sqnum %llu", *cs_sqnum); + kfree(cs_node); + return 0; + +out_err: + err = -EINVAL; +out_free: + ubifs_err("failed to get CS sqnum"); + kfree(cs_node); + return err; +} + +/** + * ubifs_recover_log_leb - scan and recover a log LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + struct ubifs_scan_leb *sleb; + int next_lnum; + + dbg_rcvry("LEB %d", lnum); + next_lnum = lnum + 1; + if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) + next_lnum = UBIFS_LOG_LNUM; + if (next_lnum != c->ltail_lnum) { + /* + * We can only recover at the end of the log, so check that the + * next log LEB is empty or out of date. + */ + sleb = ubifs_scan(c, next_lnum, 0, sbuf); + if (IS_ERR(sleb)) + return sleb; + if (sleb->nodes_cnt) { + struct ubifs_scan_node *snod; + unsigned long long cs_sqnum = c->cs_sqnum; + + snod = list_entry(sleb->nodes.next, + struct ubifs_scan_node, list); + if (cs_sqnum == 0) { + int err; + + err = get_cs_sqnum(c, lnum, offs, &cs_sqnum); + if (err) { + ubifs_scan_destroy(sleb); + return ERR_PTR(err); + } + } + if (snod->sqnum > cs_sqnum) { + ubifs_err("unrecoverable log corruption " + "in LEB %d", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(-EUCLEAN); + } + } + ubifs_scan_destroy(sleb); + } + return ubifs_recover_leb(c, lnum, offs, sbuf, 0); +} + +/** + * recover_head - recover a head. + * @c: UBIFS file-system description object + * @lnum: LEB number of head to recover + * @offs: offset of head to recover + * @sbuf: LEB-sized buffer to use + * + * This function ensures that there is no data on the flash at a head location. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int recover_head(const struct ubifs_info *c, int lnum, int offs, + void *sbuf) +{ + int len, err, need_clean = 0; + + if (c->min_io_size > 1) + len = c->min_io_size; + else + len = 512; + if (offs + len > c->leb_size) + len = c->leb_size - offs; + + if (!len) + return 0; + + /* Read at the head location and check it is empty flash */ + err = ubi_read(c->ubi, lnum, sbuf, offs, len); + if (err) + need_clean = 1; + else { + uint8_t *p = sbuf; + + while (len--) + if (*p++ != 0xff) { + need_clean = 1; + break; + } + } + + if (need_clean) { + dbg_rcvry("cleaning head at %d:%d", lnum, offs); + if (offs == 0) + return ubifs_leb_unmap(c, lnum); + err = ubi_read(c->ubi, lnum, sbuf, 0, offs); + if (err) + return err; + return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); + } + + return 0; +} + +/** + * ubifs_recover_inl_heads - recover index and LPT heads. + * @c: UBIFS file-system description object + * @sbuf: LEB-sized buffer to use + * + * This function ensures that there is no data on the flash at the index and + * LPT head locations. + * + * This deals with the recovery of a half-completed journal commit. UBIFS is + * careful never to overwrite the last version of the index or the LPT. Because + * the index and LPT are wandering trees, data from a half-completed commit will + * not be referenced anywhere in UBIFS. The data will be either in LEBs that are + * assumed to be empty and will be unmapped anyway before use, or in the index + * and LPT heads. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) +{ + int err; + + ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); + + dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); + err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); + if (err) + return err; + + dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); + err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); + if (err) + return err; + + return 0; +} + +/** + * clean_an_unclean_leb - read and write a LEB to remove corruption. + * @c: UBIFS file-system description object + * @ucleb: unclean LEB information + * @sbuf: LEB-sized buffer to use + * + * This function reads a LEB up to a point pre-determined by the mount recovery, + * checks the nodes, and writes the result back to the flash, thereby cleaning + * off any following corruption, or non-fatal ECC errors. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int clean_an_unclean_leb(const struct ubifs_info *c, + struct ubifs_unclean_leb *ucleb, void *sbuf) +{ + int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; + void *buf = sbuf; + + dbg_rcvry("LEB %d len %d", lnum, len); + + if (len == 0) { + /* Nothing to read, just unmap it */ + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + return 0; + } + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err && err != -EBADMSG) + return err; + + while (len >= 8) { + int ret; + + cond_resched(); + + /* Scan quietly until there is an error */ + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; + int node_len; + + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + continue; + } + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) { + ubifs_err("unexpected empty space at %d:%d", + lnum, offs); + return -EUCLEAN; + } + + if (quiet) { + /* Redo the last scan but noisily */ + quiet = 0; + continue; + } + + ubifs_scanned_corruption(c, lnum, offs, buf); + return -EUCLEAN; + } + + /* Pad to min_io_size */ + len = ALIGN(ucleb->endpt, c->min_io_size); + if (len > ucleb->endpt) { + int pad_len = len - ALIGN(ucleb->endpt, 8); + + if (pad_len > 0) { + buf = c->sbuf + len - pad_len; + ubifs_pad(c, buf, pad_len); + } + } + + /* Write back the LEB atomically */ + err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); + if (err) + return err; + + dbg_rcvry("cleaned LEB %d", lnum); + + return 0; +} + +/** + * ubifs_clean_lebs - clean LEBs recovered during read-only mount. + * @c: UBIFS file-system description object + * @sbuf: LEB-sized buffer to use + * + * This function cleans a LEB identified during recovery that needs to be + * written but was not because UBIFS was mounted read-only. This happens when + * remounting to read-write mode. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) +{ + dbg_rcvry("recovery"); + while (!list_empty(&c->unclean_leb_list)) { + struct ubifs_unclean_leb *ucleb; + int err; + + ucleb = list_entry(c->unclean_leb_list.next, + struct ubifs_unclean_leb, list); + err = clean_an_unclean_leb(c, ucleb, sbuf); + if (err) + return err; + list_del(&ucleb->list); + kfree(ucleb); + } + return 0; +} + +/** + * struct size_entry - inode size information for recovery. + * @rb: link in the RB-tree of sizes + * @inum: inode number + * @i_size: size on inode + * @d_size: maximum size based on data nodes + * @exists: indicates whether the inode exists + * @inode: inode if pinned in memory awaiting rw mode to fix it + */ +struct size_entry { + struct rb_node rb; + ino_t inum; + loff_t i_size; + loff_t d_size; + int exists; + struct inode *inode; +}; + +/** + * add_ino - add an entry to the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + * @i_size: size on inode + * @d_size: maximum size based on data nodes + * @exists: indicates whether the inode exists + */ +static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, + loff_t d_size, int exists) +{ + struct rb_node **p = &c->size_tree.rb_node, *parent = NULL; + struct size_entry *e; + + while (*p) { + parent = *p; + e = rb_entry(parent, struct size_entry, rb); + if (inum < e->inum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + e = kzalloc(sizeof(struct size_entry), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->inum = inum; + e->i_size = i_size; + e->d_size = d_size; + e->exists = exists; + + rb_link_node(&e->rb, parent, p); + rb_insert_color(&e->rb, &c->size_tree); + + return 0; +} + +/** + * find_ino - find an entry on the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + */ +static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum) +{ + struct rb_node *p = c->size_tree.rb_node; + struct size_entry *e; + + while (p) { + e = rb_entry(p, struct size_entry, rb); + if (inum < e->inum) + p = p->rb_left; + else if (inum > e->inum) + p = p->rb_right; + else + return e; + } + return NULL; +} + +/** + * remove_ino - remove an entry from the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + */ +static void remove_ino(struct ubifs_info *c, ino_t inum) +{ + struct size_entry *e = find_ino(c, inum); + + if (!e) + return; + rb_erase(&e->rb, &c->size_tree); + kfree(e); +} + +/** + * ubifs_recover_size_accum - accumulate inode sizes for recovery. + * @c: UBIFS file-system description object + * @key: node key + * @deletion: node is for a deletion + * @new_size: inode size + * + * This function has two purposes: + * 1) to ensure there are no data nodes that fall outside the inode size + * 2) to ensure there are no data nodes for inodes that do not exist + * To accomplish those purposes, a rb-tree is constructed containing an entry + * for each inode number in the journal that has not been deleted, and recording + * the size from the inode node, the maximum size of any data node (also altered + * by truncations) and a flag indicating a inode number for which no inode node + * was present in the journal. + * + * Note that there is still the possibility that there are data nodes that have + * been committed that are beyond the inode size, however the only way to find + * them would be to scan the entire index. Alternatively, some provision could + * be made to record the size of inodes at the start of commit, which would seem + * very cumbersome for a scenario that is quite unlikely and the only negative + * consequence of which is wasted space. + * + * This functions returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, + int deletion, loff_t new_size) +{ + ino_t inum = key_inum(c, key); + struct size_entry *e; + int err; + + switch (key_type(c, key)) { + case UBIFS_INO_KEY: + if (deletion) + remove_ino(c, inum); + else { + e = find_ino(c, inum); + if (e) { + e->i_size = new_size; + e->exists = 1; + } else { + err = add_ino(c, inum, new_size, 0, 1); + if (err) + return err; + } + } + break; + case UBIFS_DATA_KEY: + e = find_ino(c, inum); + if (e) { + if (new_size > e->d_size) + e->d_size = new_size; + } else { + err = add_ino(c, inum, 0, new_size, 0); + if (err) + return err; + } + break; + case UBIFS_TRUN_KEY: + e = find_ino(c, inum); + if (e) + e->d_size = new_size; + break; + } + return 0; +} + +/** + * ubifs_recover_size - recover inode size. + * @c: UBIFS file-system description object + * + * This function attempts to fix inode size discrepancies identified by the + * 'ubifs_recover_size_accum()' function. + * + * This functions returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_size(struct ubifs_info *c) +{ + struct rb_node *this = rb_first(&c->size_tree); + + while (this) { + struct size_entry *e; + int err; + + e = rb_entry(this, struct size_entry, rb); + if (!e->exists) { + union ubifs_key key; + + ino_key_init(c, &key, e->inum); + err = ubifs_tnc_lookup(c, &key, c->sbuf); + if (err && err != -ENOENT) + return err; + if (err == -ENOENT) { + /* Remove data nodes that have no inode */ + dbg_rcvry("removing ino %lu", + (unsigned long)e->inum); + err = ubifs_tnc_remove_ino(c, e->inum); + if (err) + return err; + } else { + struct ubifs_ino_node *ino = c->sbuf; + + e->exists = 1; + e->i_size = le64_to_cpu(ino->size); + } + } + if (e->exists && e->i_size < e->d_size) { + if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { + /* Fix the inode size and pin it in memory */ + struct inode *inode; + + inode = ubifs_iget(c->vfs_sb, e->inum); + if (IS_ERR(inode)) + return PTR_ERR(inode); + if (inode->i_size < e->d_size) { + dbg_rcvry("ino %lu size %lld -> %lld", + (unsigned long)e->inum, + e->d_size, inode->i_size); + inode->i_size = e->d_size; + ubifs_inode(inode)->ui_size = e->d_size; + e->inode = inode; + this = rb_next(this); + continue; + } + iput(inode); + } + } + this = rb_next(this); + rb_erase(&e->rb, &c->size_tree); + kfree(e); + } + return 0; +} diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c new file mode 100755 index 0000000..085a185 --- /dev/null +++ b/fs/ubifs/replay.c @@ -0,0 +1,981 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file contains journal replay code. It runs when the file-system is being + * mounted and requires no locking. + * + * The larger is the journal, the longer it takes to scan it, so the longer it + * takes to mount UBIFS. This is why the journal has limited size which may be + * changed depending on the system requirements. But a larger journal gives + * faster I/O speed because it writes the index less frequently. So this is a + * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the + * larger is the journal, the more memory its index may consume. + */ + +#include "ubifs.h" + +/* + * Replay flags. + * + * REPLAY_DELETION: node was deleted + * REPLAY_REF: node is a reference node + */ +enum { + REPLAY_DELETION = 1, + REPLAY_REF = 2, +}; + +/** + * struct replay_entry - replay tree entry. + * @lnum: logical eraseblock number of the node + * @offs: node offset + * @len: node length + * @sqnum: node sequence number + * @flags: replay flags + * @rb: links the replay tree + * @key: node key + * @nm: directory entry name + * @old_size: truncation old size + * @new_size: truncation new size + * @free: amount of free space in a bud + * @dirty: amount of dirty space in a bud from padding and deletion nodes + * + * UBIFS journal replay must compare node sequence numbers, which means it must + * build a tree of node information to insert into the TNC. + */ +struct replay_entry { + int lnum; + int offs; + int len; + unsigned long long sqnum; + int flags; + struct rb_node rb; + union ubifs_key key; + union { + struct qstr nm; + struct { + loff_t old_size; + loff_t new_size; + }; + struct { + int free; + int dirty; + }; + }; +}; + +/** + * struct bud_entry - entry in the list of buds to replay. + * @list: next bud in the list + * @bud: bud description object + * @free: free bytes in the bud + * @sqnum: reference node sequence number + */ +struct bud_entry { + struct list_head list; + struct ubifs_bud *bud; + int free; + unsigned long long sqnum; +}; + +/** + * set_bud_lprops - set free and dirty space used by a bud. + * @c: UBIFS file-system description object + * @r: replay entry of bud + */ +static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) +{ + return 0; +} + +/** + * trun_remove_range - apply a replay entry for a truncation to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry of truncation + */ +static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) +{ + unsigned min_blk, max_blk; + union ubifs_key min_key, max_key; + ino_t ino; + + min_blk = r->new_size / UBIFS_BLOCK_SIZE; + if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) + min_blk += 1; + + max_blk = r->old_size / UBIFS_BLOCK_SIZE; + if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) + max_blk -= 1; + + ino = key_inum(c, &r->key); + + data_key_init(c, &min_key, ino, min_blk); + data_key_init(c, &max_key, ino, max_blk); + + return ubifs_tnc_remove_range(c, &min_key, &max_key); +} + +/** + * apply_replay_entry - apply a replay entry to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry to apply + * + * Apply a replay entry to the TNC. + */ +static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) +{ + int err, deletion = ((r->flags & REPLAY_DELETION) != 0); + + dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, + r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); + + /* Set c->replay_sqnum to help deal with dangling branches. */ + c->replay_sqnum = r->sqnum; + + if (r->flags & REPLAY_REF) + err = set_bud_lprops(c, r); + else if (is_hash_key(c, &r->key)) { + if (deletion) + err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); + else + err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, + r->len, &r->nm); + } else { + if (deletion) + switch (key_type(c, &r->key)) { + case UBIFS_INO_KEY: + { + ino_t inum = key_inum(c, &r->key); + + err = ubifs_tnc_remove_ino(c, inum); + break; + } + case UBIFS_TRUN_KEY: + err = trun_remove_range(c, r); + break; + default: + err = ubifs_tnc_remove(c, &r->key); + break; + } + else + err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, + r->len); + if (err) + return err; + + if (c->need_recovery) + err = ubifs_recover_size_accum(c, &r->key, deletion, + r->new_size); + } + + return err; +} + +/** + * destroy_replay_tree - destroy the replay. + * @c: UBIFS file-system description object + * + * Destroy the replay tree. + */ +static void destroy_replay_tree(struct ubifs_info *c) +{ + struct rb_node *this = c->replay_tree.rb_node; + struct replay_entry *r; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + r = rb_entry(this, struct replay_entry, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &r->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + if (is_hash_key(c, &r->key)) + kfree((void *)r->nm.name); + kfree(r); + } + c->replay_tree = RB_ROOT; +} + +/** + * apply_replay_tree - apply the replay tree to the TNC. + * @c: UBIFS file-system description object + * + * Apply the replay tree. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int apply_replay_tree(struct ubifs_info *c) +{ + struct rb_node *this = rb_first(&c->replay_tree); + + while (this) { + struct replay_entry *r; + int err; + + cond_resched(); + + r = rb_entry(this, struct replay_entry, rb); + err = apply_replay_entry(c, r); + if (err) + return err; + this = rb_next(this); + } + return 0; +} + +/** + * insert_node - insert a node to the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @len: node length + * @key: node key + * @sqnum: sequence number + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * @old_size: truncation old size + * @new_size: truncation new size + * + * This function inserts a scanned non-direntry node to the replay tree. The + * replay tree is an RB-tree containing @struct replay_entry elements which are + * indexed by the sequence number. The replay tree is applied at the very end + * of the replay process. Since the tree is sorted in sequence number order, + * the older modifications are applied first. This function returns zero in + * case of success and a negative error code in case of failure. + */ +static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, unsigned long long sqnum, + int deletion, int *used, loff_t old_size, + loff_t new_size) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } else if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + + if (!deletion) + *used += ALIGN(len, 8); + r->lnum = lnum; + r->offs = offs; + r->len = len; + r->sqnum = sqnum; + r->flags = (deletion ? REPLAY_DELETION : 0); + r->old_size = old_size; + r->new_size = new_size; + key_copy(c, key, &r->key); + + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * insert_dent - insert a directory entry node into the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @len: node length + * @key: node key + * @name: directory entry name + * @nlen: directory entry name length + * @sqnum: sequence number + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * + * This function inserts a scanned directory entry node to the replay tree. + * Returns zero in case of success and a negative error code in case of + * failure. + * + * This function is also used for extended attribute entries because they are + * implemented as directory entry nodes. + */ +static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, const char *name, int nlen, + unsigned long long sqnum, int deletion, int *used) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + char *nbuf; + + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } + if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + nbuf = kmalloc(nlen + 1, GFP_KERNEL); + if (!nbuf) { + kfree(r); + return -ENOMEM; + } + + if (!deletion) + *used += ALIGN(len, 8); + r->lnum = lnum; + r->offs = offs; + r->len = len; + r->sqnum = sqnum; + r->nm.len = nlen; + memcpy(nbuf, name, nlen); + nbuf[nlen] = '\0'; + r->nm.name = nbuf; + r->flags = (deletion ? REPLAY_DELETION : 0); + key_copy(c, key, &r->key); + + ubifs_assert(!*p); + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * ubifs_validate_entry - validate directory or extended attribute entry node. + * @c: UBIFS file-system description object + * @dent: the node to validate + * + * This function validates directory or extended attribute entry node @dent. + * Returns zero if the node is all right and a %-EINVAL if not. + */ +int ubifs_validate_entry(struct ubifs_info *c, + const struct ubifs_dent_node *dent) +{ + int key_type = key_type_flash(c, dent->key); + int nlen = le16_to_cpu(dent->nlen); + + if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || + dent->type >= UBIFS_ITYPES_CNT || + nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || + strnlen((char *)dent->name, nlen) != nlen || + le64_to_cpu(dent->inum) > MAX_INUM) { + ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? + "directory entry" : "extended attribute entry"); + return -EINVAL; + } + + if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { + ubifs_err("bad key type %d", key_type); + return -EINVAL; + } + + return 0; +} + +/** + * replay_bud - replay a bud logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: bud logical eraseblock number to replay + * @offs: bud start offset + * @jhead: journal head to which this bud belongs + * @free: amount of free space in the bud is returned here + * @dirty: amount of dirty space from padding and deletion nodes is returned + * here + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, + int *free, int *dirty) +{ + int err = 0, used = 0; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct ubifs_bud *bud; + + dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); + if (c->need_recovery) + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); + else + sleb = ubifs_scan(c, lnum, offs, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + + /* + * The bud does not have to start from offset zero - the beginning of + * the 'lnum' LEB may contain previously committed data. One of the + * things we have to do in replay is to correctly update lprops with + * newer information about this LEB. + * + * At this point lprops thinks that this LEB has 'c->leb_size - offs' + * bytes of free space because it only contain information about + * committed data. + * + * But we know that real amount of free space is 'c->leb_size - + * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and + * 'sleb->endpt' is used by bud data. We have to correctly calculate + * how much of these data are dirty and update lprops with this + * information. + * + * The dirt in that LEB region is comprised of padding nodes, deletion + * nodes, truncation nodes and nodes which are obsoleted by subsequent + * nodes in this LEB. So instead of calculating clean space, we + * calculate used space ('used' variable). + */ + + list_for_each_entry(snod, &sleb->nodes, list) { + int deletion = 0; + + cond_resched(); + + if (snod->sqnum >= SQNUM_WATERMARK) { + ubifs_err("file system's life ended"); + goto out_dump; + } + + if (snod->sqnum > c->max_sqnum) + c->max_sqnum = snod->sqnum; + + switch (snod->type) { + case UBIFS_INO_NODE: + { + struct ubifs_ino_node *ino = snod->node; + loff_t new_size = le64_to_cpu(ino->size); + + if (le32_to_cpu(ino->nlink) == 0) + deletion = 1; + err = insert_node(c, lnum, snod->offs, snod->len, + &snod->key, snod->sqnum, deletion, + &used, 0, new_size); + break; + } + case UBIFS_DATA_NODE: + { + struct ubifs_data_node *dn = snod->node; + loff_t new_size = le32_to_cpu(dn->size) + + key_block(c, &snod->key) * + UBIFS_BLOCK_SIZE; + + err = insert_node(c, lnum, snod->offs, snod->len, + &snod->key, snod->sqnum, deletion, + &used, 0, new_size); + break; + } + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + { + struct ubifs_dent_node *dent = snod->node; + + err = ubifs_validate_entry(c, dent); + if (err) + goto out_dump; + + err = insert_dent(c, lnum, snod->offs, snod->len, + &snod->key, (char *)dent->name, + le16_to_cpu(dent->nlen), snod->sqnum, + !le64_to_cpu(dent->inum), &used); + break; + } + case UBIFS_TRUN_NODE: + { + struct ubifs_trun_node *trun = snod->node; + loff_t old_size = le64_to_cpu(trun->old_size); + loff_t new_size = le64_to_cpu(trun->new_size); + union ubifs_key key; + + /* Validate truncation node */ + if (old_size < 0 || old_size > c->max_inode_sz || + new_size < 0 || new_size > c->max_inode_sz || + old_size <= new_size) { + ubifs_err("bad truncation node"); + goto out_dump; + } + + /* + * Create a fake truncation key just to use the same + * functions which expect nodes to have keys. + */ + trun_key_init(c, &key, le32_to_cpu(trun->inum)); + err = insert_node(c, lnum, snod->offs, snod->len, + &key, snod->sqnum, 1, &used, + old_size, new_size); + break; + } + default: + ubifs_err("unexpected node type %d in bud LEB %d:%d", + snod->type, lnum, snod->offs); + err = -EINVAL; + goto out_dump; + } + if (err) + goto out; + } + + bud = ubifs_search_bud(c, lnum); +// if (!bud) +// BUG(); + + ubifs_assert(sleb->endpt - offs >= used); + ubifs_assert(sleb->endpt % c->min_io_size == 0); + + *dirty = sleb->endpt - offs - used; + *free = c->leb_size - sleb->endpt; + +out: + ubifs_scan_destroy(sleb); + return err; + +out_dump: + ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); + dbg_dump_node(c, snod->node); + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * insert_ref_node - insert a reference node to the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @sqnum: sequence number + * @free: amount of free space in bud + * @dirty: amount of dirty space from padding and deletion nodes + * + * This function inserts a reference node to the replay tree and returns zero + * in case of success or a negative error code in case of failure. + */ +static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, + unsigned long long sqnum, int free, int dirty) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + + dbg_mnt("add ref LEB %d:%d", lnum, offs); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } else if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay tree"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + + r->lnum = lnum; + r->offs = offs; + r->sqnum = sqnum; + r->flags = REPLAY_REF; + r->free = free; + r->dirty = dirty; + + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * replay_buds - replay all buds. + * @c: UBIFS file-system description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int replay_buds(struct ubifs_info *c) +{ + struct bud_entry *b; + int err, uninitialized_var(free), uninitialized_var(dirty); + + list_for_each_entry(b, &c->replay_buds, list) { + err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, + &free, &dirty); + if (err) + return err; + err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, + free, dirty); + if (err) + return err; + } + + return 0; +} + +/** + * destroy_bud_list - destroy the list of buds to replay. + * @c: UBIFS file-system description object + */ +static void destroy_bud_list(struct ubifs_info *c) +{ + struct bud_entry *b; + + while (!list_empty(&c->replay_buds)) { + b = list_entry(c->replay_buds.next, struct bud_entry, list); + list_del(&b->list); + kfree(b); + } +} + +/** + * add_replay_bud - add a bud to the list of buds to replay. + * @c: UBIFS file-system description object + * @lnum: bud logical eraseblock number to replay + * @offs: bud start offset + * @jhead: journal head to which this bud belongs + * @sqnum: reference node sequence number + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, + unsigned long long sqnum) +{ + struct ubifs_bud *bud; + struct bud_entry *b; + + dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); + + bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); + if (!bud) + return -ENOMEM; + + b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); + if (!b) { + kfree(bud); + return -ENOMEM; + } + + bud->lnum = lnum; + bud->start = offs; + bud->jhead = jhead; + ubifs_add_bud(c, bud); + + b->bud = bud; + b->sqnum = sqnum; + list_add_tail(&b->list, &c->replay_buds); + + return 0; +} + +/** + * validate_ref - validate a reference node. + * @c: UBIFS file-system description object + * @ref: the reference node to validate + * @ref_lnum: LEB number of the reference node + * @ref_offs: reference node offset + * + * This function returns %1 if a bud reference already exists for the LEB. %0 is + * returned if the reference node is new, otherwise %-EINVAL is returned if + * validation failed. + */ +static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) +{ + struct ubifs_bud *bud; + int lnum = le32_to_cpu(ref->lnum); + unsigned int offs = le32_to_cpu(ref->offs); + unsigned int jhead = le32_to_cpu(ref->jhead); + + /* + * ref->offs may point to the end of LEB when the journal head points + * to the end of LEB and we write reference node for it during commit. + * So this is why we require 'offs > c->leb_size'. + */ + if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || + lnum < c->main_first || offs > c->leb_size || + offs & (c->min_io_size - 1)) + return -EINVAL; + + /* Make sure we have not already looked at this bud */ + bud = ubifs_search_bud(c, lnum); + if (bud) { + if (bud->jhead == jhead && bud->start <= offs) + return 1; + ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); + return -EINVAL; + } + + return 0; +} + +/** + * replay_log_leb - replay a log logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: log logical eraseblock to replay + * @offs: offset to start replaying from + * @sbuf: scan buffer + * + * This function replays a log LEB and returns zero in case of success, %1 if + * this is the last LEB in the log, and a negative error code in case of + * failure. + */ +static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) +{ + int err; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + const struct ubifs_cs_node *node; + + dbg_mnt("replay log LEB %d:%d", lnum, offs); + sleb = ubifs_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) { + if (c->need_recovery) + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + } + + if (sleb->nodes_cnt == 0) { + err = 1; + goto out; + } + + node = sleb->buf; + + snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); + if (c->cs_sqnum == 0) { + /* + * This is the first log LEB we are looking at, make sure that + * the first node is a commit start node. Also record its + * sequence number so that UBIFS can determine where the log + * ends, because all nodes which were have higher sequence + * numbers. + */ + if (snod->type != UBIFS_CS_NODE) { + dbg_err("first log node at LEB %d:%d is not CS node", + lnum, offs); + goto out_dump; + } + if (le64_to_cpu(node->cmt_no) != c->cmt_no) { + dbg_err("first CS node at LEB %d:%d has wrong " + "commit number %llu expected %llu", + lnum, offs, + (unsigned long long)le64_to_cpu(node->cmt_no), + c->cmt_no); + goto out_dump; + } + + c->cs_sqnum = le64_to_cpu(node->ch.sqnum); + dbg_mnt("commit start sqnum %llu", c->cs_sqnum); + } + + if (snod->sqnum < c->cs_sqnum) { + /* + * This means that we reached end of log and now + * look to the older log data, which was already + * committed but the eraseblock was not erased (UBIFS + * only un-maps it). So this basically means we have to + * exit with "end of log" code. + */ + err = 1; + goto out; + } + + /* Make sure the first node sits at offset zero of the LEB */ + if (snod->offs != 0) { + dbg_err("first node is not at zero offset"); + goto out_dump; + } + + list_for_each_entry(snod, &sleb->nodes, list) { + + cond_resched(); + + if (snod->sqnum >= SQNUM_WATERMARK) { + ubifs_err("file system's life ended"); + goto out_dump; + } + + if (snod->sqnum < c->cs_sqnum) { + dbg_err("bad sqnum %llu, commit sqnum %llu", + snod->sqnum, c->cs_sqnum); + goto out_dump; + } + + if (snod->sqnum > c->max_sqnum) + c->max_sqnum = snod->sqnum; + + switch (snod->type) { + case UBIFS_REF_NODE: { + const struct ubifs_ref_node *ref = snod->node; + + err = validate_ref(c, ref); + if (err == 1) + break; /* Already have this bud */ + if (err) + goto out_dump; + + err = add_replay_bud(c, le32_to_cpu(ref->lnum), + le32_to_cpu(ref->offs), + le32_to_cpu(ref->jhead), + snod->sqnum); + if (err) + goto out; + + break; + } + case UBIFS_CS_NODE: + /* Make sure it sits at the beginning of LEB */ + if (snod->offs != 0) { + ubifs_err("unexpected node in log"); + goto out_dump; + } + break; + default: + ubifs_err("unexpected node in log"); + goto out_dump; + } + } + + if (sleb->endpt || c->lhead_offs >= c->leb_size) { + c->lhead_lnum = lnum; + c->lhead_offs = sleb->endpt; + } + + err = !sleb->endpt; +out: + ubifs_scan_destroy(sleb); + return err; + +out_dump: + ubifs_err("log error detected while replying the log at LEB %d:%d", + lnum, offs + snod->offs); + dbg_dump_node(c, snod->node); + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * take_ihead - update the status of the index head in lprops to 'taken'. + * @c: UBIFS file-system description object + * + * This function returns the amount of free space in the index head LEB or a + * negative error code. + */ +static int take_ihead(struct ubifs_info *c) +{ + return 0; +} + +/** + * ubifs_replay_journal - replay journal. + * @c: UBIFS file-system description object + * + * This function scans the journal, replays and cleans it up. It makes sure all + * memory data structures related to uncommitted journal are built (dirty TNC + * tree, tree of buds, modified lprops, etc). + */ +int ubifs_replay_journal(struct ubifs_info *c) +{ + int err, i, lnum, offs, _free; + + BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); + + /* Update the status of the index head in lprops to 'taken' */ + _free = take_ihead(c); +// if (_free < 0) +// return _free; /* Error code */ +// if (c->ihead_offs != c->leb_size - _free) { +// ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, +// c->ihead_offs); +// return -EINVAL; +// } + + c->replaying = 1; + + lnum = c->ltail_lnum = c->lhead_lnum; + offs = c->lhead_offs; + + for (i = 0; i < c->log_lebs; i++, lnum++) { + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { + /* + * The log is logically circular, we reached the last + * LEB, switch to the first one. + */ + lnum = UBIFS_LOG_LNUM; + offs = 0; + } + err = replay_log_leb(c, lnum, offs, c->sbuf); + if (err == 1) + /* We hit the end of the log */ + break; + if (err) + goto out; + offs = 0; + } + err = replay_buds(c); + if (err) + goto out; + + err = apply_replay_tree(c); + if (err) + goto out; + + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " + "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + (unsigned long)c->highest_inum); +out: + destroy_replay_tree(c); + destroy_bud_list(c); + c->replaying = 0; + return err; +} diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c new file mode 100755 index 0000000..2d6524c --- /dev/null +++ b/fs/ubifs/sb.c @@ -0,0 +1,346 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS superblock. The superblock is stored at the first + * LEB of the volume and is never changed by UBIFS. Only user-space tools may + * change it. The superblock node mostly contains geometry information. + */ + +#include "ubifs.h" + +/* + * Default journal size in logical eraseblocks as a percent of total + * flash size. + */ +#define DEFAULT_JNL_PERCENT 5 + +/* Default maximum journal size in bytes */ +#define DEFAULT_MAX_JNL (32*1024*1024) + +/* Default indexing tree fanout */ +#define DEFAULT_FANOUT 8 + +/* Default number of data journal heads */ +#define DEFAULT_JHEADS_CNT 1 + +/* Default positions of different LEBs in the main area */ +#define DEFAULT_IDX_LEB 0 +#define DEFAULT_DATA_LEB 1 +#define DEFAULT_GC_LEB 2 + +/* Default number of LEB numbers in LPT's save table */ +#define DEFAULT_LSAVE_CNT 256 + +/* Default reserved pool size as a percent of maximum free space */ +#define DEFAULT_RP_PERCENT 5 + +/* The default maximum size of reserved pool in bytes */ +#define DEFAULT_MAX_RP_SIZE (5*1024*1024) + +/* Default time granularity in nanoseconds */ +#define DEFAULT_TIME_GRAN 1000000000 + +/** + * validate_sb - validate superblock node. + * @c: UBIFS file-system description object + * @sup: superblock node + * + * This function validates superblock node @sup. Since most of data was read + * from the superblock and stored in @c, the function validates fields in @c + * instead. Returns zero in case of success and %-EINVAL in case of validation + * failure. + */ +static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) +{ + long long max_bytes; + int err = 1, min_leb_cnt; + + if (!c->key_hash) { + err = 2; + goto failed; + } + + if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) { + err = 3; + goto failed; + } + + if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { + ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", + le32_to_cpu(sup->min_io_size), c->min_io_size); + goto failed; + } + + if (le32_to_cpu(sup->leb_size) != c->leb_size) { + ubifs_err("LEB size mismatch: %d in superblock, %d real", + le32_to_cpu(sup->leb_size), c->leb_size); + goto failed; + } + + if (c->log_lebs < UBIFS_MIN_LOG_LEBS || + c->lpt_lebs < UBIFS_MIN_LPT_LEBS || + c->orph_lebs < UBIFS_MIN_ORPH_LEBS || + c->main_lebs < UBIFS_MIN_MAIN_LEBS) { + err = 4; + goto failed; + } + + /* + * Calculate minimum allowed amount of main area LEBs. This is very + * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we + * have just read from the superblock. + */ + min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs; + min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; + + if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { + ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " + "%d minimum required", c->leb_cnt, c->vi.size, + min_leb_cnt); + goto failed; + } + + if (c->max_leb_cnt < c->leb_cnt) { + ubifs_err("max. LEB count %d less than LEB count %d", + c->max_leb_cnt, c->leb_cnt); + goto failed; + } + + if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { + err = 7; + goto failed; + } + + if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || + c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { + err = 8; + goto failed; + } + + if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 || + c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) { + err = 9; + goto failed; + } + + if (c->fanout < UBIFS_MIN_FANOUT || + ubifs_idx_node_sz(c, c->fanout) > c->leb_size) { + err = 10; + goto failed; + } + + if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT && + c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - + c->log_lebs - c->lpt_lebs - c->orph_lebs)) { + err = 11; + goto failed; + } + + if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs + + c->orph_lebs + c->main_lebs != c->leb_cnt) { + err = 12; + goto failed; + } + + if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { + err = 13; + goto failed; + } + + max_bytes = c->main_lebs * (long long)c->leb_size; + if (c->rp_size < 0 || max_bytes < c->rp_size) { + err = 14; + goto failed; + } + + if (le32_to_cpu(sup->time_gran) > 1000000000 || + le32_to_cpu(sup->time_gran) < 1) { + err = 15; + goto failed; + } + + return 0; + +failed: + ubifs_err("bad superblock, error %d", err); + dbg_dump_node(c, sup); + return -EINVAL; +} + +/** + * ubifs_read_sb_node - read superblock node. + * @c: UBIFS file-system description object + * + * This function returns a pointer to the superblock node or a negative error + * code. + */ +struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) +{ + struct ubifs_sb_node *sup; + int err; + + sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS); + if (!sup) + return ERR_PTR(-ENOMEM); + + err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ, + UBIFS_SB_LNUM, 0); + if (err) { + kfree(sup); + return ERR_PTR(err); + } + + return sup; +} + +/** + * ubifs_read_superblock - read superblock. + * @c: UBIFS file-system description object + * + * This function finds, reads and checks the superblock. If an empty UBI volume + * is being mounted, this function creates default superblock. Returns zero in + * case of success, and a negative error code in case of failure. + */ +int ubifs_read_superblock(struct ubifs_info *c) +{ + int err, sup_flags; + struct ubifs_sb_node *sup; + + if (c->empty) { + printf("No UBIFS filesystem found!\n"); + return -1; + } + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) + return PTR_ERR(sup); + + c->fmt_version = le32_to_cpu(sup->fmt_version); + c->ro_compat_version = le32_to_cpu(sup->ro_compat_version); + + /* + * The software supports all previous versions but not future versions, + * due to the unavailability of time-travelling equipment. + */ + if (c->fmt_version > UBIFS_FORMAT_VERSION) { + struct super_block *sb = c->vfs_sb; + int mounting_ro = sb->s_flags & MS_RDONLY; + + ubifs_assert(!c->ro_media || mounting_ro); + if (!mounting_ro || + c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { + ubifs_err("on-flash format version is w%d/r%d, but " + "software only supports up to version " + "w%d/r%d", c->fmt_version, + c->ro_compat_version, UBIFS_FORMAT_VERSION, + UBIFS_RO_COMPAT_VERSION); + if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { + ubifs_msg("only R/O mounting is possible"); + err = -EROFS; + } else + err = -EINVAL; + goto out; + } + + /* + * The FS is mounted R/O, and the media format is + * R/O-compatible with the UBIFS implementation, so we can + * mount. + */ + c->rw_incompat = 1; + } + + if (c->fmt_version < 3) { + ubifs_err("on-flash format version %d is not supported", + c->fmt_version); + err = -EINVAL; + goto out; + } + + switch (sup->key_hash) { + case UBIFS_KEY_HASH_R5: + c->key_hash = key_r5_hash; + c->key_hash_type = UBIFS_KEY_HASH_R5; + break; + + case UBIFS_KEY_HASH_TEST: + c->key_hash = key_test_hash; + c->key_hash_type = UBIFS_KEY_HASH_TEST; + break; + }; + + c->key_fmt = sup->key_fmt; + + switch (c->key_fmt) { + case UBIFS_SIMPLE_KEY_FMT: + c->key_len = UBIFS_SK_LEN; + break; + default: + ubifs_err("unsupported key format"); + err = -EINVAL; + goto out; + } + + c->leb_cnt = le32_to_cpu(sup->leb_cnt); + c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt); + c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes); + c->log_lebs = le32_to_cpu(sup->log_lebs); + c->lpt_lebs = le32_to_cpu(sup->lpt_lebs); + c->orph_lebs = le32_to_cpu(sup->orph_lebs); + c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; + c->fanout = le32_to_cpu(sup->fanout); + c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); + c->default_compr = le16_to_cpu(sup->default_compr); + c->rp_size = le64_to_cpu(sup->rp_size); + c->rp_uid = le32_to_cpu(sup->rp_uid); + c->rp_gid = le32_to_cpu(sup->rp_gid); + sup_flags = le32_to_cpu(sup->flags); + + c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); + memcpy(&c->uuid, &sup->uuid, 16); + c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); + + /* Automatically increase file system size to the maximum size */ + c->old_leb_cnt = c->leb_cnt; + if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { + c->leb_cnt = min(c->max_leb_cnt, c->vi.size); + dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", + c->old_leb_cnt, c->leb_cnt); + } + + c->log_bytes = (long long)c->log_lebs * c->leb_size; + c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1; + c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs; + c->lpt_last = c->lpt_first + c->lpt_lebs - 1; + c->orph_first = c->lpt_last + 1; + c->orph_last = c->orph_first + c->orph_lebs - 1; + c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; + c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; + c->main_first = c->leb_cnt - c->main_lebs; + c->report_rp_size = ubifs_reported_space(c, c->rp_size); + + err = validate_sb(c, sup); +out: + kfree(sup); + return err; +} diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c new file mode 100755 index 0000000..eab6762 --- /dev/null +++ b/fs/ubifs/scan.c @@ -0,0 +1,362 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file implements the scan which is a general-purpose function for + * determining what nodes are in an eraseblock. The scan is used to replay the + * journal, to do garbage collection. for the TNC in-the-gaps method, and by + * debugging functions. + */ + +#include "ubifs.h" + +/** + * scan_padding_bytes - scan for padding bytes. + * @buf: buffer to scan + * @len: length of buffer + * + * This function returns the number of padding bytes on success and + * %SCANNED_GARBAGE on failure. + */ +static int scan_padding_bytes(void *buf, int len) +{ + int pad_len = 0, max_pad_len = min(UBIFS_PAD_NODE_SZ, len); + uint8_t *p = buf; + + dbg_scan("not a node"); + + while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE) + pad_len += 1; + + if (!pad_len || (pad_len & 7)) + return SCANNED_GARBAGE; + + dbg_scan("%d padding bytes", pad_len); + + return pad_len; +} + +/** + * ubifs_scan_a_node - scan for a node or padding. + * @c: UBIFS file-system description object + * @buf: buffer to scan + * @len: length of buffer + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @quiet: print no messages + * + * This function returns a scanning code to indicate what was scanned. + */ +int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int quiet) +{ + struct ubifs_ch *ch = buf; + uint32_t magic; + + magic = le32_to_cpu(ch->magic); + + if (magic == 0xFFFFFFFF) { + dbg_scan("hit empty space"); + return SCANNED_EMPTY_SPACE; + } + + if (magic != UBIFS_NODE_MAGIC) + return scan_padding_bytes(buf, len); + + if (len < UBIFS_CH_SZ) + return SCANNED_GARBAGE; + + dbg_scan("scanning %s", dbg_ntype(ch->node_type)); + + if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) + return SCANNED_A_CORRUPT_NODE; + + if (ch->node_type == UBIFS_PAD_NODE) { + struct ubifs_pad_node *pad = buf; + int pad_len = le32_to_cpu(pad->pad_len); + int node_len = le32_to_cpu(ch->len); + + /* Validate the padding node */ + if (pad_len < 0 || + offs + node_len + pad_len > c->leb_size) { + if (!quiet) { + ubifs_err("bad pad node at LEB %d:%d", + lnum, offs); + dbg_dump_node(c, pad); + } + return SCANNED_A_BAD_PAD_NODE; + } + + /* Make the node pads to 8-byte boundary */ + if ((node_len + pad_len) & 7) { + if (!quiet) { + dbg_err("bad padding length %d - %d", + offs, offs + node_len + pad_len); + } + return SCANNED_A_BAD_PAD_NODE; + } + + dbg_scan("%d bytes padded, offset now %d", + pad_len, ALIGN(offs + node_len + pad_len, 8)); + + return node_len + pad_len; + } + + return SCANNED_A_NODE; +} + +/** + * ubifs_start_scan - create LEB scanning information at start of scan. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * @sbuf: scan buffer (must be c->leb_size) + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + struct ubifs_scan_leb *sleb; + int err; + + dbg_scan("scan LEB %d:%d", lnum, offs); + + sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS); + if (!sleb) + return ERR_PTR(-ENOMEM); + + sleb->lnum = lnum; + INIT_LIST_HEAD(&sleb->nodes); + sleb->buf = sbuf; + + err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); + if (err && err != -EBADMSG) { + ubifs_err("cannot read %d bytes from LEB %d:%d," + " error %d", c->leb_size - offs, lnum, offs, err); + kfree(sleb); + return ERR_PTR(err); + } + + if (err == -EBADMSG) + sleb->ecc = 1; + + return sleb; +} + +/** + * ubifs_end_scan - update LEB scanning information at end of scan. + * @c: UBIFS file-system description object + * @sleb: scanning information + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * + * This function returns %0 on success and a negative error code on failure. + */ +void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int lnum, int offs) +{ + lnum = lnum; + dbg_scan("stop scanning LEB %d at offset %d", lnum, offs); + ubifs_assert(offs % c->min_io_size == 0); + + sleb->endpt = ALIGN(offs, c->min_io_size); +} + +/** + * ubifs_add_snod - add a scanned node to LEB scanning information. + * @c: UBIFS file-system description object + * @sleb: scanning information + * @buf: buffer containing node + * @offs: offset of node on flash + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + void *buf, int offs) +{ + struct ubifs_ch *ch = buf; + struct ubifs_ino_node *ino = buf; + struct ubifs_scan_node *snod; + + snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); + if (!snod) + return -ENOMEM; + + snod->sqnum = le64_to_cpu(ch->sqnum); + snod->type = ch->node_type; + snod->offs = offs; + snod->len = le32_to_cpu(ch->len); + snod->node = buf; + + switch (ch->node_type) { + case UBIFS_INO_NODE: + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + case UBIFS_DATA_NODE: + case UBIFS_TRUN_NODE: + /* + * The key is in the same place in all keyed + * nodes. + */ + key_read(c, &ino->key, &snod->key); + break; + } + list_add_tail(&snod->list, &sleb->nodes); + sleb->nodes_cnt += 1; + return 0; +} + +/** + * ubifs_scanned_corruption - print information after UBIFS scanned corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number of corruption + * @offs: offset of corruption + * @buf: buffer containing corruption + */ +void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, + void *buf) +{ + int len; + + ubifs_err("corrupted data at LEB %d:%d", lnum, offs); + if (dbg_failure_mode) + return; + len = c->leb_size - offs; + if (len > 4096) + len = 4096; + dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); +} + +/** + * ubifs_scan - scan a logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * @sbuf: scan buffer (must be c->leb_size) + * + * This function scans LEB number @lnum and returns complete information about + * its contents. Returns an error code in case of failure. + */ +struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + void *buf = sbuf + offs; + int err, len = c->leb_size - offs; + struct ubifs_scan_leb *sleb; + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + + while (len >= 8) { + struct ubifs_ch *ch = buf; + int node_len, ret; + + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + + cond_resched(); + + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) + /* Empty space is checked later */ + break; + + switch (ret) { + case SCANNED_GARBAGE: + dbg_err("garbage"); + goto corrupted; + case SCANNED_A_NODE: + break; + case SCANNED_A_CORRUPT_NODE: + case SCANNED_A_BAD_PAD_NODE: + dbg_err("bad node"); + goto corrupted; + default: + dbg_err("unknown"); + goto corrupted; + } + + err = ubifs_add_snod(c, sleb, buf, offs); + if (err) + goto error; + + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + } + + if (offs % c->min_io_size) + goto corrupted; + + ubifs_end_scan(c, sleb, lnum, offs); + + for (; len > 4; offs += 4, buf = buf + 4, len -= 4) + if (*(uint32_t *)buf != 0xffffffff) + break; + for (; len; offs++, buf++, len--) + if (*(uint8_t *)buf != 0xff) { + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); + goto corrupted; + } + + return sleb; + +corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +error: + ubifs_err("LEB %d scanning failed", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(err); +} + +/** + * ubifs_scan_destroy - destroy LEB scanning information. + * @sleb: scanning information to free + */ +void ubifs_scan_destroy(struct ubifs_scan_leb *sleb) +{ + struct ubifs_scan_node *node; + struct list_head *head; + + head = &sleb->nodes; + while (!list_empty(head)) { + node = list_entry(head->next, struct ubifs_scan_node, list); + list_del(&node->list); + kfree(node); + } + kfree(sleb); +} diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c new file mode 100755 index 0000000..c868a52 --- /dev/null +++ b/fs/ubifs/super.c @@ -0,0 +1,1127 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS initialization and VFS superblock operations. Some + * initialization stuff which is rather large and complex is placed at + * corresponding subsystems, but most of it is here. + */ + +#include "ubifs.h" +#include <linux/math64.h> + +#define INODE_LOCKED_MAX 64 + +struct super_block *ubifs_sb; +static struct inode *inodes_locked_down[INODE_LOCKED_MAX]; + +/* shrinker.c */ + +/* List of all UBIFS file-system instances */ +struct list_head ubifs_infos; + +/* linux/fs/super.c */ + +static int sb_set(struct super_block *sb, void *data) +{ + dev_t *dev = data; + + sb->s_dev = *dev; + return 0; +} + +/** + * sget - find or create a superblock + * @type: filesystem type superblock should belong to + * @test: comparison callback + * @set: setup callback + * @data: argument to each of them + */ +struct super_block *sget(struct file_system_type *type, + int (*test)(struct super_block *,void *), + int (*set)(struct super_block *,void *), + void *data) +{ + struct super_block *s = NULL; + int err; + + s = kzalloc(sizeof(struct super_block), GFP_USER); + if (!s) { + err = -ENOMEM; + return ERR_PTR(err); + } + + INIT_LIST_HEAD(&s->s_instances); + INIT_LIST_HEAD(&s->s_inodes); + s->s_time_gran = 1000000000; + + err = set(s, data); + if (err) { + return ERR_PTR(err); + } + s->s_type = type; + strncpy(s->s_id, type->name, sizeof(s->s_id)); + list_add(&s->s_instances, &type->fs_supers); + return s; +} + +/** + * validate_inode - validate inode. + * @c: UBIFS file-system description object + * @inode: the inode to validate + * + * This is a helper function for 'ubifs_iget()' which validates various fields + * of a newly built inode to make sure they contain sane values and prevent + * possible vulnerabilities. Returns zero if the inode is all right and + * a non-zero error code if not. + */ +static int validate_inode(struct ubifs_info *c, const struct inode *inode) +{ + int err; + const struct ubifs_inode *ui = ubifs_inode(inode); + + if (inode->i_size > c->max_inode_sz) { + ubifs_err("inode is too large (%lld)", + (long long)inode->i_size); + return 1; + } + + if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { + ubifs_err("unknown compression type %d", ui->compr_type); + return 2; + } + + if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) + return 4; + + err = dbg_check_dir_size(c, inode); + return err; +} + +struct inode *iget_locked(struct super_block *sb, unsigned long ino) +{ + struct inode *inode; + + inode = (struct inode *)malloc(sizeof(struct ubifs_inode)); + if (inode) { + inode->i_ino = ino; + inode->i_sb = sb; + list_add(&inode->i_sb_list, &sb->s_inodes); + inode->i_state = I_LOCK | I_NEW; + } + + return inode; +} + +int ubifs_iput(struct inode *inode) +{ + list_del_init(&inode->i_sb_list); + + free(inode); + return 0; +} + +/* + * Lock (save) inode in inode array for readback after recovery + */ +void iput(struct inode *inode) +{ + int i; + struct inode *ino; + + /* + * Search end of list + */ + for (i = 0; i < INODE_LOCKED_MAX; i++) { + if (inodes_locked_down[i] == NULL) + break; + } + + if (i >= INODE_LOCKED_MAX) { + ubifs_err("Error, can't lock (save) more inodes while recovery!!!"); + return; + } + + /* + * Allocate and use new inode + */ + ino = (struct inode *)malloc(sizeof(struct ubifs_inode)); + memcpy(ino, inode, sizeof(struct ubifs_inode)); + + /* + * Finally save inode in array + */ + inodes_locked_down[i] = ino; +} + +struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) +{ + int err; + union ubifs_key key; + struct ubifs_ino_node *ino; + struct ubifs_info *c = sb->s_fs_info; + struct inode *inode; + struct ubifs_inode *ui; + int i; + + dbg_gen("inode %lu", inum); + + /* + * U-Boot special handling of locked down inodes via recovery + * e.g. ubifs_recover_size() + */ + for (i = 0; i < INODE_LOCKED_MAX; i++) { + /* + * Exit on last entry (NULL), inode not found in list + */ + if (inodes_locked_down[i] == NULL) + break; + + if (inodes_locked_down[i]->i_ino == inum) { + /* + * We found the locked down inode in our array, + * so just return this pointer instead of creating + * a new one. + */ + return inodes_locked_down[i]; + } + } + + inode = iget_locked(sb, inum); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + ui = ubifs_inode(inode); + + ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); + if (!ino) { + err = -ENOMEM; + goto out; + } + + ino_key_init(c, &key, inode->i_ino); + + err = ubifs_tnc_lookup(c, &key, ino); + if (err) + goto out_ino; + + inode->i_flags |= (S_NOCMTIME | S_NOATIME); + inode->i_nlink = le32_to_cpu(ino->nlink); + inode->i_uid = le32_to_cpu(ino->uid); + inode->i_gid = le32_to_cpu(ino->gid); + inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); + inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); + inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); + inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); + inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec); + inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); + inode->i_mode = le32_to_cpu(ino->mode); + inode->i_size = le64_to_cpu(ino->size); + + ui->data_len = le32_to_cpu(ino->data_len); + ui->flags = le32_to_cpu(ino->flags); + ui->compr_type = le16_to_cpu(ino->compr_type); + ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum); + ui->synced_i_size = ui->ui_size = inode->i_size; + + err = validate_inode(c, inode); + if (err) + goto out_invalid; + + if ((inode->i_mode & S_IFMT) == S_IFLNK) { + if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { + err = 12; + goto out_invalid; + } + ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + memcpy(ui->data, ino->data, ui->data_len); + ((char *)ui->data)[ui->data_len] = '\0'; + } + + kfree(ino); + inode->i_state &= ~(I_LOCK | I_NEW); + return inode; + +out_invalid: + ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); + dbg_dump_node(c, ino); + dbg_dump_inode(c, inode); + err = -EINVAL; +out_ino: + kfree(ino); +out: + ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); + return ERR_PTR(err); +} + +/** + * init_constants_early - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This function initialize UBIFS constants which do not need the superblock to + * be read. It also checks that the UBI volume satisfies basic UBIFS + * requirements. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int init_constants_early(struct ubifs_info *c) +{ + if (c->vi.corrupted) { + ubifs_warn("UBI volume is corrupted - read-only mode"); + c->ro_media = 1; + } + + if (c->di.ro_mode) { + ubifs_msg("read-only UBI device"); + c->ro_media = 1; + } + + if (c->vi.vol_type == UBI_STATIC_VOLUME) { + ubifs_msg("static UBI volume - read-only mode"); + c->ro_media = 1; + } + + c->leb_cnt = c->vi.size; + c->leb_size = c->vi.usable_leb_size; + c->half_leb_size = c->leb_size / 2; + c->min_io_size = c->di.min_io_size; + c->min_io_shift = fls(c->min_io_size) - 1; + + if (c->leb_size < UBIFS_MIN_LEB_SZ) { + ubifs_err("too small LEBs (%d bytes), min. is %d bytes", + c->leb_size, UBIFS_MIN_LEB_SZ); + return -EINVAL; + } + + if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { + ubifs_err("too few LEBs (%d), min. is %d", + c->leb_cnt, UBIFS_MIN_LEB_CNT); + return -EINVAL; + } + + if (!is_power_of_2(c->min_io_size)) { + ubifs_err("bad min. I/O size %d", c->min_io_size); + return -EINVAL; + } + + /* + * UBIFS aligns all node to 8-byte boundary, so to make function in + * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is + * less than 8. + */ + if (c->min_io_size < 8) { + c->min_io_size = 8; + c->min_io_shift = 3; + } + + c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); + c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size); + + /* + * Initialize node length ranges which are mostly needed for node + * length validation. + */ + c->ranges[UBIFS_PAD_NODE].len = UBIFS_PAD_NODE_SZ; + c->ranges[UBIFS_SB_NODE].len = UBIFS_SB_NODE_SZ; + c->ranges[UBIFS_MST_NODE].len = UBIFS_MST_NODE_SZ; + c->ranges[UBIFS_REF_NODE].len = UBIFS_REF_NODE_SZ; + c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ; + c->ranges[UBIFS_CS_NODE].len = UBIFS_CS_NODE_SZ; + + c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ; + c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ; + c->ranges[UBIFS_ORPH_NODE].min_len = + UBIFS_ORPH_NODE_SZ + sizeof(__le64); + c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size; + c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ; + c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ; + c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ; + c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ; + c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ; + c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ; + /* + * Minimum indexing node size is amended later when superblock is + * read and the key length is known. + */ + c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ; + /* + * Maximum indexing node size is amended later when superblock is + * read and the fanout is known. + */ + c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; + + /* + * Initialize dead and dark LEB space watermarks. See gc.c for comments + * about these values. + */ + c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); + c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); + + /* + * Calculate how many bytes would be wasted at the end of LEB if it was + * fully filled with data nodes of maximum size. This is used in + * calculations when reporting free space. + */ + c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; + + return 0; +} + +/* + * init_constants_sb - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This is a helper function which initializes various UBIFS constants after + * the superblock has been read. It also checks various UBIFS parameters and + * makes sure they are all right. Returns zero in case of success and a + * negative error code in case of failure. + */ +static int init_constants_sb(struct ubifs_info *c) +{ + int tmp; + long long tmp64; + + c->main_bytes = (long long)c->main_lebs * c->leb_size; + c->max_znode_sz = sizeof(struct ubifs_znode) + + c->fanout * sizeof(struct ubifs_zbranch); + + tmp = ubifs_idx_node_sz(c, 1); + c->ranges[UBIFS_IDX_NODE].min_len = tmp; + c->min_idx_node_sz = ALIGN(tmp, 8); + + tmp = ubifs_idx_node_sz(c, c->fanout); + c->ranges[UBIFS_IDX_NODE].max_len = tmp; + c->max_idx_node_sz = ALIGN(tmp, 8); + + /* Make sure LEB size is large enough to fit full commit */ + tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; + tmp = ALIGN(tmp, c->min_io_size); + if (tmp > c->leb_size) { + dbg_err("too small LEB size %d, at least %d needed", + c->leb_size, tmp); + return -EINVAL; + } + + /* + * Make sure that the log is large enough to fit reference nodes for + * all buds plus one reserved LEB. + */ + tmp64 = c->max_bud_bytes + c->leb_size - 1; + c->max_bud_cnt = div_u64(tmp64, c->leb_size); + tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); + tmp /= c->leb_size; + tmp += 1; + if (c->log_lebs < tmp) { + dbg_err("too small log %d LEBs, required min. %d LEBs", + c->log_lebs, tmp); + return -EINVAL; + } + + /* + * When budgeting we assume worst-case scenarios when the pages are not + * be compressed and direntries are of the maximum size. + * + * Note, data, which may be stored in inodes is budgeted separately, so + * it is not included into 'c->inode_budget'. + */ + c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; + c->inode_budget = UBIFS_INO_NODE_SZ; + c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; + + /* + * When the amount of flash space used by buds becomes + * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit. + * The writers are unblocked when the commit is finished. To avoid + * writers to be blocked UBIFS initiates background commit in advance, + * when number of bud bytes becomes above the limit defined below. + */ + c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4; + + /* + * Ensure minimum journal size. All the bytes in the journal heads are + * considered to be used, when calculating the current journal usage. + * Consequently, if the journal is too small, UBIFS will treat it as + * always full. + */ + tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1; + if (c->bg_bud_bytes < tmp64) + c->bg_bud_bytes = tmp64; + if (c->max_bud_bytes < tmp64 + c->leb_size) + c->max_bud_bytes = tmp64 + c->leb_size; + + + return 0; +} + +/* + * init_constants_master - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This is a helper function which initializes various UBIFS constants after + * the master node has been read. It also checks various UBIFS parameters and + * makes sure they are all right. + */ +static void init_constants_master(struct ubifs_info *c) +{ + long long tmp64; + + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + /* + * Calculate total amount of FS blocks. This number is not used + * internally because it does not make much sense for UBIFS, but it is + * necessary to report something for the 'statfs()' call. + * + * Subtract the LEB reserved for GC, the LEB which is reserved for + * deletions, minimum LEBs for the index, and assume only one journal + * head is available. + */ + tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; + tmp64 *= (long long)c->leb_size - c->leb_overhead; + tmp64 = ubifs_reported_space(c, tmp64); + c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; +} + + +/** + * check_volume_empty - check if the UBI volume is empty. + * @c: UBIFS file-system description object + * + * This function checks if the UBIFS volume is empty by looking if its LEBs are + * mapped or not. The result of checking is stored in the @c->empty variable. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int check_volume_empty(struct ubifs_info *c) +{ + int lnum, err; + + c->empty = 1; + for (lnum = 0; lnum < c->leb_cnt; lnum++) { + err = ubi_is_mapped(c->ubi, lnum); + if (unlikely(err < 0)) + return err; + if (err == 1) { + c->empty = 0; + break; + } + + cond_resched(); + } + + return 0; +} + +/** + * mount_ubifs - mount UBIFS file-system. + * @c: UBIFS file-system description object + * + * This function mounts UBIFS file system. Returns zero in case of success and + * a negative error code in case of failure. + * + * Note, the function does not de-allocate resources it it fails half way + * through, and the caller has to do this instead. + */ +static int mount_ubifs(struct ubifs_info *c) +{ + struct super_block *sb = c->vfs_sb; + int err, mounted_read_only = (sb->s_flags & MS_RDONLY); + long long x; + size_t sz; + + err = init_constants_early(c); + if (err) + return err; + err = ubifs_debugging_init(c); + if (err) + return err; + err = check_volume_empty(c); + if (err) + goto out_free; + if (c->empty && (mounted_read_only || c->ro_media)) { + /* + * This UBI volume is empty, and read-only, or the file system + * is mounted read-only - we cannot format it. + */ + ubifs_err("can't format empty UBI volume: read-only %s", + c->ro_media ? "UBI volume" : "mount"); + err = -EROFS; + goto out_free; + } + + if (c->ro_media && !mounted_read_only) { + ubifs_err("cannot mount read-write - read-only media"); + err = -EROFS; + goto out_free; + } + + /* + * The requirement for the buffer is that it should fit indexing B-tree + * height amount of integers. We assume the height if the TNC tree will + * never exceed 64. + */ + err = -ENOMEM; + c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL); + if (!c->bottom_up_buf) + goto out_free; + c->sbuf = vmalloc(c->leb_size); + if (!c->sbuf) + goto out_free; + /* + * We have to check all CRCs, even for data nodes, when we mount the FS + * (specifically, when we are replaying). + */ + c->always_chk_crc = 1; + err = ubifs_read_superblock(c); + if (err) + goto out_free; + /* + * Make sure the compressor which is set as default in the superblock + * or overridden by mount options is actually compiled in. + */ + + dbg_failure_mode_registration(c); + + err = init_constants_sb(c); + if (err) + goto out_free; + sz = ALIGN(c->max_idx_node_sz, c->min_io_size); + sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); + c->cbuf = kmalloc(sz, GFP_NOFS); + if (!c->cbuf) { + err = -ENOMEM; + goto out_free; + } + + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); + err = ubifs_read_master(c); + if (err) + goto out_master; + init_constants_master(c); + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { + ubifs_msg("recovery needed"); + c->need_recovery = 1; + } + + err = dbg_check_idx_size(c, c->old_idx_sz); + if (err) + goto out_lpt; + err = ubifs_replay_journal(c); + if (err) + goto out_journal; + if (c->need_recovery) { + err = ubifs_recover_size(c); + if (err) + goto out_orphans; + } + + spin_lock(&ubifs_infos_lock); + list_add_tail(&c->infos_list, &ubifs_infos); + spin_unlock(&ubifs_infos_lock); + if (c->need_recovery) { + if (mounted_read_only) + ubifs_msg("recovery deferred"); + else { + c->need_recovery = 0; + ubifs_msg("recovery completed"); + } + } + + err = dbg_check_filesystem(c); + if (err) + goto out_infos; + + c->always_chk_crc = 0; + + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", + c->vi.ubi_num, c->vi.vol_id, c->vi.name); + if (mounted_read_only) + ubifs_msg("mounted read-only"); + x = (long long)c->main_lebs * c->leb_size; + ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " + "LEBs)", x, x >> 10, x >> 20, c->main_lebs); + x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; + ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " + "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); + ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); + ubifs_msg("reserved for root: %llu bytes (%llu KiB)", + c->report_rp_size, c->report_rp_size >> 10); + + dbg_msg("compiled on: " __DATE__ " at " __TIME__); + dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); + dbg_msg("LEB size: %d bytes (%d KiB)", + c->leb_size, c->leb_size >> 10); + dbg_msg("data journal heads: %d", + c->jhead_cnt - NONDATA_JHEADS_CNT); + dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" + "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", + c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], + c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], + c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], + c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); + dbg_msg("big_lpt %d", c->big_lpt); + dbg_msg("log LEBs: %d (%d - %d)", + c->log_lebs, UBIFS_LOG_LNUM, c->log_last); + dbg_msg("LPT area LEBs: %d (%d - %d)", + c->lpt_lebs, c->lpt_first, c->lpt_last); + dbg_msg("orphan area LEBs: %d (%d - %d)", + c->orph_lebs, c->orph_first, c->orph_last); + dbg_msg("main area LEBs: %d (%d - %d)", + c->main_lebs, c->main_first, c->leb_cnt - 1); + dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", + c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); + dbg_msg("key hash type: %d", c->key_hash_type); + dbg_msg("tree fanout: %d", c->fanout); + dbg_msg("reserved GC LEB: %d", c->gc_lnum); + dbg_msg("first main LEB: %d", c->main_first); + dbg_msg("max. znode size %d", c->max_znode_sz); + dbg_msg("max. index node size %d", c->max_idx_node_sz); + dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", + UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); + dbg_msg("node sizes: trun %zu, sb %zu, master %zu", + UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); + dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); + dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", + UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, + UBIFS_MAX_DENT_NODE_SZ); + dbg_msg("dead watermark: %d", c->dead_wm); + dbg_msg("dark watermark: %d", c->dark_wm); + dbg_msg("LEB overhead: %d", c->leb_overhead); + x = (long long)c->main_lebs * c->dark_wm; + dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", + x, x >> 10, x >> 20); + dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", + c->max_bud_bytes, c->max_bud_bytes >> 10, + c->max_bud_bytes >> 20); + dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", + c->bg_bud_bytes, c->bg_bud_bytes >> 10, + c->bg_bud_bytes >> 20); + dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", + c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); + dbg_msg("max. seq. number: %llu", c->max_sqnum); + dbg_msg("commit number: %llu", c->cmt_no); + + return 0; + +out_infos: + spin_lock(&ubifs_infos_lock); + list_del(&c->infos_list); + spin_unlock(&ubifs_infos_lock); +out_journal: +out_lpt: +out_orphans: +out_master: + kfree(c->mst_node); + kfree(c->rcvrd_mst_node); + if (c->bgt) + kthread_stop(c->bgt); + kfree(c->cbuf); +out_free: + vfree(c->ileb_buf); + vfree(c->sbuf); + kfree(c->bottom_up_buf); + ubifs_debugging_exit(c); + return err; +} + +/** + * ubifs_umount - un-mount UBIFS file-system. + * @c: UBIFS file-system description object + * + * Note, this function is called to free allocated resourced when un-mounting, + * as well as free resources when an error occurred while we were half way + * through mounting (error path cleanup function). So it has to make sure the + * resource was actually allocated before freeing it. + */ +static void ubifs_umount(struct ubifs_info *c) +{ + dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, + c->vi.vol_id); + + spin_lock(&ubifs_infos_lock); + list_del(&c->infos_list); + spin_unlock(&ubifs_infos_lock); + + if (c->bgt) + kthread_stop(c->bgt); + + + kfree(c->cbuf); + kfree(c->rcvrd_mst_node); + kfree(c->mst_node); + vfree(c->ileb_buf); + vfree(c->sbuf); + kfree(c->bottom_up_buf); + ubifs_debugging_exit(c); + + /* Finally free U-Boot's global copy of superblock */ + free(ubifs_sb->s_fs_info); + free(ubifs_sb); +} + +/** + * open_ubi - parse UBI device name string and open the UBI device. + * @name: UBI volume name + * @mode: UBI volume open mode + * + * There are several ways to specify UBI volumes when mounting UBIFS: + * o ubiX_Y - UBI device number X, volume Y; + * o ubiY - UBI device number 0, volume Y; + * o ubiX:NAME - mount UBI device X, volume with name NAME; + * o ubi:NAME - mount UBI device 0, volume with name NAME. + * + * Alternative '!' separator may be used instead of ':' (because some shells + * like busybox may interpret ':' as an NFS host name separator). This function + * returns ubi volume object in case of success and a negative error code in + * case of failure. + */ +static struct ubi_volume_desc *open_ubi(const char *name, int mode) +{ + int dev, vol; + char *endptr; + + if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') + return ERR_PTR(-EINVAL); + + /* ubi:NAME method */ + if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') + return ubi_open_volume_nm(0, name + 4, mode); + + if (!isdigit(name[3])) + return ERR_PTR(-EINVAL); + + dev = simple_strtoul(name + 3, &endptr, 0); + + /* ubiY method */ + if (*endptr == '\0') + return ubi_open_volume(0, dev, mode); + + /* ubiX_Y method */ + if (*endptr == '_' && isdigit(endptr[1])) { + vol = simple_strtoul(endptr + 1, &endptr, 0); + if (*endptr != '\0') + return ERR_PTR(-EINVAL); + return ubi_open_volume(dev, vol, mode); + } + + /* ubiX:NAME method */ + if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') + return ubi_open_volume_nm(dev, ++endptr, mode); + + return ERR_PTR(-EINVAL); +} + +static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct ubi_volume_desc *ubi = sb->s_fs_info; + struct ubifs_info *c; + struct inode *root; + int err; + + c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); + if (!c) + return -ENOMEM; + + spin_lock_init(&c->cnt_lock); + spin_lock_init(&c->cs_lock); + spin_lock_init(&c->buds_lock); + spin_lock_init(&c->space_lock); + spin_lock_init(&c->orphan_lock); + init_rwsem(&c->commit_sem); + mutex_init(&c->lp_mutex); + mutex_init(&c->tnc_mutex); + mutex_init(&c->log_mutex); + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; + c->size_tree = RB_ROOT; + c->orph_tree = RB_ROOT; + INIT_LIST_HEAD(&c->infos_list); + INIT_LIST_HEAD(&c->idx_gc); + INIT_LIST_HEAD(&c->replay_list); + INIT_LIST_HEAD(&c->replay_buds); + INIT_LIST_HEAD(&c->uncat_list); + INIT_LIST_HEAD(&c->empty_list); + INIT_LIST_HEAD(&c->freeable_list); + INIT_LIST_HEAD(&c->frdi_idx_list); + INIT_LIST_HEAD(&c->unclean_leb_list); + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); + + c->highest_inum = UBIFS_FIRST_INO; + c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; + + ubi_get_volume_info(ubi, &c->vi); + ubi_get_device_info(c->vi.ubi_num, &c->di); + + /* Re-open the UBI device in read-write mode */ + c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READONLY); + if (IS_ERR(c->ubi)) { + err = PTR_ERR(c->ubi); + goto out_free; + } + c->vfs_sb = sb; + sb->s_fs_info = c; + sb->s_magic = UBIFS_SUPER_MAGIC; + sb->s_blocksize = UBIFS_BLOCK_SIZE; + sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; + sb->s_dev = c->vi.cdev; + sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); + if (c->max_inode_sz > MAX_LFS_FILESIZE) + sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; + + if (c->rw_incompat) { + ubifs_err("the file-system is not R/W-compatible"); + ubifs_msg("on-flash format version is w%d/r%d, but software " + "only supports up to version w%d/r%d", c->fmt_version, + c->ro_compat_version, UBIFS_FORMAT_VERSION, + UBIFS_RO_COMPAT_VERSION); + return -EROFS; + } + + mutex_lock(&c->umount_mutex); + err = mount_ubifs(c); + if (err) { + ubifs_assert(err < 0); + goto out_unlock; + } + /* Read the root inode */ + root = ubifs_iget(sb, UBIFS_ROOT_INO); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto out_umount; + } + sb->s_root = NULL; + + mutex_unlock(&c->umount_mutex); + return 0; + +out_umount: + ubifs_umount(c); +out_unlock: + mutex_unlock(&c->umount_mutex); + ubi_close_volume(c->ubi); +out_free: + kfree(c); + return err; +} + +static int sb_test(struct super_block *sb, void *data) +{ + dev_t *dev = data; + + return sb->s_dev == *dev; +} + +static int ubifs_get_sb(struct file_system_type *fs_type, int flags, + const char *name, void *data, struct vfsmount *mnt) +{ + struct ubi_volume_desc *ubi; + struct ubi_volume_info vi; + struct super_block *sb; + int err; + + dbg_gen("name %s, flags %#x", name, flags); + /* + * Get UBI device number and volume ID. Mount it read-only so far + * because this might be a new mount point, and UBI allows only one + * read-write user at a time. + */ + ubi = open_ubi(name, UBI_READONLY); + if (IS_ERR(ubi)) { + ubifs_err("cannot open \"%s\", error %d", + name, (int)PTR_ERR(ubi)); + return PTR_ERR(ubi); + } + ubi_get_volume_info(ubi, &vi); + + dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); + + sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); + if (IS_ERR(sb)) { + err = PTR_ERR(sb); + goto out_close; + } + + if (sb->s_root) { + /* A new mount point for already mounted UBIFS */ + printf("this ubi volume is already mounted"); + if ((flags ^ sb->s_flags) & MS_RDONLY) { + err = -EBUSY; + goto out_deact; + } + } else { + sb->s_flags = flags; + /* + * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is + * replaced by 'c'. + */ + sb->s_fs_info = ubi; + err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); + if (err) + goto out_deact; + /* We do not support atime */ + sb->s_flags |= MS_ACTIVE | MS_NOATIME; + } + + /* 'fill_super()' opens ubi again so we must close it here */ + ubi_close_volume(ubi); + + ubifs_sb = sb; + return 0; + +out_deact: + up_write(&sb->s_umount); +out_close: + ubi_close_volume(ubi); + return err; +} + +int __init ubifs_init(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); + + /* Make sure node sizes are 8-byte aligned */ + BUILD_BUG_ON(UBIFS_CH_SZ & 7); + BUILD_BUG_ON(UBIFS_INO_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_SB_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MST_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_REF_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_CS_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7); + + BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_NODE_SZ & 7); + BUILD_BUG_ON(MIN_WRITE_SZ & 7); + + /* Check min. node size */ + BUILD_BUG_ON(UBIFS_INO_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ); + + BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ > UBIFS_MAX_NODE_SZ); + + /* Defined node sizes */ + BUILD_BUG_ON(UBIFS_SB_NODE_SZ != 4096); + BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512); + BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160); + BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); + + /* + * We use 2 bit wide bit-fields to store compression type, which should + * be amended if more compressors are added. The bit-fields are: + * @compr_type in 'struct ubifs_inode', @default_compr in + * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'. + */ + BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4); + + /* + * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to + * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. + */ + if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { + ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" + " at least 4096 bytes", + (unsigned int)PAGE_CACHE_SIZE); + return -EINVAL; + } + + + + return 0; + +out_shrinker: + return err; +} + +/* + * ubifsmount... + */ + +static struct file_system_type ubifs_fs_type = { + .name = "ubifs", + .owner = THIS_MODULE, + .get_sb = ubifs_get_sb, +}; + +int ubifs_mount(char *vol_name) +{ + int flags; + void *data; + char * name; + char * buf = "ubi0:"; + struct vfsmount *mnt; + int ret; + struct ubifs_info *c; + + /* + * First unmount if allready mounted + */ + if (ubifs_sb) + ubifs_umount(ubifs_sb->s_fs_info); + INIT_LIST_HEAD(&ubifs_infos); + INIT_LIST_HEAD(&ubifs_fs_type.fs_supers); + + /* + * Mount in read-only mode + */ + flags = MS_RDONLY; + //strcat(name, vol_name); + name = malloc(20); + memcpy(name,buf,6); + //memcpy(name+5,vol_name,5); + strcat(name, vol_name); + data = NULL; + mnt = NULL; + ret = ubifs_get_sb(&ubifs_fs_type, flags, name, data, mnt); + if (ret) { + printf("Error reading superblock on volume '%s'!\n", name); + return -1; + } + c = ubifs_sb->s_fs_info; + ubi_close_volume(c->ubi); + + return 0; +} diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c new file mode 100755 index 0000000..82163d2 --- /dev/null +++ b/fs/ubifs/tnc.c @@ -0,0 +1,2767 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file implements TNC (Tree Node Cache) which caches indexing nodes of + * the UBIFS B-tree. + * + * At the moment the locking rules of the TNC tree are quite simple and + * straightforward. We just have a mutex and lock it when we traverse the + * tree. If a znode is not in memory, we read it from flash while still having + * the mutex locked. + */ + +#include "ubifs.h" + +/* + * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. + * @NAME_LESS: name corresponding to the first argument is less than second + * @NAME_MATCHES: names match + * @NAME_GREATER: name corresponding to the second argument is greater than + * first + * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media + * + * These constants were introduce to improve readability. + */ +enum { + NAME_LESS = 0, + NAME_MATCHES = 1, + NAME_GREATER = 2, + NOT_ON_MEDIA = 3, +}; + +/** + * insert_old_idx - record an index node obsoleted since the last commit start. + * @c: UBIFS file-system description object + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + * + * For recovery, there must always be a complete intact version of the index on + * flash at all times. That is called the "old index". It is the index as at the + * time of the last successful commit. Many of the index nodes in the old index + * may be dirty, but they must not be erased until the next successful commit + * (at which point that index becomes the old index). + * + * That means that the garbage collection and the in-the-gaps method of + * committing must be able to determine if an index node is in the old index. + * Most of the old index nodes can be found by looking up the TNC using the + * 'lookup_znode()' function. However, some of the old index nodes may have + * been deleted from the current index or may have been changed so much that + * they cannot be easily found. In those cases, an entry is added to an RB-tree. + * That is what this function does. The RB-tree is ordered by LEB number and + * offset because they uniquely identify the old index node. + */ +static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) +{ + struct ubifs_old_idx *old_idx, *o; + struct rb_node **p, *parent = NULL; + + old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); + if (unlikely(!old_idx)) + return -ENOMEM; + old_idx->lnum = lnum; + old_idx->offs = offs; + + p = &c->old_idx.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_old_idx, rb); + if (lnum < o->lnum) + p = &(*p)->rb_left; + else if (lnum > o->lnum) + p = &(*p)->rb_right; + else if (offs < o->offs) + p = &(*p)->rb_left; + else if (offs > o->offs) + p = &(*p)->rb_right; + else { + ubifs_err("old idx added twice!"); + kfree(old_idx); + return 0; + } + } + rb_link_node(&old_idx->rb, parent, p); + rb_insert_color(&old_idx->rb, &c->old_idx); + return 0; +} + +/** + * insert_old_idx_znode - record a znode obsoleted since last commit start. + * @c: UBIFS file-system description object + * @znode: znode of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + */ +int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode) +{ + if (znode->parent) { + struct ubifs_zbranch *zbr; + + zbr = &znode->parent->zbranch[znode->iip]; + if (zbr->len) + return insert_old_idx(c, zbr->lnum, zbr->offs); + } else + if (c->zroot.len) + return insert_old_idx(c, c->zroot.lnum, + c->zroot.offs); + return 0; +} + +/** + * ins_clr_old_idx_znode - record a znode obsoleted since last commit start. + * @c: UBIFS file-system description object + * @znode: znode of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + */ +static int ins_clr_old_idx_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int err; + + if (znode->parent) { + struct ubifs_zbranch *zbr; + + zbr = &znode->parent->zbranch[znode->iip]; + if (zbr->len) { + err = insert_old_idx(c, zbr->lnum, zbr->offs); + if (err) + return err; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + } + } else + if (c->zroot.len) { + err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs); + if (err) + return err; + c->zroot.lnum = 0; + c->zroot.offs = 0; + c->zroot.len = 0; + } + return 0; +} + +/** + * destroy_old_idx - destroy the old_idx RB-tree. + * @c: UBIFS file-system description object + * + * During start commit, the old_idx RB-tree is used to avoid overwriting index + * nodes that were in the index last commit but have since been deleted. This + * is necessary for recovery i.e. the old index must be kept intact until the + * new index is successfully written. The old-idx RB-tree is used for the + * in-the-gaps method of writing index nodes and is destroyed every commit. + */ +void destroy_old_idx(struct ubifs_info *c) +{ + struct rb_node *this = c->old_idx.rb_node; + struct ubifs_old_idx *old_idx; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + old_idx = rb_entry(this, struct ubifs_old_idx, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &old_idx->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(old_idx); + } + c->old_idx = RB_ROOT; +} + +/** + * copy_znode - copy a dirty znode. + * @c: UBIFS file-system description object + * @znode: znode to copy + * + * A dirty znode being committed may not be changed, so it is copied. + */ +static struct ubifs_znode *copy_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + struct ubifs_znode *zn; + + zn = kmalloc(c->max_znode_sz, GFP_NOFS); + if (unlikely(!zn)) + return ERR_PTR(-ENOMEM); + + memcpy(zn, znode, c->max_znode_sz); + zn->cnext = NULL; + __set_bit(DIRTY_ZNODE, &zn->flags); + __clear_bit(COW_ZNODE, &zn->flags); + + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + __set_bit(OBSOLETE_ZNODE, &znode->flags); + + if (znode->level != 0) { + int i; + const int n = zn->child_cnt; + + /* The children now have new parent */ + for (i = 0; i < n; i++) { + struct ubifs_zbranch *zbr = &zn->zbranch[i]; + + if (zbr->znode) + zbr->znode->parent = zn; + } + } + + atomic_long_inc(&c->dirty_zn_cnt); + return zn; +} + +/** + * add_idx_dirt - add dirt due to a dirty znode. + * @c: UBIFS file-system description object + * @lnum: LEB number of index node + * @dirt: size of index node + * + * This function updates lprops dirty space and the new size of the index. + */ +static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) +{ + c->calc_idx_sz -= ALIGN(dirt, 8); + return ubifs_add_dirt(c, lnum, dirt); +} + +/** + * dirty_cow_znode - ensure a znode is not being committed. + * @c: UBIFS file-system description object + * @zbr: branch of znode to check + * + * Returns dirtied znode on success or negative error code on failure. + */ +static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr) +{ + struct ubifs_znode *znode = zbr->znode; + struct ubifs_znode *zn; + int err; + + if (!test_bit(COW_ZNODE, &znode->flags)) { + /* znode is not being committed */ + if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { + atomic_long_inc(&c->dirty_zn_cnt); + atomic_long_dec(&c->clean_zn_cnt); + atomic_long_dec(&ubifs_clean_zn_cnt); + err = add_idx_dirt(c, zbr->lnum, zbr->len); + if (unlikely(err)) + return ERR_PTR(err); + } + return znode; + } + + zn = copy_znode(c, znode); + if (IS_ERR(zn)) + return zn; + + if (zbr->len) { + err = insert_old_idx(c, zbr->lnum, zbr->offs); + if (unlikely(err)) + return ERR_PTR(err); + err = add_idx_dirt(c, zbr->lnum, zbr->len); + } else + err = 0; + + zbr->znode = zn; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + + if (unlikely(err)) + return ERR_PTR(err); + return zn; +} + +/** + * lnc_add - add a leaf node to the leaf node cache. + * @c: UBIFS file-system description object + * @zbr: zbranch of leaf node + * @node: leaf node + * + * Leaf nodes are non-index nodes directory entry nodes or data nodes. The + * purpose of the leaf node cache is to save re-reading the same leaf node over + * and over again. Most things are cached by VFS, however the file system must + * cache directory entries for readdir and for resolving hash collisions. The + * present implementation of the leaf node cache is extremely simple, and + * allows for error returns that are not used but that may be needed if a more + * complex implementation is created. + * + * Note, this function does not add the @node object to LNC directly, but + * allocates a copy of the object and adds the copy to LNC. The reason for this + * is that @node has been allocated outside of the TNC subsystem and will be + * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC + * may be changed at any time, e.g. freed by the shrinker. + */ +static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, + const void *node) +{ + int err; + void *lnc_node; + const struct ubifs_dent_node *dent = node; + + ubifs_assert(!zbr->leaf); + ubifs_assert(zbr->len != 0); + ubifs_assert(is_hash_key(c, &zbr->key)); + + err = ubifs_validate_entry(c, dent); + if (err) { + dbg_dump_stack(); + dbg_dump_node(c, dent); + return err; + } + + lnc_node = kmalloc(zbr->len, GFP_NOFS); + if (!lnc_node) + /* We don't have to have the cache, so no error */ + return 0; + + memcpy(lnc_node, node, zbr->len); + zbr->leaf = lnc_node; + return 0; +} + + /** + * lnc_add_directly - add a leaf node to the leaf-node-cache. + * @c: UBIFS file-system description object + * @zbr: zbranch of leaf node + * @node: leaf node + * + * This function is similar to 'lnc_add()', but it does not create a copy of + * @node but inserts @node to TNC directly. + */ +static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + int err; + + ubifs_assert(!zbr->leaf); + ubifs_assert(zbr->len != 0); + + err = ubifs_validate_entry(c, node); + if (err) { + dbg_dump_stack(); + dbg_dump_node(c, node); + return err; + } + + zbr->leaf = node; + return 0; +} + +/** + * lnc_free - remove a leaf node from the leaf node cache. + * @zbr: zbranch of leaf node + * @node: leaf node + */ +static void lnc_free(struct ubifs_zbranch *zbr) +{ + if (!zbr->leaf) + return; + kfree(zbr->leaf); + zbr->leaf = NULL; +} + +/** + * tnc_read_node_nm - read a "hashed" leaf node. + * @c: UBIFS file-system description object + * @zbr: key and position of the node + * @node: node is returned here + * + * This function reads a "hashed" node defined by @zbr from the leaf node cache + * (in it is there) or from the hash media, in which case the node is also + * added to LNC. Returns zero in case of success or a negative negative error + * code in case of failure. + */ +static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + int err; + + ubifs_assert(is_hash_key(c, &zbr->key)); + + if (zbr->leaf) { + /* Read from the leaf node cache */ + ubifs_assert(zbr->len != 0); + memcpy(node, zbr->leaf, zbr->len); + return 0; + } + + err = ubifs_tnc_read_node(c, zbr, node); + if (err) + return err; + + /* Add the node to the leaf node cache */ + err = lnc_add(c, zbr, node); + return err; +} + +/** + * try_read_node - read a node if it is a node. + * @c: UBIFS file-system description object + * @buf: buffer to read to + * @type: node type + * @len: node length (not aligned) + * @lnum: LEB number of node to read + * @offs: offset of node to read + * + * This function tries to read a node of known type and length, checks it and + * stores it in @buf. This function returns %1 if a node is present and %0 if + * a node is not present. A negative error code is returned for I/O errors. + * This function performs that same function as ubifs_read_node except that + * it does not require that there is actually a node present and instead + * the return code indicates if a node was read. + * + * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc + * is true (it is controlled by corresponding mount option). However, if + * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always + * checked. + */ +static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs) +{ + int err, node_len; + struct ubifs_ch *ch = buf; + uint32_t crc, node_crc; + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err) { + ubifs_err("cannot read node type %d from LEB %d:%d, error %d", + type, lnum, offs, err); + return err; + } + + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) + return 0; + + if (ch->node_type != type) + return 0; + + node_len = le32_to_cpu(ch->len); + if (node_len != len) + return 0; + + if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) + return 1; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); + node_crc = le32_to_cpu(ch->crc); + if (crc != node_crc) + return 0; + + return 1; +} + +/** + * fallible_read_node - try to read a leaf node. + * @c: UBIFS file-system description object + * @key: key of node to read + * @zbr: position of node + * @node: node returned + * + * This function tries to read a node and returns %1 if the node is read, %0 + * if the node is not present, and a negative error code in the case of error. + */ +static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_zbranch *zbr, void *node) +{ + int ret; + + dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); + + ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, + zbr->offs); + if (ret == 1) { + union ubifs_key node_key; + struct ubifs_dent_node *dent = node; + + /* All nodes have key in the same place */ + key_read(c, &dent->key, &node_key); + if (keys_cmp(c, key, &node_key) != 0) + ret = 0; + } + if (ret == 0 && c->replaying) + dbg_mnt("dangling branch LEB %d:%d len %d, key %s", + zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); + return ret; +} + +/** + * matches_name - determine if a direntry or xattr entry matches a given name. + * @c: UBIFS file-system description object + * @zbr: zbranch of dent + * @nm: name to match + * + * This function checks if xentry/direntry referred by zbranch @zbr matches name + * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by + * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case + * of failure, a negative error code is returned. + */ +static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr, + const struct qstr *nm) +{ + struct ubifs_dent_node *dent; + int nlen, err; + + /* If possible, match against the dent in the leaf node cache */ + if (!zbr->leaf) { + dent = kmalloc(zbr->len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + err = ubifs_tnc_read_node(c, zbr, dent); + if (err) + goto out_free; + + /* Add the node to the leaf node cache */ + err = lnc_add_directly(c, zbr, dent); + if (err) + goto out_free; + } else + dent = zbr->leaf; + + nlen = le16_to_cpu(dent->nlen); + err = memcmp(dent->name, nm->name, min(nlen, nm->len)); + if (err == 0) { + if (nlen == nm->len) + return NAME_MATCHES; + else if (nlen < nm->len) + return NAME_LESS; + else + return NAME_GREATER; + } else if (err < 0) + return NAME_LESS; + else + return NAME_GREATER; + +out_free: + kfree(dent); + return err; +} + +/** + * get_znode - get a TNC znode that may not be loaded yet. + * @c: UBIFS file-system description object + * @znode: parent znode + * @n: znode branch slot number + * + * This function returns the znode or a negative error code. + */ +static struct ubifs_znode *get_znode(struct ubifs_info *c, + struct ubifs_znode *znode, int n) +{ + struct ubifs_zbranch *zbr; + + zbr = &znode->zbranch[n]; + if (zbr->znode) + znode = zbr->znode; + else + znode = ubifs_load_znode(c, zbr, znode, n); + return znode; +} + +/** + * tnc_next - find next TNC entry. + * @c: UBIFS file-system description object + * @zn: znode is passed and returned here + * @n: znode branch slot number is passed and returned here + * + * This function returns %0 if the next TNC entry is found, %-ENOENT if there is + * no next entry, or a negative error code otherwise. + */ +static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n) +{ + struct ubifs_znode *znode = *zn; + int nn = *n; + + nn += 1; + if (nn < znode->child_cnt) { + *n = nn; + return 0; + } + while (1) { + struct ubifs_znode *zp; + + zp = znode->parent; + if (!zp) + return -ENOENT; + nn = znode->iip + 1; + znode = zp; + if (nn < znode->child_cnt) { + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + while (znode->level != 0) { + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + nn = 0; + break; + } + } + *zn = znode; + *n = nn; + return 0; +} + +/** + * tnc_prev - find previous TNC entry. + * @c: UBIFS file-system description object + * @zn: znode is returned here + * @n: znode branch slot number is passed and returned here + * + * This function returns %0 if the previous TNC entry is found, %-ENOENT if + * there is no next entry, or a negative error code otherwise. + */ +static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n) +{ + struct ubifs_znode *znode = *zn; + int nn = *n; + + if (nn > 0) { + *n = nn - 1; + return 0; + } + while (1) { + struct ubifs_znode *zp; + + zp = znode->parent; + if (!zp) + return -ENOENT; + nn = znode->iip - 1; + znode = zp; + if (nn >= 0) { + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + while (znode->level != 0) { + nn = znode->child_cnt - 1; + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + nn = znode->child_cnt - 1; + break; + } + } + *zn = znode; + *n = nn; + return 0; +} + +/** + * resolve_collision - resolve a collision. + * @c: UBIFS file-system description object + * @key: key of a directory or extended attribute entry + * @zn: znode is returned here + * @n: zbranch number is passed and returned here + * @nm: name of the entry + * + * This function is called for "hashed" keys to make sure that the found key + * really corresponds to the looked up node (directory or extended attribute + * entry). It returns %1 and sets @zn and @n if the collision is resolved. + * %0 is returned if @nm is not found and @zn and @n are set to the previous + * entry, i.e. to the entry after which @nm could follow if it were in TNC. + * This means that @n may be set to %-1 if the leftmost key in @zn is the + * previous one. A negative error code is returned on failures. + */ +static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + const struct qstr *nm) +{ + int err; + + err = matches_name(c, &(*zn)->zbranch[*n], nm); + if (unlikely(err < 0)) + return err; + if (err == NAME_MATCHES) + return 1; + + if (err == NAME_GREATER) { + /* Look left */ + while (1) { + err = tnc_prev(c, zn, n); + if (err == -ENOENT) { + ubifs_assert(*n == 0); + *n = -1; + return 0; + } + if (err < 0) + return err; + if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { + /* + * We have found the branch after which we would + * like to insert, but inserting in this znode + * may still be wrong. Consider the following 3 + * znodes, in the case where we are resolving a + * collision with Key2. + * + * znode zp + * ---------------------- + * level 1 | Key0 | Key1 | + * ----------------------- + * | | + * znode za | | znode zb + * ------------ ------------ + * level 0 | Key0 | | Key2 | + * ------------ ------------ + * + * The lookup finds Key2 in znode zb. Lets say + * there is no match and the name is greater so + * we look left. When we find Key0, we end up + * here. If we return now, we will insert into + * znode za at slot n = 1. But that is invalid + * according to the parent's keys. Key2 must + * be inserted into znode zb. + * + * Note, this problem is not relevant for the + * case when we go right, because + * 'tnc_insert()' would correct the parent key. + */ + if (*n == (*zn)->child_cnt - 1) { + err = tnc_next(c, zn, n); + if (err) { + /* Should be impossible */ + ubifs_assert(0); + if (err == -ENOENT) + err = -EINVAL; + return err; + } + ubifs_assert(*n == 0); + *n = -1; + } + return 0; + } + err = matches_name(c, &(*zn)->zbranch[*n], nm); + if (err < 0) + return err; + if (err == NAME_LESS) + return 0; + if (err == NAME_MATCHES) + return 1; + ubifs_assert(err == NAME_GREATER); + } + } else { + int nn = *n; + struct ubifs_znode *znode = *zn; + + /* Look right */ + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + return 0; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + return 0; + err = matches_name(c, &znode->zbranch[nn], nm); + if (err < 0) + return err; + if (err == NAME_GREATER) + return 0; + *zn = znode; + *n = nn; + if (err == NAME_MATCHES) + return 1; + ubifs_assert(err == NAME_LESS); + } + } +} + +/** + * fallible_matches_name - determine if a dent matches a given name. + * @c: UBIFS file-system description object + * @zbr: zbranch of dent + * @nm: name to match + * + * This is a "fallible" version of 'matches_name()' function which does not + * panic if the direntry/xentry referred by @zbr does not exist on the media. + * + * This function checks if xentry/direntry referred by zbranch @zbr matches name + * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr + * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA + * if xentry/direntry referred by @zbr does not exist on the media. A negative + * error code is returned in case of failure. + */ +static int fallible_matches_name(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + const struct qstr *nm) +{ + struct ubifs_dent_node *dent; + int nlen, err; + + /* If possible, match against the dent in the leaf node cache */ + if (!zbr->leaf) { + dent = kmalloc(zbr->len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + err = fallible_read_node(c, &zbr->key, zbr, dent); + if (err < 0) + goto out_free; + if (err == 0) { + /* The node was not present */ + err = NOT_ON_MEDIA; + goto out_free; + } + ubifs_assert(err == 1); + + err = lnc_add_directly(c, zbr, dent); + if (err) + goto out_free; + } else + dent = zbr->leaf; + + nlen = le16_to_cpu(dent->nlen); + err = memcmp(dent->name, nm->name, min(nlen, nm->len)); + if (err == 0) { + if (nlen == nm->len) + return NAME_MATCHES; + else if (nlen < nm->len) + return NAME_LESS; + else + return NAME_GREATER; + } else if (err < 0) + return NAME_LESS; + else + return NAME_GREATER; + +out_free: + kfree(dent); + return err; +} + +/** + * fallible_resolve_collision - resolve a collision even if nodes are missing. + * @c: UBIFS file-system description object + * @key: key + * @zn: znode is returned here + * @n: branch number is passed and returned here + * @nm: name of directory entry + * @adding: indicates caller is adding a key to the TNC + * + * This is a "fallible" version of the 'resolve_collision()' function which + * does not panic if one of the nodes referred to by TNC does not exist on the + * media. This may happen when replaying the journal if a deleted node was + * Garbage-collected and the commit was not done. A branch that refers to a node + * that is not present is called a dangling branch. The following are the return + * codes for this function: + * o if @nm was found, %1 is returned and @zn and @n are set to the found + * branch; + * o if we are @adding and @nm was not found, %0 is returned; + * o if we are not @adding and @nm was not found, but a dangling branch was + * found, then %1 is returned and @zn and @n are set to the dangling branch; + * o a negative error code is returned in case of failure. + */ +static int fallible_resolve_collision(struct ubifs_info *c, + const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + const struct qstr *nm, int adding) +{ + struct ubifs_znode *o_znode = NULL, *znode = *zn; + int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n; + + cmp = fallible_matches_name(c, &znode->zbranch[nn], nm); + if (unlikely(cmp < 0)) + return cmp; + if (cmp == NAME_MATCHES) + return 1; + if (cmp == NOT_ON_MEDIA) { + o_znode = znode; + o_n = nn; + /* + * We are unlucky and hit a dangling branch straight away. + * Now we do not really know where to go to find the needed + * branch - to the left or to the right. Well, let's try left. + */ + unsure = 1; + } else if (!adding) + unsure = 1; /* Remove a dangling branch wherever it is */ + + if (cmp == NAME_GREATER || unsure) { + /* Look left */ + while (1) { + err = tnc_prev(c, zn, n); + if (err == -ENOENT) { + ubifs_assert(*n == 0); + *n = -1; + break; + } + if (err < 0) + return err; + if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { + /* See comments in 'resolve_collision()' */ + if (*n == (*zn)->child_cnt - 1) { + err = tnc_next(c, zn, n); + if (err) { + /* Should be impossible */ + ubifs_assert(0); + if (err == -ENOENT) + err = -EINVAL; + return err; + } + ubifs_assert(*n == 0); + *n = -1; + } + break; + } + err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm); + if (err < 0) + return err; + if (err == NAME_MATCHES) + return 1; + if (err == NOT_ON_MEDIA) { + o_znode = *zn; + o_n = *n; + continue; + } + if (!adding) + continue; + if (err == NAME_LESS) + break; + else + unsure = 0; + } + } + + if (cmp == NAME_LESS || unsure) { + /* Look right */ + *zn = znode; + *n = nn; + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + break; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + break; + err = fallible_matches_name(c, &znode->zbranch[nn], nm); + if (err < 0) + return err; + if (err == NAME_GREATER) + break; + *zn = znode; + *n = nn; + if (err == NAME_MATCHES) + return 1; + if (err == NOT_ON_MEDIA) { + o_znode = znode; + o_n = nn; + } + } + } + + /* Never match a dangling branch when adding */ + if (adding || !o_znode) + return 0; + + dbg_mnt("dangling match LEB %d:%d len %d %s", + o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, + o_znode->zbranch[o_n].len, DBGKEY(key)); + *zn = o_znode; + *n = o_n; + return 1; +} + +/** + * matches_position - determine if a zbranch matches a given position. + * @zbr: zbranch of dent + * @lnum: LEB number of dent to match + * @offs: offset of dent to match + * + * This function returns %1 if @lnum:@offs matches, and %0 otherwise. + */ +static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs) +{ + if (zbr->lnum == lnum && zbr->offs == offs) + return 1; + else + return 0; +} + +/** + * resolve_collision_directly - resolve a collision directly. + * @c: UBIFS file-system description object + * @key: key of directory entry + * @zn: znode is passed and returned here + * @n: zbranch number is passed and returned here + * @lnum: LEB number of dent node to match + * @offs: offset of dent node to match + * + * This function is used for "hashed" keys to make sure the found directory or + * extended attribute entry node is what was looked for. It is used when the + * flash address of the right node is known (@lnum:@offs) which makes it much + * easier to resolve collisions (no need to read entries and match full + * names). This function returns %1 and sets @zn and @n if the collision is + * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the + * previous directory entry. Otherwise a negative error code is returned. + */ +static int resolve_collision_directly(struct ubifs_info *c, + const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + int lnum, int offs) +{ + struct ubifs_znode *znode; + int nn, err; + + znode = *zn; + nn = *n; + if (matches_position(&znode->zbranch[nn], lnum, offs)) + return 1; + + /* Look left */ + while (1) { + err = tnc_prev(c, &znode, &nn); + if (err == -ENOENT) + break; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + break; + if (matches_position(&znode->zbranch[nn], lnum, offs)) { + *zn = znode; + *n = nn; + return 1; + } + } + + /* Look right */ + znode = *zn; + nn = *n; + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + return 0; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + return 0; + *zn = znode; + *n = nn; + if (matches_position(&znode->zbranch[nn], lnum, offs)) + return 1; + } +} + +/** + * dirty_cow_bottom_up - dirty a znode and its ancestors. + * @c: UBIFS file-system description object + * @znode: znode to dirty + * + * If we do not have a unique key that resides in a znode, then we cannot + * dirty that znode from the top down (i.e. by using lookup_level0_dirty) + * This function records the path back to the last dirty ancestor, and then + * dirties the znodes on that path. + */ +static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + struct ubifs_znode *zp; + int *path = c->bottom_up_buf, p = 0; + + ubifs_assert(c->zroot.znode); + ubifs_assert(znode); + if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) { + kfree(c->bottom_up_buf); + c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int), + GFP_NOFS); + if (!c->bottom_up_buf) + return ERR_PTR(-ENOMEM); + path = c->bottom_up_buf; + } + if (c->zroot.znode->level) { + /* Go up until parent is dirty */ + while (1) { + int n; + + zp = znode->parent; + if (!zp) + break; + n = znode->iip; + ubifs_assert(p < c->zroot.znode->level); + path[p++] = n; + if (!zp->cnext && ubifs_zn_dirty(znode)) + break; + znode = zp; + } + } + + /* Come back down, dirtying as we go */ + while (1) { + struct ubifs_zbranch *zbr; + + zp = znode->parent; + if (zp) { + ubifs_assert(path[p - 1] >= 0); + ubifs_assert(path[p - 1] < zp->child_cnt); + zbr = &zp->zbranch[path[--p]]; + znode = dirty_cow_znode(c, zbr); + } else { + ubifs_assert(znode == c->zroot.znode); + znode = dirty_cow_znode(c, &c->zroot); + } + if (IS_ERR(znode) || !p) + break; + ubifs_assert(path[p - 1] >= 0); + ubifs_assert(path[p - 1] < znode->child_cnt); + znode = znode->zbranch[path[p - 1]].znode; + } + + return znode; +} + +/** + * ubifs_lookup_level0 - search for zero-level znode. + * @c: UBIFS file-system description object + * @key: key to lookup + * @zn: znode is returned here + * @n: znode branch slot number is returned here + * + * This function looks up the TNC tree and search for zero-level znode which + * refers key @key. The found zero-level znode is returned in @zn. There are 3 + * cases: + * o exact match, i.e. the found zero-level znode contains key @key, then %1 + * is returned and slot number of the matched branch is stored in @n; + * o not exact match, which means that zero-level znode does not contain + * @key, then %0 is returned and slot number of the closed branch is stored + * in @n; + * o @key is so small that it is even less than the lowest key of the + * leftmost zero-level node, then %0 is returned and %0 is stored in @n. + * + * Note, when the TNC tree is traversed, some znodes may be absent, then this + * function reads corresponding indexing nodes and inserts them to TNC. In + * case of failure, a negative error code is returned. + */ +int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n) +{ + int err, exact; + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + + dbg_tnc("search key %s", DBGKEY(key)); + + znode = c->zroot.znode; + if (unlikely(!znode)) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + znode->time = time; + + while (1) { + struct ubifs_zbranch *zbr; + + exact = ubifs_search_zbranch(c, znode, key, n); + + if (znode->level == 0) + break; + + if (*n < 0) + *n = 0; + zbr = &znode->zbranch[*n]; + + if (zbr->znode) { + znode->time = time; + znode = zbr->znode; + continue; + } + + /* znode is not in TNC cache, load it from the media */ + znode = ubifs_load_znode(c, zbr, znode, *n); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + *zn = znode; + if (exact || !is_hash_key(c, key) || *n != -1) { + dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); + return exact; + } + + /* + * Here is a tricky place. We have not found the key and this is a + * "hashed" key, which may collide. The rest of the code deals with + * situations like this: + * + * | 3 | 5 | + * / \ + * | 3 | 5 | | 6 | 7 | (x) + * + * Or more a complex example: + * + * | 1 | 5 | + * / \ + * | 1 | 3 | | 5 | 8 | + * \ / + * | 5 | 5 | | 6 | 7 | (x) + * + * In the examples, if we are looking for key "5", we may reach nodes + * marked with "(x)". In this case what we have do is to look at the + * left and see if there is "5" key there. If there is, we have to + * return it. + * + * Note, this whole situation is possible because we allow to have + * elements which are equivalent to the next key in the parent in the + * children of current znode. For example, this happens if we split a + * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something + * like this: + * | 3 | 5 | + * / \ + * | 3 | 5 | | 5 | 6 | 7 | + * ^ + * And this becomes what is at the first "picture" after key "5" marked + * with "^" is removed. What could be done is we could prohibit + * splitting in the middle of the colliding sequence. Also, when + * removing the leftmost key, we would have to correct the key of the + * parent node, which would introduce additional complications. Namely, + * if we changed the the leftmost key of the parent znode, the garbage + * collector would be unable to find it (GC is doing this when GC'ing + * indexing LEBs). Although we already have an additional RB-tree where + * we save such changed znodes (see 'ins_clr_old_idx_znode()') until + * after the commit. But anyway, this does not look easy to implement + * so we did not try this. + */ + err = tnc_prev(c, &znode, n); + if (err == -ENOENT) { + dbg_tnc("found 0, lvl %d, n -1", znode->level); + *n = -1; + return 0; + } + if (unlikely(err < 0)) + return err; + if (keys_cmp(c, key, &znode->zbranch[*n].key)) { + dbg_tnc("found 0, lvl %d, n -1", znode->level); + *n = -1; + return 0; + } + + dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); + *zn = znode; + return 1; +} + +/** + * lookup_level0_dirty - search for zero-level znode dirtying. + * @c: UBIFS file-system description object + * @key: key to lookup + * @zn: znode is returned here + * @n: znode branch slot number is returned here + * + * This function looks up the TNC tree and search for zero-level znode which + * refers key @key. The found zero-level znode is returned in @zn. There are 3 + * cases: + * o exact match, i.e. the found zero-level znode contains key @key, then %1 + * is returned and slot number of the matched branch is stored in @n; + * o not exact match, which means that zero-level znode does not contain @key + * then %0 is returned and slot number of the closed branch is stored in + * @n; + * o @key is so small that it is even less than the lowest key of the + * leftmost zero-level node, then %0 is returned and %-1 is stored in @n. + * + * Additionally all znodes in the path from the root to the located zero-level + * znode are marked as dirty. + * + * Note, when the TNC tree is traversed, some znodes may be absent, then this + * function reads corresponding indexing nodes and inserts them to TNC. In + * case of failure, a negative error code is returned. + */ +static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n) +{ + int err, exact; + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + + dbg_tnc("search and dirty key %s", DBGKEY(key)); + + znode = c->zroot.znode; + if (unlikely(!znode)) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + znode = dirty_cow_znode(c, &c->zroot); + if (IS_ERR(znode)) + return PTR_ERR(znode); + + znode->time = time; + + while (1) { + struct ubifs_zbranch *zbr; + + exact = ubifs_search_zbranch(c, znode, key, n); + + if (znode->level == 0) + break; + + if (*n < 0) + *n = 0; + zbr = &znode->zbranch[*n]; + + if (zbr->znode) { + znode->time = time; + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + continue; + } + + /* znode is not in TNC cache, load it from the media */ + znode = ubifs_load_znode(c, zbr, znode, *n); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + *zn = znode; + if (exact || !is_hash_key(c, key) || *n != -1) { + dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); + return exact; + } + + /* + * See huge comment at 'lookup_level0_dirty()' what is the rest of the + * code. + */ + err = tnc_prev(c, &znode, n); + if (err == -ENOENT) { + *n = -1; + dbg_tnc("found 0, lvl %d, n -1", znode->level); + return 0; + } + if (unlikely(err < 0)) + return err; + if (keys_cmp(c, key, &znode->zbranch[*n].key)) { + *n = -1; + dbg_tnc("found 0, lvl %d, n -1", znode->level); + return 0; + } + + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); + *zn = znode; + return 1; +} + +/** + * maybe_leb_gced - determine if a LEB may have been garbage collected. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @gc_seq1: garbage collection sequence number + * + * This function determines if @lnum may have been garbage collected since + * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise + * %0 is returned. + */ +static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) +{ + /* + * No garbage collection in the read-only U-Boot implementation + */ + return 0; +} + +/** + * ubifs_tnc_locate - look up a file-system node and return it and its location. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @lnum: LEB number is returned here + * @offs: offset is returned here + * + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. The node location can be returned in @lnum and @offs. + */ +int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, + void *node, int *lnum, int *offs) +{ + int found, n, err, safely = 0, gc_seq1; + struct ubifs_znode *znode; + struct ubifs_zbranch zbr, *zt; + +again: + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { + err = -ENOENT; + goto out; + } else if (found < 0) { + err = found; + goto out; + } + zt = &znode->zbranch[n]; + if (lnum) { + *lnum = zt->lnum; + *offs = zt->offs; + } + if (is_hash_key(c, key)) { + /* + * In this case the leaf node cache gets used, so we pass the + * address of the zbranch and keep the mutex locked + */ + err = tnc_read_node_nm(c, zt, node); + goto out; + } + if (safely) { + err = ubifs_tnc_read_node(c, zt, node); + goto out; + } + /* Drop the TNC mutex prematurely and race with garbage collection */ + zbr = znode->zbranch[n]; + gc_seq1 = c->gc_seq; + mutex_unlock(&c->tnc_mutex); + + err = fallible_read_node(c, key, &zbr, node); + if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { + /* + * The node may have been GC'ed out from under us so try again + * while keeping the TNC mutex locked. + */ + safely = 1; + goto again; + } + return 0; + +out: + mutex_unlock(&c->tnc_mutex); + return err; +} +#if 0 +/** + * ubifs_tnc_get_bu_keys - lookup keys for bulk-read. + * @c: UBIFS file-system description object + * @bu: bulk-read parameters and results + * + * Lookup consecutive data node keys for the same inode that reside + * consecutively in the same LEB. This function returns zero in case of success + * and a negative error code in case of failure. + * + * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function + * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares + * maximum possible amount of nodes for bulk-read. + */ +int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) +{ + int n, err = 0, lnum = -1, uninitialized_var(offs); + int uninitialized_var(len); + unsigned int block = key_block(c, &bu->key); + struct ubifs_znode *znode; + + bu->cnt = 0; + bu->blk_cnt = 0; + bu->eof = 0; + + mutex_lock(&c->tnc_mutex); + /* Find first key */ + err = ubifs_lookup_level0(c, &bu->key, &znode, &n); + if (err < 0) + goto out; + if (err) { + /* Key found */ + len = znode->zbranch[n].len; + /* The buffer must be big enough for at least 1 node */ + if (len > bu->buf_len) { + err = -EINVAL; + goto out; + } + /* Add this key */ + bu->zbranch[bu->cnt++] = znode->zbranch[n]; + bu->blk_cnt += 1; + lnum = znode->zbranch[n].lnum; + offs = ALIGN(znode->zbranch[n].offs + len, 8); + } + while (1) { + struct ubifs_zbranch *zbr; + union ubifs_key *key; + unsigned int next_block; + + /* Find next key */ + err = tnc_next(c, &znode, &n); + if (err) + goto out; + zbr = &znode->zbranch[n]; + key = &zbr->key; + /* See if there is another data key for this file */ + if (key_inum(c, key) != key_inum(c, &bu->key) || + key_type(c, key) != UBIFS_DATA_KEY) { + err = -ENOENT; + goto out; + } + if (lnum < 0) { + /* First key found */ + lnum = zbr->lnum; + offs = ALIGN(zbr->offs + zbr->len, 8); + len = zbr->len; + if (len > bu->buf_len) { + err = -EINVAL; + goto out; + } + } else { + /* + * The data nodes must be in consecutive positions in + * the same LEB. + */ + if (zbr->lnum != lnum || zbr->offs != offs) + goto out; + offs += ALIGN(zbr->len, 8); + len = ALIGN(len, 8) + zbr->len; + /* Must not exceed buffer length */ + if (len > bu->buf_len) + goto out; + } + /* Allow for holes */ + next_block = key_block(c, key); + bu->blk_cnt += (next_block - block - 1); + if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) + goto out; + block = next_block; + /* Add this key */ + bu->zbranch[bu->cnt++] = *zbr; + bu->blk_cnt += 1; + /* See if we have room for more */ + if (bu->cnt >= UBIFS_MAX_BULK_READ) + goto out; + if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) + goto out; + } +out: + if (err == -ENOENT) { + bu->eof = 1; + err = 0; + } + bu->gc_seq = c->gc_seq; + mutex_unlock(&c->tnc_mutex); + if (err) + return err; + /* + * An enormous hole could cause bulk-read to encompass too many + * page cache pages, so limit the number here. + */ + if (bu->blk_cnt > UBIFS_MAX_BULK_READ) + bu->blk_cnt = UBIFS_MAX_BULK_READ; + /* + * Ensure that bulk-read covers a whole number of page cache + * pages. + */ + if (UBIFS_BLOCKS_PER_PAGE == 1 || + !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1))) + return 0; + if (bu->eof) { + /* At the end of file we can round up */ + bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1; + return 0; + } + /* Exclude data nodes that do not make up a whole page cache page */ + block = key_block(c, &bu->key) + bu->blk_cnt; + block &= ~(UBIFS_BLOCKS_PER_PAGE - 1); + while (bu->cnt) { + if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block) + break; + bu->cnt -= 1; + } + return 0; +} +#endif +/** + * validate_data_node - validate data nodes for bulk-read. + * @c: UBIFS file-system description object + * @buf: buffer containing data node to validate + * @zbr: zbranch of data node to validate + * + * This functions returns %0 on success or a negative error code on failure. + */ +static int validate_data_node(struct ubifs_info *c, void *buf, + struct ubifs_zbranch *zbr) +{ + union ubifs_key key1; + struct ubifs_ch *ch = buf; + int err, len; + + if (ch->node_type != UBIFS_DATA_NODE) { + ubifs_err("bad node type (%d but expected %d)", + ch->node_type, UBIFS_DATA_NODE); + goto out_err; + } + + err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); + if (err) { + ubifs_err("expected node type %d", UBIFS_DATA_NODE); + goto out; + } + + len = le32_to_cpu(ch->len); + if (len != zbr->len) { + ubifs_err("bad node length %d, expected %d", len, zbr->len); + goto out_err; + } + + /* Make sure the key of the read node is correct */ + key_read(c, buf + UBIFS_KEY_OFFSET, &key1); + if (!keys_eq(c, &zbr->key, &key1)) { + ubifs_err("bad key in node at LEB %d:%d", + zbr->lnum, zbr->offs); + dbg_tnc("looked for key %s found node's key %s", + DBGKEY(&zbr->key), DBGKEY1(&key1)); + goto out_err; + } + + return 0; + +out_err: + err = -EINVAL; +out: + ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + return err; +} +#if 0 +/** + * ubifs_tnc_bulk_read - read a number of data nodes in one go. + * @c: UBIFS file-system description object + * @bu: bulk-read parameters and results + * + * This functions reads and validates the data nodes that were identified by the + * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success, + * -EAGAIN to indicate a race with GC, or another negative error code on + * failure. + */ +int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) +{ + int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i; + void *buf; + + len = bu->zbranch[bu->cnt - 1].offs; + len += bu->zbranch[bu->cnt - 1].len - offs; + if (len > bu->buf_len) { + ubifs_err("buffer too small %d vs %d", bu->buf_len, len); + return -EINVAL; + } + + /* Do the read */ + err = ubi_read(c->ubi, lnum, bu->buf, offs, len); + + /* Check for a race with GC */ + if (maybe_leb_gced(c, lnum, bu->gc_seq)) + return -EAGAIN; + + if (err && err != -EBADMSG) { + ubifs_err("failed to read from LEB %d:%d, error %d", + lnum, offs, err); + dbg_dump_stack(); + dbg_tnc("key %s", DBGKEY(&bu->key)); + return err; + } + + /* Validate the nodes read */ + buf = bu->buf; + for (i = 0; i < bu->cnt; i++) { + err = validate_data_node(c, buf, &bu->zbranch[i]); + if (err) + return err; + buf = buf + ALIGN(bu->zbranch[i].len, 8); + } + + return 0; +} +#endif +/** + * do_lookup_nm- look up a "hashed" node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @nm: node name + * + * This function look up and reads a node which contains name hash in the key. + * Since the hash may have collisions, there may be many nodes with the same + * key, so we have to sequentially look to all of them until the needed one is + * found. This function returns zero in case of success, %-ENOENT if the node + * was not found, and a negative error code in case of failure. + */ +static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm) +{ + int found, n, err; + struct ubifs_znode *znode; + + dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { + err = -ENOENT; + goto out_unlock; + } else if (found < 0) { + err = found; + goto out_unlock; + } + + ubifs_assert(n >= 0); + + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); + if (unlikely(err < 0)) + goto out_unlock; + if (err == 0) { + err = -ENOENT; + goto out_unlock; + } + + err = tnc_read_node_nm(c, &znode->zbranch[n], node); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_lookup_nm - look up a "hashed" node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @nm: node name + * + * This function look up and reads a node which contains name hash in the key. + * Since the hash may have collisions, there may be many nodes with the same + * key, so we have to sequentially look to all of them until the needed one is + * found. This function returns zero in case of success, %-ENOENT if the node + * was not found, and a negative error code in case of failure. + */ +int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm) +{ + int err, len; + const struct ubifs_dent_node *dent = node; + + /* + * We assume that in most of the cases there are no name collisions and + * 'ubifs_tnc_lookup()' returns us the right direntry. + */ + err = ubifs_tnc_lookup(c, key, node); + if (err) + return err; + + len = le16_to_cpu(dent->nlen); + if (nm->len == len && !memcmp(dent->name, nm->name, len)) + return 0; + + /* + * Unluckily, there are hash collisions and we have to iterate over + * them look at each direntry with colliding name hash sequentially. + */ + return do_lookup_nm(c, key, node, nm); +} + +/** + * correct_parent_keys - correct parent znodes' keys. + * @c: UBIFS file-system description object + * @znode: znode to correct parent znodes for + * + * This is a helper function for 'tnc_insert()'. When the key of the leftmost + * zbranch changes, keys of parent znodes have to be corrected. This helper + * function is called in such situations and corrects the keys if needed. + */ +static void correct_parent_keys(const struct ubifs_info *c, + struct ubifs_znode *znode) +{ + union ubifs_key *key, *key1; + + ubifs_assert(znode->parent); + ubifs_assert(znode->iip == 0); + + key = &znode->zbranch[0].key; + key1 = &znode->parent->zbranch[0].key; + + while (keys_cmp(c, key, key1) < 0) { + key_copy(c, key, key1); + znode = znode->parent; + znode->alt = 1; + if (!znode->parent || znode->iip) + break; + key1 = &znode->parent->zbranch[0].key; + } +} + +/** + * insert_zbranch - insert a zbranch into a znode. + * @znode: znode into which to insert + * @zbr: zbranch to insert + * @n: slot number to insert to + * + * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in + * znode's array of zbranches and keeps zbranches consolidated, so when a new + * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th + * slot, zbranches starting from @n have to be moved right. + */ +static void insert_zbranch(struct ubifs_znode *znode, + const struct ubifs_zbranch *zbr, int n) +{ + int i; + + ubifs_assert(ubifs_zn_dirty(znode)); + + if (znode->level) { + for (i = znode->child_cnt; i > n; i--) { + znode->zbranch[i] = znode->zbranch[i - 1]; + if (znode->zbranch[i].znode) + znode->zbranch[i].znode->iip = i; + } + if (zbr->znode) + zbr->znode->iip = n; + } else + for (i = znode->child_cnt; i > n; i--) + znode->zbranch[i] = znode->zbranch[i - 1]; + + znode->zbranch[n] = *zbr; + znode->child_cnt += 1; + + /* + * After inserting at slot zero, the lower bound of the key range of + * this znode may have changed. If this znode is subsequently split + * then the upper bound of the key range may change, and furthermore + * it could change to be lower than the original lower bound. If that + * happens, then it will no longer be possible to find this znode in the + * TNC using the key from the index node on flash. That is bad because + * if it is not found, we will assume it is obsolete and may overwrite + * it. Then if there is an unclean unmount, we will start using the + * old index which will be broken. + * + * So we first mark znodes that have insertions at slot zero, and then + * if they are split we add their lnum/offs to the old_idx tree. + */ + if (n == 0) + znode->alt = 1; +} + +/** + * tnc_insert - insert a node into TNC. + * @c: UBIFS file-system description object + * @znode: znode to insert into + * @zbr: branch to insert + * @n: slot number to insert new zbranch to + * + * This function inserts a new node described by @zbr into znode @znode. If + * znode does not have a free slot for new zbranch, it is split. Parent znodes + * are splat as well if needed. Returns zero in case of success or a negative + * error code in case of failure. + */ +static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, + struct ubifs_zbranch *zbr, int n) +{ + struct ubifs_znode *zn, *zi, *zp; + int i, keep, move, appending = 0; + union ubifs_key *key = &zbr->key, *key1; + + ubifs_assert(n >= 0 && n <= c->fanout); + + /* Implement naive insert for now */ +again: + zp = znode->parent; + if (znode->child_cnt < c->fanout) { + ubifs_assert(n != c->fanout); + dbg_tnc("inserted at %d level %d, key %s", n, znode->level, + DBGKEY(key)); + + insert_zbranch(znode, zbr, n); + + /* Ensure parent's key is correct */ + if (n == 0 && zp && znode->iip == 0) + correct_parent_keys(c, znode); + + return 0; + } + + /* + * Unfortunately, @znode does not have more empty slots and we have to + * split it. + */ + dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); + + if (znode->alt) + /* + * We can no longer be sure of finding this znode by key, so we + * record it in the old_idx tree. + */ + ins_clr_old_idx_znode(c, znode); + + zn = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!zn) + return -ENOMEM; + zn->parent = zp; + zn->level = znode->level; + + /* Decide where to split */ + if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) { + /* Try not to split consecutive data keys */ + if (n == c->fanout) { + key1 = &znode->zbranch[n - 1].key; + if (key_inum(c, key1) == key_inum(c, key) && + key_type(c, key1) == UBIFS_DATA_KEY) + appending = 1; + } else + goto check_split; + } else if (appending && n != c->fanout) { + /* Try not to split consecutive data keys */ + appending = 0; +check_split: + if (n >= (c->fanout + 1) / 2) { + key1 = &znode->zbranch[0].key; + if (key_inum(c, key1) == key_inum(c, key) && + key_type(c, key1) == UBIFS_DATA_KEY) { + key1 = &znode->zbranch[n].key; + if (key_inum(c, key1) != key_inum(c, key) || + key_type(c, key1) != UBIFS_DATA_KEY) { + keep = n; + move = c->fanout - keep; + zi = znode; + goto do_split; + } + } + } + } + + if (appending) { + keep = c->fanout; + move = 0; + } else { + keep = (c->fanout + 1) / 2; + move = c->fanout - keep; + } + + /* + * Although we don't at present, we could look at the neighbors and see + * if we can move some zbranches there. + */ + + if (n < keep) { + /* Insert into existing znode */ + zi = znode; + move += 1; + keep -= 1; + } else { + /* Insert into new znode */ + zi = zn; + n -= keep; + /* Re-parent */ + if (zn->level != 0) + zbr->znode->parent = zn; + } + +do_split: + + __set_bit(DIRTY_ZNODE, &zn->flags); + atomic_long_inc(&c->dirty_zn_cnt); + + zn->child_cnt = move; + znode->child_cnt = keep; + + dbg_tnc("moving %d, keeping %d", move, keep); + + /* Move zbranch */ + for (i = 0; i < move; i++) { + zn->zbranch[i] = znode->zbranch[keep + i]; + /* Re-parent */ + if (zn->level != 0) + if (zn->zbranch[i].znode) { + zn->zbranch[i].znode->parent = zn; + zn->zbranch[i].znode->iip = i; + } + } + + /* Insert new key and branch */ + dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); + + insert_zbranch(zi, zbr, n); + + /* Insert new znode (produced by spitting) into the parent */ + if (zp) { + if (n == 0 && zi == znode && znode->iip == 0) + correct_parent_keys(c, znode); + + /* Locate insertion point */ + n = znode->iip + 1; + + /* Tail recursion */ + zbr->key = zn->zbranch[0].key; + zbr->znode = zn; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + znode = zp; + + goto again; + } + + /* We have to split root znode */ + dbg_tnc("creating new zroot at level %d", znode->level + 1); + + zi = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!zi) + return -ENOMEM; + + zi->child_cnt = 2; + zi->level = znode->level + 1; + + __set_bit(DIRTY_ZNODE, &zi->flags); + atomic_long_inc(&c->dirty_zn_cnt); + + zi->zbranch[0].key = znode->zbranch[0].key; + zi->zbranch[0].znode = znode; + zi->zbranch[0].lnum = c->zroot.lnum; + zi->zbranch[0].offs = c->zroot.offs; + zi->zbranch[0].len = c->zroot.len; + zi->zbranch[1].key = zn->zbranch[0].key; + zi->zbranch[1].znode = zn; + + c->zroot.lnum = 0; + c->zroot.offs = 0; + c->zroot.len = 0; + c->zroot.znode = zi; + + zn->parent = zi; + zn->iip = 1; + znode->parent = zi; + znode->iip = 0; + + return 0; +} + +/** + * ubifs_tnc_add - add a node to TNC. + * @c: UBIFS file-system description object + * @key: key to add + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * + * This function adds a node with key @key to TNC. The node may be new or it may + * obsolete some existing one. Returns %0 on success or negative error code on + * failure. + */ +int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, + int offs, int len) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (!found) { + struct ubifs_zbranch zbr; + + zbr.znode = NULL; + zbr.lnum = lnum; + zbr.offs = offs; + zbr.len = len; + key_copy(c, key, &zbr.key); + err = tnc_insert(c, znode, &zbr, n + 1); + } else if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } else + err = found; + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + + return err; +} + +/** + * ubifs_tnc_replace - replace a node in the TNC only if the old node is found. + * @c: UBIFS file-system description object + * @key: key to add + * @old_lnum: LEB number of old node + * @old_offs: old node offset + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * + * This function replaces a node with key @key in the TNC only if the old node + * is found. This function is called by garbage collection when node are moved. + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, + int old_lnum, int old_offs, int lnum, int offs, int len) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, + old_offs, lnum, offs, len, DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + found = 0; + if (zbr->lnum == old_lnum && zbr->offs == old_offs) { + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + if (err) + goto out_unlock; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + found = 1; + } else if (is_hash_key(c, key)) { + found = resolve_collision_directly(c, key, &znode, &n, + old_lnum, old_offs); + dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d", + found, znode, n, old_lnum, old_offs); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + zbr = &znode->zbranch[n]; + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, + zbr->len); + if (err) + goto out_unlock; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } + } + } + + if (!found) + err = ubifs_add_dirt(c, lnum, len); + + if (!err) + err = dbg_check_tnc(c, 0); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_add_nm - add a "hashed" node to TNC. + * @c: UBIFS file-system description object + * @key: key to add + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * @nm: node name + * + * This is the same as 'ubifs_tnc_add()' but it should be used with keys which + * may have collisions, like directory entry keys. + */ +int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, + int lnum, int offs, int len, const struct qstr *nm) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, + DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found == 1) { + if (c->replaying) + found = fallible_resolve_collision(c, key, &znode, &n, + nm, 1); + else + found = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n); + if (found < 0) { + err = found; + goto out_unlock; + } + + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + + if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + goto out_unlock; + } + } + + if (!found) { + struct ubifs_zbranch zbr; + + zbr.znode = NULL; + zbr.lnum = lnum; + zbr.offs = offs; + zbr.len = len; + key_copy(c, key, &zbr.key); + err = tnc_insert(c, znode, &zbr, n + 1); + if (err) + goto out_unlock; + if (c->replaying) { + /* + * We did not find it in the index so there may be a + * dangling branch still in the index. So we remove it + * by passing 'ubifs_tnc_remove_nm()' the same key but + * an unmatchable name. + */ + struct qstr noname = { .len = 0, .name = "" }; + + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + if (err) + return err; + return ubifs_tnc_remove_nm(c, key, &noname); + } + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * tnc_delete - delete a znode form TNC. + * @c: UBIFS file-system description object + * @znode: znode to delete from + * @n: zbranch slot number to delete + * + * This function deletes a leaf node from @n-th slot of @znode. Returns zero in + * case of success and a negative error code in case of failure. + */ +static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) +{ + struct ubifs_zbranch *zbr; + struct ubifs_znode *zp; + int i, err; + + /* Delete without merge for now */ + ubifs_assert(znode->level == 0); + ubifs_assert(n >= 0 && n < c->fanout); + dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); + + zbr = &znode->zbranch[n]; + lnc_free(zbr); + + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + if (err) { + dbg_dump_znode(c, znode); + return err; + } + + /* We do not "gap" zbranch slots */ + for (i = n; i < znode->child_cnt - 1; i++) + znode->zbranch[i] = znode->zbranch[i + 1]; + znode->child_cnt -= 1; + + if (znode->child_cnt > 0) + return 0; + + /* + * This was the last zbranch, we have to delete this znode from the + * parent. + */ + + do { + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_dirty(znode)); + + zp = znode->parent; + n = znode->iip; + + atomic_long_dec(&c->dirty_zn_cnt); + + err = insert_old_idx_znode(c, znode); + if (err) + return err; + + if (znode->cnext) { + __set_bit(OBSOLETE_ZNODE, &znode->flags); + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } else + kfree(znode); + znode = zp; + } while (znode->child_cnt == 1); /* while removing last child */ + + /* Remove from znode, entry n - 1 */ + znode->child_cnt -= 1; + ubifs_assert(znode->level != 0); + for (i = n; i < znode->child_cnt; i++) { + znode->zbranch[i] = znode->zbranch[i + 1]; + if (znode->zbranch[i].znode) + znode->zbranch[i].znode->iip = i; + } + + /* + * If this is the root and it has only 1 child then + * collapse the tree. + */ + if (!znode->parent) { + while (znode->child_cnt == 1 && znode->level != 0) { + zp = znode; + zbr = &znode->zbranch[0]; + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode->parent = NULL; + znode->iip = 0; + if (c->zroot.len) { + err = insert_old_idx(c, c->zroot.lnum, + c->zroot.offs); + if (err) + return err; + } + c->zroot.lnum = zbr->lnum; + c->zroot.offs = zbr->offs; + c->zroot.len = zbr->len; + c->zroot.znode = znode; + ubifs_assert(!test_bit(OBSOLETE_ZNODE, + &zp->flags)); + ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); + atomic_long_dec(&c->dirty_zn_cnt); + + if (zp->cnext) { + __set_bit(OBSOLETE_ZNODE, &zp->flags); + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } else + kfree(zp); + } + } + + return 0; +} + +/** + * ubifs_tnc_remove - remove an index entry of a node. + * @c: UBIFS file-system description object + * @key: key of node + * + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("key %s", DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + if (found == 1) + err = tnc_delete(c, znode, n); + if (!err) + err = dbg_check_tnc(c, 0); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node. + * @c: UBIFS file-system description object + * @key: key of node + * @nm: directory entry name + * + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + const struct qstr *nm) +{ + int n, err; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); + err = lookup_level0_dirty(c, key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) { + if (c->replaying) + err = fallible_resolve_collision(c, key, &znode, &n, + nm, 0); + else + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); + if (err < 0) + goto out_unlock; + if (err) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + err = tnc_delete(c, znode, n); + } + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * key_in_range - determine if a key falls within a range of keys. + * @c: UBIFS file-system description object + * @key: key to check + * @from_key: lowest key in range + * @to_key: highest key in range + * + * This function returns %1 if the key is in range and %0 otherwise. + */ +static int key_in_range(struct ubifs_info *c, union ubifs_key *key, + union ubifs_key *from_key, union ubifs_key *to_key) +{ + if (keys_cmp(c, key, from_key) < 0) + return 0; + if (keys_cmp(c, key, to_key) > 0) + return 0; + return 1; +} + +/** + * ubifs_tnc_remove_range - remove index entries in range. + * @c: UBIFS file-system description object + * @from_key: lowest key to remove + * @to_key: highest key to remove + * + * This function removes index entries starting at @from_key and ending at + * @to_key. This function returns zero in case of success and a negative error + * code in case of failure. + */ +int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, + union ubifs_key *to_key) +{ + int i, n, k, err = 0; + struct ubifs_znode *znode; + union ubifs_key *key; + + mutex_lock(&c->tnc_mutex); + while (1) { + /* Find first level 0 znode that contains keys to remove */ + err = ubifs_lookup_level0(c, from_key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) + key = from_key; + else { + err = tnc_next(c, &znode, &n); + if (err == -ENOENT) { + err = 0; + goto out_unlock; + } + if (err < 0) + goto out_unlock; + key = &znode->zbranch[n].key; + if (!key_in_range(c, key, from_key, to_key)) { + err = 0; + goto out_unlock; + } + } + + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + + /* Remove all keys in range except the first */ + for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) { + key = &znode->zbranch[i].key; + if (!key_in_range(c, key, from_key, to_key)) + break; + lnc_free(&znode->zbranch[i]); + err = ubifs_add_dirt(c, znode->zbranch[i].lnum, + znode->zbranch[i].len); + if (err) { + dbg_dump_znode(c, znode); + goto out_unlock; + } + dbg_tnc("removing %s", DBGKEY(key)); + } + if (k) { + for (i = n + 1 + k; i < znode->child_cnt; i++) + znode->zbranch[i - k] = znode->zbranch[i]; + znode->child_cnt -= k; + } + + /* Now delete the first */ + err = tnc_delete(c, znode, n); + if (err) + goto out_unlock; + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_remove_ino - remove an inode from TNC. + * @c: UBIFS file-system description object + * @inum: inode number to remove + * + * This function remove inode @inum and all the extended attributes associated + * with the anode from TNC and returns zero in case of success or a negative + * error code in case of failure. + */ +int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) +{ + union ubifs_key key1, key2; + struct ubifs_dent_node *xent, *pxent = NULL; + struct qstr nm = { .name = NULL }; + + dbg_tnc("ino %lu", (unsigned long)inum); + + /* + * Walk all extended attribute entries and remove them together with + * corresponding extended attribute inodes. + */ + lowest_xent_key(c, &key1, inum); + while (1) { + ino_t xattr_inum; + int err; + + xent = ubifs_tnc_next_ent(c, &key1, &nm); + if (IS_ERR(xent)) { + err = PTR_ERR(xent); + if (err == -ENOENT) + break; + return err; + } + + xattr_inum = le64_to_cpu(xent->inum); + dbg_tnc("xent '%s', ino %lu", xent->name, + (unsigned long)xattr_inum); + + nm.name = (char *)xent->name; + nm.len = le16_to_cpu(xent->nlen); + err = ubifs_tnc_remove_nm(c, &key1, &nm); + if (err) { + kfree(xent); + return err; + } + + lowest_ino_key(c, &key1, xattr_inum); + highest_ino_key(c, &key2, xattr_inum); + err = ubifs_tnc_remove_range(c, &key1, &key2); + if (err) { + kfree(xent); + return err; + } + + kfree(pxent); + pxent = xent; + key_read(c, &xent->key, &key1); + } + + kfree(pxent); + lowest_ino_key(c, &key1, inum); + highest_ino_key(c, &key2, inum); + + return ubifs_tnc_remove_range(c, &key1, &key2); +} + +/** + * ubifs_tnc_next_ent - walk directory or extended attribute entries. + * @c: UBIFS file-system description object + * @key: key of last entry + * @nm: name of last entry found or %NULL + * + * This function finds and reads the next directory or extended attribute entry + * after the given key (@key) if there is one. @nm is used to resolve + * collisions. + * + * If the name of the current entry is not known and only the key is known, + * @nm->name has to be %NULL. In this case the semantics of this function is a + * little bit different and it returns the entry corresponding to this key, not + * the next one. If the key was not found, the closest "right" entry is + * returned. + * + * If the fist entry has to be found, @key has to contain the lowest possible + * key value for this inode and @name has to be %NULL. + * + * This function returns the found directory or extended attribute entry node + * in case of success, %-ENOENT is returned if no entry was found, and a + * negative error code is returned in case of failure. + */ +struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, + union ubifs_key *key, + const struct qstr *nm) +{ + int n, err, type = key_type(c, key); + struct ubifs_znode *znode; + struct ubifs_dent_node *dent; + struct ubifs_zbranch *zbr; + union ubifs_key *dkey; + + dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); + ubifs_assert(is_hash_key(c, key)); + + mutex_lock(&c->tnc_mutex); + err = ubifs_lookup_level0(c, key, &znode, &n); + if (unlikely(err < 0)) + goto out_unlock; + + if (nm->name) { + if (err) { + /* Handle collisions */ + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", + err, znode, n); + if (unlikely(err < 0)) + goto out_unlock; + } + + /* Now find next entry */ + err = tnc_next(c, &znode, &n); + if (unlikely(err)) + goto out_unlock; + } else { + /* + * The full name of the entry was not given, in which case the + * behavior of this function is a little different and it + * returns current entry, not the next one. + */ + if (!err) { + /* + * However, the given key does not exist in the TNC + * tree and @znode/@n variables contain the closest + * "preceding" element. Switch to the next one. + */ + err = tnc_next(c, &znode, &n); + if (err) + goto out_unlock; + } + } + + zbr = &znode->zbranch[n]; + dent = kmalloc(zbr->len, GFP_NOFS); + if (unlikely(!dent)) { + err = -ENOMEM; + goto out_unlock; + } + + /* + * The above 'tnc_next()' call could lead us to the next inode, check + * this. + */ + dkey = &zbr->key; + if (key_inum(c, dkey) != key_inum(c, key) || + key_type(c, dkey) != type) { + err = -ENOENT; + goto out_free; + } + + err = tnc_read_node_nm(c, zbr, dent); + if (unlikely(err)) + goto out_free; + + mutex_unlock(&c->tnc_mutex); + return dent; + +out_free: + kfree(dent); +out_unlock: + mutex_unlock(&c->tnc_mutex); + return ERR_PTR(err); +} diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c new file mode 100755 index 0000000..955219f --- /dev/null +++ b/fs/ubifs/tnc_misc.c @@ -0,0 +1,435 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Ðртём) + */ + +/* + * This file contains miscelanious TNC-related functions shared betweend + * different files. This file does not form any logically separate TNC + * sub-system. The file was created because there is a lot of TNC code and + * putting it all in one file would make that file too big and unreadable. + */ + +#include "ubifs.h" + +/** + * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal. + * @zr: root of the subtree to traverse + * @znode: previous znode + * + * This function implements levelorder TNC traversal. The LNC is ignored. + * Returns the next element or %NULL if @znode is already the last one. + */ +struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, + struct ubifs_znode *znode) +{ + int level, iip, level_search = 0; + struct ubifs_znode *zn; + + ubifs_assert(zr); + + if (unlikely(!znode)) + return zr; + + if (unlikely(znode == zr)) { + if (znode->level == 0) + return NULL; + return ubifs_tnc_find_child(zr, 0); + } + + level = znode->level; + + iip = znode->iip; + while (1) { + ubifs_assert(znode->level <= zr->level); + + /* + * First walk up until there is a znode with next branch to + * look at. + */ + while (znode->parent != zr && iip >= znode->parent->child_cnt) { + znode = znode->parent; + iip = znode->iip; + } + + if (unlikely(znode->parent == zr && + iip >= znode->parent->child_cnt)) { + /* This level is done, switch to the lower one */ + level -= 1; + if (level_search || level < 0) + /* + * We were already looking for znode at lower + * level ('level_search'). As we are here + * again, it just does not exist. Or all levels + * were finished ('level < 0'). + */ + return NULL; + + level_search = 1; + iip = -1; + znode = ubifs_tnc_find_child(zr, 0); + ubifs_assert(znode); + } + + /* Switch to the next index */ + zn = ubifs_tnc_find_child(znode->parent, iip + 1); + if (!zn) { + /* No more children to look at, we have walk up */ + iip = znode->parent->child_cnt; + continue; + } + + /* Walk back down to the level we came from ('level') */ + while (zn->level != level) { + znode = zn; + zn = ubifs_tnc_find_child(zn, 0); + if (!zn) { + /* + * This path is not too deep so it does not + * reach 'level'. Try next path. + */ + iip = znode->iip; + break; + } + } + + if (zn) { + ubifs_assert(zn->level >= 0); + return zn; + } + } +} + +/** + * ubifs_search_zbranch - search znode branch. + * @c: UBIFS file-system description object + * @znode: znode to search in + * @key: key to search for + * @n: znode branch slot number is returned here + * + * This is a helper function which search branch with key @key in @znode using + * binary search. The result of the search may be: + * o exact match, then %1 is returned, and the slot number of the branch is + * stored in @n; + * o no exact match, then %0 is returned and the slot number of the left + * closest branch is returned in @n; the slot if all keys in this znode are + * greater than @key, then %-1 is returned in @n. + */ +int ubifs_search_zbranch(const struct ubifs_info *c, + const struct ubifs_znode *znode, + const union ubifs_key *key, int *n) +{ + int beg = 0, end = znode->child_cnt, uninitialized_var(mid); + int uninitialized_var(cmp); + const struct ubifs_zbranch *zbr = &znode->zbranch[0]; + + ubifs_assert(end > beg); + + while (end > beg) { + mid = (beg + end) >> 1; + cmp = keys_cmp(c, key, &zbr[mid].key); + if (cmp > 0) + beg = mid + 1; + else if (cmp < 0) + end = mid; + else { + *n = mid; + return 1; + } + } + + *n = end - 1; + + /* The insert point is after *n */ + ubifs_assert(*n >= -1 && *n < znode->child_cnt); + if (*n == -1) + ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0); + else + ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0); + if (*n + 1 < znode->child_cnt) + ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0); + + return 0; +} + +/** + * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal. + * @znode: znode to start at (root of the sub-tree to traverse) + * + * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is + * ignored. + */ +struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode) +{ + if (unlikely(!znode)) + return NULL; + + while (znode->level > 0) { + struct ubifs_znode *child; + + child = ubifs_tnc_find_child(znode, 0); + if (!child) + return znode; + znode = child; + } + + return znode; +} + +/** + * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal. + * @znode: previous znode + * + * This function implements postorder TNC traversal. The LNC is ignored. + * Returns the next element or %NULL if @znode is already the last one. + */ +struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode) +{ + struct ubifs_znode *zn; + + ubifs_assert(znode); + if (unlikely(!znode->parent)) + return NULL; + + /* Switch to the next index in the parent */ + zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1); + if (!zn) + /* This is in fact the last child, return parent */ + return znode->parent; + + /* Go to the first znode in this new subtree */ + return ubifs_tnc_postorder_first(zn); +} + +/** + * read_znode - read an indexing node from flash and fill znode. + * @c: UBIFS file-system description object + * @lnum: LEB of the indexing node to read + * @offs: node offset + * @len: node length + * @znode: znode to read to + * + * This function reads an indexing node from the flash media and fills znode + * with the read data. Returns zero in case of success and a negative error + * code in case of failure. The read indexing node is validated and if anything + * is wrong with it, this function prints complaint messages and returns + * %-EINVAL. + */ +static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, + struct ubifs_znode *znode) +{ + int i, err, type, cmp; + struct ubifs_idx_node *idx; + + idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); + if (!idx) + return -ENOMEM; + + err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); + if (err < 0) { + kfree(idx); + return err; + } + + znode->child_cnt = le16_to_cpu(idx->child_cnt); + znode->level = le16_to_cpu(idx->level); + + dbg_tnc("LEB %d:%d, level %d, %d branch", + lnum, offs, znode->level, znode->child_cnt); + + if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { + dbg_err("current fanout %d, branch count %d", + c->fanout, znode->child_cnt); + dbg_err("max levels %d, znode level %d", + UBIFS_MAX_LEVELS, znode->level); + err = 1; + goto out_dump; + } + + for (i = 0; i < znode->child_cnt; i++) { + const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + key_read(c, &br->key, &zbr->key); + zbr->lnum = le32_to_cpu(br->lnum); + zbr->offs = le32_to_cpu(br->offs); + zbr->len = le32_to_cpu(br->len); + zbr->znode = NULL; + + /* Validate branch */ + + if (zbr->lnum < c->main_first || + zbr->lnum >= c->leb_cnt || zbr->offs < 0 || + zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { + dbg_err("bad branch %d", i); + err = 2; + goto out_dump; + } + + switch (key_type(c, &zbr->key)) { + case UBIFS_INO_KEY: + case UBIFS_DATA_KEY: + case UBIFS_DENT_KEY: + case UBIFS_XENT_KEY: + break; + default: + dbg_msg("bad key type at slot %d: %s", i, + DBGKEY(&zbr->key)); + err = 3; + goto out_dump; + } + + if (znode->level) + continue; + + type = key_type(c, &zbr->key); + if (c->ranges[type].max_len == 0) { + if (zbr->len != c->ranges[type].len) { + dbg_err("bad target node (type %d) length (%d)", + type, zbr->len); + dbg_err("have to be %d", c->ranges[type].len); + err = 4; + goto out_dump; + } + } else if (zbr->len < c->ranges[type].min_len || + zbr->len > c->ranges[type].max_len) { + dbg_err("bad target node (type %d) length (%d)", + type, zbr->len); + dbg_err("have to be in range of %d-%d", + c->ranges[type].min_len, + c->ranges[type].max_len); + err = 5; + goto out_dump; + } + } + + /* + * Ensure that the next key is greater or equivalent to the + * previous one. + */ + for (i = 0; i < znode->child_cnt - 1; i++) { + const union ubifs_key *key1, *key2; + + key1 = &znode->zbranch[i].key; + key2 = &znode->zbranch[i + 1].key; + + cmp = keys_cmp(c, key1, key2); + if (cmp > 0) { + dbg_err("bad key order (keys %d and %d)", i, i + 1); + err = 6; + goto out_dump; + } else if (cmp == 0 && !is_hash_key(c, key1)) { + /* These can only be keys with colliding hash */ + dbg_err("keys %d and %d are not hashed but equivalent", + i, i + 1); + err = 7; + goto out_dump; + } + } + + kfree(idx); + return 0; + +out_dump: + ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); + dbg_dump_node(c, idx); + kfree(idx); + return -EINVAL; +} + +/** + * ubifs_load_znode - load znode to TNC cache. + * @c: UBIFS file-system description object + * @zbr: znode branch + * @parent: znode's parent + * @iip: index in parent + * + * This function loads znode pointed to by @zbr into the TNC cache and + * returns pointer to it in case of success and a negative error code in case + * of failure. + */ +struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + struct ubifs_znode *parent, int iip) +{ + int err; + struct ubifs_znode *znode; + + ubifs_assert(!zbr->znode); + /* + * A slab cache is not presently used for znodes because the znode size + * depends on the fanout which is stored in the superblock. + */ + znode = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!znode) + return ERR_PTR(-ENOMEM); + + err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode); + if (err) + goto out; + + zbr->znode = znode; + znode->parent = parent; + znode->time = get_seconds(); + znode->iip = iip; + + return znode; + +out: + kfree(znode); + return ERR_PTR(err); +} + +/** + * ubifs_tnc_read_node - read a leaf node from the flash media. + * @c: UBIFS file-system description object + * @zbr: key and position of the node + * @node: node is returned here + * + * This function reads a node defined by @zbr from the flash media. Returns + * zero in case of success or a negative negative error code in case of + * failure. + */ +int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + union ubifs_key key1, *key = &zbr->key; + int err, type = key_type(c, key); + + err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, zbr->offs); + + if (err) { + dbg_tnc("key %s", DBGKEY(key)); + return err; + } + + /* Make sure the key of the read node is correct */ + key_read(c, node + UBIFS_KEY_OFFSET, &key1); + if (!keys_eq(c, key, &key1)) { + ubifs_err("bad key in node at LEB %d:%d", + zbr->lnum, zbr->offs); + dbg_tnc("looked for key %s found node's key %s", + DBGKEY(key), DBGKEY1(&key1)); + dbg_dump_node(c, node); + return -EINVAL; + } + + return 0; +} diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h new file mode 100755 index 0000000..3eee07e --- /dev/null +++ b/fs/ubifs/ubifs-media.h @@ -0,0 +1,775 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +/* + * This file describes UBIFS on-flash format and contains definitions of all the + * relevant data structures and constants. + * + * All UBIFS on-flash objects are stored in the form of nodes. All nodes start + * with the UBIFS node magic number and have the same common header. Nodes + * always sit at 8-byte aligned positions on the media and node header sizes are + * also 8-byte aligned (except for the indexing node and the padding node). + */ + +#ifndef __UBIFS_MEDIA_H__ +#define __UBIFS_MEDIA_H__ + +/* UBIFS node magic number (must not have the padding byte first or last) */ +#define UBIFS_NODE_MAGIC 0x06101831 + +/* + * UBIFS on-flash format version. This version is increased when the on-flash + * format is changing. If this happens, UBIFS is will support older versions as + * well. But older UBIFS code will not support newer formats. Format changes + * will be rare and only when absolutely necessary, e.g. to fix a bug or to add + * a new feature. + * + * UBIFS went into mainline kernel with format version 4. The older formats + * were development formats. + */ +#define UBIFS_FORMAT_VERSION 4 + +/* + * Read-only compatibility version. If the UBIFS format is changed, older UBIFS + * implementations will not be able to mount newer formats in read-write mode. + * However, depending on the change, it may be possible to mount newer formats + * in R/O mode. This is indicated by the R/O compatibility version which is + * stored in the super-block. + * + * This is needed to support boot-loaders which only need R/O mounting. With + * this flag it is possible to do UBIFS format changes without a need to update + * boot-loaders. + */ +#define UBIFS_RO_COMPAT_VERSION 0 + +/* Minimum logical eraseblock size in bytes */ +#define UBIFS_MIN_LEB_SZ (15*1024) + +/* Initial CRC32 value used when calculating CRC checksums */ +#define UBIFS_CRC32_INIT 0xFFFFFFFFU + +/* + * UBIFS does not try to compress data if its length is less than the below + * constant. + */ +#define UBIFS_MIN_COMPR_LEN 128 + +/* + * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes + * shorter than uncompressed data length, UBIFS prefers to leave this data + * node uncompress, because it'll be read faster. + */ +#define UBIFS_MIN_COMPRESS_DIFF 64 + +/* Root inode number */ +#define UBIFS_ROOT_INO 1 + +/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */ +#define UBIFS_FIRST_INO 64 + +/* + * Maximum file name and extended attribute length (must be a multiple of 8, + * minus 1). + */ +#define UBIFS_MAX_NLEN 255 + +/* Maximum number of data journal heads */ +#define UBIFS_MAX_JHEADS 1 + +/* + * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system, + * which means that it does not treat the underlying media as consisting of + * blocks like in case of hard drives. Do not be confused. UBIFS block is just + * the maximum amount of data which one data node can have or which can be + * attached to an inode node. + */ +#define UBIFS_BLOCK_SIZE 4096 +#define UBIFS_BLOCK_SHIFT 12 + +/* UBIFS padding byte pattern (must not be first or last byte of node magic) */ +#define UBIFS_PADDING_BYTE 0xCE + +/* Maximum possible key length */ +#define UBIFS_MAX_KEY_LEN 16 + +/* Key length ("simple" format) */ +#define UBIFS_SK_LEN 8 + +/* Minimum index tree fanout */ +#define UBIFS_MIN_FANOUT 3 + +/* Maximum number of levels in UBIFS indexing B-tree */ +#define UBIFS_MAX_LEVELS 512 + +/* Maximum amount of data attached to an inode in bytes */ +#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE + +/* LEB Properties Tree fanout (must be power of 2) and fanout shift */ +#define UBIFS_LPT_FANOUT 4 +#define UBIFS_LPT_FANOUT_SHIFT 2 + +/* LEB Properties Tree bit field sizes */ +#define UBIFS_LPT_CRC_BITS 16 +#define UBIFS_LPT_CRC_BYTES 2 +#define UBIFS_LPT_TYPE_BITS 4 + +/* The key is always at the same position in all keyed nodes */ +#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) + +/* + * LEB Properties Tree node types. + * + * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties) + * UBIFS_LPT_NNODE: LPT internal node + * UBIFS_LPT_LTAB: LPT's own lprops table + * UBIFS_LPT_LSAVE: LPT's save table (big model only) + * UBIFS_LPT_NODE_CNT: count of LPT node types + * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type + */ +enum { + UBIFS_LPT_PNODE, + UBIFS_LPT_NNODE, + UBIFS_LPT_LTAB, + UBIFS_LPT_LSAVE, + UBIFS_LPT_NODE_CNT, + UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1, +}; + +/* + * UBIFS inode types. + * + * UBIFS_ITYPE_REG: regular file + * UBIFS_ITYPE_DIR: directory + * UBIFS_ITYPE_LNK: soft link + * UBIFS_ITYPE_BLK: block device node + * UBIFS_ITYPE_CHR: character device node + * UBIFS_ITYPE_FIFO: fifo + * UBIFS_ITYPE_SOCK: socket + * UBIFS_ITYPES_CNT: count of supported file types + */ +enum { + UBIFS_ITYPE_REG, + UBIFS_ITYPE_DIR, + UBIFS_ITYPE_LNK, + UBIFS_ITYPE_BLK, + UBIFS_ITYPE_CHR, + UBIFS_ITYPE_FIFO, + UBIFS_ITYPE_SOCK, + UBIFS_ITYPES_CNT, +}; + +/* + * Supported key hash functions. + * + * UBIFS_KEY_HASH_R5: R5 hash + * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name + */ +enum { + UBIFS_KEY_HASH_R5, + UBIFS_KEY_HASH_TEST, +}; + +/* + * Supported key formats. + * + * UBIFS_SIMPLE_KEY_FMT: simple key format + */ +enum { + UBIFS_SIMPLE_KEY_FMT, +}; + +/* + * The simple key format uses 29 bits for storing UBIFS block number and hash + * value. + */ +#define UBIFS_S_KEY_BLOCK_BITS 29 +#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF +#define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS +#define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK + +/* + * Key types. + * + * UBIFS_INO_KEY: inode node key + * UBIFS_DATA_KEY: data node key + * UBIFS_DENT_KEY: directory entry node key + * UBIFS_XENT_KEY: extended attribute entry key + * UBIFS_KEY_TYPES_CNT: number of supported key types + */ +enum { + UBIFS_INO_KEY, + UBIFS_DATA_KEY, + UBIFS_DENT_KEY, + UBIFS_XENT_KEY, + UBIFS_KEY_TYPES_CNT, +}; + +/* Count of LEBs reserved for the superblock area */ +#define UBIFS_SB_LEBS 1 +/* Count of LEBs reserved for the master area */ +#define UBIFS_MST_LEBS 2 + +/* First LEB of the superblock area */ +#define UBIFS_SB_LNUM 0 +/* First LEB of the master area */ +#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS) +/* First LEB of the log area */ +#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS) + +/* + * The below constants define the absolute minimum values for various UBIFS + * media areas. Many of them actually depend of flash geometry and the FS + * configuration (number of journal heads, orphan LEBs, etc). This means that + * the smallest volume size which can be used for UBIFS cannot be pre-defined + * by these constants. The file-system that meets the below limitation will not + * necessarily mount. UBIFS does run-time calculations and validates the FS + * size. + */ + +/* Minimum number of logical eraseblocks in the log */ +#define UBIFS_MIN_LOG_LEBS 2 +/* Minimum number of bud logical eraseblocks (one for each head) */ +#define UBIFS_MIN_BUD_LEBS 3 +/* Minimum number of journal logical eraseblocks */ +#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS) +/* Minimum number of LPT area logical eraseblocks */ +#define UBIFS_MIN_LPT_LEBS 2 +/* Minimum number of orphan area logical eraseblocks */ +#define UBIFS_MIN_ORPH_LEBS 1 +/* + * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1 + * for GC, 1 for deletions, and at least 1 for committed data). + */ +#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6) + +/* Minimum number of logical eraseblocks */ +#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ + UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \ + UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS) + +/* Node sizes (N.B. these are guaranteed to be multiples of 8) */ +#define UBIFS_CH_SZ sizeof(struct ubifs_ch) +#define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node) +#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node) +#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node) +#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node) +#define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node) +#define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node) +#define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node) +#define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node) +#define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node) +#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node) +#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node) +/* Extended attribute entry nodes are identical to directory entry nodes */ +#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ +/* Only this does not have to be multiple of 8 bytes */ +#define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch) + +/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */ +#define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE) +#define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA) +#define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1) +#define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ + +/* The largest UBIFS node */ +#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ + +/* + * On-flash inode flags. + * + * UBIFS_COMPR_FL: use compression for this inode + * UBIFS_SYNC_FL: I/O on this inode has to be synchronous + * UBIFS_IMMUTABLE_FL: inode is immutable + * UBIFS_APPEND_FL: writes to the inode may only append data + * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous + * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value + * + * Note, these are on-flash flags which correspond to ioctl flags + * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not + * have to be the same. + */ +enum { + UBIFS_COMPR_FL = 0x01, + UBIFS_SYNC_FL = 0x02, + UBIFS_IMMUTABLE_FL = 0x04, + UBIFS_APPEND_FL = 0x08, + UBIFS_DIRSYNC_FL = 0x10, + UBIFS_XATTR_FL = 0x20, +}; + +/* Inode flag bits used by UBIFS */ +#define UBIFS_FL_MASK 0x0000001F + +/* + * UBIFS compression algorithms. + * + * UBIFS_COMPR_NONE: no compression + * UBIFS_COMPR_LZO: LZO compression + * UBIFS_COMPR_ZLIB: ZLIB compression + * UBIFS_COMPR_TYPES_CNT: count of supported compression types + */ +enum { + UBIFS_COMPR_NONE, + UBIFS_COMPR_LZO, + UBIFS_COMPR_ZLIB, + UBIFS_COMPR_TYPES_CNT, +}; + +/* + * UBIFS node types. + * + * UBIFS_INO_NODE: inode node + * UBIFS_DATA_NODE: data node + * UBIFS_DENT_NODE: directory entry node + * UBIFS_XENT_NODE: extended attribute node + * UBIFS_TRUN_NODE: truncation node + * UBIFS_PAD_NODE: padding node + * UBIFS_SB_NODE: superblock node + * UBIFS_MST_NODE: master node + * UBIFS_REF_NODE: LEB reference node + * UBIFS_IDX_NODE: index node + * UBIFS_CS_NODE: commit start node + * UBIFS_ORPH_NODE: orphan node + * UBIFS_NODE_TYPES_CNT: count of supported node types + * + * Note, we index arrays by these numbers, so keep them low and contiguous. + * Node type constants for inodes, direntries and so on have to be the same as + * corresponding key type constants. + */ +enum { + UBIFS_INO_NODE, + UBIFS_DATA_NODE, + UBIFS_DENT_NODE, + UBIFS_XENT_NODE, + UBIFS_TRUN_NODE, + UBIFS_PAD_NODE, + UBIFS_SB_NODE, + UBIFS_MST_NODE, + UBIFS_REF_NODE, + UBIFS_IDX_NODE, + UBIFS_CS_NODE, + UBIFS_ORPH_NODE, + UBIFS_NODE_TYPES_CNT, +}; + +/* + * Master node flags. + * + * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty + * UBIFS_MST_NO_ORPHS: no orphan inodes present + * UBIFS_MST_RCVRY: written by recovery + */ +enum { + UBIFS_MST_DIRTY = 1, + UBIFS_MST_NO_ORPHS = 2, + UBIFS_MST_RCVRY = 4, +}; + +/* + * Node group type (used by recovery to recover whole group or none). + * + * UBIFS_NO_NODE_GROUP: this node is not part of a group + * UBIFS_IN_NODE_GROUP: this node is a part of a group + * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group + */ +enum { + UBIFS_NO_NODE_GROUP = 0, + UBIFS_IN_NODE_GROUP, + UBIFS_LAST_OF_NODE_GROUP, +}; + +/* + * Superblock flags. + * + * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set + */ +enum { + UBIFS_FLG_BIGLPT = 0x02, +}; + +/** + * struct ubifs_ch - common header node. + * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC) + * @crc: CRC-32 checksum of the node header + * @sqnum: sequence number + * @len: full node length + * @node_type: node type + * @group_type: node group type + * @padding: reserved for future, zeroes + * + * Every UBIFS node starts with this common part. If the node has a key, the + * key always goes next. + */ +struct ubifs_ch { + __le32 magic; + __le32 crc; + __le64 sqnum; + __le32 len; + __u8 node_type; + __u8 group_type; + __u8 padding[2]; +} __attribute__ ((packed)); + +/** + * union ubifs_dev_desc - device node descriptor. + * @new: new type device descriptor + * @huge: huge type device descriptor + * + * This data structure describes major/minor numbers of a device node. In an + * inode is a device node then its data contains an object of this type. UBIFS + * uses standard Linux "new" and "huge" device node encodings. + */ +union ubifs_dev_desc { + __le32 new; + __le64 huge; +} __attribute__ ((packed)); + +/** + * struct ubifs_ino_node - inode node. + * @ch: common header + * @key: node key + * @creat_sqnum: sequence number at time of creation + * @size: inode size in bytes (amount of uncompressed data) + * @atime_sec: access time seconds + * @ctime_sec: creation time seconds + * @mtime_sec: modification time seconds + * @atime_nsec: access time nanoseconds + * @ctime_nsec: creation time nanoseconds + * @mtime_nsec: modification time nanoseconds + * @nlink: number of hard links + * @uid: owner ID + * @gid: group ID + * @mode: access flags + * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc) + * @data_len: inode data length + * @xattr_cnt: count of extended attributes this inode has + * @xattr_size: summarized size of all extended attributes in bytes + * @padding1: reserved for future, zeroes + * @xattr_names: sum of lengths of all extended attribute names belonging to + * this inode + * @compr_type: compression type used for this inode + * @padding2: reserved for future, zeroes + * @data: data attached to the inode + * + * Note, even though inode compression type is defined by @compr_type, some + * nodes of this inode may be compressed with different compressor - this + * happens if compression type is changed while the inode already has data + * nodes. But @compr_type will be use for further writes to the inode. + * + * Note, do not forget to amend 'zero_ino_node_unused()' function when changing + * the padding fields. + */ +struct ubifs_ino_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le64 creat_sqnum; + __le64 size; + __le64 atime_sec; + __le64 ctime_sec; + __le64 mtime_sec; + __le32 atime_nsec; + __le32 ctime_nsec; + __le32 mtime_nsec; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le32 flags; + __le32 data_len; + __le32 xattr_cnt; + __le32 xattr_size; + __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */ + __le32 xattr_names; + __le16 compr_type; + __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ + __u8 data[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_dent_node - directory entry node. + * @ch: common header + * @key: node key + * @inum: target inode number + * @padding1: reserved for future, zeroes + * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc) + * @nlen: name length + * @padding2: reserved for future, zeroes + * @name: zero-terminated name + * + * Note, do not forget to amend 'zero_dent_node_unused()' function when + * changing the padding fields. + */ +struct ubifs_dent_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le64 inum; + __u8 padding1; + __u8 type; + __le16 nlen; + __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ + __u8 name[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_data_node - data node. + * @ch: common header + * @key: node key + * @size: uncompressed data size in bytes + * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc) + * @padding: reserved for future, zeroes + * @data: data + * + * Note, do not forget to amend 'zero_data_node_unused()' function when + * changing the padding fields. + */ +struct ubifs_data_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le32 size; + __le16 compr_type; + __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ + __u8 data[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_trun_node - truncation node. + * @ch: common header + * @inum: truncated inode number + * @padding: reserved for future, zeroes + * @old_size: size before truncation + * @new_size: size after truncation + * + * This node exists only in the journal and never goes to the main area. Note, + * do not forget to amend 'zero_trun_node_unused()' function when changing the + * padding fields. + */ +struct ubifs_trun_node { + struct ubifs_ch ch; + __le32 inum; + __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ + __le64 old_size; + __le64 new_size; +} __attribute__ ((packed)); + +/** + * struct ubifs_pad_node - padding node. + * @ch: common header + * @pad_len: how many bytes after this node are unused (because padded) + * @padding: reserved for future, zeroes + */ +struct ubifs_pad_node { + struct ubifs_ch ch; + __le32 pad_len; +} __attribute__ ((packed)); + +/** + * struct ubifs_sb_node - superblock node. + * @ch: common header + * @padding: reserved for future, zeroes + * @key_hash: type of hash function used in keys + * @key_fmt: format of the key + * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc) + * @min_io_size: minimal input/output unit size + * @leb_size: logical eraseblock size in bytes + * @leb_cnt: count of LEBs used by file-system + * @max_leb_cnt: maximum count of LEBs used by file-system + * @max_bud_bytes: maximum amount of data stored in buds + * @log_lebs: log size in logical eraseblocks + * @lpt_lebs: number of LEBs used for lprops table + * @orph_lebs: number of LEBs used for recording orphans + * @jhead_cnt: count of journal heads + * @fanout: tree fanout (max. number of links per indexing node) + * @lsave_cnt: number of LEB numbers in LPT's save table + * @fmt_version: UBIFS on-flash format version + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * @padding1: reserved for future, zeroes + * @rp_uid: reserve pool UID + * @rp_gid: reserve pool GID + * @rp_size: size of the reserved pool in bytes + * @padding2: reserved for future, zeroes + * @time_gran: time granularity in nanoseconds + * @uuid: UUID generated when the file system image was created + * @ro_compat_version: UBIFS R/O compatibility version + */ +struct ubifs_sb_node { + struct ubifs_ch ch; + __u8 padding[2]; + __u8 key_hash; + __u8 key_fmt; + __le32 flags; + __le32 min_io_size; + __le32 leb_size; + __le32 leb_cnt; + __le32 max_leb_cnt; + __le64 max_bud_bytes; + __le32 log_lebs; + __le32 lpt_lebs; + __le32 orph_lebs; + __le32 jhead_cnt; + __le32 fanout; + __le32 lsave_cnt; + __le32 fmt_version; + __le16 default_compr; + __u8 padding1[2]; + __le32 rp_uid; + __le32 rp_gid; + __le64 rp_size; + __le32 time_gran; + __u8 uuid[16]; + __le32 ro_compat_version; + __u8 padding2[3968]; +} __attribute__ ((packed)); + +/** + * struct ubifs_mst_node - master node. + * @ch: common header + * @highest_inum: highest inode number in the committed index + * @cmt_no: commit number + * @flags: various flags (%UBIFS_MST_DIRTY, etc) + * @log_lnum: start of the log + * @root_lnum: LEB number of the root indexing node + * @root_offs: offset within @root_lnum + * @root_len: root indexing node length + * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was + * not reserved and should be reserved on mount) + * @ihead_lnum: LEB number of index head + * @ihead_offs: offset of index head + * @index_size: size of index on flash + * @total_free: total free space in bytes + * @total_dirty: total dirty space in bytes + * @total_used: total used space in bytes (includes only data LEBs) + * @total_dead: total dead space in bytes (includes only data LEBs) + * @total_dark: total dark space in bytes (includes only data LEBs) + * @lpt_lnum: LEB number of LPT root nnode + * @lpt_offs: offset of LPT root nnode + * @nhead_lnum: LEB number of LPT head + * @nhead_offs: offset of LPT head + * @ltab_lnum: LEB number of LPT's own lprops table + * @ltab_offs: offset of LPT's own lprops table + * @lsave_lnum: LEB number of LPT's save table (big model only) + * @lsave_offs: offset of LPT's save table (big model only) + * @lscan_lnum: LEB number of last LPT scan + * @empty_lebs: number of empty logical eraseblocks + * @idx_lebs: number of indexing logical eraseblocks + * @leb_cnt: count of LEBs used by file-system + * @padding: reserved for future, zeroes + */ +struct ubifs_mst_node { + struct ubifs_ch ch; + __le64 highest_inum; + __le64 cmt_no; + __le32 flags; + __le32 log_lnum; + __le32 root_lnum; + __le32 root_offs; + __le32 root_len; + __le32 gc_lnum; + __le32 ihead_lnum; + __le32 ihead_offs; + __le64 index_size; + __le64 total_free; + __le64 total_dirty; + __le64 total_used; + __le64 total_dead; + __le64 total_dark; + __le32 lpt_lnum; + __le32 lpt_offs; + __le32 nhead_lnum; + __le32 nhead_offs; + __le32 ltab_lnum; + __le32 ltab_offs; + __le32 lsave_lnum; + __le32 lsave_offs; + __le32 lscan_lnum; + __le32 empty_lebs; + __le32 idx_lebs; + __le32 leb_cnt; + __u8 padding[344]; +} __attribute__ ((packed)); + +/** + * struct ubifs_ref_node - logical eraseblock reference node. + * @ch: common header + * @lnum: the referred logical eraseblock number + * @offs: start offset in the referred LEB + * @jhead: journal head number + * @padding: reserved for future, zeroes + */ +struct ubifs_ref_node { + struct ubifs_ch ch; + __le32 lnum; + __le32 offs; + __le32 jhead; + __u8 padding[28]; +} __attribute__ ((packed)); + +/** + * struct ubifs_branch - key/reference/length branch + * @lnum: LEB number of the target node + * @offs: offset within @lnum + * @len: target node length + * @key: key + */ +struct ubifs_branch { + __le32 lnum; + __le32 offs; + __le32 len; + __u8 key[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_idx_node - indexing node. + * @ch: common header + * @child_cnt: number of child index nodes + * @level: tree level + * @branches: LEB number / offset / length / key branches + */ +struct ubifs_idx_node { + struct ubifs_ch ch; + __le16 child_cnt; + __le16 level; + __u8 branches[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_cs_node - commit start node. + * @ch: common header + * @cmt_no: commit number + */ +struct ubifs_cs_node { + struct ubifs_ch ch; + __le64 cmt_no; +} __attribute__ ((packed)); + +/** + * struct ubifs_orph_node - orphan node. + * @ch: common header + * @cmt_no: commit number (also top bit is set on the last node of the commit) + * @inos: inode numbers of orphans + */ +struct ubifs_orph_node { + struct ubifs_ch ch; + __le64 cmt_no; + __le64 inos[]; +} __attribute__ ((packed)); + +#endif /* __UBIFS_MEDIA_H__ */ diff --git a/fs/ubifs/ubifs.c b/fs/ubifs/ubifs.c new file mode 100755 index 0000000..06abe80 --- /dev/null +++ b/fs/ubifs/ubifs.c @@ -0,0 +1,588 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * (C) Copyright 2008-2009 + * Stefan Roese, DENX Software Engineering, sr@denx.de. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +#include "ubifs.h" + +DECLARE_GLOBAL_DATA_PTR; + +/* compress.c */ + +int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, + int *out_len, int compr_type) +{ + + memcpy(out_buf, in_buf, in_len); + *out_len = in_len; + return 0; + +} +/* + * ubifsls... + */ + +static int filldir(struct ubifs_info *c, const char *name, int namlen, + u64 ino, unsigned int d_type) +{ + struct inode *inode; + char filetime[32]; + + switch (d_type) { + case UBIFS_ITYPE_REG: + printf("\t"); + break; + case UBIFS_ITYPE_DIR: + printf("<DIR>\t"); + break; + case UBIFS_ITYPE_LNK: + printf("<LNK>\t"); + break; + default: + printf("other\t"); + break; + } + + inode = ubifs_iget(c->vfs_sb, ino); + if (IS_ERR(inode)) { + printf("%s: Error in ubifs_iget(), ino=%lld ret=%p!\n", + __func__, ino, inode); + return -1; + } + ctime_r((time_t *)&inode->i_mtime, filetime); + printf("%9lld %24.24s ", inode->i_size, filetime); + ubifs_iput(inode); + + printf("%s\n", name); + + return 0; +} + +static int ubifs_printdir(struct file *file, void *dirent) +{ + int err, over = 0; + struct qstr nm; + union ubifs_key key; + struct ubifs_dent_node *dent; + struct inode *dir = file->f_path.dentry->d_inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; + + dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + + if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) + /* + * The directory was seek'ed to a senseless position or there + * are no more entries. + */ + return 0; + + if (file->f_pos == 1) { + /* Find the first entry in TNC and save it */ + lowest_dent_key(c, &key, dir->i_ino); + nm.name = NULL; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + } + + dent = file->private_data; + if (!dent) { + /* + * The directory was seek'ed to and is now readdir'ed. + * Find the entry corresponding to @file->f_pos or the + * closest one. + */ + dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); + nm.name = NULL; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + } + + while (1) { + dbg_gen("feed '%s', ino %llu, new f_pos %#x", + dent->name, (unsigned long long)le64_to_cpu(dent->inum), + key_hash_flash(c, &dent->key)); + ubifs_assert(le64_to_cpu(dent->ch.sqnum) > ubifs_inode(dir)->creat_sqnum); + + nm.len = le16_to_cpu(dent->nlen); + over = filldir(c, (char *)dent->name, nm.len, + le64_to_cpu(dent->inum), dent->type); + if (over) + return 0; + + /* Switch to the next entry */ + key_read(c, &dent->key, &key); + nm.name = (char *)dent->name; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + + kfree(file->private_data); + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + cond_resched(); + } + +out: + if (err != -ENOENT) { + ubifs_err("cannot find next direntry, error %d", err); + return err; + } + + kfree(file->private_data); + file->private_data = NULL; + file->f_pos = 2; + return 0; +} + +static int ubifs_finddir(struct super_block *sb, char *dirname, + unsigned long root_inum, unsigned long *inum) +{ + int err; + struct qstr nm; + union ubifs_key key; + struct ubifs_dent_node *dent; + struct ubifs_info *c; + struct file *file; + struct dentry *dentry; + struct inode *dir; + + file = kzalloc(sizeof(struct file), 0); + dentry = kzalloc(sizeof(struct dentry), 0); + dir = kzalloc(sizeof(struct inode), 0); + if (!file || !dentry || !dir) { + printf("%s: Error, no memory for malloc!\n", __func__); + err = -ENOMEM; + goto out; + } + + dir->i_sb = sb; + file->f_path.dentry = dentry; + file->f_path.dentry->d_parent = dentry; + file->f_path.dentry->d_inode = dir; + file->f_path.dentry->d_inode->i_ino = root_inum; + c = sb->s_fs_info; + + dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + + /* Find the first entry in TNC and save it */ + lowest_dent_key(c, &key, dir->i_ino); + nm.name = NULL; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + + while (1) { + dbg_gen("feed '%s', ino %llu, new f_pos %#x", + dent->name, (unsigned long long)le64_to_cpu(dent->inum), + key_hash_flash(c, &dent->key)); + ubifs_assert(le64_to_cpu(dent->ch.sqnum) > ubifs_inode(dir)->creat_sqnum); + + nm.len = le16_to_cpu(dent->nlen); + if ((strncmp(dirname, (char *)dent->name, nm.len) == 0) && + (strlen(dirname) == nm.len)) { + *inum = le64_to_cpu(dent->inum); + return 1; + } + + /* Switch to the next entry */ + key_read(c, &dent->key, &key); + nm.name = (char *)dent->name; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + + kfree(file->private_data); + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + cond_resched(); + } + +out: + if (err != -ENOENT) { + ubifs_err("cannot find next direntry, error %d", err); + return err; + } + + if (file) + free(file); + if (dentry) + free(dentry); + if (dir) + free(dir); + + if (file->private_data) + kfree(file->private_data); + file->private_data = NULL; + file->f_pos = 2; + return 0; +} + +static unsigned long ubifs_findfile(struct super_block *sb, char *filename) +{ + int ret; + char *next; + char fpath[128]; + char symlinkpath[128]; + char *name = fpath; + unsigned long root_inum = 1; + unsigned long inum; + int symlink_count = 0; /* Don't allow symlink recursion */ + + strcpy(fpath, filename); + + /* Remove all leading slashes */ + while (*name == '/') + name++; + + /* + * Handle root-direcoty ('/') + */ + inum = root_inum; + if (!name || *name == '\0') + return inum; + + for (;;) { + struct inode *inode; + struct ubifs_inode *ui; + + /* Extract the actual part from the pathname. */ + next = strchr(name, '/'); + if (next) { + /* Remove all leading slashes. */ + while (*next == '/') + *(next++) = '\0'; + } + + ret = ubifs_finddir(sb, name, root_inum, &inum); + if (!ret) + return 0; + inode = ubifs_iget(sb, inum); + + if (!inode) + return 0; + ui = ubifs_inode(inode); + + if ((inode->i_mode & S_IFMT) == S_IFLNK) { + char link_name[64]; + char buf[128]; + + /* We have some sort of symlink recursion, bail out */ + if (symlink_count++ > 8) { + printf("Symlink recursion, aborting\n"); + return 0; + } + memcpy(link_name, ui->data, ui->data_len); + link_name[ui->data_len] = '\0'; + + if (link_name[0] == '/') { + /* Absolute path, redo everything without + * the leading slash */ + next = name = link_name + 1; + root_inum = 1; + continue; + } + /* Relative to cur dir */ + sprintf(buf, "%s/%s", + link_name, next == NULL ? "" : next); + memcpy(symlinkpath, buf, sizeof(buf)); + next = name = symlinkpath; + continue; + } + + /* + * Check if directory with this name exists + */ + + /* Found the node! */ + if (!next || *next == '\0') + return inum; + + root_inum = inum; + name = next; + } + + return 0; +} + +int ubifs_ls(char *filename) +{ + struct ubifs_info *c = ubifs_sb->s_fs_info; + struct file *file; + struct dentry *dentry; + struct inode *dir; + void *dirent = NULL; + unsigned long inum; + int ret = 0; + + c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READONLY); + inum = ubifs_findfile(ubifs_sb, filename); + if (!inum) { + ret = -1; + goto out; + } + + file = kzalloc(sizeof(struct file), 0); + dentry = kzalloc(sizeof(struct dentry), 0); + dir = kzalloc(sizeof(struct inode), 0); + if (!file || !dentry || !dir) { + printf("%s: Error, no memory for malloc!\n", __func__); + ret = -ENOMEM; + goto out_mem; + } + + dir->i_sb = ubifs_sb; + file->f_path.dentry = dentry; + file->f_path.dentry->d_parent = dentry; + file->f_path.dentry->d_inode = dir; + file->f_path.dentry->d_inode->i_ino = inum; + file->f_pos = 1; + file->private_data = NULL; + ubifs_printdir(file, dirent); + +out_mem: + if (file) + free(file); + if (dentry) + free(dentry); + if (dir) + free(dir); + +out: + ubi_close_volume(c->ubi); + return ret; +} + +/* + * ubifsload... + */ + +/* file.c */ + +static inline void *kmap(struct page *page) +{ + return page->addr; +} + +static int read_block(struct inode *inode, void *addr, unsigned int block, + struct ubifs_data_node *dn) +{ + struct ubifs_info *c = inode->i_sb->s_fs_info; + int err, len, out_len; + union ubifs_key key; + unsigned int dlen; + + data_key_init(c, &key, inode->i_ino, block); + err = ubifs_tnc_lookup(c, &key, dn); + if (err) { + if (err == -ENOENT) + /* Not found, so it must be a hole */ + memset(addr, 0, UBIFS_BLOCK_SIZE); + return err; + } + + ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum); + + len = le32_to_cpu(dn->size); + if (len <= 0 || len > UBIFS_BLOCK_SIZE) + goto dump; + + dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + out_len = UBIFS_BLOCK_SIZE; + err = ubifs_decompress(&dn->data, dlen, addr, &out_len, + le16_to_cpu(dn->compr_type)); + if (err || len != out_len) + goto dump; + + /* + * Data length can be less than a full block, even for blocks that are + * not the last in the file (e.g., as a result of making a hole and + * appending data). Ensure that the remainder is zeroed out. + */ + if (len < UBIFS_BLOCK_SIZE) + memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); + + return 0; + +dump: + ubifs_err("bad data node (block %u, inode %lu)", + block, inode->i_ino); + dbg_dump_node(c, dn); + return -EINVAL; +} + +static int do_readpage(struct ubifs_info *c, struct inode *inode, struct page *page) +{ + void *addr; + int err = 0, i; + unsigned int block, beyond; + struct ubifs_data_node *dn; + loff_t i_size = inode->i_size; + + dbg_gen("ino %lu, pg %lu, i_size %lld", + inode->i_ino, page->index, i_size); + + addr = kmap(page); + + block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; + if (block >= beyond) { + /* Reading beyond inode */ + memset(addr, 0, PAGE_CACHE_SIZE); + goto out; + } + + dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); + if (!dn) + return -ENOMEM; + + i = 0; + while (1) { + int ret; + + if (block >= beyond) { + /* Reading beyond inode */ + err = -ENOENT; + memset(addr, 0, UBIFS_BLOCK_SIZE); + } else { + ret = read_block(inode, addr, block, dn); + if (ret) { + err = ret; + if (err != -ENOENT) + break; + } else if (block + 1 == beyond) { + int dlen = le32_to_cpu(dn->size); + int ilen = i_size & (UBIFS_BLOCK_SIZE - 1); + + if (ilen && ilen < dlen) + memset(addr + ilen, 0, dlen - ilen); + } + } + if (++i >= UBIFS_BLOCKS_PER_PAGE) + break; + block += 1; + addr += UBIFS_BLOCK_SIZE; + } + if (err) { + if (err == -ENOENT) { + /* Not found, so it must be a hole */ + dbg_gen("hole"); + goto out_free; + } + ubifs_err("cannot read page %lu of inode %lu, error %d", + page->index, inode->i_ino, err); + goto error; + } + +out_free: + kfree(dn); +out: + return 0; + +error: + kfree(dn); + return err; +} + +int ubifs_load(char *filename, u32 addr, u32 size) +{ + struct ubifs_info *c = ubifs_sb->s_fs_info; + unsigned long inum; + struct inode *inode; + struct page page; + int err = 0; + int i; + int count; + + c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READONLY); + /* ubifs_findfile will resolve symlinks, so we know that we get + * the real file here */ + inum = ubifs_findfile(ubifs_sb, filename); + if (!inum) { + err = -1; + goto out; + } + + /* + * Read file inode + */ + inode = ubifs_iget(ubifs_sb, inum); + if (IS_ERR(inode)) { + printf("%s: Error reading inode %ld!\n", __func__, inum); + err = PTR_ERR(inode); + goto out; + } + + /* + * If no size was specified or if size bigger than filesize + * set size to filesize + */ + if ((size == 0) || (size > inode->i_size)) + size = inode->i_size; + + count = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; + printf("Loading file '%s' to addr 0x%08x with size %d (0x%08x)...\n", + filename, addr, size, size); + + page.addr = (void *)addr; + page.index = 0; + page.inode = inode; + for (i = 0; i < count; i++) { + err = do_readpage(c, inode, &page); + if (err) + break; + + page.addr += PAGE_SIZE; + page.index++; + } + + if (err) + printf("Error reading file '%s'\n", filename); + else + printf("Done\n"); + + ubifs_iput(inode); + +out: + ubi_close_volume(c->ubi); + return err; +} diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h new file mode 100755 index 0000000..9a03c24 --- /dev/null +++ b/fs/ubifs/ubifs.h @@ -0,0 +1,1677 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * (C) Copyright 2008-2009 + * Stefan Roese, DENX Software Engineering, sr@denx.de. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Ðртём) + * Adrian Hunter + */ + +#ifndef __UBIFS_H__ +#define __UBIFS_H__ + +#if 0 /* Enable for debugging output */ +#define CONFIG_UBIFS_FS_DEBUG +#define CONFIG_UBIFS_FS_DEBUG_MSG_LVL 3 +#endif + +#include <ubi_uboot.h> +#include <linux/ctype.h> +#include <linux/time.h> +#include <linux/math64.h> +#include "ubifs-media.h" + +struct dentry; +struct file; +struct iattr; +struct kstat; +struct vfsmount; + +extern struct super_block *ubifs_sb; + +extern unsigned int ubifs_msg_flags; +extern unsigned int ubifs_chk_flags; +extern unsigned int ubifs_tst_flags; + +#define printk printf + +#define kmalloc(size, flags) malloc(size) +#define kfree(size) free(size) +#define kzalloc(size, flags) calloc(size, 1) + +#define vmalloc(a) malloc(a) +#define vfree(ptr) do { if (ptr != NULL) free(ptr); } while(0) + +#define PAGE_SIZE 4096 + +#define pgoff_t unsigned long + +/* + * We "simulate" the Linux page struct much simpler here + */ +struct page { + pgoff_t index; + void *addr; + struct inode *inode; +}; + +void iput(struct inode *inode); + +/* + * The atomic operations are used for budgeting etc which is not + * needed for the read-only U-Boot implementation: + */ +#define atomic_long_inc(a) +#define atomic_long_dec(a) +#define atomic_long_sub(a, b) + +/* linux/include/time.h */ + +struct timespec { + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; + +/* linux/include/dcache.h */ + +/* + * "quick string" -- eases parameter passing, but more importantly + * saves "metadata" about the string (ie length and the hash). + * + * hash comes first so it snuggles against d_parent in the + * dentry. + */ +struct qstr { + unsigned int hash; + unsigned int len; + const char *name; +}; + +struct inode { + struct hlist_node i_hash; + struct list_head i_list; + struct list_head i_sb_list; + struct list_head i_dentry; + unsigned long i_ino; + unsigned int i_nlink; + uid_t i_uid; + gid_t i_gid; + dev_t i_rdev; + u64 i_version; + loff_t i_size; +#ifdef __NEED_I_SIZE_ORDERED + seqcount_t i_size_seqcount; +#endif + struct timespec i_atime; + struct timespec i_mtime; + struct timespec i_ctime; + unsigned int i_blkbits; + unsigned short i_bytes; + umode_t i_mode; + spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ + struct mutex i_mutex; + struct rw_semaphore i_alloc_sem; + const struct inode_operations *i_op; + const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ + struct super_block *i_sb; + struct file_lock *i_flock; +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif + struct list_head i_devices; + int i_cindex; + + __u32 i_generation; + +#ifdef CONFIG_DNOTIFY + unsigned long i_dnotify_mask; /* Directory notify events */ + struct dnotify_struct *i_dnotify; /* for directory notifications */ +#endif + +#ifdef CONFIG_INOTIFY + struct list_head inotify_watches; /* watches on this inode */ + struct mutex inotify_mutex; /* protects the watches list */ +#endif + + unsigned long i_state; + unsigned long dirtied_when; /* jiffies of first dirtying */ + + unsigned int i_flags; + +#ifdef CONFIG_SECURITY + void *i_security; +#endif + void *i_private; /* fs or device private pointer */ +}; + +struct super_block { + struct list_head s_list; /* Keep this first */ + dev_t s_dev; /* search index; _not_ kdev_t */ + unsigned long s_blocksize; + unsigned char s_blocksize_bits; + unsigned char s_dirt; + unsigned long long s_maxbytes; /* Max file size */ + struct file_system_type *s_type; + const struct super_operations *s_op; + struct dquot_operations *dq_op; + struct quotactl_ops *s_qcop; + const struct export_operations *s_export_op; + unsigned long s_flags; + unsigned long s_magic; + struct dentry *s_root; + struct rw_semaphore s_umount; + struct mutex s_lock; + int s_count; + int s_syncing; + int s_need_sync_fs; +#ifdef CONFIG_SECURITY + void *s_security; +#endif + struct xattr_handler **s_xattr; + + struct list_head s_inodes; /* all inodes */ + struct list_head s_dirty; /* dirty inodes */ + struct list_head s_io; /* parked for writeback */ + struct list_head s_more_io; /* parked for more writeback */ + struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ + struct list_head s_files; + /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ + struct list_head s_dentry_lru; /* unused dentry lru */ + int s_nr_dentry_unused; /* # of dentry on lru */ + + struct block_device *s_bdev; + struct mtd_info *s_mtd; + struct list_head s_instances; + + int s_frozen; + wait_queue_head_t s_wait_unfrozen; + + char s_id[32]; /* Informational name */ + + void *s_fs_info; /* Filesystem private info */ + + /* + * The next field is for VFS *only*. No filesystems have any business + * even looking at it. You had been warned. + */ + struct mutex s_vfs_rename_mutex; /* Kludge */ + + /* Granularity of c/m/atime in ns. + Cannot be worse than a second */ + u32 s_time_gran; + + /* + * Filesystem subtype. If non-empty the filesystem type field + * in /proc/mounts will be "type.subtype" + */ + char *s_subtype; + + /* + * Saved mount options for lazy filesystems using + * generic_show_options() + */ + char *s_options; +}; + +struct file_system_type { + const char *name; + int fs_flags; + int (*get_sb) (struct file_system_type *, int, + const char *, void *, struct vfsmount *); + void (*kill_sb) (struct super_block *); + struct module *owner; + struct file_system_type * next; + struct list_head fs_supers; +}; + +struct vfsmount { + struct list_head mnt_hash; + struct vfsmount *mnt_parent; /* fs we are mounted on */ + struct dentry *mnt_mountpoint; /* dentry of mountpoint */ + struct dentry *mnt_root; /* root of the mounted tree */ + struct super_block *mnt_sb; /* pointer to superblock */ + struct list_head mnt_mounts; /* list of children, anchored here */ + struct list_head mnt_child; /* and going through their mnt_child */ + int mnt_flags; + /* 4 bytes hole on 64bits arches */ + const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ + struct list_head mnt_list; + struct list_head mnt_expire; /* link in fs-specific expiry list */ + struct list_head mnt_share; /* circular list of shared mounts */ + struct list_head mnt_slave_list;/* list of slave mounts */ + struct list_head mnt_slave; /* slave list entry */ + struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ + struct mnt_namespace *mnt_ns; /* containing namespace */ + int mnt_id; /* mount identifier */ + int mnt_group_id; /* peer group identifier */ + /* + * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount + * to let these frequently modified fields in a separate cache line + * (so that reads of mnt_flags wont ping-pong on SMP machines) + */ + int mnt_expiry_mark; /* true if marked for expiry */ + int mnt_pinned; + int mnt_ghosts; + /* + * This value is not stable unless all of the mnt_writers[] spinlocks + * are held, and all mnt_writer[]s on this mount have 0 as their ->count + */ +}; + +struct path { + struct vfsmount *mnt; + struct dentry *dentry; +}; + +struct file { + struct path f_path; +#define f_dentry f_path.dentry +#define f_vfsmnt f_path.mnt + const struct file_operations *f_op; + unsigned int f_flags; + loff_t f_pos; + unsigned int f_uid, f_gid; + + u64 f_version; +#ifdef CONFIG_SECURITY + void *f_security; +#endif + /* needed for tty driver, and maybe others */ + void *private_data; + +#ifdef CONFIG_EPOLL + /* Used by fs/eventpoll.c to link all the hooks to this file */ + struct list_head f_ep_links; + spinlock_t f_ep_lock; +#endif /* #ifdef CONFIG_EPOLL */ +#ifdef CONFIG_DEBUG_WRITECOUNT + unsigned long f_mnt_write_state; +#endif +}; + +/* + * get_seconds() not really needed in the read-only implmentation + */ +#define get_seconds() 0 + +/* 4k page size */ +#define PAGE_CACHE_SHIFT 12 +#define PAGE_CACHE_SIZE (1 << PAGE_CACHE_SHIFT) + +/* Page cache limit. The filesystems should put that into their s_maxbytes + limits, otherwise bad things can happen in VM. */ +#if BITS_PER_LONG==32 +#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) +#elif BITS_PER_LONG==64 +#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL +#endif + +#define INT_MAX ((int)(~0U>>1)) +#define INT_MIN (-INT_MAX - 1) +#define LLONG_MAX ((long long)(~0ULL>>1)) + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +#define I_NEW 8 + +/* Inode flags - they have nothing to superblock flags now */ + +#define S_SYNC 1 /* Writes are synced at once */ +#define S_NOATIME 2 /* Do not update access times */ +#define S_APPEND 4 /* Append-only file */ +#define S_IMMUTABLE 8 /* Immutable file */ +#define S_DEAD 16 /* removed, but still open directory */ +#define S_NOQUOTA 32 /* Inode is not counted to quota */ +#define S_DIRSYNC 64 /* Directory modifications are synchronous */ +#define S_NOCMTIME 128 /* Do not update file c/mtime */ +#define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ +#define S_PRIVATE 512 /* Inode is fs-internal */ + +/* include/linux/stat.h */ + +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 + +/* include/linux/fs.h */ + +/* + * File types + * + * NOTE! These match bits 12..15 of stat.st_mode + * (ie "(i_mode >> 12) & 15"). + */ +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 + +#define I_DIRTY_SYNC 1 +#define I_DIRTY_DATASYNC 2 +#define I_DIRTY_PAGES 4 +#define I_NEW 8 +#define I_WILL_FREE 16 +#define I_FREEING 32 +#define I_CLEAR 64 +#define __I_LOCK 7 +#define I_LOCK (1 << __I_LOCK) +#define __I_SYNC 8 +#define I_SYNC (1 << __I_SYNC) + +#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) + +/* linux/include/dcache.h */ + +#define DNAME_INLINE_LEN_MIN 36 + +struct dentry { + unsigned int d_flags; /* protected by d_lock */ + spinlock_t d_lock; /* per dentry lock */ + struct inode *d_inode; /* Where the name belongs to - NULL is + * negative */ + /* + * The next three fields are touched by __d_lookup. Place them here + * so they all fit in a cache line. + */ + struct hlist_node d_hash; /* lookup hash list */ + struct dentry *d_parent; /* parent directory */ + struct qstr d_name; + + struct list_head d_lru; /* LRU list */ + /* + * d_child and d_rcu can share memory + */ + struct list_head d_subdirs; /* our children */ + struct list_head d_alias; /* inode alias list */ + unsigned long d_time; /* used by d_revalidate */ + struct super_block *d_sb; /* The root of the dentry tree */ + void *d_fsdata; /* fs-specific data */ +#ifdef CONFIG_PROFILING + struct dcookie_struct *d_cookie; /* cookie, if any */ +#endif + int d_mounted; + unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ +}; + +static inline ino_t parent_ino(struct dentry *dentry) +{ + ino_t res; + + spin_lock(&dentry->d_lock); + res = dentry->d_parent->d_inode->i_ino; + spin_unlock(&dentry->d_lock); + return res; +} + +/* debug.c */ + +#define DEFINE_SPINLOCK(...) +#define module_param_named(...) + +/* misc.h */ +#define mutex_lock_nested(...) +#define mutex_unlock_nested(...) +#define mutex_is_locked(...) 0 + +/* Version of this UBIFS implementation */ +#define UBIFS_VERSION 1 + +/* Normal UBIFS messages */ +#define ubifs_msg(fmt, ...) \ + printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) +/* UBIFS error messages */ +#define ubifs_err(fmt, ...) \ + printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", 0, \ + __func__, ##__VA_ARGS__) +/* UBIFS warning messages */ +#define ubifs_warn(fmt, ...) \ + printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ + 0, __func__, ##__VA_ARGS__) + +/* UBIFS file system VFS magic number */ +#define UBIFS_SUPER_MAGIC 0x24051905 + +/* Number of UBIFS blocks per VFS page */ +#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE) +#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT) + +/* "File system end of life" sequence number watermark */ +#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL +#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL + +/* + * Minimum amount of LEBs reserved for the index. At present the index needs at + * least 2 LEBs: one for the index head and one for in-the-gaps method (which + * currently does not cater for the index head and so excludes it from + * consideration). + */ +#define MIN_INDEX_LEBS 2 + +/* Minimum amount of data UBIFS writes to the flash */ +#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) + +/* + * Currently we do not support inode number overlapping and re-using, so this + * watermark defines dangerous inode number level. This should be fixed later, + * although it is difficult to exceed current limit. Another option is to use + * 64-bit inode numbers, but this means more overhead. + */ +#define INUM_WARN_WATERMARK 0xFFF00000 +#define INUM_WATERMARK 0xFFFFFF00 + +/* Largest key size supported in this implementation */ +#define CUR_MAX_KEY_LEN UBIFS_SK_LEN + +/* Maximum number of entries in each LPT (LEB category) heap */ +#define LPT_HEAP_SZ 256 + +/* + * Background thread name pattern. The numbers are UBI device and volume + * numbers. + */ +#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" + +/* Default write-buffer synchronization timeout (5 secs) */ +#define DEFAULT_WBUF_TIMEOUT (5 * HZ) + +/* Maximum possible inode number (only 32-bit inodes are supported now) */ +#define MAX_INUM 0xFFFFFFFF + +/* Number of non-data journal heads */ +#define NONDATA_JHEADS_CNT 2 + +/* Garbage collector head */ +#define GCHD 0 +/* Base journal head number */ +#define BASEHD 1 +/* First "general purpose" journal head */ +#define DATAHD 2 + +/* 'No change' value for 'ubifs_change_lp()' */ +#define LPROPS_NC 0x80000001 + +/* + * There is no notion of truncation key because truncation nodes do not exist + * in TNC. However, when replaying, it is handy to introduce fake "truncation" + * keys for truncation nodes because the code becomes simpler. So we define + * %UBIFS_TRUN_KEY type. + */ +#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT + +/* + * How much a directory entry/extended attribute entry adds to the parent/host + * inode. + */ +#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8) + +/* How much an extended attribute adds to the host inode */ +#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8) + +/* + * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered + * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are + * considered "young". This is used by shrinker when selecting znode to trim + * off. + */ +#define OLD_ZNODE_AGE 20 +#define YOUNG_ZNODE_AGE 5 + + +/* Maximum expected tree height for use by bottom_up_buf */ +#define BOTTOM_UP_HEIGHT 64 + +/* Maximum number of data nodes to bulk-read */ +#define UBIFS_MAX_BULK_READ 32 + +/* + * Lockdep classes for UBIFS inode @ui_mutex. + */ +enum { + WB_MUTEX_1 = 0, + WB_MUTEX_2 = 1, + WB_MUTEX_3 = 2, +}; + +/* + * Znode flags (actually, bit numbers which store the flags). + * + * DIRTY_ZNODE: znode is dirty + * COW_ZNODE: znode is being committed and a new instance of this znode has to + * be created before changing this znode + * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is + * still in the commit list and the ongoing commit operation + * will commit it, and delete this znode after it is done + */ +enum { + DIRTY_ZNODE = 0, + COW_ZNODE = 1, + OBSOLETE_ZNODE = 2, +}; +/* + * 'ubifs_scan_a_node()' return values. + * + * SCANNED_GARBAGE: scanned garbage + * SCANNED_EMPTY_SPACE: scanned empty space + * SCANNED_A_NODE: scanned a valid node + * SCANNED_A_CORRUPT_NODE: scanned a corrupted node + * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length + * + * Greater than zero means: 'scanned that number of padding bytes' + */ +enum { + SCANNED_GARBAGE = 0, + SCANNED_EMPTY_SPACE = -1, + SCANNED_A_NODE = -2, + SCANNED_A_CORRUPT_NODE = -3, + SCANNED_A_BAD_PAD_NODE = -4, +}; + + +/* + * Dirty flag bits (lpt_drty_flgs) for LPT special nodes. + * + * LTAB_DIRTY: ltab node is dirty + * LSAVE_DIRTY: lsave node is dirty + */ +enum { + LTAB_DIRTY = 1, + LSAVE_DIRTY = 2, +}; + +/* + * Return codes used by the garbage collector. + * @LEB_FREED: the logical eraseblock was freed and is ready to use + * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit + * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes + */ +enum { + LEB_FREED, + LEB_FREED_IDX, + LEB_RETAINED, +}; + +/** + * struct ubifs_old_idx - index node obsoleted since last commit start. + * @rb: rb-tree node + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + */ +struct ubifs_old_idx { + struct rb_node rb; + int lnum; + int offs; +}; + +/* The below union makes it easier to deal with keys */ +union ubifs_key { + uint8_t u8[CUR_MAX_KEY_LEN]; + uint32_t u32[CUR_MAX_KEY_LEN/4]; + uint64_t u64[CUR_MAX_KEY_LEN/8]; + __le32 j32[CUR_MAX_KEY_LEN/4]; +}; + +/** + * struct ubifs_scan_node - UBIFS scanned node information. + * @list: list of scanned nodes + * @key: key of node scanned (if it has one) + * @sqnum: sequence number + * @type: type of node scanned + * @offs: offset with LEB of node scanned + * @len: length of node scanned + * @node: raw node + */ +struct ubifs_scan_node { + struct list_head list; + union ubifs_key key; + unsigned long long sqnum; + int type; + int offs; + int len; + void *node; +}; + +/** + * struct ubifs_scan_leb - UBIFS scanned LEB information. + * @lnum: logical eraseblock number + * @nodes_cnt: number of nodes scanned + * @nodes: list of struct ubifs_scan_node + * @endpt: end point (and therefore the start of empty space) + * @ecc: read returned -EBADMSG + * @buf: buffer containing entire LEB scanned + */ +struct ubifs_scan_leb { + int lnum; + int nodes_cnt; + struct list_head nodes; + int endpt; + int ecc; + void *buf; +}; + +/** + * struct ubifs_gced_idx_leb - garbage-collected indexing LEB. + * @list: list + * @lnum: LEB number + * @unmap: OK to unmap this LEB + * + * This data structure is used to temporary store garbage-collected indexing + * LEBs - they are not released immediately, but only after the next commit. + * This is needed to guarantee recoverability. + */ +struct ubifs_gced_idx_leb { + struct list_head list; + int lnum; + int unmap; +}; + +/** + * struct ubifs_inode - UBIFS in-memory inode description. + * @vfs_inode: VFS inode description object + * @creat_sqnum: sequence number at time of creation + * @del_cmtno: commit number corresponding to the time the inode was deleted, + * protected by @c->commit_sem; + * @xattr_size: summarized size of all extended attributes in bytes + * @xattr_cnt: count of extended attributes this inode has + * @xattr_names: sum of lengths of all extended attribute names belonging to + * this inode + * @dirty: non-zero if the inode is dirty + * @xattr: non-zero if this is an extended attribute inode + * @bulk_read: non-zero if bulk-read should be used + * @ui_mutex: serializes inode write-back with the rest of VFS operations, + * serializes "clean <-> dirty" state changes, serializes bulk-read, + * protects @dirty, @bulk_read, @ui_size, and @xattr_size + * @ui_lock: protects @synced_i_size + * @synced_i_size: synchronized size of inode, i.e. the value of inode size + * currently stored on the flash; used only for regular file + * inodes + * @ui_size: inode size used by UBIFS when writing to flash + * @flags: inode flags (@UBIFS_COMPR_FL, etc) + * @compr_type: default compression type used for this inode + * @last_page_read: page number of last page read (for bulk read) + * @read_in_a_row: number of consecutive pages read in a row (for bulk read) + * @data_len: length of the data attached to the inode + * @data: inode's data + * + * @ui_mutex exists for two main reasons. At first it prevents inodes from + * being written back while UBIFS changing them, being in the middle of an VFS + * operation. This way UBIFS makes sure the inode fields are consistent. For + * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and + * write-back must not write any of them before we have finished. + * + * The second reason is budgeting - UBIFS has to budget all operations. If an + * operation is going to mark an inode dirty, it has to allocate budget for + * this. It cannot just mark it dirty because there is no guarantee there will + * be enough flash space to write the inode back later. This means UBIFS has + * to have full control over inode "clean <-> dirty" transitions (and pages + * actually). But unfortunately, VFS marks inodes dirty in many places, and it + * does not ask the file-system if it is allowed to do so (there is a notifier, + * but it is not enough), i.e., there is no mechanism to synchronize with this. + * So UBIFS has its own inode dirty flag and its own mutex to serialize + * "clean <-> dirty" transitions. + * + * The @synced_i_size field is used to make sure we never write pages which are + * beyond last synchronized inode size. See 'ubifs_writepage()' for more + * information. + * + * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses + * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot + * make sure @inode->i_size is always changed under @ui_mutex, because it + * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock + * with 'ubifs_writepage()' (see file.c). All the other inode fields are + * changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * could consider to rework locking and base it on "shadow" fields. + */ +struct ubifs_inode { + struct inode vfs_inode; + unsigned long long creat_sqnum; + unsigned long long del_cmtno; + unsigned int xattr_size; + unsigned int xattr_cnt; + unsigned int xattr_names; + unsigned int dirty:1; + unsigned int xattr:1; + unsigned int bulk_read:1; + unsigned int compr_type:2; + struct mutex ui_mutex; + spinlock_t ui_lock; + loff_t synced_i_size; + loff_t ui_size; + int flags; + pgoff_t last_page_read; + pgoff_t read_in_a_row; + int data_len; + void *data; +}; + +/** + * struct ubifs_unclean_leb - records a LEB recovered under read-only mode. + * @list: list + * @lnum: LEB number of recovered LEB + * @endpt: offset where recovery ended + * + * This structure records a LEB identified during recovery that needs to be + * cleaned but was not because UBIFS was mounted read-only. The information + * is used to clean the LEB when remounting to read-write mode. + */ +struct ubifs_unclean_leb { + struct list_head list; + int lnum; + int endpt; +}; + + +struct ubifs_nnode; + +/** + * struct ubifs_bud - bud logical eraseblock. + * @lnum: logical eraseblock number + * @start: where the (uncommitted) bud data starts + * @jhead: journal head number this bud belongs to + * @list: link in the list buds belonging to the same journal head + * @rb: link in the tree of all buds + */ +struct ubifs_bud { + int lnum; + int start; + int jhead; + struct list_head list; + struct rb_node rb; +}; + + +/** + * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. + * @key: key + * @znode: znode address in memory + * @lnum: LEB number of the target node (indexing node or data node) + * @offs: target node offset within @lnum + * @len: target node length + */ +struct ubifs_zbranch { + union ubifs_key key; + union { + struct ubifs_znode *znode; + void *leaf; + }; + int lnum; + int offs; + int len; +}; + +/** + * struct ubifs_znode - in-memory representation of an indexing node. + * @parent: parent znode or NULL if it is the root + * @cnext: next znode to commit + * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE) + * @time: last access time (seconds) + * @level: level of the entry in the TNC tree + * @child_cnt: count of child znodes + * @iip: index in parent's zbranch array + * @alt: lower bound of key range has altered i.e. child inserted at slot 0 + * @lnum: LEB number of the corresponding indexing node + * @offs: offset of the corresponding indexing node + * @len: length of the corresponding indexing node + * @zbranch: array of znode branches (@c->fanout elements) + */ +struct ubifs_znode { + struct ubifs_znode *parent; + struct ubifs_znode *cnext; + unsigned long flags; + unsigned long time; + int level; + int child_cnt; + int iip; + int alt; +#ifdef CONFIG_UBIFS_FS_DEBUG + int lnum, offs, len; +#endif + struct ubifs_zbranch zbranch[]; +}; + + +/** + * struct ubifs_node_range - node length range description data structure. + * @len: fixed node length + * @min_len: minimum possible node length + * @max_len: maximum possible node length + * + * If @max_len is %0, the node has fixed length @len. + */ +struct ubifs_node_range { + union { + int len; + int min_len; + }; + int max_len; +}; + + +/** + * struct ubifs_budget_req - budget requirements of an operation. + * + * @fast: non-zero if the budgeting should try to acquire budget quickly and + * should not try to call write-back + * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields + * have to be re-calculated + * @new_page: non-zero if the operation adds a new page + * @dirtied_page: non-zero if the operation makes a page dirty + * @new_dent: non-zero if the operation adds a new directory entry + * @mod_dent: non-zero if the operation removes or modifies an existing + * directory entry + * @new_ino: non-zero if the operation adds a new inode + * @new_ino_d: now much data newly created inode contains + * @dirtied_ino: how many inodes the operation makes dirty + * @dirtied_ino_d: now much data dirtied inode contains + * @idx_growth: how much the index will supposedly grow + * @data_growth: how much new data the operation will supposedly add + * @dd_growth: how much data that makes other data dirty the operation will + * supposedly add + * + * @idx_growth, @data_growth and @dd_growth are not used in budget request. The + * budgeting subsystem caches index and data growth values there to avoid + * re-calculating them when the budget is released. However, if @idx_growth is + * %-1, it is calculated by the release function using other fields. + * + * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d + * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made + * dirty by the re-name operation. + * + * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to + * make sure the amount of inode data which contribute to @new_ino_d and + * @dirtied_ino_d fields are aligned. + */ +struct ubifs_budget_req { + unsigned int fast:1; + unsigned int recalculate:1; +#ifndef UBIFS_DEBUG + unsigned int new_page:1; + unsigned int dirtied_page:1; + unsigned int new_dent:1; + unsigned int mod_dent:1; + unsigned int new_ino:1; + unsigned int new_ino_d:13; + unsigned int dirtied_ino:4; + unsigned int dirtied_ino_d:15; +#else + /* Not bit-fields to check for overflows */ + unsigned int new_page; + unsigned int dirtied_page; + unsigned int new_dent; + unsigned int mod_dent; + unsigned int new_ino; + unsigned int new_ino_d; + unsigned int dirtied_ino; + unsigned int dirtied_ino_d; +#endif + int idx_growth; + int data_growth; + int dd_growth; +}; + + +/** + * struct ubifs_mount_opts - UBIFS-specific mount options information. + * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) + * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) + * @chk_data_crc: enable/disable CRC data checking when reading data nodes + * (%0 default, %1 disabe, %2 enable) + * @override_compr: override default compressor (%0 - do not override and use + * superblock compressor, %1 - override and use compressor + * specified in @compr_type) + * @compr_type: compressor type to override the superblock compressor with + * (%UBIFS_COMPR_NONE, etc) + */ +struct ubifs_mount_opts { + unsigned int unmount_mode:2; + unsigned int bulk_read:2; + unsigned int chk_data_crc:2; + unsigned int override_compr:1; + unsigned int compr_type:2; +}; + +struct ubifs_debug_info; + +/** + * struct ubifs_info - UBIFS file-system description data structure + * (per-superblock). + * @vfs_sb: VFS @struct super_block object + * @bdi: backing device info object to make VFS happy and disable read-ahead + * + * @highest_inum: highest used inode number + * @max_sqnum: current global sequence number + * @cmt_no: commit number of the last successfully completed commit, protected + * by @commit_sem + * @cnt_lock: protects @highest_inum and @max_sqnum counters + * @fmt_version: UBIFS on-flash format version + * @ro_compat_version: R/O compatibility version + * @uuid: UUID from super block + * + * @lhead_lnum: log head logical eraseblock number + * @lhead_offs: log head offset + * @ltail_lnum: log tail logical eraseblock number (offset is always 0) + * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and + * @bud_bytes + * @min_log_bytes: minimum required number of bytes in the log + * @cmt_bud_bytes: used during commit to temporarily amount of bytes in + * committed buds + * + * @buds: tree of all buds indexed by bud LEB number + * @bud_bytes: how many bytes of flash is used by buds + * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud + * lists + * @jhead_cnt: count of journal heads + * @jheads: journal heads (head zero is base head) + * @max_bud_bytes: maximum number of bytes allowed in buds + * @bg_bud_bytes: number of bud bytes when background commit is initiated + * @old_buds: buds to be released after commit ends + * @max_bud_cnt: maximum number of buds + * + * @commit_sem: synchronizes committer with other processes + * @cmt_state: commit state + * @cs_lock: commit state lock + * @cmt_wq: wait queue to sleep on if the log is full and a commit is running + * + * @big_lpt: flag that LPT is too big to write whole during commit + * @no_chk_data_crc: do not check CRCs when reading data nodes (except during + * recovery) + * @bulk_read: enable bulk-reads + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * @rw_incompat: the media is not R/W compatible + * + * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and + * @calc_idx_sz + * @zroot: zbranch which points to the root index node and znode + * @cnext: next znode to commit + * @enext: next znode to commit to empty space + * @gap_lebs: array of LEBs used by the in-gaps commit method + * @cbuf: commit buffer + * @ileb_buf: buffer for commit in-the-gaps method + * @ileb_len: length of data in ileb_buf + * @ihead_lnum: LEB number of index head + * @ihead_offs: offset of index head + * @ilebs: pre-allocated index LEBs + * @ileb_cnt: number of pre-allocated index LEBs + * @ileb_nxt: next pre-allocated index LEBs + * @old_idx: tree of index nodes obsoleted since the last commit start + * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c + * + * @mst_node: master node + * @mst_offs: offset of valid master node + * @mst_mutex: protects the master node area, @mst_node, and @mst_offs + * + * @max_bu_buf_len: maximum bulk-read buffer length + * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu + * @bu: pre-allocated bulk-read information + * + * @log_lebs: number of logical eraseblocks in the log + * @log_bytes: log size in bytes + * @log_last: last LEB of the log + * @lpt_lebs: number of LEBs used for lprops table + * @lpt_first: first LEB of the lprops table area + * @lpt_last: last LEB of the lprops table area + * @orph_lebs: number of LEBs used for the orphan area + * @orph_first: first LEB of the orphan area + * @orph_last: last LEB of the orphan area + * @main_lebs: count of LEBs in the main area + * @main_first: first LEB of the main area + * @main_bytes: main area size in bytes + * + * @key_hash_type: type of the key hash + * @key_hash: direntry key hash function + * @key_fmt: key format + * @key_len: key length + * @fanout: fanout of the index tree (number of links per indexing node) + * + * @min_io_size: minimal input/output unit size + * @min_io_shift: number of bits in @min_io_size minus one + * @leb_size: logical eraseblock size in bytes + * @half_leb_size: half LEB size + * @leb_cnt: count of logical eraseblocks + * @max_leb_cnt: maximum count of logical eraseblocks + * @old_leb_cnt: count of logical eraseblocks before re-size + * @ro_media: the underlying UBI volume is read-only + * + * @dirty_pg_cnt: number of dirty pages (not used) + * @dirty_zn_cnt: number of dirty znodes + * @clean_zn_cnt: number of clean znodes + * + * @budg_idx_growth: amount of bytes budgeted for index growth + * @budg_data_growth: amount of bytes budgeted for cached data + * @budg_dd_growth: amount of bytes budgeted for cached data that will make + * other data dirty + * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, + * but which still have to be taken into account because + * the index has not been committed so far + * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, + * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, + * @nospace, and @nospace_rp; + * @min_idx_lebs: minimum number of LEBs required for the index + * @old_idx_sz: size of index on flash + * @calc_idx_sz: temporary variable which is used to calculate new index size + * (contains accurate new index size at end of TNC commit start) + * @lst: lprops statistics + * @nospace: non-zero if the file-system does not have flash space (used as + * optimization) + * @nospace_rp: the same as @nospace, but additionally means that even reserved + * pool is full + * + * @page_budget: budget for a page + * @inode_budget: budget for an inode + * @dent_budget: budget for a directory entry + * + * @ref_node_alsz: size of the LEB reference node aligned to the min. flash + * I/O unit + * @mst_node_alsz: master node aligned size + * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary + * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary + * @max_inode_sz: maximum possible inode size in bytes + * @max_znode_sz: size of znode in bytes + * + * @leb_overhead: how many bytes are wasted in an LEB when it is filled with + * data nodes of maximum size - used in free space reporting + * @dead_wm: LEB dead space watermark + * @dark_wm: LEB dark space watermark + * @block_cnt: count of 4KiB blocks on the FS + * + * @ranges: UBIFS node length ranges + * @ubi: UBI volume descriptor + * @di: UBI device information + * @vi: UBI volume information + * + * @orph_tree: rb-tree of orphan inode numbers + * @orph_list: list of orphan inode numbers in order added + * @orph_new: list of orphan inode numbers added since last commit + * @orph_cnext: next orphan to commit + * @orph_dnext: next orphan to delete + * @orphan_lock: lock for orph_tree and orph_new + * @orph_buf: buffer for orphan nodes + * @new_orphans: number of orphans since last commit + * @cmt_orphans: number of orphans being committed + * @tot_orphans: number of orphans in the rb_tree + * @max_orphans: maximum number of orphans allowed + * @ohead_lnum: orphan head LEB number + * @ohead_offs: orphan head offset + * @no_orphs: non-zero if there are no orphans + * + * @bgt: UBIFS background thread + * @bgt_name: background thread name + * @need_bgt: if background thread should run + * @need_wbuf_sync: if write-buffers have to be synchronized + * + * @gc_lnum: LEB number used for garbage collection + * @sbuf: a buffer of LEB size used by GC and replay for scanning + * @idx_gc: list of index LEBs that have been garbage collected + * @idx_gc_cnt: number of elements on the idx_gc list + * @gc_seq: incremented for every non-index LEB garbage collected + * @gced_lnum: last non-index LEB that was garbage collected + * + * @infos_list: links all 'ubifs_info' objects + * @umount_mutex: serializes shrinker and un-mount + * @shrinker_run_no: shrinker run number + * + * @space_bits: number of bits needed to record free or dirty space + * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT + * @lpt_offs_bits: number of bits needed to record an offset in the LPT + * @lpt_spc_bits: number of bits needed to space in the LPT + * @pcnt_bits: number of bits needed to record pnode or nnode number + * @lnum_bits: number of bits needed to record LEB number + * @nnode_sz: size of on-flash nnode + * @pnode_sz: size of on-flash pnode + * @ltab_sz: size of on-flash LPT lprops table + * @lsave_sz: size of on-flash LPT save table + * @pnode_cnt: number of pnodes + * @nnode_cnt: number of nnodes + * @lpt_hght: height of the LPT + * @pnodes_have: number of pnodes in memory + * + * @lp_mutex: protects lprops table and all the other lprops-related fields + * @lpt_lnum: LEB number of the root nnode of the LPT + * @lpt_offs: offset of the root nnode of the LPT + * @nhead_lnum: LEB number of LPT head + * @nhead_offs: offset of LPT head + * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab + * @dirty_nn_cnt: number of dirty nnodes + * @dirty_pn_cnt: number of dirty pnodes + * @check_lpt_free: flag that indicates LPT GC may be needed + * @lpt_sz: LPT size + * @lpt_nod_buf: buffer for an on-flash nnode or pnode + * @lpt_buf: buffer of LEB size used by LPT + * @nroot: address in memory of the root nnode of the LPT + * @lpt_cnext: next LPT node to commit + * @lpt_heap: array of heaps of categorized lprops + * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at + * previous commit start + * @uncat_list: list of un-categorized LEBs + * @empty_list: list of empty LEBs + * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) + * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) + * @freeable_cnt: number of freeable LEBs in @freeable_list + * + * @ltab_lnum: LEB number of LPT's own lprops table + * @ltab_offs: offset of LPT's own lprops table + * @ltab: LPT's own lprops table + * @ltab_cmt: LPT's own lprops table (commit copy) + * @lsave_cnt: number of LEB numbers in LPT's save table + * @lsave_lnum: LEB number of LPT's save table + * @lsave_offs: offset of LPT's save table + * @lsave: LPT's save table + * @lscan_lnum: LEB number of last LPT scan + * + * @rp_size: size of the reserved pool in bytes + * @report_rp_size: size of the reserved pool reported to user-space + * @rp_uid: reserved pool user ID + * @rp_gid: reserved pool group ID + * + * @empty: if the UBI device is empty + * @replay_tree: temporary tree used during journal replay + * @replay_list: temporary list used during journal replay + * @replay_buds: list of buds to replay + * @cs_sqnum: sequence number of first node in the log (commit start node) + * @replay_sqnum: sequence number of node currently being replayed + * @need_recovery: file-system needs recovery + * @replaying: set to %1 during journal replay + * @unclean_leb_list: LEBs to recover when mounting ro to rw + * @rcvrd_mst_node: recovered master node to write when mounting ro to rw + * @size_tree: inode size information for recovery + * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) + * @always_chk_crc: always check CRCs (while mounting and remounting rw) + * @mount_opts: UBIFS-specific mount options + * + * @dbg: debugging-related information + */ +struct ubifs_info { + struct super_block *vfs_sb; + + ino_t highest_inum; + unsigned long long max_sqnum; + unsigned long long cmt_no; + spinlock_t cnt_lock; + int fmt_version; + int ro_compat_version; + unsigned char uuid[16]; + + int lhead_lnum; + int lhead_offs; + int ltail_lnum; + struct mutex log_mutex; + int min_log_bytes; + long long cmt_bud_bytes; + + struct rb_root buds; + long long bud_bytes; + spinlock_t buds_lock; + int jhead_cnt; + struct ubifs_jhead *jheads; + long long max_bud_bytes; + long long bg_bud_bytes; + struct list_head old_buds; + int max_bud_cnt; + + struct rw_semaphore commit_sem; + int cmt_state; + spinlock_t cs_lock; + wait_queue_head_t cmt_wq; + + unsigned int big_lpt:1; + unsigned int no_chk_data_crc:1; + unsigned int bulk_read:1; + unsigned int default_compr:2; + unsigned int rw_incompat:1; + + struct mutex tnc_mutex; + struct ubifs_zbranch zroot; + struct ubifs_znode *cnext; + struct ubifs_znode *enext; + int *gap_lebs; + void *cbuf; + void *ileb_buf; + int ileb_len; + int ihead_lnum; + int ihead_offs; + int *ilebs; + int ileb_cnt; + int ileb_nxt; + struct rb_root old_idx; + int *bottom_up_buf; + + struct ubifs_mst_node *mst_node; + int mst_offs; + struct mutex mst_mutex; + + int max_bu_buf_len; + struct mutex bu_mutex; + + int log_lebs; + long long log_bytes; + int log_last; + int lpt_lebs; + int lpt_first; + int lpt_last; + int orph_lebs; + int orph_first; + int orph_last; + int main_lebs; + int main_first; + long long main_bytes; + + uint8_t key_hash_type; + uint32_t (*key_hash)(const char *str, int len); + int key_fmt; + int key_len; + int fanout; + + int min_io_size; + int min_io_shift; + int leb_size; + int half_leb_size; + int leb_cnt; + int max_leb_cnt; + int old_leb_cnt; + int ro_media; + + long long budg_idx_growth; + long long budg_data_growth; + long long budg_dd_growth; + long long budg_uncommitted_idx; + spinlock_t space_lock; + int min_idx_lebs; + unsigned long long old_idx_sz; + unsigned long long calc_idx_sz; + unsigned int nospace:1; + unsigned int nospace_rp:1; + + int page_budget; + int inode_budget; + int dent_budget; + + int ref_node_alsz; + int mst_node_alsz; + int min_idx_node_sz; + int max_idx_node_sz; + long long max_inode_sz; + int max_znode_sz; + + int leb_overhead; + int dead_wm; + int dark_wm; + int block_cnt; + + struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT]; + struct ubi_volume_desc *ubi; + struct ubi_device_info di; + struct ubi_volume_info vi; + + struct rb_root orph_tree; + struct list_head orph_list; + struct list_head orph_new; + struct ubifs_orphan *orph_cnext; + struct ubifs_orphan *orph_dnext; + spinlock_t orphan_lock; + void *orph_buf; + int new_orphans; + int cmt_orphans; + int tot_orphans; + int max_orphans; + int ohead_lnum; + int ohead_offs; + int no_orphs; + + struct task_struct *bgt; + char bgt_name[sizeof(BGT_NAME_PATTERN) + 9]; + int need_bgt; + int need_wbuf_sync; + + int gc_lnum; + void *sbuf; + struct list_head idx_gc; + int idx_gc_cnt; + int gc_seq; + int gced_lnum; + + struct list_head infos_list; + struct mutex umount_mutex; + unsigned int shrinker_run_no; + + int space_bits; + int lpt_lnum_bits; + int lpt_offs_bits; + int lpt_spc_bits; + int pcnt_bits; + int lnum_bits; + int nnode_sz; + int pnode_sz; + int ltab_sz; + int lsave_sz; + int pnode_cnt; + int nnode_cnt; + int lpt_hght; + int pnodes_have; + + struct mutex lp_mutex; + int lpt_lnum; + int lpt_offs; + int nhead_lnum; + int nhead_offs; + int lpt_drty_flgs; + int dirty_nn_cnt; + int dirty_pn_cnt; + int check_lpt_free; + long long lpt_sz; + void *lpt_nod_buf; + void *lpt_buf; + struct list_head uncat_list; + struct list_head empty_list; + struct list_head freeable_list; + struct list_head frdi_idx_list; + int freeable_cnt; + + int ltab_lnum; + int ltab_offs; + struct ubifs_lpt_lprops *ltab; + struct ubifs_lpt_lprops *ltab_cmt; + int lsave_cnt; + int lsave_lnum; + int lsave_offs; + int *lsave; + int lscan_lnum; + + long long rp_size; + long long report_rp_size; + uid_t rp_uid; + gid_t rp_gid; + + /* The below fields are used only during mounting and re-mounting */ + int empty; + struct rb_root replay_tree; + struct list_head replay_list; + struct list_head replay_buds; + unsigned long long cs_sqnum; + unsigned long long replay_sqnum; + int need_recovery; + int replaying; + struct list_head unclean_leb_list; + struct ubifs_mst_node *rcvrd_mst_node; + struct rb_root size_tree; + int remounting_rw; + int always_chk_crc; + struct ubifs_mount_opts mount_opts; + +#ifdef CONFIG_UBIFS_FS_DEBUG + struct ubifs_debug_info *dbg; +#endif +}; + +extern spinlock_t ubifs_infos_lock; +extern struct kmem_cache *ubifs_inode_slab; +extern const struct super_operations ubifs_super_operations; +extern const struct address_space_operations ubifs_file_address_operations; +extern const struct file_operations ubifs_file_operations; +extern const struct inode_operations ubifs_file_inode_operations; +extern const struct file_operations ubifs_dir_operations; +extern const struct inode_operations ubifs_dir_inode_operations; +extern const struct inode_operations ubifs_symlink_inode_operations; +extern struct backing_dev_info ubifs_backing_dev_info; +extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + +/* io.c */ +void ubifs_ro_mode(struct ubifs_info *c, int err); +int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, + int lnum, int offs); +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, + int offs, int quiet, int must_chk_crc); +void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); +void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); +int ubifs_io_init(struct ubifs_info *c); +void ubifs_pad(const struct ubifs_info *c, void *buf, int pad); +int ubifs_bg_wbufs_sync(struct ubifs_info *c); + +/* scan.c */ +struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf); +void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); +int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int quiet); +struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf); +void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int lnum, int offs); +int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + void *buf, int offs); +void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, + void *buf); + +/* log.c */ +void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud); +void ubifs_create_buds_lists(struct ubifs_info *c); +int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs); +struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum); +struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum); +int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum); +int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum); +int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum); +int ubifs_consolidate_log(struct ubifs_info *c); + +/* journal.c */ +int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, + const struct qstr *nm, const struct inode *inode, + int deletion, int xent); +int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + const union ubifs_key *key, const void *buf, int len); +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); +int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); +int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry, int sync); +int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, + loff_t old_size, loff_t new_size); +int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, + const struct inode *inode, const struct qstr *nm); +int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1, + const struct inode *inode2); + +/* budget.c */ +int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req); +void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req); +void ubifs_release_dirty_inode_budget(struct ubifs_info *c, + struct ubifs_inode *ui); +int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +long long ubifs_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space_nolock(struct ubifs_info *c); +int ubifs_calc_min_idx_lebs(struct ubifs_info *c); +void ubifs_convert_page_budget(struct ubifs_info *c); +long long ubifs_reported_space(const struct ubifs_info *c, long long free); +long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); + +/* find.c */ +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, + int squeeze); +int ubifs_find_free_leb_for_idx(struct ubifs_info *c); + +/* tnc.c */ +int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n); +int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm); +int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, + void *node, int *lnum, int *offs); +int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, + int offs, int len); +int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, + int old_lnum, int old_offs, int lnum, int offs, int len); +int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, + int lnum, int offs, int len, const struct qstr *nm); +int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key); +int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + const struct qstr *nm); +int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, + union ubifs_key *to_key); +int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum); +struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, + union ubifs_key *key, + const struct qstr *nm); +void ubifs_tnc_close(struct ubifs_info *c); +int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs, int is_idx); +int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs); +/* Shared by tnc.c for tnc_commit.c */ +void destroy_old_idx(struct ubifs_info *c); +int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs); +int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); + +/* tnc_misc.c */ +struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, + struct ubifs_znode *znode); +int ubifs_search_zbranch(const struct ubifs_info *c, + const struct ubifs_znode *znode, + const union ubifs_key *key, int *n); +struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode); +struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode); +long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr); +struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + struct ubifs_znode *parent, int iip); +int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node); + +/* tnc_commit.c */ +int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); +int ubifs_tnc_end_commit(struct ubifs_info *c); + +/* shrinker.c */ +int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); + + +/* master.c */ +int ubifs_read_master(struct ubifs_info *c); +int ubifs_write_master(struct ubifs_info *c); + +/* sb.c */ +int ubifs_read_superblock(struct ubifs_info *c); +struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); +int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); + +/* replay.c */ +int ubifs_validate_entry(struct ubifs_info *c, + const struct ubifs_dent_node *dent); +int ubifs_replay_journal(struct ubifs_info *c); + + +/* file.c */ +int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_setattr(struct dentry *dentry, struct iattr *attr); + +/* dir.c */ +struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + int mode); +int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* xattr.c */ +int ubifs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size); +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); +int ubifs_removexattr(struct dentry *dentry, const char *name); + +/* super.c */ +struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); +int ubifs_iput(struct inode *inode); + +/* recovery.c */ +int ubifs_recover_master_node(struct ubifs_info *c); +int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); +struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf, int grouped); +struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf); +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_rcvry_gc_commit(struct ubifs_info *c); +int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, + int deletion, loff_t new_size); +int ubifs_recover_size(struct ubifs_info *c); +void ubifs_destroy_size_tree(struct ubifs_info *c); + +/* ioctl.c */ +long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +void ubifs_set_inode_flags(struct inode *inode); +#ifdef CONFIG_COMPAT +long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +#endif + +/* compressor.c */ + +#include "debug.h" +#include "misc.h" +#include "key.h" + +/* todo: Move these to a common U-Boot header */ +int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len); +#endif /* !__UBIFS_H__ */ diff --git a/fs/yaffs2/Makefile b/fs/yaffs2/Makefile new file mode 100755 index 0000000..7707cb7 --- /dev/null +++ b/fs/yaffs2/Makefile @@ -0,0 +1,46 @@ +# Makefile for YAFFS direct test +# +# +# YAFFS: Yet another Flash File System. A NAND-flash specific file system. +# +# Copyright (C) 2003 Aleph One Ltd. +# +# +# Created by Charles Manning <charles@aleph1.co.uk> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# NB Warning this Makefile does not include header dependencies. +# +# $Id: Makefile,v 1.15 2007/07/18 19:40:38 charles Exp $ + +#EXTRA_COMPILE_FLAGS = -DYAFFS_IGNORE_TAGS_ECC +include $(TOPDIR)/config.mk + +LIB = libyaffs2.a + +AOBJS = +COBJS = \ + yaffscfg.o yaffsfs.o yaffs_guts.o \ + yaffs_packedtags2.o yaffs_tagsvalidity.o \ + yaffs_nand.o yaffs_checkptrw.o \ + yaffs_mtdif2.o + +OBJS = $(AOBJS) $(COBJS) + +#CPPFLAGS += +all: $(LIB) $(AOBJS) + +$(LIB): .depend $(OBJS) + $(AR) crv $@ $(OBJS) + + +######################################################################### + +.depend: Makefile $(AOBJS:.o=.S) $(COBJS:.o=.c) + $(CC) -M $(CFLAGS) $(AOBJS:.o=.S) $(COBJS:.o=.c) > $@ + +sinclude .depend +######################################################################### diff --git a/fs/yaffs2/README-linux b/fs/yaffs2/README-linux new file mode 100755 index 0000000..3851e36 --- /dev/null +++ b/fs/yaffs2/README-linux @@ -0,0 +1,201 @@ +Welcome to YAFFS, the first file system developed specifically for NAND flash. + +It is now YAFFS2 - original YAFFS (AYFFS1) only supports 512-byte page +NAND and is now deprectated. YAFFS2 supports 512b page in 'YAFFS1 +compatibility' mode (CONFIG_YAFFS_YAFFS1) and 2K or larger page NAND +in YAFFS2 mode (CONFIG_YAFFS_YAFFS2). + + +A note on licencing +------------------- +YAFFS is available under the GPL and via alternative licensing +arrangements with Aleph One. If you're using YAFFS as a Linux kernel +file system then it will be under the GPL. For use in other situations +you should discuss licensing issues with Aleph One. + + +Terminology +----------- +Page - NAND addressable unit (normally 512b or 2Kbyte size) - can + be read, written, marked bad. Has associated OOB. +Block - Eraseable unit. 64 Pages. (128K on 2K NAND, 32K on 512b NAND) +OOB - 'spare area' of each page for ECC, bad block marked and YAFFS + tags. 16 bytes per 512b - 64 bytes for 2K page size. +Chunk - Basic YAFFS addressable unit. Same size as Page. +Object - YAFFS Object: File, Directory, Link, Device etc. + +YAFFS design +------------ + +YAFFS is a log-structured filesystem. It is designed particularly for +NAND (as opposed to NOR) flash, to be flash-friendly, robust due to +journalling, and to have low RAM and boot time overheads. File data is +stored in 'chunks'. Chunks are the same size as NAND pages. Each page +is marked with file id and chunk number. These marking 'tags' are +stored in the OOB (or 'spare') region of the flash. The chunk number +is determined by dividing the file position by the chunk size. Each +chunk has a number of valid bytes, which equals the page size for all +except the last chunk in a file. + +File 'headers' are stored as the first page in a file, marked as a +different type to data pages. The same mechanism is used to store +directories, device files, links etc. The first page describes which +type of object it is. + +YAFFS2 never re-writes a page, because the spec of NAND chips does not +allow it. (YAFFS1 used to mark a block 'deleted' in the OOB). Deletion +is managed by moving deleted objects to the special, hidden 'unlinked' +directory. These records are preserved until all the pages containing +the object have been erased (We know when this happen by keeping a +count of chunks remaining on the system for each object - when it +reaches zero the object really is gone). + +When data in a file is overwritten, the relevant chunks are replaced +by writing new pages to flash containing the new data but the same +tags. + +Pages are also marked with a short (2 bit) serial number that +increments each time the page at this position is incremented. The +reason for this is that if power loss/crash/other act of demonic +forces happens before the replaced page is marked as discarded, it is +possible to have two pages with the same tags. The serial number is +used to arbitrate. + +A block containing only discarded pages (termed a dirty block) is an +obvious candidate for garbage collection. Otherwise valid pages can be +copied off a block thus rendering the whole block discarded and ready +for garbage collection. + +In theory you don't need to hold the file structure in RAM... you +could just scan the whole flash looking for pages when you need them. +In practice though you'd want better file access times than that! The +mechanism proposed here is to have a list of __u16 page addresses +associated with each file. Since there are 2^18 pages in a 128MB NAND, +a __u16 is insufficient to uniquely identify a page but is does +identify a group of 4 pages - a small enough region to search +exhaustively. This mechanism is clearly expandable to larger NAND +devices - within reason. The RAM overhead with this approach is approx +2 bytes per page - 512kB of RAM for a whole 128MB NAND. + +Boot-time scanning to build the file structure lists only requires +one pass reading NAND. If proper shutdowns happen the current RAM +summary of the filesystem status is saved to flash, called +'checkpointing'. This saves re-scanning the flash on startup, and gives +huge boot/mount time savings. + +YAFFS regenerates its state by 'replaying the tape' - i.e. by +scanning the chunks in their allocation order (i.e. block sequence ID +order), which is usually different form the media block order. Each +block is still only read once - starting from the end of the media and +working back. + +YAFFS tags in YAFFS1 mode: + +18-bit Object ID (2^18 files, i.e. > 260,000 files). File id 0- is not + valid and indicates a deleted page. File od 0x3ffff is also not valid. + Synonymous with inode. +2-bit serial number +20-bit Chunk ID within file. Limit of 2^20 chunks/pages per file (i.e. + > 500MB max file size). Chunk ID 0 is the file header for the file. +10-bit counter of the number of bytes used in the page. +12 bit ECC on tags + +YAFFS tags in YAFFS2 mode: + 4 bytes 32-bit chunk ID + 4 bytes 32-bit object ID + 2 bytes Number of data bytes in this chunk + 4 bytes Sequence number for this block + 3 bytes ECC on tags + 12 bytes ECC on data (3 bytes per 256 bytes of data) + + +Page allocation and garbage collection + +Pages are allocated sequentially from the currently selected block. +When all the pages in the block are filled, another clean block is +selected for allocation. At least two or three clean blocks are +reserved for garbage collection purposes. If there are insufficient +clean blocks available, then a dirty block ( ie one containing only +discarded pages) is erased to free it up as a clean block. If no dirty +blocks are available, then the dirtiest block is selected for garbage +collection. + +Garbage collection is performed by copying the valid data pages into +new data pages thus rendering all the pages in this block dirty and +freeing it up for erasure. I also like the idea of selecting a block +at random some small percentage of the time - thus reducing the chance +of wear differences. + +YAFFS is single-threaded. Garbage-collection is done as a parasitic +task of writing data. So each time some data is written, a bit of +pending garbage collection is done. More pages are garbage-collected +when free space is tight. + + +Flash writing + +YAFFS only ever writes each page once, complying with the requirements +of the most restricitve NAND devices. + +Wear levelling + +This comes as a side-effect of the block-allocation strategy. Data is +always written on the next free block, so they are all used equally. +Blocks containing data that is written but never erased will not get +back into the free list, so wear is levelled over only blocks which +are free or become free, not blocks which never change. + + + +Some helpful info +----------------- + +Formatting a YAFFS device is simply done by erasing it. + +Making an initial filesystem can be tricky because YAFFS uses the OOB +and thus the bytes that get written depend on the YAFFS data (tags), +and the ECC bytes and bad block markers which are dictated by the +hardware and/or the MTD subsystem. The data layout also depends on the +device page size (512b or 2K). Because YAFFS is only responsible for +some of the OOB data, generating a filesystem offline requires +detailed knowledge of what the other parts (MTD and NAND +driver/hardware) are going to do. + +To make a YAFFS filesystem you have 3 options: + +1) Boot the system with an empty NAND device mounted as YAFFS and copy + stuff on. + +2) Make a filesystem image offline, then boot the system and use + MTDutils to write an image to flash. + +3) Make a filesystem image offline and use some tool like a bootloader to + write it to flash. + +Option 1 avoids a lot of issues because all the parts +(YAFFS/MTD/hardware) all take care of their own bits and (if you have +put things together properly) it will 'just work'. YAFFS just needs to +know how many bytes of the OOB it can use. However sometimes it is not +practical. + +Option 2 lets MTD/hardware take care of the ECC so the filesystem +image just had to know which bytes to use for YAFFS Tags. + +Option 3 is hardest as the image creator needs to know exactly what +ECC bytes, endianness and algorithm to use as well as which bytes are +available to YAFFS. + +mkyaffs2image creates an image suitable for option 3 for the +particular case of yaffs2 on 2K page NAND with default MTD layout. + +mkyaffsimage creates an equivalent image for 512b page NAND (i.e. +yaffs1 format). + +Bootloaders +----------- + +A bootloader using YAFFS needs to know how MTD is laying out the OOB +so that it can skip bad blocks. + +YAFFS Tracing +------------- diff --git a/fs/yaffs2/devextras.h b/fs/yaffs2/devextras.h new file mode 100755 index 0000000..f6e5361 --- /dev/null +++ b/fs/yaffs2/devextras.h @@ -0,0 +1,275 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* + * This file is just holds extra declarations used during development. + * Most of these are from kernel includes placed here so we can use them in + * applications. + * + */ + +#ifndef __EXTRAS_H__ +#define __EXTRAS_H__ + +#if defined WIN32 +#define __inline__ __inline +#define new newHack +#endif + +/* XXX U-BOOT XXX */ +#if 1 /* !(defined __KERNEL__) || (defined WIN32) */ + +/* User space defines */ + +/* XXX U-BOOT XXX */ +#if 0 +typedef unsigned char __u8; +typedef unsigned short __u16; +typedef unsigned __u32; +#endif + +#include <asm/types.h> + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +#define prefetch(x) 1 + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static __inline__ void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static __inline__ void list_add_tail(struct list_head *new, + struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_del(struct list_head *prev, + struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +static __inline__ void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static __inline__ void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static __inline__ int list_empty(struct list_head *head) +{ + return head->next == head; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static __inline__ void list_splice(struct list_head *list, + struct list_head *head) +{ + struct list_head *first = list->next; + + if (first != list) { + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next, prefetch(pos->next); pos != (head); \ + pos = pos->next, prefetch(pos->next)) + +/** + * list_for_each_safe - iterate over a list safe against removal + * of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/* + * File types + */ +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 + +#ifndef WIN32 +/* XXX U-BOOT XXX */ +#if 0 +#include <sys/stat.h> +#else +#include "common.h" +#endif +#endif + +/* + * Attribute flags. These should be or-ed together to figure out what + * has been changed! + */ +#define ATTR_MODE 1 +#define ATTR_UID 2 +#define ATTR_GID 4 +#define ATTR_SIZE 8 +#define ATTR_ATIME 16 +#define ATTR_MTIME 32 +#define ATTR_CTIME 64 +#define ATTR_ATIME_SET 128 +#define ATTR_MTIME_SET 256 +#define ATTR_FORCE 512 /* Not a change, but a change it */ +#define ATTR_ATTR_FLAG 1024 + +struct iattr { + unsigned int ia_valid; + unsigned ia_mode; + unsigned ia_uid; + unsigned ia_gid; + unsigned ia_size; + unsigned ia_atime; + unsigned ia_mtime; + unsigned ia_ctime; + unsigned int ia_attr_flags; +}; + +#define KERN_DEBUG + +#else + +#ifndef WIN32 +#include <linux/types.h> +#include <linux/list.h> +#include <linux/fs.h> +#include <linux/stat.h> +#endif + +#endif + +#if defined WIN32 +#undef new +#endif + +#endif diff --git a/fs/yaffs2/yaffs_checkptrw.c b/fs/yaffs2/yaffs_checkptrw.c new file mode 100755 index 0000000..fe886dc --- /dev/null +++ b/fs/yaffs2/yaffs_checkptrw.c @@ -0,0 +1,447 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> +#include <malloc.h> + +const char *yaffs_checkptrw_c_version = + "$Id: yaffs_checkptrw.c,v 1.14 2007/05/15 20:07:40 charles Exp $"; + + +#include "yaffs_checkptrw.h" + +struct yaffs_CheckpointChunkhdr { + int version; + int seq; + u32 sum; + u32 xor; +}; + +static int yaffs_ApplyChunkOffset(yaffs_Device *dev, int chunk) +{ + return chunk - dev->chunkOffset; +} + +static int yaffs_ApplyBlockOffset(yaffs_Device *dev, int block) +{ + return block - dev->blockOffset; +} +#if 0 +static void yaffs_CheckpointInitChunkhdr(yaffs_Device *dev) +{ + struct yaffs_CheckpointChunkhdr hdr; + hdr.version = YAFFS_CHECKPOINT_VERSION; + hdr.seq = dev->checkpointPageSequence; + hdr.sum = dev->checkpointSum; + hdr.xor = dev->checkpointXor; + + dev->checkpointByteOffset = sizeof(hdr); + + memcpy(dev->checkpointBuffer, &hdr, sizeof(hdr)); +} +#endif +static int yaffs_CheckpointCheckChunkhdr(yaffs_Device *dev) +{ + struct yaffs_CheckpointChunkhdr hdr; + memcpy(&hdr, dev->checkpointBuffer, sizeof(hdr)); + dev->checkpointByteOffset = sizeof(hdr); + + return hdr.version == YAFFS_CHECKPOINT_VERSION && + hdr.seq == dev->checkpointPageSequence && + hdr.sum == dev->checkpointSum && + hdr.xor == dev->checkpointXor; +} + +static int yaffs_CheckpointSpaceOk(yaffs_Device *dev) +{ + + int blocksAvailable = dev->nErasedBlocks - dev->nReservedBlocks; + + T(YAFFS_TRACE_CHECKPOINT, + (TSTR("checkpt blocks available = %d" TENDSTR), + blocksAvailable)); + + + return (blocksAvailable <= 0) ? 0 : 1; +} + + +static int yaffs_CheckpointErase(yaffs_Device *dev) +{ + + int i; + + + if(!dev->eraseBlockInNAND) + return 0; + T(YAFFS_TRACE_CHECKPOINT,(TSTR("checking blocks %d to %d"TENDSTR), + dev->internalStartBlock,dev->internalEndBlock)); + + for(i = dev->internalStartBlock; i <= dev->internalEndBlock; i++) { + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev,i); + if(bi->blockState == YAFFS_BLOCK_STATE_CHECKPOINT){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("erasing checkpt block %d"TENDSTR),i)); + if(dev->eraseBlockInNAND(dev,i- dev->blockOffset /* realign */)){ + bi->blockState = YAFFS_BLOCK_STATE_EMPTY; + dev->nErasedBlocks++; + dev->nFreeChunks += dev->nChunksPerBlock; + } + else { + dev->markNANDBlockBad(dev,i); + bi->blockState = YAFFS_BLOCK_STATE_DEAD; + } + } + } + + dev->blocksInCheckpoint = 0; + + return 1; +} + + +static void yaffs_CheckpointFindNextErasedBlock(yaffs_Device *dev) +{ + int i; + int blocksAvailable = dev->nErasedBlocks - dev->nReservedBlocks; + T(YAFFS_TRACE_CHECKPOINT, + (TSTR("allocating checkpt block: erased %d reserved %d avail %d next %d "TENDSTR), + dev->nErasedBlocks,dev->nReservedBlocks,blocksAvailable,dev->checkpointNextBlock)); + + if(dev->checkpointNextBlock >= 0 && + dev->checkpointNextBlock <= dev->internalEndBlock && + blocksAvailable > 0){ + + for(i = dev->checkpointNextBlock; i <= dev->internalEndBlock; i++){ + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev,i); + if(bi->blockState == YAFFS_BLOCK_STATE_EMPTY){ + dev->checkpointNextBlock = i + 1; + dev->checkpointCurrentBlock = i; + T(YAFFS_TRACE_CHECKPOINT,(TSTR("allocating checkpt block %d"TENDSTR),i)); + return; + } + } + } + T(YAFFS_TRACE_CHECKPOINT,(TSTR("out of checkpt blocks"TENDSTR))); + + dev->checkpointNextBlock = -1; + dev->checkpointCurrentBlock = -1; +} + +static void yaffs_CheckpointFindNextCheckpointBlock(yaffs_Device *dev) +{ + int i; + yaffs_ExtendedTags tags; + + T(YAFFS_TRACE_CHECKPOINT,(TSTR("find next checkpt block: start: blocks %d next %d" TENDSTR), + dev->blocksInCheckpoint, dev->checkpointNextBlock)); + + if(dev->blocksInCheckpoint < dev->checkpointMaxBlocks) + for(i = dev->checkpointNextBlock; i <= dev->internalEndBlock; i++){ + int chunk = i * dev->nChunksPerBlock; + int realignedChunk = chunk - dev->chunkOffset; + + dev->readChunkWithTagsFromNAND(dev,realignedChunk,NULL,&tags); + T(YAFFS_TRACE_CHECKPOINT,(TSTR("find next checkpt block: search: block %d oid %d seq %d eccr %d" TENDSTR), + i, tags.objectId,tags.sequenceNumber,tags.eccResult)); + + if(tags.sequenceNumber == YAFFS_SEQUENCE_CHECKPOINT_DATA){ + /* Right kind of block */ + dev->checkpointNextBlock = tags.objectId; + dev->checkpointCurrentBlock = i; + dev->checkpointBlockList[dev->blocksInCheckpoint] = i; + dev->blocksInCheckpoint++; + T(YAFFS_TRACE_CHECKPOINT,(TSTR("found checkpt block %d"TENDSTR),i)); + return; + } + } + + T(YAFFS_TRACE_CHECKPOINT,(TSTR("found no more checkpt blocks"TENDSTR))); + + dev->checkpointNextBlock = -1; + dev->checkpointCurrentBlock = -1; +} + + +int yaffs_CheckpointOpen(yaffs_Device *dev, int forWriting) +{ + + /* Got the functions we need? */ + if (!dev->writeChunkWithTagsToNAND || + !dev->readChunkWithTagsFromNAND || + !dev->eraseBlockInNAND || + !dev->markNANDBlockBad) + return 0; + + if(forWriting && !yaffs_CheckpointSpaceOk(dev)) + return 0; + + if(!dev->checkpointBuffer) + dev->checkpointBuffer = YMALLOC_DMA(dev->nDataBytesPerChunk); + if(!dev->checkpointBuffer) + return 0; + + + dev->checkpointPageSequence = 0; + + dev->checkpointOpenForWrite = forWriting; + + dev->checkpointByteCount = 0; + dev->checkpointSum = 0; + dev->checkpointXor = 0; + dev->checkpointCurrentBlock = -1; + dev->checkpointCurrentChunk = -1; + dev->checkpointNextBlock = dev->internalStartBlock; + + /* Erase all the blocks in the checkpoint area */ + if(forWriting){ + memset(dev->checkpointBuffer,0,dev->nDataBytesPerChunk); + //yaffs_CheckpointInitChunkhdr(dev); + return yaffs_CheckpointErase(dev); + + + } else { + int i; + /* Set to a value that will kick off a read */ + dev->checkpointByteOffset = dev->nDataBytesPerChunk; + /* A checkpoint block list of 1 checkpoint block per 16 block is (hopefully) + * going to be way more than we need */ + dev->blocksInCheckpoint = 0; + dev->checkpointMaxBlocks = (dev->internalEndBlock - dev->internalStartBlock)/16 + 2; + dev->checkpointBlockList = YMALLOC(sizeof(int) * dev->checkpointMaxBlocks); + for(i = 0; i < dev->checkpointMaxBlocks; i++) + dev->checkpointBlockList[i] = -1; + } + + return 1; +} + +int yaffs_GetCheckpointSum(yaffs_Device *dev, __u32 *sum) +{ + __u32 compositeSum; + compositeSum = (dev->checkpointSum << 8) | (dev->checkpointXor & 0xFF); + *sum = compositeSum; + return 1; +} +#if 0 +static int yaffs_CheckpointFlushBuffer(yaffs_Device *dev) +{ + + int chunk; + int realignedChunk; + + yaffs_ExtendedTags tags; + + if(dev->checkpointCurrentBlock < 0){ + yaffs_CheckpointFindNextErasedBlock(dev); + dev->checkpointCurrentChunk = 0; + } + + if(dev->checkpointCurrentBlock < 0) + return 0; + + tags.chunkDeleted = 0; + tags.objectId = dev->checkpointNextBlock; /* Hint to next place to look */ + tags.chunkId = dev->checkpointPageSequence + 1; + tags.sequenceNumber = YAFFS_SEQUENCE_CHECKPOINT_DATA; + tags.byteCount = dev->nDataBytesPerChunk; + if(dev->checkpointCurrentChunk == 0){ + /* First chunk we write for the block? Set block state to + checkpoint */ + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev,dev->checkpointCurrentBlock); + bi->blockState = YAFFS_BLOCK_STATE_CHECKPOINT; + dev->blocksInCheckpoint++; + } + + chunk = dev->checkpointCurrentBlock * dev->nChunksPerBlock + dev->checkpointCurrentChunk; + + + T(YAFFS_TRACE_CHECKPOINT,(TSTR("checkpoint wite buffer nand %d(%d:%d) objid %d chId %d" TENDSTR), + chunk, dev->checkpointCurrentBlock, dev->checkpointCurrentChunk,tags.objectId,tags.chunkId)); + + realignedChunk = chunk - dev->chunkOffset; + + dev->writeChunkWithTagsToNAND(dev,realignedChunk,dev->checkpointBuffer,&tags); + dev->checkpointByteOffset = 0; + dev->checkpointPageSequence++; + dev->checkpointCurrentChunk++; + if(dev->checkpointCurrentChunk >= dev->nChunksPerBlock){ + dev->checkpointCurrentChunk = 0; + dev->checkpointCurrentBlock = -1; + } + memset(dev->checkpointBuffer,0,dev->nDataBytesPerChunk); + + yaffs_CheckpointInitChunkhdr(dev); + return 1; +} +#endif +#if 0 +int yaffs_CheckpointWrite(yaffs_Device *dev,const void *data, int nBytes) +{ + int i=0; + int ok = 1; + + + __u8 * dataBytes = (__u8 *)data; + + + + if(!dev->checkpointBuffer) + return 0; + + if(!dev->checkpointOpenForWrite) + return -1; + + while(i < nBytes && ok) { + + + + dev->checkpointBuffer[dev->checkpointByteOffset] = *dataBytes ; + dev->checkpointSum += *dataBytes; + dev->checkpointXor ^= *dataBytes; + + dev->checkpointByteOffset++; + i++; + dataBytes++; + dev->checkpointByteCount++; + + + if(dev->checkpointByteOffset < 0 || + dev->checkpointByteOffset >= dev->nDataBytesPerChunk) + ok = yaffs_CheckpointFlushBuffer(dev); + + } + + return i; +} +#endif +int yaffs_CheckpointRead(yaffs_Device *dev, void *data, int nBytes) +{ + int i=0; + int ok = 1; + yaffs_ExtendedTags tags; + + + int chunk; + int realignedChunk; + + __u8 *dataBytes = (__u8 *)data; + + if(!dev->checkpointBuffer) + return 0; + + if(dev->checkpointOpenForWrite) + return -1; + while(i < nBytes && ok) { + + + if(dev->checkpointByteOffset < 0 || + dev->checkpointByteOffset >= dev->nDataBytesPerChunk) { + + if(dev->checkpointCurrentBlock < 0){ + yaffs_CheckpointFindNextCheckpointBlock(dev); + dev->checkpointCurrentChunk = 0; + } + + if(dev->checkpointCurrentBlock < 0) + ok = 0; + else { + + chunk = dev->checkpointCurrentBlock * dev->nChunksPerBlock + + dev->checkpointCurrentChunk; + + realignedChunk = chunk - dev->chunkOffset; + + /* read in the next chunk */ + /* printf("read checkpoint page %d\n",dev->checkpointPage); */ + dev->readChunkWithTagsFromNAND(dev, realignedChunk, + dev->checkpointBuffer, + &tags); + + if(tags.chunkId != (dev->checkpointPageSequence + 1) || + tags.sequenceNumber != YAFFS_SEQUENCE_CHECKPOINT_DATA) + ok = 0; + if(!yaffs_CheckpointCheckChunkhdr(dev)) + ok = 0; + //dev->checkpointByteOffset = 0; + dev->checkpointPageSequence++; + dev->checkpointCurrentChunk++; + + if(dev->checkpointCurrentChunk >= dev->nChunksPerBlock) + dev->checkpointCurrentBlock = -1; + } + } + + if(ok){ + *dataBytes = dev->checkpointBuffer[dev->checkpointByteOffset]; + dev->checkpointSum += *dataBytes; + dev->checkpointXor ^= *dataBytes; + dev->checkpointByteOffset++; + i++; + dataBytes++; + dev->checkpointByteCount++; + } + } + + return i; +} + +int yaffs_CheckpointClose(yaffs_Device *dev) +{ + +// if(dev->checkpointOpenForWrite){ +// if(dev->checkpointByteOffset != 0) +// yaffs_CheckpointFlushBuffer(dev); +// } else { + int i; + for(i = 0; i < dev->blocksInCheckpoint && dev->checkpointBlockList[i] >= 0; i++){ + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev,dev->checkpointBlockList[i]); + if(bi->blockState == YAFFS_BLOCK_STATE_EMPTY) + bi->blockState = YAFFS_BLOCK_STATE_CHECKPOINT; + else { + // Todo this looks odd... + } + } + YFREE(dev->checkpointBlockList); + dev->checkpointBlockList = NULL; +// } + + dev->nFreeChunks -= dev->blocksInCheckpoint * dev->nChunksPerBlock; + dev->nErasedBlocks -= dev->blocksInCheckpoint; + + + T(YAFFS_TRACE_CHECKPOINT,(TSTR("checkpoint byte count %d" TENDSTR), + dev->checkpointByteCount)); + + if(dev->checkpointBuffer){ + /* free the buffer */ + YFREE(dev->checkpointBuffer); + dev->checkpointBuffer = NULL; + return 1; + } + else + return 0; + +} + +int yaffs_CheckpointInvalidateStream(yaffs_Device *dev) +{ + /* Erase the first checksum block */ + + T(YAFFS_TRACE_CHECKPOINT,(TSTR("checkpoint invalidate"TENDSTR))); + + if(!yaffs_CheckpointSpaceOk(dev)) + return 0; + + return yaffs_CheckpointErase(dev); +} diff --git a/fs/yaffs2/yaffs_checkptrw.h b/fs/yaffs2/yaffs_checkptrw.h new file mode 100755 index 0000000..e59d151 --- /dev/null +++ b/fs/yaffs2/yaffs_checkptrw.h @@ -0,0 +1,34 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +#ifndef __YAFFS_CHECKPTRW_H__ +#define __YAFFS_CHECKPTRW_H__ + +#include "yaffs_guts.h" + +int yaffs_CheckpointOpen(yaffs_Device *dev, int forWriting); + +int yaffs_CheckpointWrite(yaffs_Device *dev,const void *data, int nBytes); + +int yaffs_CheckpointRead(yaffs_Device *dev,void *data, int nBytes); + +int yaffs_GetCheckpointSum(yaffs_Device *dev, __u32 *sum); + +int yaffs_CheckpointClose(yaffs_Device *dev); + +int yaffs_CheckpointInvalidateStream(yaffs_Device *dev); + + +#endif diff --git a/fs/yaffs2/yaffs_flashif.h b/fs/yaffs2/yaffs_flashif.h new file mode 100755 index 0000000..4e5157e --- /dev/null +++ b/fs/yaffs2/yaffs_flashif.h @@ -0,0 +1,31 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +#ifndef __YAFFS_FLASH_H__ +#define __YAFFS_FLASH_H__ + + +#include "yaffs_guts.h" +int yflash_EraseBlockInNAND(yaffs_Device *dev, int blockNumber); +int yflash_WriteChunkToNAND(yaffs_Device *dev,int chunkInNAND,const __u8 *data, const yaffs_Spare *spare); +int yflash_WriteChunkWithTagsToNAND(yaffs_Device *dev,int chunkInNAND,const __u8 *data, yaffs_ExtendedTags *tags); +int yflash_ReadChunkFromNAND(yaffs_Device *dev,int chunkInNAND, __u8 *data, yaffs_Spare *spare); +int yflash_ReadChunkWithTagsFromNAND(yaffs_Device *dev,int chunkInNAND, __u8 *data, yaffs_ExtendedTags *tags); +int yflash_EraseBlockInNAND(yaffs_Device *dev, int blockNumber); +int yflash_InitialiseNAND(yaffs_Device *dev); +int yflash_MarkNANDBlockBad(struct yaffs_DeviceStruct *dev, int blockNo); +int yflash_QueryNANDBlock(struct yaffs_DeviceStruct *dev, int blockNo, yaffs_BlockState *state, int *sequenceNumber); + +#endif diff --git a/fs/yaffs2/yaffs_guts.c b/fs/yaffs2/yaffs_guts.c new file mode 100755 index 0000000..c680468 --- /dev/null +++ b/fs/yaffs2/yaffs_guts.c @@ -0,0 +1,7413 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +const char *yaffs_guts_c_version = + "$Id: yaffs_guts.c,v 1.52 2007/10/16 00:45:05 charles Exp $"; + +#include "yportenv.h" +#include "linux/stat.h" + +#include "yaffsinterface.h" +#include "yaffsfs.h" +#include "yaffs_guts.h" + +#include "yaffs_nand.h" + +#include "yaffs_checkptrw.h" + +#include "yaffs_nand.h" +#include "yaffs_packedtags2.h" + +#include "malloc.h" + +#ifdef CONFIG_YAFFS_WINCE +void yfsd_LockYAFFS(BOOL fsLockOnly); +void yfsd_UnlockYAFFS(BOOL fsLockOnly); +#endif + +#define YAFFS_PASSIVE_GC_CHUNKS 2 + +//#include "yaffs_ecc.h" + + +/* Robustification (if it ever comes about...) */ +static void yaffs_RetireBlock(yaffs_Device * dev, int blockInNAND); +void yaffs_HandleChunkError(yaffs_Device *dev, yaffs_BlockInfo *bi); + +/* Other local prototypes */ +static int yaffs_UnlinkObject( yaffs_Object *obj); +static int yaffs_ObjectHasCachedWriteData(yaffs_Object *obj); + +static void yaffs_HardlinkFixup(yaffs_Device *dev, yaffs_Object *hardList); + +static int yaffs_WriteNewChunkWithTagsToNAND(yaffs_Device * dev, + const __u8 * buffer, + yaffs_ExtendedTags * tags, + int useReserve); +static int yaffs_PutChunkIntoFile(yaffs_Object * in, int chunkInInode, + int chunkInNAND, int inScan); + +static yaffs_Object *yaffs_CreateNewObject(yaffs_Device * dev, int number, + yaffs_ObjectType type); +static void yaffs_AddObjectToDirectory(yaffs_Object * directory, + yaffs_Object * obj); +static int yaffs_UpdateObjectHeader(yaffs_Object * in, const YCHAR * name, + int force, int isShrink, int shadows); +static void yaffs_RemoveObjectFromDirectory(yaffs_Object * obj); +static int yaffs_CheckStructures(void); +static int yaffs_DeleteWorker(yaffs_Object * in, yaffs_Tnode * tn, __u32 level, + int chunkOffset, int *limit); +static int yaffs_DoGenericObjectDeletion(yaffs_Object * in); + +static yaffs_BlockInfo *yaffs_GetBlockInfo(yaffs_Device * dev, int blockNo); + +static __u8 *yaffs_GetTempBuffer(yaffs_Device * dev, int lineNo); +static void yaffs_ReleaseTempBuffer(yaffs_Device * dev, __u8 * buffer, + int lineNo); + +static int yaffs_CheckChunkErased(struct yaffs_DeviceStruct *dev, + int chunkInNAND); + +static int yaffs_UnlinkWorker(yaffs_Object * obj); +static void yaffs_DestroyObject(yaffs_Object * obj); + +static int yaffs_TagsMatch(const yaffs_ExtendedTags * tags, int objectId, + int chunkInObject); + +loff_t yaffs_GetFileSize(yaffs_Object * obj); + +static int yaffs_AllocateChunk(yaffs_Device * dev, int useReserve, yaffs_BlockInfo **blockUsedPtr); + +static void yaffs_VerifyFreeChunks(yaffs_Device * dev); + +static void yaffs_CheckObjectDetailsLoaded(yaffs_Object *in); + +#ifdef YAFFS_PARANOID +static int yaffs_CheckFileSanity(yaffs_Object * in); +#else +#define yaffs_CheckFileSanity(in) +#endif + +static void yaffs_InvalidateWholeChunkCache(yaffs_Object * in); +static void yaffs_InvalidateChunkCache(yaffs_Object * object, int chunkId); + +static void yaffs_InvalidateCheckpoint(yaffs_Device *dev); + +static int yaffs_FindChunkInFile(yaffs_Object * in, int chunkInInode, + yaffs_ExtendedTags * tags); + +static __u32 yaffs_GetChunkGroupBase(yaffs_Device *dev, yaffs_Tnode *tn, unsigned pos); +static yaffs_Tnode *yaffs_FindLevel0Tnode(yaffs_Device * dev, + yaffs_FileStructure * fStruct, + __u32 chunkId); + + +/* Function to calculate chunk and offset */ + +static void yaffs_AddrToChunk(yaffs_Device *dev, loff_t addr, __u32 *chunk, __u32 *offset) +{ + if(dev->chunkShift){ + /* Easy-peasy power of 2 case */ + *chunk = (__u32)(addr >> dev->chunkShift); + *offset = (__u32)(addr & dev->chunkMask); + } + else if(dev->crumbsPerChunk) + { + /* Case where we're using "crumbs" */ + *offset = (__u32)(addr & dev->crumbMask); + addr >>= dev->crumbShift; + *chunk = ((__u32)addr)/dev->crumbsPerChunk; + *offset += ((addr - (*chunk * dev->crumbsPerChunk)) << dev->crumbShift); + } + else + YBUG(); +} + +/* Function to return the number of shifts for a power of 2 greater than or equal + * to the given number + * Note we don't try to cater for all possible numbers and this does not have to + * be hellishly efficient. + */ + +static __u32 ShiftsGE(__u32 x) +{ + int extraBits; + int nShifts; + + nShifts = extraBits = 0; + + while(x>1){ + if(x & 1) extraBits++; + x>>=1; + nShifts++; + } + + if(extraBits) + nShifts++; + + return nShifts; +} + +/* Function to return the number of shifts to get a 1 in bit 0 + */ + +static __u32 ShiftDiv(__u32 x) +{ + int nShifts; + + nShifts = 0; + + if(!x) return 0; + + while( !(x&1)){ + x>>=1; + nShifts++; + } + + return nShifts; +} + + + +/* + * Temporary buffer manipulations. + */ + +static int yaffs_InitialiseTempBuffers(yaffs_Device *dev) +{ + int i; + __u8 *buf = (__u8 *)1; + + memset(dev->tempBuffer,0,sizeof(dev->tempBuffer)); + + for (i = 0; buf && i < YAFFS_N_TEMP_BUFFERS; i++) { + dev->tempBuffer[i].line = 0; /* not in use */ + dev->tempBuffer[i].buffer = buf = + YMALLOC_DMA(dev->nDataBytesPerChunk); + } + + return buf ? YAFFS_OK : YAFFS_FAIL; + +} + +static __u8 *yaffs_GetTempBuffer(yaffs_Device * dev, int lineNo) +{ + int i, j; + for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) { + if (dev->tempBuffer[i].line == 0) { + dev->tempBuffer[i].line = lineNo; + if ((i + 1) > dev->maxTemp) { + dev->maxTemp = i + 1; + for (j = 0; j <= i; j++) + dev->tempBuffer[j].maxLine = + dev->tempBuffer[j].line; + } + + return dev->tempBuffer[i].buffer; + } + } + + T(YAFFS_TRACE_BUFFERS, + (TSTR("Out of temp buffers at line %d, other held by lines:"), + lineNo)); + for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) { + T(YAFFS_TRACE_BUFFERS, (TSTR(" %d "), dev->tempBuffer[i].line)); + } + T(YAFFS_TRACE_BUFFERS, (TSTR(" " TENDSTR))); + + /* + * If we got here then we have to allocate an unmanaged one + * This is not good. + */ + + dev->unmanagedTempAllocations++; + return YMALLOC(dev->nDataBytesPerChunk); + +} + +static void yaffs_ReleaseTempBuffer(yaffs_Device * dev, __u8 * buffer, + int lineNo) +{ + int i; + for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) { + if (dev->tempBuffer[i].buffer == buffer) { + dev->tempBuffer[i].line = 0; + return; + } + } + + if (buffer) { + /* assume it is an unmanaged one. */ + T(YAFFS_TRACE_BUFFERS, + (TSTR("Releasing unmanaged temp buffer in line %d" TENDSTR), + lineNo)); + YFREE(buffer); + dev->unmanagedTempDeallocations++; + } + +} + +/* + * Determine if we have a managed buffer. + */ +int yaffs_IsManagedTempBuffer(yaffs_Device * dev, const __u8 * buffer) +{ + int i; + for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) { + if (dev->tempBuffer[i].buffer == buffer) + return 1; + + } + + for (i = 0; i < dev->nShortOpCaches; i++) { + if( dev->srCache[i].data == buffer ) + return 1; + + } + + if (buffer == dev->checkpointBuffer) + return 1; + + T(YAFFS_TRACE_ALWAYS, + (TSTR("yaffs: unmaged buffer detected.\n" TENDSTR))); + return 0; +} + + + +/* + * Chunk bitmap manipulations + */ + +static Y_INLINE __u8 *yaffs_BlockBits(yaffs_Device * dev, int blk) +{ + if (blk < dev->internalStartBlock || blk > dev->internalEndBlock) { + T(YAFFS_TRACE_ERROR, + (TSTR("**>> yaffs: BlockBits block %d is not valid" TENDSTR), + blk)); + YBUG(); + } + return dev->chunkBits + + (dev->chunkBitmapStride * (blk - dev->internalStartBlock)); +} + +static Y_INLINE void yaffs_VerifyChunkBitId(yaffs_Device *dev, int blk, int chunk) +{ + if(blk < dev->internalStartBlock || blk > dev->internalEndBlock || + chunk < 0 || chunk >= dev->nChunksPerBlock) { + T(YAFFS_TRACE_ERROR, + (TSTR("**>> yaffs: Chunk Id (%d:%d) invalid"TENDSTR),blk,chunk)); + YBUG(); + } +} + +static Y_INLINE void yaffs_ClearChunkBits(yaffs_Device * dev, int blk) +{ + __u8 *blkBits = yaffs_BlockBits(dev, blk); + + memset(blkBits, 0, dev->chunkBitmapStride); +} + +static Y_INLINE void yaffs_ClearChunkBit(yaffs_Device * dev, int blk, int chunk) +{ + __u8 *blkBits = yaffs_BlockBits(dev, blk); + + yaffs_VerifyChunkBitId(dev,blk,chunk); + + blkBits[chunk / 8] &= ~(1 << (chunk & 7)); +} + +static Y_INLINE void yaffs_SetChunkBit(yaffs_Device * dev, int blk, int chunk) +{ + __u8 *blkBits = yaffs_BlockBits(dev, blk); + + yaffs_VerifyChunkBitId(dev,blk,chunk); + + blkBits[chunk / 8] |= (1 << (chunk & 7)); +} + +static Y_INLINE int yaffs_CheckChunkBit(yaffs_Device * dev, int blk, int chunk) +{ + __u8 *blkBits = yaffs_BlockBits(dev, blk); + yaffs_VerifyChunkBitId(dev,blk,chunk); + + return (blkBits[chunk / 8] & (1 << (chunk & 7))) ? 1 : 0; +} + +static Y_INLINE int yaffs_StillSomeChunkBits(yaffs_Device * dev, int blk) +{ + __u8 *blkBits = yaffs_BlockBits(dev, blk); + int i; + for (i = 0; i < dev->chunkBitmapStride; i++) { + if (*blkBits) + return 1; + blkBits++; + } + return 0; +} + +static int yaffs_CountChunkBits(yaffs_Device * dev, int blk) +{ + __u8 *blkBits = yaffs_BlockBits(dev, blk); + int i; + int n = 0; + for (i = 0; i < dev->chunkBitmapStride; i++) { + __u8 x = *blkBits; + while(x){ + if(x & 1) + n++; + x >>=1; + } + + blkBits++; + } + return n; +} + +/* + * Verification code + */ + +static int yaffs_SkipVerification(yaffs_Device *dev) +{ + return !(yaffs_traceMask & (YAFFS_TRACE_VERIFY | YAFFS_TRACE_VERIFY_FULL)); +} + +static int yaffs_SkipFullVerification(yaffs_Device *dev) +{ + return !(yaffs_traceMask & (YAFFS_TRACE_VERIFY_FULL)); +} + +static int yaffs_SkipNANDVerification(yaffs_Device *dev) +{ + return !(yaffs_traceMask & (YAFFS_TRACE_VERIFY_NAND)); +} + +static const char * blockStateName[] = { +"Unknown", +"Needs scanning", +"Scanning", +"Empty", +"Allocating", +"Full", +"Dirty", +"Checkpoint", +"Collecting", +"Dead" +}; + +static void yaffs_VerifyBlock(yaffs_Device *dev,yaffs_BlockInfo *bi,int n) +{ + int actuallyUsed; + int inUse; + + if(yaffs_SkipVerification(dev)) + return; + + /* Report illegal runtime states */ + if(bi->blockState <0 || bi->blockState >= YAFFS_NUMBER_OF_BLOCK_STATES) + T(YAFFS_TRACE_VERIFY,(TSTR("Block %d has undefined state %d"TENDSTR),n,bi->blockState)); + + switch(bi->blockState){ + case YAFFS_BLOCK_STATE_UNKNOWN: + case YAFFS_BLOCK_STATE_SCANNING: + case YAFFS_BLOCK_STATE_NEEDS_SCANNING: + T(YAFFS_TRACE_VERIFY,(TSTR("Block %d has bad run-state %s"TENDSTR), + n,blockStateName[bi->blockState])); + } + + /* Check pages in use and soft deletions are legal */ + + actuallyUsed = bi->pagesInUse - bi->softDeletions; + + if(bi->pagesInUse < 0 || bi->pagesInUse > dev->nChunksPerBlock || + bi->softDeletions < 0 || bi->softDeletions > dev->nChunksPerBlock || + actuallyUsed < 0 || actuallyUsed > dev->nChunksPerBlock) + T(YAFFS_TRACE_VERIFY,(TSTR("Block %d has illegal values pagesInUsed %d softDeletions %d"TENDSTR), + n,bi->pagesInUse,bi->softDeletions)); + + + /* Check chunk bitmap legal */ + inUse = yaffs_CountChunkBits(dev,n); + if(inUse != bi->pagesInUse) + T(YAFFS_TRACE_VERIFY,(TSTR("Block %d has inconsistent values pagesInUse %d counted chunk bits %d"TENDSTR), + n,bi->pagesInUse,inUse)); + + /* Check that the sequence number is valid. + * Ten million is legal, but is very unlikely + */ + if(dev->isYaffs2 && + (bi->blockState == YAFFS_BLOCK_STATE_ALLOCATING || bi->blockState == YAFFS_BLOCK_STATE_FULL) && + (bi->sequenceNumber < YAFFS_LOWEST_SEQUENCE_NUMBER || bi->sequenceNumber > 10000000 )) + T(YAFFS_TRACE_VERIFY,(TSTR("Block %d has suspect sequence number of %d"TENDSTR), + n,bi->sequenceNumber)); + +} + +static void yaffs_VerifyCollectedBlock(yaffs_Device *dev,yaffs_BlockInfo *bi,int n) +{ + yaffs_VerifyBlock(dev,bi,n); + + /* After collection the block should be in the erased state */ + /* TODO: This will need to change if we do partial gc */ + + if(bi->blockState != YAFFS_BLOCK_STATE_EMPTY){ + T(YAFFS_TRACE_ERROR,(TSTR("Block %d is in state %d after gc, should be erased"TENDSTR), + n,bi->blockState)); + } +} + +static void yaffs_VerifyBlocks(yaffs_Device *dev) +{ + int i; + int nBlocksPerState[YAFFS_NUMBER_OF_BLOCK_STATES]; + int nIllegalBlockStates = 0; + + + if(yaffs_SkipVerification(dev)) + return; + + memset(nBlocksPerState,0,sizeof(nBlocksPerState)); + + + for(i = dev->internalStartBlock; i <= dev->internalEndBlock; i++){ + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev,i); + yaffs_VerifyBlock(dev,bi,i); + + if(bi->blockState >=0 && bi->blockState < YAFFS_NUMBER_OF_BLOCK_STATES) + nBlocksPerState[bi->blockState]++; + else + nIllegalBlockStates++; + + } + + T(YAFFS_TRACE_VERIFY,(TSTR(""TENDSTR))); + T(YAFFS_TRACE_VERIFY,(TSTR("Block summary"TENDSTR))); + + T(YAFFS_TRACE_VERIFY,(TSTR("%d blocks have illegal states"TENDSTR),nIllegalBlockStates)); + if(nBlocksPerState[YAFFS_BLOCK_STATE_ALLOCATING] > 1) + T(YAFFS_TRACE_VERIFY,(TSTR("Too many allocating blocks"TENDSTR))); + + for(i = 0; i < YAFFS_NUMBER_OF_BLOCK_STATES; i++) + T(YAFFS_TRACE_VERIFY, + (TSTR("%s %d blocks"TENDSTR), + blockStateName[i],nBlocksPerState[i])); + + if(dev->blocksInCheckpoint != nBlocksPerState[YAFFS_BLOCK_STATE_CHECKPOINT]) + T(YAFFS_TRACE_VERIFY, + (TSTR("Checkpoint block count wrong dev %d count %d"TENDSTR), + dev->blocksInCheckpoint, nBlocksPerState[YAFFS_BLOCK_STATE_CHECKPOINT])); + + if(dev->nErasedBlocks != nBlocksPerState[YAFFS_BLOCK_STATE_EMPTY]) + T(YAFFS_TRACE_VERIFY, + (TSTR("Erased block count wrong dev %d count %d"TENDSTR), + dev->nErasedBlocks, nBlocksPerState[YAFFS_BLOCK_STATE_EMPTY])); + + if(nBlocksPerState[YAFFS_BLOCK_STATE_COLLECTING] > 1) + T(YAFFS_TRACE_VERIFY, + (TSTR("Too many collecting blocks %d (max is 1)"TENDSTR), + nBlocksPerState[YAFFS_BLOCK_STATE_COLLECTING])); + + T(YAFFS_TRACE_VERIFY,(TSTR(""TENDSTR))); + +} + +/* + * Verify the object header. oh must be valid, but obj and tags may be NULL in which + * case those tests will not be performed. + */ +static void yaffs_VerifyObjectHeader(yaffs_Object *obj, yaffs_ObjectHeader *oh, yaffs_ExtendedTags *tags, int parentCheck) +{ + if(yaffs_SkipVerification(obj->myDev)) + return; + + if(!(tags && obj && oh)){ + T(YAFFS_TRACE_VERIFY, + (TSTR("Verifying object header tags %x obj %x oh %x"TENDSTR), + (__u32)tags,(__u32)obj,(__u32)oh)); + return; + } + + if(oh->type <= YAFFS_OBJECT_TYPE_UNKNOWN || + oh->type > YAFFS_OBJECT_TYPE_MAX) + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d header type is illegal value 0x%x"TENDSTR), + tags->objectId, oh->type)); + + if(tags->objectId != obj->objectId) + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d header mismatch objectId %d"TENDSTR), + tags->objectId, obj->objectId)); + + + /* + * Check that the object's parent ids match if parentCheck requested. + * + * Tests do not apply to the root object. + */ + + if(parentCheck && tags->objectId > 1 && !obj->parent) + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d header mismatch parentId %d obj->parent is NULL"TENDSTR), + tags->objectId, oh->parentObjectId)); + + + if(parentCheck && obj->parent && + oh->parentObjectId != obj->parent->objectId && + (oh->parentObjectId != YAFFS_OBJECTID_UNLINKED || + obj->parent->objectId != YAFFS_OBJECTID_DELETED)) + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d header mismatch parentId %d parentObjectId %d"TENDSTR), + tags->objectId, oh->parentObjectId, obj->parent->objectId)); + + + if(tags->objectId > 1 && oh->name[0] == 0) /* Null name */ + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d header name is NULL"TENDSTR), + obj->objectId)); + + if(tags->objectId > 1 && ((__u8)(oh->name[0])) == 0xff) /* Trashed name */ + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d header name is 0xFF"TENDSTR), + obj->objectId)); +} + + + +static int yaffs_VerifyTnodeWorker(yaffs_Object * obj, yaffs_Tnode * tn, + __u32 level, int chunkOffset) +{ + int i; + yaffs_Device *dev = obj->myDev; + int ok = 1; + + if (tn) { + if (level > 0) { + + for (i = 0; i < YAFFS_NTNODES_INTERNAL && ok; i++){ + if (tn->internal[i]) { + ok = yaffs_VerifyTnodeWorker(obj, + tn->internal[i], + level - 1, + (chunkOffset<<YAFFS_TNODES_INTERNAL_BITS) + i); + } + } + } else if (level == 0) { + int i; + yaffs_ExtendedTags tags; + __u32 objectId = obj->objectId; + + chunkOffset <<= YAFFS_TNODES_LEVEL0_BITS; + + for(i = 0; i < YAFFS_NTNODES_LEVEL0; i++){ + __u32 theChunk = yaffs_GetChunkGroupBase(dev,tn,i); + + if(theChunk > 0){ + /* T(~0,(TSTR("verifying (%d:%d) %d"TENDSTR),tags.objectId,tags.chunkId,theChunk)); */ + yaffs_ReadChunkWithTagsFromNAND(dev,theChunk,NULL, &tags); + if(tags.objectId != objectId || tags.chunkId != chunkOffset){ + T(~0,(TSTR("Object %d chunkId %d NAND mismatch chunk %d tags (%d:%d)"TENDSTR), + objectId, chunkOffset, theChunk, + tags.objectId, tags.chunkId)); + } + } + chunkOffset++; + } + } + } + + return ok; + +} + + +static void yaffs_VerifyFile(yaffs_Object *obj) +{ + int requiredTallness; + int actualTallness; + __u32 lastChunk; + __u32 x; + __u32 i; + yaffs_Device *dev; + yaffs_ExtendedTags tags; + yaffs_Tnode *tn; + __u32 objectId; + + if(obj && yaffs_SkipVerification(obj->myDev)) + return; + + dev = obj->myDev; + objectId = obj->objectId; + + /* Check file size is consistent with tnode depth */ + lastChunk = obj->variant.fileVariant.fileSize / dev->nDataBytesPerChunk + 1; + x = lastChunk >> YAFFS_TNODES_LEVEL0_BITS; + requiredTallness = 0; + while (x> 0) { + x >>= YAFFS_TNODES_INTERNAL_BITS; + requiredTallness++; + } + + actualTallness = obj->variant.fileVariant.topLevel; + + if(requiredTallness > actualTallness ) + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d had tnode tallness %d, needs to be %d"TENDSTR), + obj->objectId,actualTallness, requiredTallness)); + + + /* Check that the chunks in the tnode tree are all correct. + * We do this by scanning through the tnode tree and + * checking the tags for every chunk match. + */ + + if(yaffs_SkipNANDVerification(dev)) + return; + + for(i = 1; i <= lastChunk; i++){ + tn = yaffs_FindLevel0Tnode(dev, &obj->variant.fileVariant,i); + + if (tn) { + __u32 theChunk = yaffs_GetChunkGroupBase(dev,tn,i); + if(theChunk > 0){ + /* T(~0,(TSTR("verifying (%d:%d) %d"TENDSTR),objectId,i,theChunk)); */ + yaffs_ReadChunkWithTagsFromNAND(dev,theChunk,NULL, &tags); + if(tags.objectId != objectId || tags.chunkId != i){ + T(~0,(TSTR("Object %d chunkId %d NAND mismatch chunk %d tags (%d:%d)"TENDSTR), + objectId, i, theChunk, + tags.objectId, tags.chunkId)); + } + } + } + + } + +} + +static void yaffs_VerifyDirectory(yaffs_Object *obj) +{ + if(obj && yaffs_SkipVerification(obj->myDev)) + return; + +} + +static void yaffs_VerifyHardLink(yaffs_Object *obj) +{ + if(obj && yaffs_SkipVerification(obj->myDev)) + return; + + /* Verify sane equivalent object */ +} + +static void yaffs_VerifySymlink(yaffs_Object *obj) +{ + if(obj && yaffs_SkipVerification(obj->myDev)) + return; + + /* Verify symlink string */ +} + +static void yaffs_VerifySpecial(yaffs_Object *obj) +{ + if(obj && yaffs_SkipVerification(obj->myDev)) + return; +} + +static void yaffs_VerifyObject(yaffs_Object *obj) +{ + yaffs_Device *dev; + + __u32 chunkMin; + __u32 chunkMax; + + __u32 chunkIdOk; + __u32 chunkIsLive; + + if(!obj) + return; + + dev = obj->myDev; + + if(yaffs_SkipVerification(dev)) + return; + + /* Check sane object header chunk */ + + chunkMin = dev->internalStartBlock * dev->nChunksPerBlock; + chunkMax = (dev->internalEndBlock+1) * dev->nChunksPerBlock - 1; + + chunkIdOk = (obj->chunkId >= chunkMin && obj->chunkId <= chunkMax); + chunkIsLive = chunkIdOk && + yaffs_CheckChunkBit(dev, + obj->chunkId / dev->nChunksPerBlock, + obj->chunkId % dev->nChunksPerBlock); + if(!obj->fake && + (!chunkIdOk || !chunkIsLive)) { + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d has chunkId %d %s %s"TENDSTR), + obj->objectId,obj->chunkId, + chunkIdOk ? "" : ",out of range", + chunkIsLive || !chunkIdOk ? "" : ",marked as deleted")); + } + + if(chunkIdOk && chunkIsLive &&!yaffs_SkipNANDVerification(dev)) { + yaffs_ExtendedTags tags; + yaffs_ObjectHeader *oh; + __u8 *buffer = yaffs_GetTempBuffer(dev,__LINE__); + + oh = (yaffs_ObjectHeader *)buffer; + + yaffs_ReadChunkWithTagsFromNAND(dev, obj->chunkId,buffer, &tags); + + yaffs_VerifyObjectHeader(obj,oh,&tags,1); + + yaffs_ReleaseTempBuffer(dev,buffer,__LINE__); + } + + /* Verify it has a parent */ + if(obj && !obj->fake && + (!obj->parent || obj->parent->myDev != dev)){ + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d has parent pointer %p which does not look like an object"TENDSTR), + obj->objectId,obj->parent)); + } + + /* Verify parent is a directory */ + if(obj->parent && obj->parent->variantType != YAFFS_OBJECT_TYPE_DIRECTORY){ + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d's parent is not a directory (type %d)"TENDSTR), + obj->objectId,obj->parent->variantType)); + } + + switch(obj->variantType){ + case YAFFS_OBJECT_TYPE_FILE: + yaffs_VerifyFile(obj); + break; + case YAFFS_OBJECT_TYPE_SYMLINK: + yaffs_VerifySymlink(obj); + break; + case YAFFS_OBJECT_TYPE_DIRECTORY: + yaffs_VerifyDirectory(obj); + break; + case YAFFS_OBJECT_TYPE_HARDLINK: + yaffs_VerifyHardLink(obj); + break; + case YAFFS_OBJECT_TYPE_SPECIAL: + yaffs_VerifySpecial(obj); + break; + case YAFFS_OBJECT_TYPE_UNKNOWN: + default: + T(YAFFS_TRACE_VERIFY, + (TSTR("Obj %d has illegaltype %d"TENDSTR), + obj->objectId,obj->variantType)); + break; + } + + +} + +static void yaffs_VerifyObjects(yaffs_Device *dev) +{ + yaffs_Object *obj; + int i; + struct list_head *lh; + + if(yaffs_SkipVerification(dev)) + return; + + /* Iterate through the objects in each hash entry */ + + for(i = 0; i < YAFFS_NOBJECT_BUCKETS; i++){ + list_for_each(lh, &dev->objectBucket[i].list) { + if (lh) { + obj = list_entry(lh, yaffs_Object, hashLink); + yaffs_VerifyObject(obj); + } + } + } + +} + + +/* + * Simple hash function. Needs to have a reasonable spread + */ + +static Y_INLINE int yaffs_HashFunction(int n) +{ +/* XXX U-BOOT XXX */ + /*n = abs(n); */ + if (n < 0) + n = -n; + return (n % YAFFS_NOBJECT_BUCKETS); +} + +/* + * Access functions to useful fake objects + */ + +yaffs_Object *yaffs_Root(yaffs_Device * dev) +{ + return dev->rootDir; +} + +yaffs_Object *yaffs_LostNFound(yaffs_Device * dev) +{ + return dev->lostNFoundDir; +} + + +/* + * Erased NAND checking functions + */ + +int yaffs_CheckFF(__u8 * buffer, int nBytes) +{ + /* Horrible, slow implementation */ + while (nBytes--) { + if (*buffer != 0xFF) + return 0; + buffer++; + } + return 1; +} + +static int yaffs_CheckChunkErased(struct yaffs_DeviceStruct *dev, + int chunkInNAND) +{ + + int retval = YAFFS_OK; + __u8 *data = yaffs_GetTempBuffer(dev, __LINE__); + yaffs_ExtendedTags tags; + int result; + + result = yaffs_ReadChunkWithTagsFromNAND(dev, chunkInNAND, data, &tags); + + if(tags.eccResult > YAFFS_ECC_RESULT_NO_ERROR) + retval = YAFFS_FAIL; + + + if (!yaffs_CheckFF(data, dev->nDataBytesPerChunk) || tags.chunkUsed) { + T(YAFFS_TRACE_NANDACCESS, + (TSTR("Chunk %d not erased" TENDSTR), chunkInNAND)); + retval = YAFFS_FAIL; + } + + yaffs_ReleaseTempBuffer(dev, data, __LINE__); + + return retval; + +} + +static int yaffs_WriteNewChunkWithTagsToNAND(struct yaffs_DeviceStruct *dev, + const __u8 * data, + yaffs_ExtendedTags * tags, + int useReserve) +{ + int attempts = 0; + int writeOk = 0; + int chunk; + + yaffs_InvalidateCheckpoint(dev); + + do { + yaffs_BlockInfo *bi = 0; + int erasedOk = 0; + + chunk = yaffs_AllocateChunk(dev, useReserve, &bi); + if (chunk < 0) { + /* no space */ + break; + } + + /* First check this chunk is erased, if it needs + * checking. The checking policy (unless forced + * always on) is as follows: + * + * Check the first page we try to write in a block. + * If the check passes then we don't need to check any + * more. If the check fails, we check again... + * If the block has been erased, we don't need to check. + * + * However, if the block has been prioritised for gc, + * then we think there might be something odd about + * this block and stop using it. + * + * Rationale: We should only ever see chunks that have + * not been erased if there was a partially written + * chunk due to power loss. This checking policy should + * catch that case with very few checks and thus save a + * lot of checks that are most likely not needed. + */ + if (bi->gcPrioritise) { + yaffs_DeleteChunk(dev, chunk, 1, __LINE__); + /* try another chunk */ + continue; + } + + /* let's give it a try */ + attempts++; + +#ifdef CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED + bi->skipErasedCheck = 0; +#endif + if (!bi->skipErasedCheck) { + erasedOk = yaffs_CheckChunkErased(dev, chunk); + if (erasedOk != YAFFS_OK) { + T(YAFFS_TRACE_ERROR, + (TSTR ("**>> yaffs chunk %d was not erased" + TENDSTR), chunk)); + + /* try another chunk */ + continue; + } + bi->skipErasedCheck = 1; + } + + writeOk = yaffs_WriteChunkWithTagsToNAND(dev, chunk, + data, tags); + if (writeOk != YAFFS_OK) { + /* try another chunk */ + continue; + } + + + } while (writeOk != YAFFS_OK && + (yaffs_wr_attempts <= 0 || attempts <= yaffs_wr_attempts)); + + if(!writeOk) + chunk = -1; + + if (attempts > 1) { + T(YAFFS_TRACE_ERROR, + (TSTR("**>> yaffs write required %d attempts" TENDSTR), + attempts)); + + dev->nRetriedWrites += (attempts - 1); + } + + return chunk; +} + +/* + * Block retiring for handling a broken block. + */ + +static void yaffs_RetireBlock(yaffs_Device * dev, int blockInNAND) +{ + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev, blockInNAND); + + yaffs_InvalidateCheckpoint(dev); + + yaffs_MarkBlockBad(dev, blockInNAND); + + bi->blockState = YAFFS_BLOCK_STATE_DEAD; + bi->gcPrioritise = 0; + bi->needsRetiring = 0; + + dev->nRetiredBlocks++; +} + +void yaffs_HandleChunkError(yaffs_Device *dev, yaffs_BlockInfo *bi) +{ + if(!bi->gcPrioritise){ + bi->gcPrioritise = 1; + dev->hasPendingPrioritisedGCs = 1; + bi->chunkErrorStrikes ++; + + if(bi->chunkErrorStrikes > 3){ + bi->needsRetiring = 1; /* Too many stikes, so retire this */ + T(YAFFS_TRACE_ALWAYS, (TSTR("yaffs: Block struck out" TENDSTR))); + + } + + } +} + + + +/*---------------- Name handling functions ------------*/ + +static __u16 yaffs_CalcNameSum(const YCHAR * name) +{ + __u16 sum = 0; + __u16 i = 1; + + YUCHAR *bname = (YUCHAR *) name; + if (bname) { + while ((*bname) && (i < (YAFFS_MAX_NAME_LENGTH/2))) { + +#ifdef CONFIG_YAFFS_CASE_INSENSITIVE + sum += yaffs_toupper(*bname) * i; +#else + sum += (*bname) * i; +#endif + i++; + bname++; + } + } + return sum; +} + +static void yaffs_SetObjectName(yaffs_Object * obj, const YCHAR * name) +{ +#ifdef CONFIG_YAFFS_SHORT_NAMES_IN_RAM + if (name && yaffs_strlen(name) <= YAFFS_SHORT_NAME_LENGTH) { + yaffs_strcpy(obj->shortName, name); + } else { + obj->shortName[0] = _Y('\0'); + } +#endif + obj->sum = yaffs_CalcNameSum(name); +} + +/*-------------------- TNODES ------------------- + + * List of spare tnodes + * The list is hooked together using the first pointer + * in the tnode. + */ + +/* yaffs_CreateTnodes creates a bunch more tnodes and + * adds them to the tnode free list. + * Don't use this function directly + */ + +static int yaffs_CreateTnodes(yaffs_Device * dev, int nTnodes) +{ + int i; + int tnodeSize; + yaffs_Tnode *newTnodes; + __u8 *mem; + yaffs_Tnode *curr; + yaffs_Tnode *next; + yaffs_TnodeList *tnl; + + if (nTnodes < 1) + return YAFFS_OK; + + /* Calculate the tnode size in bytes for variable width tnode support. + * Must be a multiple of 32-bits */ + tnodeSize = (dev->tnodeWidth * YAFFS_NTNODES_LEVEL0)/8; + + /* make these things */ + + newTnodes = YMALLOC(nTnodes * tnodeSize); + mem = (__u8 *)newTnodes; + + if (!newTnodes) { + T(YAFFS_TRACE_ERROR, + (TSTR("yaffs: Could not allocate Tnodes" TENDSTR))); + return YAFFS_FAIL; + } + + /* Hook them into the free list */ +#if 0 + for (i = 0; i < nTnodes - 1; i++) { + newTnodes[i].internal[0] = &newTnodes[i + 1]; +#ifdef CONFIG_YAFFS_TNODE_LIST_DEBUG + newTnodes[i].internal[YAFFS_NTNODES_INTERNAL] = (void *)1; +#endif + } + + newTnodes[nTnodes - 1].internal[0] = dev->freeTnodes; +#ifdef CONFIG_YAFFS_TNODE_LIST_DEBUG + newTnodes[nTnodes - 1].internal[YAFFS_NTNODES_INTERNAL] = (void *)1; +#endif + dev->freeTnodes = newTnodes; +#else + /* New hookup for wide tnodes */ + for(i = 0; i < nTnodes -1; i++) { + curr = (yaffs_Tnode *) &mem[i * tnodeSize]; + next = (yaffs_Tnode *) &mem[(i+1) * tnodeSize]; + curr->internal[0] = next; + } + + curr = (yaffs_Tnode *) &mem[(nTnodes - 1) * tnodeSize]; + curr->internal[0] = dev->freeTnodes; + dev->freeTnodes = (yaffs_Tnode *)mem; + +#endif + + + dev->nFreeTnodes += nTnodes; + dev->nTnodesCreated += nTnodes; + + /* Now add this bunch of tnodes to a list for freeing up. + * NB If we can't add this to the management list it isn't fatal + * but it just means we can't free this bunch of tnodes later. + */ + + tnl = YMALLOC(sizeof(yaffs_TnodeList)); + if (!tnl) { + T(YAFFS_TRACE_ERROR, + (TSTR + ("yaffs: Could not add tnodes to management list" TENDSTR))); + return YAFFS_FAIL; + + } else { + tnl->tnodes = newTnodes; + tnl->next = dev->allocatedTnodeList; + dev->allocatedTnodeList = tnl; + } + + T(YAFFS_TRACE_ALLOCATE, (TSTR("yaffs: Tnodes added" TENDSTR))); + + return YAFFS_OK; +} + +/* GetTnode gets us a clean tnode. Tries to make allocate more if we run out */ + +static yaffs_Tnode *yaffs_GetTnodeRaw(yaffs_Device * dev) +{ + yaffs_Tnode *tn = NULL; + + /* If there are none left make more */ + if (!dev->freeTnodes) { + yaffs_CreateTnodes(dev, YAFFS_ALLOCATION_NTNODES); + } + + if (dev->freeTnodes) { + tn = dev->freeTnodes; +#ifdef CONFIG_YAFFS_TNODE_LIST_DEBUG + if (tn->internal[YAFFS_NTNODES_INTERNAL] != (void *)1) { + /* Hoosterman, this thing looks like it isn't in the list */ + T(YAFFS_TRACE_ALWAYS, + (TSTR("yaffs: Tnode list bug 1" TENDSTR))); + } +#endif + dev->freeTnodes = dev->freeTnodes->internal[0]; + dev->nFreeTnodes--; + } + + return tn; +} + +static yaffs_Tnode *yaffs_GetTnode(yaffs_Device * dev) +{ + yaffs_Tnode *tn = yaffs_GetTnodeRaw(dev); + + if(tn) + memset(tn, 0, (dev->tnodeWidth * YAFFS_NTNODES_LEVEL0)/8); + + return tn; +} + +/* FreeTnode frees up a tnode and puts it back on the free list */ +static void yaffs_FreeTnode(yaffs_Device * dev, yaffs_Tnode * tn) +{ + if (tn) { +#ifdef CONFIG_YAFFS_TNODE_LIST_DEBUG + if (tn->internal[YAFFS_NTNODES_INTERNAL] != 0) { + /* Hoosterman, this thing looks like it is already in the list */ + T(YAFFS_TRACE_ALWAYS, + (TSTR("yaffs: Tnode list bug 2" TENDSTR))); + } + tn->internal[YAFFS_NTNODES_INTERNAL] = (void *)1; +#endif + tn->internal[0] = dev->freeTnodes; + dev->freeTnodes = tn; + dev->nFreeTnodes++; + } +} + +static void yaffs_DeinitialiseTnodes(yaffs_Device * dev) +{ + /* Free the list of allocated tnodes */ + yaffs_TnodeList *tmp; + + while (dev->allocatedTnodeList) { + tmp = dev->allocatedTnodeList->next; + + YFREE(dev->allocatedTnodeList->tnodes); + YFREE(dev->allocatedTnodeList); + dev->allocatedTnodeList = tmp; + + } + + dev->freeTnodes = NULL; + dev->nFreeTnodes = 0; +} + +static void yaffs_InitialiseTnodes(yaffs_Device * dev) +{ + dev->allocatedTnodeList = NULL; + dev->freeTnodes = NULL; + dev->nFreeTnodes = 0; + dev->nTnodesCreated = 0; + +} + + +void yaffs_PutLevel0Tnode(yaffs_Device *dev, yaffs_Tnode *tn, unsigned pos, unsigned val) +{ + __u32 *map = (__u32 *)tn; + __u32 bitInMap; + __u32 bitInWord; + __u32 wordInMap; + __u32 mask; + + pos &= YAFFS_TNODES_LEVEL0_MASK; + val >>= dev->chunkGroupBits; + + bitInMap = pos * dev->tnodeWidth; + wordInMap = bitInMap /32; + bitInWord = bitInMap & (32 -1); + + mask = dev->tnodeMask << bitInWord; + + map[wordInMap] &= ~mask; + map[wordInMap] |= (mask & (val << bitInWord)); + + if(dev->tnodeWidth > (32-bitInWord)) { + bitInWord = (32 - bitInWord); + wordInMap++;; + mask = dev->tnodeMask >> (/*dev->tnodeWidth -*/ bitInWord); + map[wordInMap] &= ~mask; + map[wordInMap] |= (mask & (val >> bitInWord)); + } +} + +static __u32 yaffs_GetChunkGroupBase(yaffs_Device *dev, yaffs_Tnode *tn, unsigned pos) +{ + __u32 *map = (__u32 *)tn; + __u32 bitInMap; + __u32 bitInWord; + __u32 wordInMap; + __u32 val; + + pos &= YAFFS_TNODES_LEVEL0_MASK; + + bitInMap = pos * dev->tnodeWidth; + wordInMap = bitInMap /32; + bitInWord = bitInMap & (32 -1); + + val = map[wordInMap] >> bitInWord; + + if(dev->tnodeWidth > (32-bitInWord)) { + bitInWord = (32 - bitInWord); + wordInMap++;; + val |= (map[wordInMap] << bitInWord); + } + + val &= dev->tnodeMask; + val <<= dev->chunkGroupBits; + + return val; +} + +/* ------------------- End of individual tnode manipulation -----------------*/ + +/* ---------Functions to manipulate the look-up tree (made up of tnodes) ------ + * The look up tree is represented by the top tnode and the number of topLevel + * in the tree. 0 means only the level 0 tnode is in the tree. + */ + +/* FindLevel0Tnode finds the level 0 tnode, if one exists. */ +static yaffs_Tnode *yaffs_FindLevel0Tnode(yaffs_Device * dev, + yaffs_FileStructure * fStruct, + __u32 chunkId) +{ + + yaffs_Tnode *tn = fStruct->top; + __u32 i; + int requiredTallness; + int level = fStruct->topLevel; + + /* Check sane level and chunk Id */ + if (level < 0 || level > YAFFS_TNODES_MAX_LEVEL) { + return NULL; + } + + if (chunkId > YAFFS_MAX_CHUNK_ID) { + return NULL; + } + + /* First check we're tall enough (ie enough topLevel) */ + + i = chunkId >> YAFFS_TNODES_LEVEL0_BITS; + requiredTallness = 0; + while (i) { + i >>= YAFFS_TNODES_INTERNAL_BITS; + requiredTallness++; + } + + if (requiredTallness > fStruct->topLevel) { + /* Not tall enough, so we can't find it, return NULL. */ + return NULL; + } + + /* Traverse down to level 0 */ + while (level > 0 && tn) { + tn = tn-> + internal[(chunkId >> + ( YAFFS_TNODES_LEVEL0_BITS + + (level - 1) * + YAFFS_TNODES_INTERNAL_BITS) + ) & + YAFFS_TNODES_INTERNAL_MASK]; + level--; + + } + + return tn; +} + +/* AddOrFindLevel0Tnode finds the level 0 tnode if it exists, otherwise first expands the tree. + * This happens in two steps: + * 1. If the tree isn't tall enough, then make it taller. + * 2. Scan down the tree towards the level 0 tnode adding tnodes if required. + * + * Used when modifying the tree. + * + * If the tn argument is NULL, then a fresh tnode will be added otherwise the specified tn will + * be plugged into the ttree. + */ + +static yaffs_Tnode *yaffs_AddOrFindLevel0Tnode(yaffs_Device * dev, + yaffs_FileStructure * fStruct, + __u32 chunkId, + yaffs_Tnode *passedTn) +{ + + int requiredTallness; + int i; + int l; + yaffs_Tnode *tn; + + __u32 x; + + + /* Check sane level and page Id */ + if (fStruct->topLevel < 0 || fStruct->topLevel > YAFFS_TNODES_MAX_LEVEL) { + return NULL; + } + + if (chunkId > YAFFS_MAX_CHUNK_ID) { + return NULL; + } + + /* First check we're tall enough (ie enough topLevel) */ + + x = chunkId >> YAFFS_TNODES_LEVEL0_BITS; + requiredTallness = 0; + while (x) { + x >>= YAFFS_TNODES_INTERNAL_BITS; + requiredTallness++; + } + + + if (requiredTallness > fStruct->topLevel) { + /* Not tall enough,gotta make the tree taller */ + for (i = fStruct->topLevel; i < requiredTallness; i++) { + + tn = yaffs_GetTnode(dev); + + if (tn) { + tn->internal[0] = fStruct->top; + fStruct->top = tn; + } else { + T(YAFFS_TRACE_ERROR, + (TSTR("yaffs: no more tnodes" TENDSTR))); + } + } + + fStruct->topLevel = requiredTallness; + } + + /* Traverse down to level 0, adding anything we need */ + + l = fStruct->topLevel; + tn = fStruct->top; + + if(l > 0) { + while (l > 0 && tn) { + x = (chunkId >> + ( YAFFS_TNODES_LEVEL0_BITS + + (l - 1) * YAFFS_TNODES_INTERNAL_BITS)) & + YAFFS_TNODES_INTERNAL_MASK; + + + if((l>1) && !tn->internal[x]){ + /* Add missing non-level-zero tnode */ + tn->internal[x] = yaffs_GetTnode(dev); + + } else if(l == 1) { + /* Looking from level 1 at level 0 */ + if (passedTn) { + /* If we already have one, then release it.*/ + if(tn->internal[x]) + yaffs_FreeTnode(dev,tn->internal[x]); + tn->internal[x] = passedTn; + + } else if(!tn->internal[x]) { + /* Don't have one, none passed in */ + tn->internal[x] = yaffs_GetTnode(dev); + } + } + + tn = tn->internal[x]; + l--; + } + } else { + /* top is level 0 */ + if(passedTn) { + memcpy(tn,passedTn,(dev->tnodeWidth * YAFFS_NTNODES_LEVEL0)/8); + yaffs_FreeTnode(dev,passedTn); + } + } + + return tn; +} + +static int yaffs_FindChunkInGroup(yaffs_Device * dev, int theChunk, + yaffs_ExtendedTags * tags, int objectId, + int chunkInInode) +{ + int j; + + for (j = 0; theChunk && j < dev->chunkGroupSize; j++) { + if (yaffs_CheckChunkBit + (dev, theChunk / dev->nChunksPerBlock, + theChunk % dev->nChunksPerBlock)) { + yaffs_ReadChunkWithTagsFromNAND(dev, theChunk, NULL, + tags); + if (yaffs_TagsMatch(tags, objectId, chunkInInode)) { + /* found it; */ + return theChunk; + + } + } + theChunk++; + } + return -1; +} + + +/* DeleteWorker scans backwards through the tnode tree and deletes all the + * chunks and tnodes in the file + * Returns 1 if the tree was deleted. + * Returns 0 if it stopped early due to hitting the limit and the delete is incomplete. + */ + +static int yaffs_DeleteWorker(yaffs_Object * in, yaffs_Tnode * tn, __u32 level, + int chunkOffset, int *limit) +{ + int i; + int chunkInInode; + int theChunk; + yaffs_ExtendedTags tags; + int foundChunk; + yaffs_Device *dev = in->myDev; + + int allDone = 1; + + if (tn) { + if (level > 0) { + + for (i = YAFFS_NTNODES_INTERNAL - 1; allDone && i >= 0; + i--) { + if (tn->internal[i]) { + if (limit && (*limit) < 0) { + allDone = 0; + } else { + allDone = + yaffs_DeleteWorker(in, + tn-> + internal + [i], + level - + 1, + (chunkOffset + << + YAFFS_TNODES_INTERNAL_BITS) + + i, + limit); + } + if (allDone) { + yaffs_FreeTnode(dev, + tn-> + internal[i]); + tn->internal[i] = NULL; + } + } + + } + return (allDone) ? 1 : 0; + } else if (level == 0) { + int hitLimit = 0; + + for (i = YAFFS_NTNODES_LEVEL0 - 1; i >= 0 && !hitLimit; + i--) { + theChunk = yaffs_GetChunkGroupBase(dev,tn,i); + if (theChunk) { + + chunkInInode = + (chunkOffset << + YAFFS_TNODES_LEVEL0_BITS) + i; + + foundChunk = + yaffs_FindChunkInGroup(dev, + theChunk, + &tags, + in->objectId, + chunkInInode); + + if (foundChunk > 0) { + yaffs_DeleteChunk(dev, + foundChunk, 1, + __LINE__); + in->nDataChunks--; + if (limit) { + *limit = *limit - 1; + if (*limit <= 0) { + hitLimit = 1; + } + } + + } + + yaffs_PutLevel0Tnode(dev,tn,i,0); + } + + } + return (i < 0) ? 1 : 0; + + } + + } + + return 1; + +} + +static void yaffs_SoftDeleteChunk(yaffs_Device * dev, int chunk) +{ + + yaffs_BlockInfo *theBlock; + + T(YAFFS_TRACE_DELETION, (TSTR("soft delete chunk %d" TENDSTR), chunk)); + + theBlock = yaffs_GetBlockInfo(dev, chunk / dev->nChunksPerBlock); + if (theBlock) { + theBlock->softDeletions++; + dev->nFreeChunks++; + } +} + +/* SoftDeleteWorker scans backwards through the tnode tree and soft deletes all the chunks in the file. + * All soft deleting does is increment the block's softdelete count and pulls the chunk out + * of the tnode. + * Thus, essentially this is the same as DeleteWorker except that the chunks are soft deleted. + */ + +static int yaffs_SoftDeleteWorker(yaffs_Object * in, yaffs_Tnode * tn, + __u32 level, int chunkOffset) +{ + int i; + int theChunk; + int allDone = 1; + yaffs_Device *dev = in->myDev; + + if (tn) { + if (level > 0) { + + for (i = YAFFS_NTNODES_INTERNAL - 1; allDone && i >= 0; + i--) { + if (tn->internal[i]) { + allDone = + yaffs_SoftDeleteWorker(in, + tn-> + internal[i], + level - 1, + (chunkOffset + << + YAFFS_TNODES_INTERNAL_BITS) + + i); + if (allDone) { + yaffs_FreeTnode(dev, + tn-> + internal[i]); + tn->internal[i] = NULL; + } else { + /* Hoosterman... how could this happen? */ + } + } + } + return (allDone) ? 1 : 0; + } else if (level == 0) { + + for (i = YAFFS_NTNODES_LEVEL0 - 1; i >= 0; i--) { + theChunk = yaffs_GetChunkGroupBase(dev,tn,i); + if (theChunk) { + /* Note this does not find the real chunk, only the chunk group. + * We make an assumption that a chunk group is not larger than + * a block. + */ + yaffs_SoftDeleteChunk(dev, theChunk); + yaffs_PutLevel0Tnode(dev,tn,i,0); + } + + } + return 1; + + } + + } + + return 1; + +} + +static void yaffs_SoftDeleteFile(yaffs_Object * obj) +{ + if (obj->deleted && + obj->variantType == YAFFS_OBJECT_TYPE_FILE && !obj->softDeleted) { + if (obj->nDataChunks <= 0) { + /* Empty file with no duplicate object headers, just delete it immediately */ + yaffs_FreeTnode(obj->myDev, + obj->variant.fileVariant.top); + obj->variant.fileVariant.top = NULL; + T(YAFFS_TRACE_TRACING, + (TSTR("yaffs: Deleting empty file %d" TENDSTR), + obj->objectId)); + yaffs_DoGenericObjectDeletion(obj); + } else { + yaffs_SoftDeleteWorker(obj, + obj->variant.fileVariant.top, + obj->variant.fileVariant. + topLevel, 0); + obj->softDeleted = 1; + } + } +} + +/* Pruning removes any part of the file structure tree that is beyond the + * bounds of the file (ie that does not point to chunks). + * + * A file should only get pruned when its size is reduced. + * + * Before pruning, the chunks must be pulled from the tree and the + * level 0 tnode entries must be zeroed out. + * Could also use this for file deletion, but that's probably better handled + * by a special case. + */ + +static yaffs_Tnode *yaffs_PruneWorker(yaffs_Device * dev, yaffs_Tnode * tn, + __u32 level, int del0) +{ + int i; + int hasData; + + if (tn) { + hasData = 0; + + for (i = 0; i < YAFFS_NTNODES_INTERNAL; i++) { + if (tn->internal[i] && level > 0) { + tn->internal[i] = + yaffs_PruneWorker(dev, tn->internal[i], + level - 1, + (i == 0) ? del0 : 1); + } + + if (tn->internal[i]) { + hasData++; + } + } + + if (hasData == 0 && del0) { + /* Free and return NULL */ + + yaffs_FreeTnode(dev, tn); + tn = NULL; + } + + } + + return tn; + +} + +static int yaffs_PruneFileStructure(yaffs_Device * dev, + yaffs_FileStructure * fStruct) +{ + int i; + int hasData; + int done = 0; + yaffs_Tnode *tn; + + if (fStruct->topLevel > 0) { + fStruct->top = + yaffs_PruneWorker(dev, fStruct->top, fStruct->topLevel, 0); + + /* Now we have a tree with all the non-zero branches NULL but the height + * is the same as it was. + * Let's see if we can trim internal tnodes to shorten the tree. + * We can do this if only the 0th element in the tnode is in use + * (ie all the non-zero are NULL) + */ + + while (fStruct->topLevel && !done) { + tn = fStruct->top; + + hasData = 0; + for (i = 1; i < YAFFS_NTNODES_INTERNAL; i++) { + if (tn->internal[i]) { + hasData++; + } + } + + if (!hasData) { + fStruct->top = tn->internal[0]; + fStruct->topLevel--; + yaffs_FreeTnode(dev, tn); + } else { + done = 1; + } + } + } + + return YAFFS_OK; +} + +/*-------------------- End of File Structure functions.-------------------*/ + +/* yaffs_CreateFreeObjects creates a bunch more objects and + * adds them to the object free list. + */ +static int yaffs_CreateFreeObjects(yaffs_Device * dev, int nObjects) +{ + int i; + yaffs_Object *newObjects; + yaffs_ObjectList *list; + + if (nObjects < 1) + return YAFFS_OK; + + /* make these things */ + newObjects = YMALLOC(nObjects * sizeof(yaffs_Object)); + list = YMALLOC(sizeof(yaffs_ObjectList)); + + if (!newObjects || !list) { + if(newObjects) + YFREE(newObjects); + if(list) + YFREE(list); + T(YAFFS_TRACE_ALLOCATE, + (TSTR("yaffs: Could not allocate more objects" TENDSTR))); + return YAFFS_FAIL; + } + + /* Hook them into the free list */ + for (i = 0; i < nObjects - 1; i++) { + newObjects[i].siblings.next = + (struct list_head *)(&newObjects[i + 1]); + } + + newObjects[nObjects - 1].siblings.next = (void *)dev->freeObjects; + dev->freeObjects = newObjects; + dev->nFreeObjects += nObjects; + dev->nObjectsCreated += nObjects; + + /* Now add this bunch of Objects to a list for freeing up. */ + + list->objects = newObjects; + list->next = dev->allocatedObjectList; + dev->allocatedObjectList = list; + + return YAFFS_OK; +} + + +/* AllocateEmptyObject gets us a clean Object. Tries to make allocate more if we run out */ +static yaffs_Object *yaffs_AllocateEmptyObject(yaffs_Device * dev) +{ + yaffs_Object *tn = NULL; + + /* If there are none left make more */ + if (!dev->freeObjects) { + yaffs_CreateFreeObjects(dev, YAFFS_ALLOCATION_NOBJECTS); + } + + if (dev->freeObjects) { + tn = dev->freeObjects; + dev->freeObjects = + (yaffs_Object *) (dev->freeObjects->siblings.next); + dev->nFreeObjects--; + + /* Now sweeten it up... */ + + memset(tn, 0, sizeof(yaffs_Object)); + tn->myDev = dev; + tn->chunkId = -1; + tn->variantType = YAFFS_OBJECT_TYPE_UNKNOWN; + INIT_LIST_HEAD(&(tn->hardLinks)); + INIT_LIST_HEAD(&(tn->hashLink)); + INIT_LIST_HEAD(&tn->siblings); + + /* Add it to the lost and found directory. + * NB Can't put root or lostNFound in lostNFound so + * check if lostNFound exists first + */ + if (dev->lostNFoundDir) { + yaffs_AddObjectToDirectory(dev->lostNFoundDir, tn); + } + } + + return tn; +} + +static yaffs_Object *yaffs_CreateFakeDirectory(yaffs_Device * dev, int number, + __u32 mode) +{ + + yaffs_Object *obj = + yaffs_CreateNewObject(dev, number, YAFFS_OBJECT_TYPE_DIRECTORY); + if (obj) { + obj->fake = 1; /* it is fake so it has no NAND presence... */ + obj->renameAllowed = 0; /* ... and we're not allowed to rename it... */ + obj->unlinkAllowed = 0; /* ... or unlink it */ + obj->deleted = 0; + obj->unlinked = 0; + obj->yst_mode = mode; + obj->myDev = dev; + obj->chunkId = 0; /* Not a valid chunk. */ + } + + return obj; + +} + +static void yaffs_UnhashObject(yaffs_Object * tn) +{ + int bucket; + yaffs_Device *dev = tn->myDev; + + /* If it is still linked into the bucket list, free from the list */ + if (!list_empty(&tn->hashLink)) { + list_del_init(&tn->hashLink); + bucket = yaffs_HashFunction(tn->objectId); + dev->objectBucket[bucket].count--; + } + +} + +/* FreeObject frees up a Object and puts it back on the free list */ +static void yaffs_FreeObject(yaffs_Object * tn) +{ + + yaffs_Device *dev = tn->myDev; + +/* XXX U-BOOT XXX */ +#if 0 +#ifdef __KERNEL__ + if (tn->myInode) { + /* We're still hooked up to a cached inode. + * Don't delete now, but mark for later deletion + */ + tn->deferedFree = 1; + return; + } +#endif +#endif + yaffs_UnhashObject(tn); + + /* Link into the free list. */ + tn->siblings.next = (struct list_head *)(dev->freeObjects); + dev->freeObjects = tn; + dev->nFreeObjects++; +} + +/* XXX U-BOOT XXX */ +#if 0 +#ifdef __KERNEL__ + +void yaffs_HandleDeferedFree(yaffs_Object * obj) +{ + if (obj->deferedFree) { + yaffs_FreeObject(obj); + } +} + +#endif +#endif + +static void yaffs_DeinitialiseObjects(yaffs_Device * dev) +{ + /* Free the list of allocated Objects */ + + yaffs_ObjectList *tmp; + + while (dev->allocatedObjectList) { + tmp = dev->allocatedObjectList->next; + YFREE(dev->allocatedObjectList->objects); + YFREE(dev->allocatedObjectList); + + dev->allocatedObjectList = tmp; + } + + dev->freeObjects = NULL; + dev->nFreeObjects = 0; +} + +static void yaffs_InitialiseObjects(yaffs_Device * dev) +{ + int i; + + dev->allocatedObjectList = NULL; + dev->freeObjects = NULL; + dev->nFreeObjects = 0; + + for (i = 0; i < YAFFS_NOBJECT_BUCKETS; i++) { + INIT_LIST_HEAD(&dev->objectBucket[i].list); + dev->objectBucket[i].count = 0; + } + +} + +static int yaffs_FindNiceObjectBucket(yaffs_Device * dev) +{ + static int x = 0; + int i; + int l = 999; + int lowest = 999999; + + /* First let's see if we can find one that's empty. */ + + for (i = 0; i < 10 && lowest > 0; i++) { + x++; + x %= YAFFS_NOBJECT_BUCKETS; + if (dev->objectBucket[x].count < lowest) { + lowest = dev->objectBucket[x].count; + l = x; + } + + } + + /* If we didn't find an empty list, then try + * looking a bit further for a short one + */ + + for (i = 0; i < 10 && lowest > 3; i++) { + x++; + x %= YAFFS_NOBJECT_BUCKETS; + if (dev->objectBucket[x].count < lowest) { + lowest = dev->objectBucket[x].count; + l = x; + } + + } + + return l; +} + +static int yaffs_CreateNewObjectNumber(yaffs_Device * dev) +{ + int bucket = yaffs_FindNiceObjectBucket(dev); + + /* Now find an object value that has not already been taken + * by scanning the list. + */ + + int found = 0; + struct list_head *i; + + __u32 n = (__u32) bucket; + + /* yaffs_CheckObjectHashSanity(); */ + + while (!found) { + found = 1; + n += YAFFS_NOBJECT_BUCKETS; + if (1 || dev->objectBucket[bucket].count > 0) { + list_for_each(i, &dev->objectBucket[bucket].list) { + /* If there is already one in the list */ + if (i + && list_entry(i, yaffs_Object, + hashLink)->objectId == n) { + found = 0; + } + } + } + } + + + return n; +} + +static void yaffs_HashObject(yaffs_Object * in) +{ + int bucket = yaffs_HashFunction(in->objectId); + yaffs_Device *dev = in->myDev; + + list_add(&in->hashLink, &dev->objectBucket[bucket].list); + dev->objectBucket[bucket].count++; + +} + +yaffs_Object *yaffs_FindObjectByNumber(yaffs_Device * dev, __u32 number) +{ + int bucket = yaffs_HashFunction(number); + struct list_head *i; + yaffs_Object *in; + + list_for_each(i, &dev->objectBucket[bucket].list) { + /* Look if it is in the list */ + if (i) { + in = list_entry(i, yaffs_Object, hashLink); + if (in->objectId == number) { +/* XXX U-BOOT XXX */ +#if 0 +#ifdef __KERNEL__ + /* Don't tell the VFS about this one if it is defered free */ + if (in->deferedFree) + return NULL; +#endif +#endif + return in; + } + } + } + + return NULL; +} + +yaffs_Object *yaffs_CreateNewObject(yaffs_Device * dev, int number, + yaffs_ObjectType type) +{ + + yaffs_Object *theObject; + yaffs_Tnode *tn; + + if (number < 0) { + number = yaffs_CreateNewObjectNumber(dev); + } + + theObject = yaffs_AllocateEmptyObject(dev); + if(!theObject) + return NULL; + + if(type == YAFFS_OBJECT_TYPE_FILE){ + tn = yaffs_GetTnode(dev); + if(!tn){ + yaffs_FreeObject(theObject); + return NULL; + } + } + + + + if (theObject) { + theObject->fake = 0; + theObject->renameAllowed = 1; + theObject->unlinkAllowed = 1; + theObject->objectId = number; + yaffs_HashObject(theObject); + theObject->variantType = type; +#ifdef CONFIG_YAFFS_WINCE + yfsd_WinFileTimeNow(theObject->win_atime); + theObject->win_ctime[0] = theObject->win_mtime[0] = + theObject->win_atime[0]; + theObject->win_ctime[1] = theObject->win_mtime[1] = + theObject->win_atime[1]; + +#else + + theObject->yst_atime = theObject->yst_mtime = + theObject->yst_ctime = Y_CURRENT_TIME; +#endif + switch (type) { + case YAFFS_OBJECT_TYPE_FILE: + theObject->variant.fileVariant.fileSize = 0; + theObject->variant.fileVariant.scannedFileSize = 0; + theObject->variant.fileVariant.shrinkSize = 0xFFFFFFFF; /* max __u32 */ + theObject->variant.fileVariant.topLevel = 0; + theObject->variant.fileVariant.top = tn; + break; + case YAFFS_OBJECT_TYPE_DIRECTORY: + INIT_LIST_HEAD(&theObject->variant.directoryVariant. + children); + break; + case YAFFS_OBJECT_TYPE_SYMLINK: + case YAFFS_OBJECT_TYPE_HARDLINK: + case YAFFS_OBJECT_TYPE_SPECIAL: + /* No action required */ + break; + case YAFFS_OBJECT_TYPE_UNKNOWN: + /* todo this should not happen */ + break; + } + } + + return theObject; +} + +static yaffs_Object *yaffs_FindOrCreateObjectByNumber(yaffs_Device * dev, + int number, + yaffs_ObjectType type) +{ + yaffs_Object *theObject = NULL; + + if (number > 0) { + theObject = yaffs_FindObjectByNumber(dev, number); + } + + if (!theObject) { + theObject = yaffs_CreateNewObject(dev, number, type); + } + + return theObject; + +} + + +static YCHAR *yaffs_CloneString(const YCHAR * str) +{ + YCHAR *newStr = NULL; + + if (str && *str) { + newStr = YMALLOC((yaffs_strlen(str) + 1) * sizeof(YCHAR)); + if(newStr) + yaffs_strcpy(newStr, str); + } + + return newStr; + +} + +/* + * Mknod (create) a new object. + * equivalentObject only has meaning for a hard link; + * aliasString only has meaning for a sumlink. + * rdev only has meaning for devices (a subset of special objects) + */ + +static yaffs_Object *yaffs_MknodObject(yaffs_ObjectType type, + yaffs_Object * parent, + const YCHAR * name, + __u32 mode, + __u32 uid, + __u32 gid, + yaffs_Object * equivalentObject, + const YCHAR * aliasString, __u32 rdev) +{ + yaffs_Object *in; + YCHAR *str; + + yaffs_Device *dev = parent->myDev; + + /* Check if the entry exists. If it does then fail the call since we don't want a dup.*/ + if (yaffs_FindObjectByName(parent, name)) { + return NULL; + } + + in = yaffs_CreateNewObject(dev, -1, type); + + if(type == YAFFS_OBJECT_TYPE_SYMLINK){ + str = yaffs_CloneString(aliasString); + if(!str){ + yaffs_FreeObject(in); + return NULL; + } + } + + + + if (in) { + in->chunkId = -1; + in->valid = 1; + in->variantType = type; + + in->yst_mode = mode; + +#ifdef CONFIG_YAFFS_WINCE + yfsd_WinFileTimeNow(in->win_atime); + in->win_ctime[0] = in->win_mtime[0] = in->win_atime[0]; + in->win_ctime[1] = in->win_mtime[1] = in->win_atime[1]; + +#else + in->yst_atime = in->yst_mtime = in->yst_ctime = Y_CURRENT_TIME; + + in->yst_rdev = rdev; + in->yst_uid = uid; + in->yst_gid = gid; +#endif + in->nDataChunks = 0; + + yaffs_SetObjectName(in, name); + in->dirty = 1; + + yaffs_AddObjectToDirectory(parent, in); + + in->myDev = parent->myDev; + + switch (type) { + case YAFFS_OBJECT_TYPE_SYMLINK: + in->variant.symLinkVariant.alias = str; + break; + case YAFFS_OBJECT_TYPE_HARDLINK: + in->variant.hardLinkVariant.equivalentObject = + equivalentObject; + in->variant.hardLinkVariant.equivalentObjectId = + equivalentObject->objectId; + list_add(&in->hardLinks, &equivalentObject->hardLinks); + break; + case YAFFS_OBJECT_TYPE_FILE: + case YAFFS_OBJECT_TYPE_DIRECTORY: + case YAFFS_OBJECT_TYPE_SPECIAL: + case YAFFS_OBJECT_TYPE_UNKNOWN: + /* do nothing */ + break; + } + + if (yaffs_UpdateObjectHeader(in, name, 0, 0, 0) < 0) { + /* Could not create the object header, fail the creation */ + yaffs_DestroyObject(in); + in = NULL; + } + + } + + return in; +} + +yaffs_Object *yaffs_MknodFile(yaffs_Object * parent, const YCHAR * name, + __u32 mode, __u32 uid, __u32 gid) +{ + return yaffs_MknodObject(YAFFS_OBJECT_TYPE_FILE, parent, name, mode, + uid, gid, NULL, NULL, 0); +} + +yaffs_Object *yaffs_MknodDirectory(yaffs_Object * parent, const YCHAR * name, + __u32 mode, __u32 uid, __u32 gid) +{ + return yaffs_MknodObject(YAFFS_OBJECT_TYPE_DIRECTORY, parent, name, + mode, uid, gid, NULL, NULL, 0); +} + +yaffs_Object *yaffs_MknodSpecial(yaffs_Object * parent, const YCHAR * name, + __u32 mode, __u32 uid, __u32 gid, __u32 rdev) +{ + return yaffs_MknodObject(YAFFS_OBJECT_TYPE_SPECIAL, parent, name, mode, + uid, gid, NULL, NULL, rdev); +} + +yaffs_Object *yaffs_MknodSymLink(yaffs_Object * parent, const YCHAR * name, + __u32 mode, __u32 uid, __u32 gid, + const YCHAR * alias) +{ + return yaffs_MknodObject(YAFFS_OBJECT_TYPE_SYMLINK, parent, name, mode, + uid, gid, NULL, alias, 0); +} + +/* yaffs_Link returns the object id of the equivalent object.*/ +yaffs_Object *yaffs_Link(yaffs_Object * parent, const YCHAR * name, + yaffs_Object * equivalentObject) +{ + /* Get the real object in case we were fed a hard link as an equivalent object */ + equivalentObject = yaffs_GetEquivalentObject(equivalentObject); + + if (yaffs_MknodObject + (YAFFS_OBJECT_TYPE_HARDLINK, parent, name, 0, 0, 0, + equivalentObject, NULL, 0)) { + return equivalentObject; + } else { + return NULL; + } + +} + +static int yaffs_ChangeObjectName(yaffs_Object * obj, yaffs_Object * newDir, + const YCHAR * newName, int force, int shadows) +{ + int unlinkOp; + int deleteOp; + + yaffs_Object *existingTarget; + + if (newDir == NULL) { + newDir = obj->parent; /* use the old directory */ + } + + if (newDir->variantType != YAFFS_OBJECT_TYPE_DIRECTORY) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("tragendy: yaffs_ChangeObjectName: newDir is not a directory" + TENDSTR))); + YBUG(); + } + + /* TODO: Do we need this different handling for YAFFS2 and YAFFS1?? */ + if (obj->myDev->isYaffs2) { + unlinkOp = (newDir == obj->myDev->unlinkedDir); + } else { + unlinkOp = (newDir == obj->myDev->unlinkedDir + && obj->variantType == YAFFS_OBJECT_TYPE_FILE); + } + + deleteOp = (newDir == obj->myDev->deletedDir); + + existingTarget = yaffs_FindObjectByName(newDir, newName); + + /* If the object is a file going into the unlinked directory, + * then it is OK to just stuff it in since duplicate names are allowed. + * else only proceed if the new name does not exist and if we're putting + * it into a directory. + */ + if ((unlinkOp || + deleteOp || + force || + (shadows > 0) || + !existingTarget) && + newDir->variantType == YAFFS_OBJECT_TYPE_DIRECTORY) { + yaffs_SetObjectName(obj, newName); + obj->dirty = 1; + + yaffs_AddObjectToDirectory(newDir, obj); + + if (unlinkOp) + obj->unlinked = 1; + + /* If it is a deletion then we mark it as a shrink for gc purposes. */ + if (yaffs_UpdateObjectHeader(obj, newName, 0, deleteOp, shadows)>= 0) + return YAFFS_OK; + } + + return YAFFS_FAIL; +} + +int yaffs_RenameObject(yaffs_Object * oldDir, const YCHAR * oldName, + yaffs_Object * newDir, const YCHAR * newName) +{ + yaffs_Object *obj; + yaffs_Object *existingTarget; + int force = 0; + +#ifdef CONFIG_YAFFS_CASE_INSENSITIVE + /* Special case for case insemsitive systems (eg. WinCE). + * While look-up is case insensitive, the name isn't. + * Therefore we might want to change x.txt to X.txt + */ + if (oldDir == newDir && yaffs_strcmp(oldName, newName) == 0) { + force = 1; + } +#endif + + obj = yaffs_FindObjectByName(oldDir, oldName); + /* Check new name to long. */ + if (obj->variantType == YAFFS_OBJECT_TYPE_SYMLINK && + yaffs_strlen(newName) > YAFFS_MAX_ALIAS_LENGTH) + /* ENAMETOOLONG */ + return YAFFS_FAIL; + else if (obj->variantType != YAFFS_OBJECT_TYPE_SYMLINK && + yaffs_strlen(newName) > YAFFS_MAX_NAME_LENGTH) + /* ENAMETOOLONG */ + return YAFFS_FAIL; + + if (obj && obj->renameAllowed) { + + /* Now do the handling for an existing target, if there is one */ + + existingTarget = yaffs_FindObjectByName(newDir, newName); + if (existingTarget && + existingTarget->variantType == YAFFS_OBJECT_TYPE_DIRECTORY && + !list_empty(&existingTarget->variant.directoryVariant.children)) { + /* There is a target that is a non-empty directory, so we fail */ + return YAFFS_FAIL; /* EEXIST or ENOTEMPTY */ + } else if (existingTarget && existingTarget != obj) { + /* Nuke the target first, using shadowing, + * but only if it isn't the same object + */ + yaffs_ChangeObjectName(obj, newDir, newName, force, + existingTarget->objectId); + yaffs_UnlinkObject(existingTarget); + } + + return yaffs_ChangeObjectName(obj, newDir, newName, 1, 0); + } + return YAFFS_FAIL; +} + +/*------------------------- Block Management and Page Allocation ----------------*/ + +static int yaffs_InitialiseBlocks(yaffs_Device * dev) +{ + int nBlocks = dev->internalEndBlock - dev->internalStartBlock + 1; + + dev->blockInfo = NULL; + dev->chunkBits = NULL; + + dev->allocationBlock = -1; /* force it to get a new one */ + + /* If the first allocation strategy fails, thry the alternate one */ + dev->blockInfo = YMALLOC(nBlocks * sizeof(yaffs_BlockInfo)); + if(!dev->blockInfo){ + dev->blockInfo = YMALLOC_ALT(nBlocks * sizeof(yaffs_BlockInfo)); + dev->blockInfoAlt = 1; + } + else + dev->blockInfoAlt = 0; + + if(dev->blockInfo){ + + /* Set up dynamic blockinfo stuff. */ + dev->chunkBitmapStride = (dev->nChunksPerBlock + 7) / 8; /* round up bytes */ + dev->chunkBits = YMALLOC(dev->chunkBitmapStride * nBlocks); + if(!dev->chunkBits){ + dev->chunkBits = YMALLOC_ALT(dev->chunkBitmapStride * nBlocks); + dev->chunkBitsAlt = 1; + } + else + dev->chunkBitsAlt = 0; + } + + if (dev->blockInfo && dev->chunkBits) { + memset(dev->blockInfo, 0, nBlocks * sizeof(yaffs_BlockInfo)); + memset(dev->chunkBits, 0, dev->chunkBitmapStride * nBlocks); + return YAFFS_OK; + } + + return YAFFS_FAIL; + +} + +static void yaffs_DeinitialiseBlocks(yaffs_Device * dev) +{ + if(dev->blockInfoAlt && dev->blockInfo) + YFREE_ALT(dev->blockInfo); + else if(dev->blockInfo) + YFREE(dev->blockInfo); + + dev->blockInfoAlt = 0; + + dev->blockInfo = NULL; + + if(dev->chunkBitsAlt && dev->chunkBits) + YFREE_ALT(dev->chunkBits); + else if(dev->chunkBits) + YFREE(dev->chunkBits); + dev->chunkBitsAlt = 0; + dev->chunkBits = NULL; +} + +static int yaffs_BlockNotDisqualifiedFromGC(yaffs_Device * dev, + yaffs_BlockInfo * bi) +{ + int i; + __u32 seq; + yaffs_BlockInfo *b; + + if (!dev->isYaffs2) + return 1; /* disqualification only applies to yaffs2. */ + + if (!bi->hasShrinkHeader) + return 1; /* can gc */ + + /* Find the oldest dirty sequence number if we don't know it and save it + * so we don't have to keep recomputing it. + */ + if (!dev->oldestDirtySequence) { + seq = dev->sequenceNumber; + + for (i = dev->internalStartBlock; i <= dev->internalEndBlock; + i++) { + b = yaffs_GetBlockInfo(dev, i); + if (b->blockState == YAFFS_BLOCK_STATE_FULL && + (b->pagesInUse - b->softDeletions) < + dev->nChunksPerBlock && b->sequenceNumber < seq) { + seq = b->sequenceNumber; + } + } + dev->oldestDirtySequence = seq; + } + + /* Can't do gc of this block if there are any blocks older than this one that have + * discarded pages. + */ + return (bi->sequenceNumber <= dev->oldestDirtySequence); + +} + +/* FindDiretiestBlock is used to select the dirtiest block (or close enough) + * for garbage collection. + */ + +static int yaffs_FindBlockForGarbageCollection(yaffs_Device * dev, + int aggressive) +{ + + int b = dev->currentDirtyChecker; + + int i; + int iterations; + int dirtiest = -1; + int pagesInUse = 0; + int prioritised=0; + yaffs_BlockInfo *bi; + int pendingPrioritisedExist = 0; + + /* First let's see if we need to grab a prioritised block */ + if(dev->hasPendingPrioritisedGCs){ + for(i = dev->internalStartBlock; i < dev->internalEndBlock && !prioritised; i++){ + + bi = yaffs_GetBlockInfo(dev, i); + //yaffs_VerifyBlock(dev,bi,i); + + if(bi->gcPrioritise) { + pendingPrioritisedExist = 1; + if(bi->blockState == YAFFS_BLOCK_STATE_FULL && + yaffs_BlockNotDisqualifiedFromGC(dev, bi)){ + pagesInUse = (bi->pagesInUse - bi->softDeletions); + dirtiest = i; + prioritised = 1; + aggressive = 1; /* Fool the non-aggressive skip logiv below */ + } + } + } + + if(!pendingPrioritisedExist) /* None found, so we can clear this */ + dev->hasPendingPrioritisedGCs = 0; + } + + /* If we're doing aggressive GC then we are happy to take a less-dirty block, and + * search harder. + * else (we're doing a leasurely gc), then we only bother to do this if the + * block has only a few pages in use. + */ + + dev->nonAggressiveSkip--; + + if (!aggressive && (dev->nonAggressiveSkip > 0)) { + return -1; + } + + if(!prioritised) + pagesInUse = + (aggressive) ? dev->nChunksPerBlock : YAFFS_PASSIVE_GC_CHUNKS + 1; + + if (aggressive) { + iterations = + dev->internalEndBlock - dev->internalStartBlock + 1; + } else { + iterations = + dev->internalEndBlock - dev->internalStartBlock + 1; + iterations = iterations / 16; + if (iterations > 200) { + iterations = 200; + } + } + + for (i = 0; i <= iterations && pagesInUse > 0 && !prioritised; i++) { + b++; + if (b < dev->internalStartBlock || b > dev->internalEndBlock) { + b = dev->internalStartBlock; + } + + if (b < dev->internalStartBlock || b > dev->internalEndBlock) { + T(YAFFS_TRACE_ERROR, + (TSTR("**>> Block %d is not valid" TENDSTR), b)); + YBUG(); + } + + bi = yaffs_GetBlockInfo(dev, b); + +#if 0 + if (bi->blockState == YAFFS_BLOCK_STATE_CHECKPOINT) { + dirtiest = b; + pagesInUse = 0; + } + else +#endif + + if (bi->blockState == YAFFS_BLOCK_STATE_FULL && + (bi->pagesInUse - bi->softDeletions) < pagesInUse && + yaffs_BlockNotDisqualifiedFromGC(dev, bi)) { + dirtiest = b; + pagesInUse = (bi->pagesInUse - bi->softDeletions); + } + } + + dev->currentDirtyChecker = b; + + if (dirtiest > 0) { + T(YAFFS_TRACE_GC, + (TSTR("GC Selected block %d with %d free, prioritised:%d" TENDSTR), dirtiest, + dev->nChunksPerBlock - pagesInUse,prioritised)); + } + + dev->oldestDirtySequence = 0; + + if (dirtiest > 0) { + dev->nonAggressiveSkip = 4; + } + + return dirtiest; +} + +static void yaffs_BlockBecameDirty(yaffs_Device * dev, int blockNo) +{ + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev, blockNo); + + int erasedOk = 0; + + /* If the block is still healthy erase it and mark as clean. + * If the block has had a data failure, then retire it. + */ + + T(YAFFS_TRACE_GC | YAFFS_TRACE_ERASE, + (TSTR("yaffs_BlockBecameDirty block %d state %d %s"TENDSTR), + blockNo, bi->blockState, (bi->needsRetiring) ? "needs retiring" : "")); + + bi->blockState = YAFFS_BLOCK_STATE_DIRTY; + + if (!bi->needsRetiring) { + yaffs_InvalidateCheckpoint(dev); + erasedOk = yaffs_EraseBlockInNAND(dev, blockNo); + if (!erasedOk) { + dev->nErasureFailures++; + T(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS, + (TSTR("**>> Erasure failed %d" TENDSTR), blockNo)); + } + } + + if (erasedOk && + ((yaffs_traceMask & YAFFS_TRACE_ERASE) || !yaffs_SkipVerification(dev))) { + int i; + for (i = 0; i < dev->nChunksPerBlock; i++) { + if (!yaffs_CheckChunkErased + (dev, blockNo * dev->nChunksPerBlock + i)) { + T(YAFFS_TRACE_ERROR, + (TSTR + (">>Block %d erasure supposedly OK, but chunk %d not erased" + TENDSTR), blockNo, i)); + } + } + } + + if (erasedOk) { + /* Clean it up... */ + bi->blockState = YAFFS_BLOCK_STATE_EMPTY; + dev->nErasedBlocks++; + bi->pagesInUse = 0; + bi->softDeletions = 0; + bi->hasShrinkHeader = 0; + bi->skipErasedCheck = 1; /* This is clean, so no need to check */ + bi->gcPrioritise = 0; + yaffs_ClearChunkBits(dev, blockNo); + + T(YAFFS_TRACE_ERASE, + (TSTR("Erased block %d" TENDSTR), blockNo)); + } else { + dev->nFreeChunks -= dev->nChunksPerBlock; /* We lost a block of free space */ + + yaffs_RetireBlock(dev, blockNo); + T(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS, + (TSTR("**>> Block %d retired" TENDSTR), blockNo)); + } +} + +static int yaffs_FindBlockForAllocation(yaffs_Device * dev) +{ + int i; + + yaffs_BlockInfo *bi; + + if (dev->nErasedBlocks < 1) { + /* Hoosterman we've got a problem. + * Can't get space to gc + */ + T(YAFFS_TRACE_ERROR, + (TSTR("yaffs tragedy: no more eraased blocks" TENDSTR))); + + return -1; + } + + /* Find an empty block. */ + + for (i = dev->internalStartBlock; i <= dev->internalEndBlock; i++) { + dev->allocationBlockFinder++; + if (dev->allocationBlockFinder < dev->internalStartBlock + || dev->allocationBlockFinder > dev->internalEndBlock) { + dev->allocationBlockFinder = dev->internalStartBlock; + } + + bi = yaffs_GetBlockInfo(dev, dev->allocationBlockFinder); + + if (bi->blockState == YAFFS_BLOCK_STATE_EMPTY) { + bi->blockState = YAFFS_BLOCK_STATE_ALLOCATING; + dev->sequenceNumber++; + bi->sequenceNumber = dev->sequenceNumber; + dev->nErasedBlocks--; + T(YAFFS_TRACE_ALLOCATE, + (TSTR("Allocated block %d, seq %d, %d left" TENDSTR), + dev->allocationBlockFinder, dev->sequenceNumber, + dev->nErasedBlocks)); + return dev->allocationBlockFinder; + } + } + + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("yaffs tragedy: no more eraased blocks, but there should have been %d" + TENDSTR), dev->nErasedBlocks)); + + return -1; +} + + +// Check if there's space to allocate... +// Thinks.... do we need top make this ths same as yaffs_GetFreeChunks()? +static int yaffs_CheckSpaceForAllocation(yaffs_Device * dev) +{ + int reservedChunks; + int reservedBlocks = dev->nReservedBlocks; + int checkpointBlocks; + + checkpointBlocks = dev->nCheckpointReservedBlocks - dev->blocksInCheckpoint; + if(checkpointBlocks < 0) + checkpointBlocks = 0; + + reservedChunks = ((reservedBlocks + checkpointBlocks) * dev->nChunksPerBlock); + + return (dev->nFreeChunks > reservedChunks); +} + +static int yaffs_AllocateChunk(yaffs_Device * dev, int useReserve, yaffs_BlockInfo **blockUsedPtr) +{ + int retVal; + yaffs_BlockInfo *bi; + + if (dev->allocationBlock < 0) { + /* Get next block to allocate off */ + dev->allocationBlock = yaffs_FindBlockForAllocation(dev); + dev->allocationPage = 0; + } + + if (!useReserve && !yaffs_CheckSpaceForAllocation(dev)) { + /* Not enough space to allocate unless we're allowed to use the reserve. */ + return -1; + } + + if (dev->nErasedBlocks < dev->nReservedBlocks + && dev->allocationPage == 0) { + T(YAFFS_TRACE_ALLOCATE, (TSTR("Allocating reserve" TENDSTR))); + } + + /* Next page please.... */ + if (dev->allocationBlock >= 0) { + bi = yaffs_GetBlockInfo(dev, dev->allocationBlock); + + retVal = (dev->allocationBlock * dev->nChunksPerBlock) + + dev->allocationPage; + bi->pagesInUse++; + yaffs_SetChunkBit(dev, dev->allocationBlock, + dev->allocationPage); + + dev->allocationPage++; + + dev->nFreeChunks--; + + /* If the block is full set the state to full */ + if (dev->allocationPage >= dev->nChunksPerBlock) { + bi->blockState = YAFFS_BLOCK_STATE_FULL; + dev->allocationBlock = -1; + } + + if(blockUsedPtr) + *blockUsedPtr = bi; + + return retVal; + } + + T(YAFFS_TRACE_ERROR, + (TSTR("!!!!!!!!! Allocator out !!!!!!!!!!!!!!!!!" TENDSTR))); + + return -1; +} + +static int yaffs_GetErasedChunks(yaffs_Device * dev) +{ + int n; + + n = dev->nErasedBlocks * dev->nChunksPerBlock; + + if (dev->allocationBlock > 0) { + n += (dev->nChunksPerBlock - dev->allocationPage); + } + + return n; + +} + +static int yaffs_GarbageCollectBlock(yaffs_Device * dev, int block) +{ + int oldChunk; + int newChunk; + int chunkInBlock; + int markNAND; + int retVal = YAFFS_OK; + int cleanups = 0; + int i; + int isCheckpointBlock; + int matchingChunk; + + int chunksBefore = yaffs_GetErasedChunks(dev); + int chunksAfter; + + yaffs_ExtendedTags tags; + + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev, block); + + yaffs_Object *object; + + isCheckpointBlock = (bi->blockState == YAFFS_BLOCK_STATE_CHECKPOINT); + + bi->blockState = YAFFS_BLOCK_STATE_COLLECTING; + + T(YAFFS_TRACE_TRACING, + (TSTR("Collecting block %d, in use %d, shrink %d, " TENDSTR), block, + bi->pagesInUse, bi->hasShrinkHeader)); + + /*yaffs_VerifyFreeChunks(dev); */ + + bi->hasShrinkHeader = 0; /* clear the flag so that the block can erase */ + + /* Take off the number of soft deleted entries because + * they're going to get really deleted during GC. + */ + dev->nFreeChunks -= bi->softDeletions; + + dev->isDoingGC = 1; + + if (isCheckpointBlock || + !yaffs_StillSomeChunkBits(dev, block)) { + T(YAFFS_TRACE_TRACING, + (TSTR + ("Collecting block %d that has no chunks in use" TENDSTR), + block)); + yaffs_BlockBecameDirty(dev, block); + } else { + + __u8 *buffer = yaffs_GetTempBuffer(dev, __LINE__); + + yaffs_VerifyBlock(dev,bi,block); + + for (chunkInBlock = 0, oldChunk = block * dev->nChunksPerBlock; + chunkInBlock < dev->nChunksPerBlock + && yaffs_StillSomeChunkBits(dev, block); + chunkInBlock++, oldChunk++) { + if (yaffs_CheckChunkBit(dev, block, chunkInBlock)) { + + /* This page is in use and might need to be copied off */ + + markNAND = 1; + + yaffs_InitialiseTags(&tags); + + yaffs_ReadChunkWithTagsFromNAND(dev, oldChunk, + buffer, &tags); + + object = + yaffs_FindObjectByNumber(dev, + tags.objectId); + + T(YAFFS_TRACE_GC_DETAIL, + (TSTR + ("Collecting page %d, %d %d %d " TENDSTR), + chunkInBlock, tags.objectId, tags.chunkId, + tags.byteCount)); + + if(object && !yaffs_SkipVerification(dev)){ + if(tags.chunkId == 0) + matchingChunk = object->chunkId; + else if(object->softDeleted) + matchingChunk = oldChunk; /* Defeat the test */ + else + matchingChunk = yaffs_FindChunkInFile(object,tags.chunkId,NULL); + + if(oldChunk != matchingChunk) + T(YAFFS_TRACE_ERROR, + (TSTR("gc: page in gc mismatch: %d %d %d %d"TENDSTR), + oldChunk,matchingChunk,tags.objectId, tags.chunkId)); + + } + + if (!object) { + T(YAFFS_TRACE_ERROR, + (TSTR + ("page %d in gc has no object: %d %d %d " + TENDSTR), oldChunk, + tags.objectId, tags.chunkId, tags.byteCount)); + } + + if (object && object->deleted + && tags.chunkId != 0) { + /* Data chunk in a deleted file, throw it away + * It's a soft deleted data chunk, + * No need to copy this, just forget about it and + * fix up the object. + */ + + object->nDataChunks--; + + if (object->nDataChunks <= 0) { + /* remeber to clean up the object */ + dev->gcCleanupList[cleanups] = + tags.objectId; + cleanups++; + } + markNAND = 0; + } else if (0 + /* Todo object && object->deleted && object->nDataChunks == 0 */ + ) { + /* Deleted object header with no data chunks. + * Can be discarded and the file deleted. + */ + object->chunkId = 0; + yaffs_FreeTnode(object->myDev, + object->variant. + fileVariant.top); + object->variant.fileVariant.top = NULL; + yaffs_DoGenericObjectDeletion(object); + + } else if (object) { + /* It's either a data chunk in a live file or + * an ObjectHeader, so we're interested in it. + * NB Need to keep the ObjectHeaders of deleted files + * until the whole file has been deleted off + */ + tags.serialNumber++; + + dev->nGCCopies++; + + if (tags.chunkId == 0) { + /* It is an object Id, + * We need to nuke the shrinkheader flags first + * We no longer want the shrinkHeader flag since its work is done + * and if it is left in place it will mess up scanning. + * Also, clear out any shadowing stuff + */ + + yaffs_ObjectHeader *oh; + oh = (yaffs_ObjectHeader *)buffer; + oh->isShrink = 0; + oh->shadowsObject = -1; + tags.extraShadows = 0; + tags.extraIsShrinkHeader = 0; + + yaffs_VerifyObjectHeader(object,oh,&tags,1); + } + + newChunk = + yaffs_WriteNewChunkWithTagsToNAND(dev, buffer, &tags, 1); + + if (newChunk < 0) { + retVal = YAFFS_FAIL; + } else { + + /* Ok, now fix up the Tnodes etc. */ + + if (tags.chunkId == 0) { + /* It's a header */ + object->chunkId = newChunk; + object->serial = tags.serialNumber; + } else { + /* It's a data chunk */ + yaffs_PutChunkIntoFile + (object, + tags.chunkId, + newChunk, 0); + } + } + } + + yaffs_DeleteChunk(dev, oldChunk, markNAND, __LINE__); + + } + } + + yaffs_ReleaseTempBuffer(dev, buffer, __LINE__); + + + /* Do any required cleanups */ + for (i = 0; i < cleanups; i++) { + /* Time to delete the file too */ + object = + yaffs_FindObjectByNumber(dev, + dev->gcCleanupList[i]); + if (object) { + yaffs_FreeTnode(dev, + object->variant.fileVariant. + top); + object->variant.fileVariant.top = NULL; + T(YAFFS_TRACE_GC, + (TSTR + ("yaffs: About to finally delete object %d" + TENDSTR), object->objectId)); + yaffs_DoGenericObjectDeletion(object); + object->myDev->nDeletedFiles--; + } + + } + + } + + yaffs_VerifyCollectedBlock(dev,bi,block); + + if (chunksBefore >= (chunksAfter = yaffs_GetErasedChunks(dev))) { + T(YAFFS_TRACE_GC, + (TSTR + ("gc did not increase free chunks before %d after %d" + TENDSTR), chunksBefore, chunksAfter)); + } + + dev->isDoingGC = 0; + + return YAFFS_OK; +} + +/* New garbage collector + * If we're very low on erased blocks then we do aggressive garbage collection + * otherwise we do "leasurely" garbage collection. + * Aggressive gc looks further (whole array) and will accept less dirty blocks. + * Passive gc only inspects smaller areas and will only accept more dirty blocks. + * + * The idea is to help clear out space in a more spread-out manner. + * Dunno if it really does anything useful. + */ +static int yaffs_CheckGarbageCollection(yaffs_Device * dev) +{ + int block; + int aggressive; + int gcOk = YAFFS_OK; + int maxTries = 0; + + int checkpointBlockAdjust; + + if (dev->isDoingGC) { + /* Bail out so we don't get recursive gc */ + return YAFFS_OK; + } + + /* This loop should pass the first time. + * We'll only see looping here if the erase of the collected block fails. + */ + + do { + maxTries++; + + checkpointBlockAdjust = (dev->nCheckpointReservedBlocks - dev->blocksInCheckpoint); + if(checkpointBlockAdjust < 0) + checkpointBlockAdjust = 0; + + if (dev->nErasedBlocks < (dev->nReservedBlocks + checkpointBlockAdjust + 2)) { + /* We need a block soon...*/ + aggressive = 1; + } else { + /* We're in no hurry */ + aggressive = 0; + } + + block = yaffs_FindBlockForGarbageCollection(dev, aggressive); + + if (block > 0) { + dev->garbageCollections++; + if (!aggressive) { + dev->passiveGarbageCollections++; + } + + T(YAFFS_TRACE_GC, + (TSTR + ("yaffs: GC erasedBlocks %d aggressive %d" TENDSTR), + dev->nErasedBlocks, aggressive)); + + gcOk = yaffs_GarbageCollectBlock(dev, block); + } + + if (dev->nErasedBlocks < (dev->nReservedBlocks) && block > 0) { + T(YAFFS_TRACE_GC, + (TSTR + ("yaffs: GC !!!no reclaim!!! erasedBlocks %d after try %d block %d" + TENDSTR), dev->nErasedBlocks, maxTries, block)); + } + } while ((dev->nErasedBlocks < dev->nReservedBlocks) && (block > 0) + && (maxTries < 2)); + + return aggressive ? gcOk : YAFFS_OK; +} + +/*------------------------- TAGS --------------------------------*/ + +static int yaffs_TagsMatch(const yaffs_ExtendedTags * tags, int objectId, + int chunkInObject) +{ + return (tags->chunkId == chunkInObject && + tags->objectId == objectId && !tags->chunkDeleted) ? 1 : 0; + +} + + +/*-------------------- Data file manipulation -----------------*/ + +static int yaffs_FindChunkInFile(yaffs_Object * in, int chunkInInode, + yaffs_ExtendedTags * tags) +{ + /*Get the Tnode, then get the level 0 offset chunk offset */ + yaffs_Tnode *tn; + int theChunk = -1; + yaffs_ExtendedTags localTags; + int retVal = -1; + + yaffs_Device *dev = in->myDev; + + if (!tags) { + /* Passed a NULL, so use our own tags space */ + tags = &localTags; + } + + tn = yaffs_FindLevel0Tnode(dev, &in->variant.fileVariant, chunkInInode); + + if (tn) { + theChunk = yaffs_GetChunkGroupBase(dev,tn,chunkInInode); + + retVal = + yaffs_FindChunkInGroup(dev, theChunk, tags, in->objectId, + chunkInInode); + } + return retVal; +} + +static int yaffs_FindAndDeleteChunkInFile(yaffs_Object * in, int chunkInInode, + yaffs_ExtendedTags * tags) +{ + /* Get the Tnode, then get the level 0 offset chunk offset */ + yaffs_Tnode *tn; + int theChunk = -1; + yaffs_ExtendedTags localTags; + + yaffs_Device *dev = in->myDev; + int retVal = -1; + + if (!tags) { + /* Passed a NULL, so use our own tags space */ + tags = &localTags; + } + + tn = yaffs_FindLevel0Tnode(dev, &in->variant.fileVariant, chunkInInode); + + if (tn) { + + theChunk = yaffs_GetChunkGroupBase(dev,tn,chunkInInode); + + retVal = + yaffs_FindChunkInGroup(dev, theChunk, tags, in->objectId, + chunkInInode); + + /* Delete the entry in the filestructure (if found) */ + if (retVal != -1) { + yaffs_PutLevel0Tnode(dev,tn,chunkInInode,0); + } + } else { + /*T(("No level 0 found for %d\n", chunkInInode)); */ + } + + if (retVal == -1) { + /* T(("Could not find %d to delete\n",chunkInInode)); */ + } + return retVal; +} + +#ifdef YAFFS_PARANOID + +static int yaffs_CheckFileSanity(yaffs_Object * in) +{ + int chunk; + int nChunks; + int fSize; + int failed = 0; + int objId; + yaffs_Tnode *tn; + yaffs_Tags localTags; + yaffs_Tags *tags = &localTags; + int theChunk; + int chunkDeleted; + + if (in->variantType != YAFFS_OBJECT_TYPE_FILE) { + /* T(("Object not a file\n")); */ + return YAFFS_FAIL; + } + + objId = in->objectId; + fSize = in->variant.fileVariant.fileSize; + nChunks = + (fSize + in->myDev->nDataBytesPerChunk - 1) / in->myDev->nDataBytesPerChunk; + + for (chunk = 1; chunk <= nChunks; chunk++) { + tn = yaffs_FindLevel0Tnode(in->myDev, &in->variant.fileVariant, + chunk); + + if (tn) { + + theChunk = yaffs_GetChunkGroupBase(dev,tn,chunk); + + if (yaffs_CheckChunkBits + (dev, theChunk / dev->nChunksPerBlock, + theChunk % dev->nChunksPerBlock)) { + + yaffs_ReadChunkTagsFromNAND(in->myDev, theChunk, + tags, + &chunkDeleted); + if (yaffs_TagsMatch + (tags, in->objectId, chunk, chunkDeleted)) { + /* found it; */ + + } + } else { + + failed = 1; + } + + } else { + /* T(("No level 0 found for %d\n", chunk)); */ + } + } + + return failed ? YAFFS_FAIL : YAFFS_OK; +} + +#endif + +static int yaffs_PutChunkIntoFile(yaffs_Object * in, int chunkInInode, + int chunkInNAND, int inScan) +{ + /* NB inScan is zero unless scanning. + * For forward scanning, inScan is > 0; + * for backward scanning inScan is < 0 + */ + + yaffs_Tnode *tn; + yaffs_Device *dev = in->myDev; + int existingChunk; + yaffs_ExtendedTags existingTags; + yaffs_ExtendedTags newTags; + unsigned existingSerial, newSerial; + + if (in->variantType != YAFFS_OBJECT_TYPE_FILE) { + /* Just ignore an attempt at putting a chunk into a non-file during scanning + * If it is not during Scanning then something went wrong! + */ + if (!inScan) { + T(YAFFS_TRACE_ERROR, + (TSTR + ("yaffs tragedy:attempt to put data chunk into a non-file" + TENDSTR))); + YBUG(); + } + + yaffs_DeleteChunk(dev, chunkInNAND, 1, __LINE__); + return YAFFS_OK; + } + + tn = yaffs_AddOrFindLevel0Tnode(dev, + &in->variant.fileVariant, + chunkInInode, + NULL); + if (!tn) { + return YAFFS_FAIL; + } + + existingChunk = yaffs_GetChunkGroupBase(dev,tn,chunkInInode); + + if (inScan != 0) { + /* If we're scanning then we need to test for duplicates + * NB This does not need to be efficient since it should only ever + * happen when the power fails during a write, then only one + * chunk should ever be affected. + * + * Correction for YAFFS2: This could happen quite a lot and we need to think about efficiency! TODO + * Update: For backward scanning we don't need to re-read tags so this is quite cheap. + */ + + if (existingChunk != 0) { + /* NB Right now existing chunk will not be real chunkId if the device >= 32MB + * thus we have to do a FindChunkInFile to get the real chunk id. + * + * We have a duplicate now we need to decide which one to use: + * + * Backwards scanning YAFFS2: The old one is what we use, dump the new one. + * Forward scanning YAFFS2: The new one is what we use, dump the old one. + * YAFFS1: Get both sets of tags and compare serial numbers. + */ + + if (inScan > 0) { + /* Only do this for forward scanning */ + yaffs_ReadChunkWithTagsFromNAND(dev, + chunkInNAND, + NULL, &newTags); + + /* Do a proper find */ + existingChunk = + yaffs_FindChunkInFile(in, chunkInInode, + &existingTags); + } + + if (existingChunk <= 0) { + /*Hoosterman - how did this happen? */ + + T(YAFFS_TRACE_ERROR, + (TSTR + ("yaffs tragedy: existing chunk < 0 in scan" + TENDSTR))); + + } + + /* NB The deleted flags should be false, otherwise the chunks will + * not be loaded during a scan + */ + + newSerial = newTags.serialNumber; + existingSerial = existingTags.serialNumber; + + if ((inScan > 0) && + (in->myDev->isYaffs2 || + existingChunk <= 0 || + ((existingSerial + 1) & 3) == newSerial)) { + /* Forward scanning. + * Use new + * Delete the old one and drop through to update the tnode + */ + yaffs_DeleteChunk(dev, existingChunk, 1, + __LINE__); + } else { + /* Backward scanning or we want to use the existing one + * Use existing. + * Delete the new one and return early so that the tnode isn't changed + */ + yaffs_DeleteChunk(dev, chunkInNAND, 1, + __LINE__); + return YAFFS_OK; + } + } + + } + + if (existingChunk == 0) { + in->nDataChunks++; + } + + yaffs_PutLevel0Tnode(dev,tn,chunkInInode,chunkInNAND); + + return YAFFS_OK; +} + +static int yaffs_ReadChunkDataFromObject(yaffs_Object * in, int chunkInInode, + __u8 * buffer) +{ + int chunkInNAND = yaffs_FindChunkInFile(in, chunkInInode, NULL); + + if (chunkInNAND >= 0) { + return yaffs_ReadChunkWithTagsFromNAND(in->myDev, chunkInNAND, + buffer,NULL); + } else { + T(YAFFS_TRACE_NANDACCESS, + (TSTR("Chunk %d not found zero instead" TENDSTR), + chunkInNAND)); + /* get sane (zero) data if you read a hole */ + memset(buffer, 0, in->myDev->nDataBytesPerChunk); + return 0; + } + +} + +void yaffs_DeleteChunk(yaffs_Device * dev, int chunkId, int markNAND, int lyn) +{ + int block; + int page; + yaffs_ExtendedTags tags; + yaffs_BlockInfo *bi; + + if (chunkId <= 0) + return; + + + dev->nDeletions++; + block = chunkId / dev->nChunksPerBlock; + page = chunkId % dev->nChunksPerBlock; + + + if(!yaffs_CheckChunkBit(dev,block,page)) + T(YAFFS_TRACE_VERIFY, + (TSTR("Deleting invalid chunk %d"TENDSTR), + chunkId)); + + bi = yaffs_GetBlockInfo(dev, block); + + T(YAFFS_TRACE_DELETION, + (TSTR("line %d delete of chunk %d" TENDSTR), lyn, chunkId)); + + if (markNAND && + bi->blockState != YAFFS_BLOCK_STATE_COLLECTING && !dev->isYaffs2) { + + yaffs_InitialiseTags(&tags); + + tags.chunkDeleted = 1; + + yaffs_WriteChunkWithTagsToNAND(dev, chunkId, NULL, &tags); + } else { + dev->nUnmarkedDeletions++; + } + + /* Pull out of the management area. + * If the whole block became dirty, this will kick off an erasure. + */ + if (bi->blockState == YAFFS_BLOCK_STATE_ALLOCATING || + bi->blockState == YAFFS_BLOCK_STATE_FULL || + bi->blockState == YAFFS_BLOCK_STATE_NEEDS_SCANNING || + bi->blockState == YAFFS_BLOCK_STATE_COLLECTING) { + dev->nFreeChunks++; + + yaffs_ClearChunkBit(dev, block, page); + + bi->pagesInUse--; + + if (bi->pagesInUse == 0 && + !bi->hasShrinkHeader && + bi->blockState != YAFFS_BLOCK_STATE_ALLOCATING && + bi->blockState != YAFFS_BLOCK_STATE_NEEDS_SCANNING) { + yaffs_BlockBecameDirty(dev, block); + } + + } else { + /* T(("Bad news deleting chunk %d\n",chunkId)); */ + } + +} + +static int yaffs_WriteChunkDataToObject(yaffs_Object * in, int chunkInInode, + const __u8 * buffer, int nBytes, + int useReserve) +{ + /* Find old chunk Need to do this to get serial number + * Write new one and patch into tree. + * Invalidate old tags. + */ + + int prevChunkId; + yaffs_ExtendedTags prevTags; + + int newChunkId; + yaffs_ExtendedTags newTags; + + yaffs_Device *dev = in->myDev; + + yaffs_CheckGarbageCollection(dev); + + /* Get the previous chunk at this location in the file if it exists */ + prevChunkId = yaffs_FindChunkInFile(in, chunkInInode, &prevTags); + + /* Set up new tags */ + yaffs_InitialiseTags(&newTags); + + newTags.chunkId = chunkInInode; + newTags.objectId = in->objectId; + newTags.serialNumber = + (prevChunkId >= 0) ? prevTags.serialNumber + 1 : 1; + newTags.byteCount = nBytes; + + newChunkId = + yaffs_WriteNewChunkWithTagsToNAND(dev, buffer, &newTags, + useReserve); + + if (newChunkId >= 0) { + yaffs_PutChunkIntoFile(in, chunkInInode, newChunkId, 0); + + if (prevChunkId >= 0) { + yaffs_DeleteChunk(dev, prevChunkId, 1, __LINE__); + + } + + yaffs_CheckFileSanity(in); + } + return newChunkId; + +} + +/* UpdateObjectHeader updates the header on NAND for an object. + * If name is not NULL, then that new name is used. + */ +int yaffs_UpdateObjectHeader(yaffs_Object * in, const YCHAR * name, int force, + int isShrink, int shadows) +{ + + yaffs_BlockInfo *bi; + + yaffs_Device *dev = in->myDev; + + int prevChunkId; + int retVal = 0; + int result = 0; + + int newChunkId; + yaffs_ExtendedTags newTags; + yaffs_ExtendedTags oldTags; + + __u8 *buffer = NULL; + YCHAR oldName[YAFFS_MAX_NAME_LENGTH + 1]; + + yaffs_ObjectHeader *oh = NULL; + + yaffs_strcpy(oldName,"silly old name"); + + if (!in->fake || force) { + + yaffs_CheckGarbageCollection(dev); + yaffs_CheckObjectDetailsLoaded(in); + + buffer = yaffs_GetTempBuffer(in->myDev, __LINE__); + oh = (yaffs_ObjectHeader *) buffer; + + prevChunkId = in->chunkId; + + if (prevChunkId >= 0) { + result = yaffs_ReadChunkWithTagsFromNAND(dev, prevChunkId, + buffer, &oldTags); + + yaffs_VerifyObjectHeader(in,oh,&oldTags,0); + + memcpy(oldName, oh->name, sizeof(oh->name)); + } + + memset(buffer, 0xFF, dev->nDataBytesPerChunk); + + oh->type = in->variantType; + oh->yst_mode = in->yst_mode; + oh->shadowsObject = shadows; + +#ifdef CONFIG_YAFFS_WINCE + oh->win_atime[0] = in->win_atime[0]; + oh->win_ctime[0] = in->win_ctime[0]; + oh->win_mtime[0] = in->win_mtime[0]; + oh->win_atime[1] = in->win_atime[1]; + oh->win_ctime[1] = in->win_ctime[1]; + oh->win_mtime[1] = in->win_mtime[1]; +#else + oh->yst_uid = in->yst_uid; + oh->yst_gid = in->yst_gid; + oh->yst_atime = in->yst_atime; + oh->yst_mtime = in->yst_mtime; + oh->yst_ctime = in->yst_ctime; + oh->yst_rdev = in->yst_rdev; +#endif + if (in->parent) { + oh->parentObjectId = in->parent->objectId; + } else { + oh->parentObjectId = 0; + } + + if (name && *name) { + memset(oh->name, 0, sizeof(oh->name)); + yaffs_strncpy(oh->name, name, YAFFS_MAX_NAME_LENGTH); + } else if (prevChunkId>=0) { + memcpy(oh->name, oldName, sizeof(oh->name)); + } else { + memset(oh->name, 0, sizeof(oh->name)); + } + + oh->isShrink = isShrink; + + switch (in->variantType) { + case YAFFS_OBJECT_TYPE_UNKNOWN: + /* Should not happen */ + break; + case YAFFS_OBJECT_TYPE_FILE: + oh->fileSize = + (oh->parentObjectId == YAFFS_OBJECTID_DELETED + || oh->parentObjectId == + YAFFS_OBJECTID_UNLINKED) ? 0 : in->variant. + fileVariant.fileSize; + break; + case YAFFS_OBJECT_TYPE_HARDLINK: + oh->equivalentObjectId = + in->variant.hardLinkVariant.equivalentObjectId; + break; + case YAFFS_OBJECT_TYPE_SPECIAL: + /* Do nothing */ + break; + case YAFFS_OBJECT_TYPE_DIRECTORY: + /* Do nothing */ + break; + case YAFFS_OBJECT_TYPE_SYMLINK: + yaffs_strncpy(oh->alias, + in->variant.symLinkVariant.alias, + YAFFS_MAX_ALIAS_LENGTH); + oh->alias[YAFFS_MAX_ALIAS_LENGTH] = 0; + break; + } + + /* Tags */ + yaffs_InitialiseTags(&newTags); + in->serial++; + newTags.chunkId = 0; + newTags.objectId = in->objectId; + newTags.serialNumber = in->serial; + + /* Add extra info for file header */ + + newTags.extraHeaderInfoAvailable = 1; + newTags.extraParentObjectId = oh->parentObjectId; + newTags.extraFileLength = oh->fileSize; + newTags.extraIsShrinkHeader = oh->isShrink; + newTags.extraEquivalentObjectId = oh->equivalentObjectId; + newTags.extraShadows = (oh->shadowsObject > 0) ? 1 : 0; + newTags.extraObjectType = in->variantType; + + yaffs_VerifyObjectHeader(in,oh,&newTags,1); + + /* Create new chunk in NAND */ + newChunkId = + yaffs_WriteNewChunkWithTagsToNAND(dev, buffer, &newTags, + (prevChunkId >= 0) ? 1 : 0); + + if (newChunkId >= 0) { + + in->chunkId = newChunkId; + + if (prevChunkId >= 0) { + yaffs_DeleteChunk(dev, prevChunkId, 1, + __LINE__); + } + + if(!yaffs_ObjectHasCachedWriteData(in)) + in->dirty = 0; + + /* If this was a shrink, then mark the block that the chunk lives on */ + if (isShrink) { + bi = yaffs_GetBlockInfo(in->myDev, + newChunkId /in->myDev-> nChunksPerBlock); + bi->hasShrinkHeader = 1; + } + + } + + retVal = newChunkId; + + } + + if (buffer) + yaffs_ReleaseTempBuffer(dev, buffer, __LINE__); + + return retVal; +} + +/*------------------------ Short Operations Cache ---------------------------------------- + * In many situations where there is no high level buffering (eg WinCE) a lot of + * reads might be short sequential reads, and a lot of writes may be short + * sequential writes. eg. scanning/writing a jpeg file. + * In these cases, a short read/write cache can provide a huge perfomance benefit + * with dumb-as-a-rock code. + * In Linux, the page cache provides read buffering aand the short op cache provides write + * buffering. + * + * There are a limited number (~10) of cache chunks per device so that we don't + * need a very intelligent search. + */ + +static int yaffs_ObjectHasCachedWriteData(yaffs_Object *obj) +{ + yaffs_Device *dev = obj->myDev; + int i; + yaffs_ChunkCache *cache; + int nCaches = obj->myDev->nShortOpCaches; + + for(i = 0; i < nCaches; i++){ + cache = &dev->srCache[i]; + if (cache->object == obj && + cache->dirty) + return 1; + } + + return 0; +} + + +static void yaffs_FlushFilesChunkCache(yaffs_Object * obj) +{ + yaffs_Device *dev = obj->myDev; + int lowest = -99; /* Stop compiler whining. */ + int i; + yaffs_ChunkCache *cache; + int chunkWritten = 0; + int nCaches = obj->myDev->nShortOpCaches; + + if (nCaches > 0) { + do { + cache = NULL; + + /* Find the dirty cache for this object with the lowest chunk id. */ + for (i = 0; i < nCaches; i++) { + if (dev->srCache[i].object == obj && + dev->srCache[i].dirty) { + if (!cache + || dev->srCache[i].chunkId < + lowest) { + cache = &dev->srCache[i]; + lowest = cache->chunkId; + } + } + } + + if (cache && !cache->locked) { + /* Write it out and free it up */ + + chunkWritten = + yaffs_WriteChunkDataToObject(cache->object, + cache->chunkId, + cache->data, + cache->nBytes, + 1); + cache->dirty = 0; + cache->object = NULL; + } + + } while (cache && chunkWritten > 0); + + if (cache) { + /* Hoosterman, disk full while writing cache out. */ + T(YAFFS_TRACE_ERROR, + (TSTR("yaffs tragedy: no space during cache write" TENDSTR))); + + } + } + +} + +/*yaffs_FlushEntireDeviceCache(dev) + * + * + */ + +void yaffs_FlushEntireDeviceCache(yaffs_Device *dev) +{ + yaffs_Object *obj; + int nCaches = dev->nShortOpCaches; + int i; + + /* Find a dirty object in the cache and flush it... + * until there are no further dirty objects. + */ + do { + obj = NULL; + for( i = 0; i < nCaches && !obj; i++) { + if (dev->srCache[i].object && + dev->srCache[i].dirty) + obj = dev->srCache[i].object; + + } + if(obj) + yaffs_FlushFilesChunkCache(obj); + + } while(obj); + +} + + +/* Grab us a cache chunk for use. + * First look for an empty one. + * Then look for the least recently used non-dirty one. + * Then look for the least recently used dirty one...., flush and look again. + */ +static yaffs_ChunkCache *yaffs_GrabChunkCacheWorker(yaffs_Device * dev) +{ + int i; + int usage; + int theOne; + + if (dev->nShortOpCaches > 0) { + for (i = 0; i < dev->nShortOpCaches; i++) { + if (!dev->srCache[i].object) + return &dev->srCache[i]; + } + + return NULL; + + theOne = -1; + usage = 0; /* just to stop the compiler grizzling */ + + for (i = 0; i < dev->nShortOpCaches; i++) { + if (!dev->srCache[i].dirty && + ((dev->srCache[i].lastUse < usage && theOne >= 0) || + theOne < 0)) { + usage = dev->srCache[i].lastUse; + theOne = i; + } + } + + + return theOne >= 0 ? &dev->srCache[theOne] : NULL; + } else { + return NULL; + } + +} + +static yaffs_ChunkCache *yaffs_GrabChunkCache(yaffs_Device * dev) +{ + yaffs_ChunkCache *cache; + yaffs_Object *theObj; + int usage; + int i; + int pushout; + + if (dev->nShortOpCaches > 0) { + /* Try find a non-dirty one... */ + + cache = yaffs_GrabChunkCacheWorker(dev); + + if (!cache) { + /* They were all dirty, find the last recently used object and flush + * its cache, then find again. + * NB what's here is not very accurate, we actually flush the object + * the last recently used page. + */ + + /* With locking we can't assume we can use entry zero */ + + theObj = NULL; + usage = -1; + cache = NULL; + pushout = -1; + + for (i = 0; i < dev->nShortOpCaches; i++) { + if (dev->srCache[i].object && + !dev->srCache[i].locked && + (dev->srCache[i].lastUse < usage || !cache)) + { + usage = dev->srCache[i].lastUse; + theObj = dev->srCache[i].object; + cache = &dev->srCache[i]; + pushout = i; + } + } + + if (!cache || cache->dirty) { + /* Flush and try again */ + yaffs_FlushFilesChunkCache(theObj); + cache = yaffs_GrabChunkCacheWorker(dev); + } + + } + return cache; + } else + return NULL; + +} + +/* Find a cached chunk */ +static yaffs_ChunkCache *yaffs_FindChunkCache(const yaffs_Object * obj, + int chunkId) +{ + yaffs_Device *dev = obj->myDev; + int i; + if (dev->nShortOpCaches > 0) { + for (i = 0; i < dev->nShortOpCaches; i++) { + if (dev->srCache[i].object == obj && + dev->srCache[i].chunkId == chunkId) { + dev->cacheHits++; + + return &dev->srCache[i]; + } + } + } + return NULL; +} + +/* Mark the chunk for the least recently used algorithym */ +static void yaffs_UseChunkCache(yaffs_Device * dev, yaffs_ChunkCache * cache, + int isAWrite) +{ + + if (dev->nShortOpCaches > 0) { + if (dev->srLastUse < 0 || dev->srLastUse > 100000000) { + /* Reset the cache usages */ + int i; + for (i = 1; i < dev->nShortOpCaches; i++) { + dev->srCache[i].lastUse = 0; + } + dev->srLastUse = 0; + } + + dev->srLastUse++; + + cache->lastUse = dev->srLastUse; + + if (isAWrite) { + cache->dirty = 1; + } + } +} + +/* Invalidate a single cache page. + * Do this when a whole page gets written, + * ie the short cache for this page is no longer valid. + */ +static void yaffs_InvalidateChunkCache(yaffs_Object * object, int chunkId) +{ + if (object->myDev->nShortOpCaches > 0) { + yaffs_ChunkCache *cache = yaffs_FindChunkCache(object, chunkId); + + if (cache) { + cache->object = NULL; + } + } +} + +/* Invalidate all the cache pages associated with this object + * Do this whenever ther file is deleted or resized. + */ +static void yaffs_InvalidateWholeChunkCache(yaffs_Object * in) +{ + int i; + yaffs_Device *dev = in->myDev; + + if (dev->nShortOpCaches > 0) { + /* Invalidate it. */ + for (i = 0; i < dev->nShortOpCaches; i++) { + if (dev->srCache[i].object == in) { + dev->srCache[i].object = NULL; + } + } + } +} + +/*--------------------- Checkpointing --------------------*/ + +#if 0 +static int yaffs_WriteCheckpointValidityMarker(yaffs_Device *dev,int head) +{ + yaffs_CheckpointValidity cp; + + memset(&cp,0,sizeof(cp)); + + cp.structType = sizeof(cp); + cp.magic = YAFFS_MAGIC; + cp.version = YAFFS_CHECKPOINT_VERSION; + cp.head = (head) ? 1 : 0; + + return (yaffs_CheckpointWrite(dev,&cp,sizeof(cp)) == sizeof(cp))? + 1 : 0; +} +#endif +static int yaffs_ReadCheckpointValidityMarker(yaffs_Device *dev, int head) +{ + yaffs_CheckpointValidity cp; + int ok; + + ok = (yaffs_CheckpointRead(dev,&cp,sizeof(cp)) == sizeof(cp)); + if(ok) + ok = (cp.structType == sizeof(cp)) && + (cp.magic == YAFFS_MAGIC) && + (cp.version == YAFFS_CHECKPOINT_VERSION) && + (cp.head == ((head) ? 1 : 0)); + return ok ? 1 : 0; +} + +static void yaffs_DeviceToCheckpointDevice(yaffs_CheckpointDevice *cp, + yaffs_Device *dev) +{ + cp->nErasedBlocks = dev->nErasedBlocks; + cp->allocationBlock = dev->allocationBlock; + cp->allocationPage = dev->allocationPage; + cp->nFreeChunks = dev->nFreeChunks; + + cp->nDeletedFiles = dev->nDeletedFiles; + cp->nUnlinkedFiles = dev->nUnlinkedFiles; + cp->nBackgroundDeletions = dev->nBackgroundDeletions; + cp->sequenceNumber = dev->sequenceNumber; + //cp->oldestDirtySequence = dev->oldestDirtySequence; + +} + +static void yaffs_CheckpointDeviceToDevice(yaffs_Device *dev, + yaffs_CheckpointDevice *cp) +{ + dev->nErasedBlocks = cp->nErasedBlocks; + dev->allocationBlock = cp->allocationBlock; + dev->allocationPage = cp->allocationPage; + dev->nFreeChunks = cp->nFreeChunks; + + dev->nDeletedFiles = cp->nDeletedFiles; + dev->nUnlinkedFiles = cp->nUnlinkedFiles; + dev->nBackgroundDeletions = cp->nBackgroundDeletions; + dev->sequenceNumber = cp->sequenceNumber; + //dev->oldestDirtySequence = cp->oldestDirtySequence; +} + +#if 0 +static int yaffs_WriteCheckpointDevice(yaffs_Device *dev) +{ + yaffs_CheckpointDevice cp; + __u32 nBytes; + __u32 nBlocks = (dev->internalEndBlock - dev->internalStartBlock + 1); + + int ok; + + /* Write device runtime values*/ + yaffs_DeviceToCheckpointDevice(&cp,dev); + cp.structType = sizeof(cp); + + ok = (yaffs_CheckpointWrite(dev,&cp,sizeof(cp)) == sizeof(cp)); + + /* Write block info */ + if(ok) { + nBytes = nBlocks * sizeof(yaffs_BlockInfo); + ok = (yaffs_CheckpointWrite(dev,dev->blockInfo,nBytes) == nBytes); + } + + /* Write chunk bits */ + if(ok) { + nBytes = nBlocks * dev->chunkBitmapStride; + ok = (yaffs_CheckpointWrite(dev,dev->chunkBits,nBytes) == nBytes); + } + return ok ? 1 : 0; + +} +#endif +static int yaffs_ReadCheckpointDevice(yaffs_Device *dev) +{ + yaffs_CheckpointDevice cp; + __u32 nBytes; + __u32 nBlocks = (dev->internalEndBlock - dev->internalStartBlock + 1); + + int ok; + + ok = (yaffs_CheckpointRead(dev,&cp,sizeof(cp)) == sizeof(cp)); + if(!ok) + return 0; + if(cp.structType != sizeof(cp)) + return 0; + + yaffs_CheckpointDeviceToDevice(dev,&cp); + + nBytes = nBlocks * sizeof(yaffs_BlockInfo); + + ok = (yaffs_CheckpointRead(dev,dev->blockInfo,nBytes) == nBytes); + + if(!ok) + return 0; + nBytes = nBlocks * dev->chunkBitmapStride; + + ok = (yaffs_CheckpointRead(dev,dev->chunkBits,nBytes) == nBytes); + + return ok ? 1 : 0; +} + +static void yaffs_ObjectToCheckpointObject(yaffs_CheckpointObject *cp, + yaffs_Object *obj) +{ + + cp->objectId = obj->objectId; + cp->parentId = (obj->parent) ? obj->parent->objectId : 0; + cp->chunkId = obj->chunkId; + cp->variantType = obj->variantType; + cp->deleted = obj->deleted; + cp->softDeleted = obj->softDeleted; + cp->unlinked = obj->unlinked; + cp->fake = obj->fake; + cp->renameAllowed = obj->renameAllowed; + cp->unlinkAllowed = obj->unlinkAllowed; + cp->serial = obj->serial; + cp->nDataChunks = obj->nDataChunks; + + if(obj->variantType == YAFFS_OBJECT_TYPE_FILE) + cp->fileSizeOrEquivalentObjectId = obj->variant.fileVariant.fileSize; + else if(obj->variantType == YAFFS_OBJECT_TYPE_HARDLINK) + cp->fileSizeOrEquivalentObjectId = obj->variant.hardLinkVariant.equivalentObjectId; +} + +static void yaffs_CheckpointObjectToObject( yaffs_Object *obj,yaffs_CheckpointObject *cp) +{ + + yaffs_Object *parent; + + obj->objectId = cp->objectId; + + if(cp->parentId) + parent = yaffs_FindOrCreateObjectByNumber( + obj->myDev, + cp->parentId, + YAFFS_OBJECT_TYPE_DIRECTORY); + else + parent = NULL; + + if(parent) + yaffs_AddObjectToDirectory(parent, obj); + + obj->chunkId = cp->chunkId; + obj->variantType = cp->variantType; + obj->deleted = cp->deleted; + obj->softDeleted = cp->softDeleted; + obj->unlinked = cp->unlinked; + obj->fake = cp->fake; + obj->renameAllowed = cp->renameAllowed; + obj->unlinkAllowed = cp->unlinkAllowed; + obj->serial = cp->serial; + obj->nDataChunks = cp->nDataChunks; + + if(obj->variantType == YAFFS_OBJECT_TYPE_FILE) + obj->variant.fileVariant.fileSize = cp->fileSizeOrEquivalentObjectId; + else if(obj->variantType == YAFFS_OBJECT_TYPE_HARDLINK) + obj->variant.hardLinkVariant.equivalentObjectId = cp->fileSizeOrEquivalentObjectId; + + if(obj->objectId >= YAFFS_NOBJECT_BUCKETS) + obj->lazyLoaded = 1; +} + + +#if 0 +static int yaffs_CheckpointTnodeWorker(yaffs_Object * in, yaffs_Tnode * tn, + __u32 level, int chunkOffset) +{ + int i; + yaffs_Device *dev = in->myDev; + int ok = 1; + int nTnodeBytes = (dev->tnodeWidth * YAFFS_NTNODES_LEVEL0)/8; + + if (tn) { + if (level > 0) { + + for (i = 0; i < YAFFS_NTNODES_INTERNAL && ok; i++){ + if (tn->internal[i]) { + ok = yaffs_CheckpointTnodeWorker(in, + tn->internal[i], + level - 1, + (chunkOffset<<YAFFS_TNODES_INTERNAL_BITS) + i); + } + } + } else if (level == 0) { + __u32 baseOffset = chunkOffset << YAFFS_TNODES_LEVEL0_BITS; + /* printf("write tnode at %d\n",baseOffset); */ + ok = (yaffs_CheckpointWrite(dev,&baseOffset,sizeof(baseOffset)) == sizeof(baseOffset)); + if(ok) + ok = (yaffs_CheckpointWrite(dev,tn,nTnodeBytes) == nTnodeBytes); + } + } + + return ok; + +} +#endif + +#if 0 +static int yaffs_WriteCheckpointTnodes(yaffs_Object *obj) +{ + __u32 endMarker = ~0; + int ok = 1; + + if(obj->variantType == YAFFS_OBJECT_TYPE_FILE){ + ok = yaffs_CheckpointTnodeWorker(obj, + obj->variant.fileVariant.top, + obj->variant.fileVariant.topLevel, + 0); + if(ok) + ok = (yaffs_CheckpointWrite(obj->myDev,&endMarker,sizeof(endMarker)) == + sizeof(endMarker)); + } + + return ok ? 1 : 0; +} +#endif +static int yaffs_ReadCheckpointTnodes(yaffs_Object *obj) +{ + __u32 baseChunk; + int ok = 1; + yaffs_Device *dev = obj->myDev; + yaffs_FileStructure *fileStructPtr = &obj->variant.fileVariant; + yaffs_Tnode *tn; + int nread = 0; + + ok = (yaffs_CheckpointRead(dev,&baseChunk,sizeof(baseChunk)) == sizeof(baseChunk)); + + while(ok && (~baseChunk)){ + nread++; + /* Read level 0 tnode */ + + + /* printf("read tnode at %d\n",baseChunk); */ + tn = yaffs_GetTnodeRaw(dev); + if(tn) + ok = (yaffs_CheckpointRead(dev,tn,(dev->tnodeWidth * YAFFS_NTNODES_LEVEL0)/8) == + (dev->tnodeWidth * YAFFS_NTNODES_LEVEL0)/8); + else + ok = 0; + + if(tn && ok){ + ok = yaffs_AddOrFindLevel0Tnode(dev, + fileStructPtr, + baseChunk, + tn) ? 1 : 0; + + } + + if(ok) + ok = (yaffs_CheckpointRead(dev,&baseChunk,sizeof(baseChunk)) == sizeof(baseChunk)); + + } + + T(YAFFS_TRACE_CHECKPOINT,( + TSTR("Checkpoint read tnodes %d records, last %d. ok %d" TENDSTR), + nread,baseChunk,ok)); + + return ok ? 1 : 0; +} + +#if 0 +static int yaffs_WriteCheckpointObjects(yaffs_Device *dev) +{ + yaffs_Object *obj; + yaffs_CheckpointObject cp; + int i; + int ok = 1; + struct list_head *lh; + + + /* Iterate through the objects in each hash entry, + * dumping them to the checkpointing stream. + */ + + for(i = 0; ok && i < YAFFS_NOBJECT_BUCKETS; i++){ + list_for_each(lh, &dev->objectBucket[i].list) { + if (lh) { + obj = list_entry(lh, yaffs_Object, hashLink); + if (!obj->deferedFree) { + yaffs_ObjectToCheckpointObject(&cp,obj); + cp.structType = sizeof(cp); + + T(YAFFS_TRACE_CHECKPOINT,( + TSTR("Checkpoint write object %d parent %d type %d chunk %d obj addr %x" TENDSTR), + cp.objectId,cp.parentId,cp.variantType,cp.chunkId,(unsigned) obj)); + + ok = (yaffs_CheckpointWrite(dev,&cp,sizeof(cp)) == sizeof(cp)); + + if(ok && obj->variantType == YAFFS_OBJECT_TYPE_FILE){ + ok = yaffs_WriteCheckpointTnodes(obj); + } + } + } + } + } + + /* Dump end of list */ + memset(&cp,0xFF,sizeof(yaffs_CheckpointObject)); + cp.structType = sizeof(cp); + + if(ok) + ok = (yaffs_CheckpointWrite(dev,&cp,sizeof(cp)) == sizeof(cp)); + + return ok ? 1 : 0; +} +#endif +static int yaffs_ReadCheckpointObjects(yaffs_Device *dev) +{ + yaffs_Object *obj; + yaffs_CheckpointObject cp; + int ok = 1; + int done = 0; + yaffs_Object *hardList = NULL; + + while(ok && !done) { + ok = (yaffs_CheckpointRead(dev,&cp,sizeof(cp)) == sizeof(cp)); + if(cp.structType != sizeof(cp)) { + T(YAFFS_TRACE_CHECKPOINT,(TSTR("struct size %d instead of %d ok %d"TENDSTR), + cp.structType,sizeof(cp),ok)); + ok = 0; + } + + T(YAFFS_TRACE_CHECKPOINT,(TSTR("Checkpoint read object %d parent %d type %d chunk %d " TENDSTR), + cp.objectId,cp.parentId,cp.variantType,cp.chunkId)); + + if(ok && cp.objectId == ~0) + done = 1; + else if(ok){ + obj = yaffs_FindOrCreateObjectByNumber(dev,cp.objectId, cp.variantType); + if(obj) { + yaffs_CheckpointObjectToObject(obj,&cp); + if(obj->variantType == YAFFS_OBJECT_TYPE_FILE) { + ok = yaffs_ReadCheckpointTnodes(obj); + } else if(obj->variantType == YAFFS_OBJECT_TYPE_HARDLINK) { + obj->hardLinks.next = + (struct list_head *) + hardList; + hardList = obj; + } + + } + } + } + + if(ok) + yaffs_HardlinkFixup(dev,hardList); + + return ok ? 1 : 0; +} +#if 0 +static int yaffs_WriteCheckpointSum(yaffs_Device *dev) +{ + __u32 checkpointSum; + int ok; + + yaffs_GetCheckpointSum(dev,&checkpointSum); + + ok = (yaffs_CheckpointWrite(dev,&checkpointSum,sizeof(checkpointSum)) == sizeof(checkpointSum)); + + if(!ok) + return 0; + + return 1; +} +#endif +static int yaffs_ReadCheckpointSum(yaffs_Device *dev) +{ + __u32 checkpointSum0; + __u32 checkpointSum1; + int ok; + + yaffs_GetCheckpointSum(dev,&checkpointSum0); + + ok = (yaffs_CheckpointRead(dev,&checkpointSum1,sizeof(checkpointSum1)) == sizeof(checkpointSum1)); + + if(!ok) + return 0; + + if(checkpointSum0 != checkpointSum1) + return 0; + + return 1; +} + +#if 0 +static int yaffs_WriteCheckpointData(yaffs_Device *dev) +{ + + int ok = 1; + + if(dev->skipCheckpointWrite || !dev->isYaffs2){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("skipping checkpoint write" TENDSTR))); + ok = 0; + } + + if(ok) + ok = yaffs_CheckpointOpen(dev,1); + + if(ok){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("write checkpoint validity" TENDSTR))); + ok = yaffs_WriteCheckpointValidityMarker(dev,1); + } + if(ok){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("write checkpoint device" TENDSTR))); + ok = yaffs_WriteCheckpointDevice(dev); + } + if(ok){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("write checkpoint objects" TENDSTR))); + ok = yaffs_WriteCheckpointObjects(dev); + } + if(ok){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("write checkpoint validity" TENDSTR))); + ok = yaffs_WriteCheckpointValidityMarker(dev,0); + } + + if(ok){ + ok = yaffs_WriteCheckpointSum(dev); + } + + + if(!yaffs_CheckpointClose(dev)) + ok = 0; + + if(ok) + dev->isCheckpointed = 1; + else + dev->isCheckpointed = 0; + + return dev->isCheckpointed; +} +#endif +static int yaffs_ReadCheckpointData(yaffs_Device *dev) +{ + int ok = 1; + + if(dev->skipCheckpointRead || !dev->isYaffs2){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("skipping checkpoint read" TENDSTR))); + ok = 0; + } + + if(ok) + ok = yaffs_CheckpointOpen(dev,0); /* open for read */ + + if(ok){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("read checkpoint validity" TENDSTR))); + ok = yaffs_ReadCheckpointValidityMarker(dev,1); + } + if(ok){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("read checkpoint device" TENDSTR))); + ok = yaffs_ReadCheckpointDevice(dev); + } + if(ok){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("read checkpoint objects" TENDSTR))); + ok = yaffs_ReadCheckpointObjects(dev); + } + if(ok){ + T(YAFFS_TRACE_CHECKPOINT,(TSTR("read checkpoint validity" TENDSTR))); + ok = yaffs_ReadCheckpointValidityMarker(dev,0); + } + + if(ok){ + ok = yaffs_ReadCheckpointSum(dev); + T(YAFFS_TRACE_CHECKPOINT,(TSTR("read checkpoint checksum %d" TENDSTR),ok)); + } + + if(!yaffs_CheckpointClose(dev)) + ok = 0; + + if(ok) + dev->isCheckpointed = 1; + else + dev->isCheckpointed = 0; + + return ok ? 1 : 0; + +} + +static void yaffs_InvalidateCheckpoint(yaffs_Device *dev) +{ + if(dev->isCheckpointed || + dev->blocksInCheckpoint > 0){ + dev->isCheckpointed = 0; + yaffs_CheckpointInvalidateStream(dev); + if(dev->superBlock && dev->markSuperBlockDirty) + dev->markSuperBlockDirty(dev->superBlock); + } +} + +#if 0 +int yaffs_CheckpointSave(yaffs_Device *dev) +{ + + T(YAFFS_TRACE_CHECKPOINT,(TSTR("save entry: isCheckpointed %d"TENDSTR),dev->isCheckpointed)); + + yaffs_VerifyObjects(dev); + yaffs_VerifyBlocks(dev); + yaffs_VerifyFreeChunks(dev); + + if(!dev->isCheckpointed) { + yaffs_InvalidateCheckpoint(dev); + yaffs_WriteCheckpointData(dev); + } + + T(YAFFS_TRACE_ALWAYS,(TSTR("save exit: isCheckpointed %d"TENDSTR),dev->isCheckpointed)); + + return dev->isCheckpointed; +} +#endif +int yaffs_CheckpointRestore(yaffs_Device *dev) +{ + int retval; + T(YAFFS_TRACE_CHECKPOINT,(TSTR("restore entry: isCheckpointed %d"TENDSTR),dev->isCheckpointed)); + + retval = yaffs_ReadCheckpointData(dev); + + if(dev->isCheckpointed){ + yaffs_VerifyObjects(dev); + yaffs_VerifyBlocks(dev); + yaffs_VerifyFreeChunks(dev); + } + + T(YAFFS_TRACE_CHECKPOINT,(TSTR("restore exit: isCheckpointed %d"TENDSTR),dev->isCheckpointed)); + + return retval; +} + +/*--------------------- File read/write ------------------------ + * Read and write have very similar structures. + * In general the read/write has three parts to it + * An incomplete chunk to start with (if the read/write is not chunk-aligned) + * Some complete chunks + * An incomplete chunk to end off with + * + * Curve-balls: the first chunk might also be the last chunk. + */ + +int yaffs_ReadDataFromFile(yaffs_Object * in, __u8 * buffer, loff_t offset, + int nBytes) +{ + + int chunk; + int start; + int nToCopy; + int n = nBytes; + int nDone = 0; + yaffs_ChunkCache *cache; + + yaffs_Device *dev; + + dev = in->myDev; + + while (n > 0) { + //chunk = offset / dev->nDataBytesPerChunk + 1; + //start = offset % dev->nDataBytesPerChunk; + yaffs_AddrToChunk(dev,offset,&chunk,&start); + chunk++; + + /* OK now check for the curveball where the start and end are in + * the same chunk. + */ + if ((start + n) < dev->nDataBytesPerChunk) { + nToCopy = n; + } else { + nToCopy = dev->nDataBytesPerChunk - start; + } + + cache = yaffs_FindChunkCache(in, chunk); + + /* If the chunk is already in the cache or it is less than a whole chunk + * then use the cache (if there is caching) + * else bypass the cache. + */ + if (cache || nToCopy != dev->nDataBytesPerChunk) { + if (dev->nShortOpCaches > 0) { + + /* If we can't find the data in the cache, then load it up. */ + + if (!cache) { + cache = yaffs_GrabChunkCache(in->myDev); + cache->object = in; + cache->chunkId = chunk; + cache->dirty = 0; + cache->locked = 0; + yaffs_ReadChunkDataFromObject(in, chunk, + cache-> + data); + cache->nBytes = 0; + } + + yaffs_UseChunkCache(dev, cache, 0); + + cache->locked = 1; + +#ifdef CONFIG_YAFFS_WINCE + yfsd_UnlockYAFFS(TRUE); +#endif + memcpy(buffer, &cache->data[start], nToCopy); + +#ifdef CONFIG_YAFFS_WINCE + yfsd_LockYAFFS(TRUE); +#endif + cache->locked = 0; + } else { + /* Read into the local buffer then copy..*/ + + __u8 *localBuffer = + yaffs_GetTempBuffer(dev, __LINE__); + yaffs_ReadChunkDataFromObject(in, chunk, + localBuffer); +#ifdef CONFIG_YAFFS_WINCE + yfsd_UnlockYAFFS(TRUE); +#endif + memcpy(buffer, &localBuffer[start], nToCopy); + +#ifdef CONFIG_YAFFS_WINCE + yfsd_LockYAFFS(TRUE); +#endif + yaffs_ReleaseTempBuffer(dev, localBuffer, + __LINE__); + } + + } else { +#ifdef CONFIG_YAFFS_WINCE + __u8 *localBuffer = yaffs_GetTempBuffer(dev, __LINE__); + + /* Under WinCE can't do direct transfer. Need to use a local buffer. + * This is because we otherwise screw up WinCE's memory mapper + */ + yaffs_ReadChunkDataFromObject(in, chunk, localBuffer); + +#ifdef CONFIG_YAFFS_WINCE + yfsd_UnlockYAFFS(TRUE); +#endif + memcpy(buffer, localBuffer, dev->nDataBytesPerChunk); + +#ifdef CONFIG_YAFFS_WINCE + yfsd_LockYAFFS(TRUE); + yaffs_ReleaseTempBuffer(dev, localBuffer, __LINE__); +#endif + +#else + /* A full chunk. Read directly into the supplied buffer. */ + yaffs_ReadChunkDataFromObject(in, chunk, buffer); +#endif + } + + n -= nToCopy; + offset += nToCopy; + buffer += nToCopy; + nDone += nToCopy; + + } + + return nDone; +} + +int yaffs_WriteDataToFile(yaffs_Object * in, const __u8 * buffer, loff_t offset, + int nBytes, int writeThrough) +{ + + int chunk; + int start; + int nToCopy; + int n = nBytes; + int nDone = 0; + int nToWriteBack; + int startOfWrite = offset; + int chunkWritten = 0; + int nBytesRead; + + yaffs_Device *dev; + + dev = in->myDev; + + while (n > 0 && chunkWritten >= 0) { + //chunk = offset / dev->nDataBytesPerChunk + 1; + //start = offset % dev->nDataBytesPerChunk; + yaffs_AddrToChunk(dev,offset,&chunk,&start); + chunk++; + + /* OK now check for the curveball where the start and end are in + * the same chunk. + */ + + if ((start + n) < dev->nDataBytesPerChunk) { + nToCopy = n; + + /* Now folks, to calculate how many bytes to write back.... + * If we're overwriting and not writing to then end of file then + * we need to write back as much as was there before. + */ + + nBytesRead = + in->variant.fileVariant.fileSize - + ((chunk - 1) * dev->nDataBytesPerChunk); + + if (nBytesRead > dev->nDataBytesPerChunk) { + nBytesRead = dev->nDataBytesPerChunk; + } + + nToWriteBack = + (nBytesRead > + (start + n)) ? nBytesRead : (start + n); + + } else { + nToCopy = dev->nDataBytesPerChunk - start; + nToWriteBack = dev->nDataBytesPerChunk; + } + + if (nToCopy != dev->nDataBytesPerChunk) { + /* An incomplete start or end chunk (or maybe both start and end chunk) */ + if (dev->nShortOpCaches > 0) { + yaffs_ChunkCache *cache; + /* If we can't find the data in the cache, then load the cache */ + cache = yaffs_FindChunkCache(in, chunk); + + if (!cache + && yaffs_CheckSpaceForAllocation(in-> + myDev)) { + cache = yaffs_GrabChunkCache(in->myDev); + cache->object = in; + cache->chunkId = chunk; + cache->dirty = 0; + cache->locked = 0; + yaffs_ReadChunkDataFromObject(in, chunk, + cache-> + data); + } + else if(cache && + !cache->dirty && + !yaffs_CheckSpaceForAllocation(in->myDev)){ + /* Drop the cache if it was a read cache item and + * no space check has been made for it. + */ + cache = NULL; + } + + if (cache) { + yaffs_UseChunkCache(dev, cache, 1); + cache->locked = 1; +#ifdef CONFIG_YAFFS_WINCE + yfsd_UnlockYAFFS(TRUE); +#endif + + memcpy(&cache->data[start], buffer, + nToCopy); + +#ifdef CONFIG_YAFFS_WINCE + yfsd_LockYAFFS(TRUE); +#endif + cache->locked = 0; + cache->nBytes = nToWriteBack; + + if (writeThrough) { + chunkWritten = + yaffs_WriteChunkDataToObject + (cache->object, + cache->chunkId, + cache->data, cache->nBytes, + 1); + cache->dirty = 0; + } + + } else { + chunkWritten = -1; /* fail the write */ + } + } else { + /* An incomplete start or end chunk (or maybe both start and end chunk) + * Read into the local buffer then copy, then copy over and write back. + */ + + __u8 *localBuffer = + yaffs_GetTempBuffer(dev, __LINE__); + + yaffs_ReadChunkDataFromObject(in, chunk, + localBuffer); + +#ifdef CONFIG_YAFFS_WINCE + yfsd_UnlockYAFFS(TRUE); +#endif + + memcpy(&localBuffer[start], buffer, nToCopy); + +#ifdef CONFIG_YAFFS_WINCE + yfsd_LockYAFFS(TRUE); +#endif + chunkWritten = + yaffs_WriteChunkDataToObject(in, chunk, + localBuffer, + nToWriteBack, + 0); + + yaffs_ReleaseTempBuffer(dev, localBuffer, + __LINE__); + + } + + } else { + +#ifdef CONFIG_YAFFS_WINCE + /* Under WinCE can't do direct transfer. Need to use a local buffer. + * This is because we otherwise screw up WinCE's memory mapper + */ + __u8 *localBuffer = yaffs_GetTempBuffer(dev, __LINE__); +#ifdef CONFIG_YAFFS_WINCE + yfsd_UnlockYAFFS(TRUE); +#endif + memcpy(localBuffer, buffer, dev->nDataBytesPerChunk); +#ifdef CONFIG_YAFFS_WINCE + yfsd_LockYAFFS(TRUE); +#endif + chunkWritten = + yaffs_WriteChunkDataToObject(in, chunk, localBuffer, + dev->nDataBytesPerChunk, + 0); + yaffs_ReleaseTempBuffer(dev, localBuffer, __LINE__); +#else + /* A full chunk. Write directly from the supplied buffer. */ + chunkWritten = + yaffs_WriteChunkDataToObject(in, chunk, buffer, + dev->nDataBytesPerChunk, + 0); +#endif + /* Since we've overwritten the cached data, we better invalidate it. */ + yaffs_InvalidateChunkCache(in, chunk); + } + + if (chunkWritten >= 0) { + n -= nToCopy; + offset += nToCopy; + buffer += nToCopy; + nDone += nToCopy; + } + + } + + /* Update file object */ + + if ((startOfWrite + nDone) > in->variant.fileVariant.fileSize) { + in->variant.fileVariant.fileSize = (startOfWrite + nDone); + } + + in->dirty = 1; + + return nDone; +} + + +/* ---------------------- File resizing stuff ------------------ */ + +static void yaffs_PruneResizedChunks(yaffs_Object * in, int newSize) +{ + + yaffs_Device *dev = in->myDev; + int oldFileSize = in->variant.fileVariant.fileSize; + + int lastDel = 1 + (oldFileSize - 1) / dev->nDataBytesPerChunk; + + int startDel = 1 + (newSize + dev->nDataBytesPerChunk - 1) / + dev->nDataBytesPerChunk; + int i; + int chunkId; + + /* Delete backwards so that we don't end up with holes if + * power is lost part-way through the operation. + */ + for (i = lastDel; i >= startDel; i--) { + /* NB this could be optimised somewhat, + * eg. could retrieve the tags and write them without + * using yaffs_DeleteChunk + */ + + chunkId = yaffs_FindAndDeleteChunkInFile(in, i, NULL); + if (chunkId > 0) { + if (chunkId < + (dev->internalStartBlock * dev->nChunksPerBlock) + || chunkId >= + ((dev->internalEndBlock + + 1) * dev->nChunksPerBlock)) { + T(YAFFS_TRACE_ALWAYS, + (TSTR("Found daft chunkId %d for %d" TENDSTR), + chunkId, i)); + } else { + in->nDataChunks--; + yaffs_DeleteChunk(dev, chunkId, 1, __LINE__); + } + } + } + +} + +int yaffs_ResizeFile(yaffs_Object * in, loff_t newSize) +{ + + int oldFileSize = in->variant.fileVariant.fileSize; + int newSizeOfPartialChunk; + int newFullChunks; + + yaffs_Device *dev = in->myDev; + + yaffs_AddrToChunk(dev, newSize, &newFullChunks, &newSizeOfPartialChunk); + + yaffs_FlushFilesChunkCache(in); + yaffs_InvalidateWholeChunkCache(in); + + yaffs_CheckGarbageCollection(dev); + + if (in->variantType != YAFFS_OBJECT_TYPE_FILE) { + return yaffs_GetFileSize(in); + } + + if (newSize == oldFileSize) { + return oldFileSize; + } + + if (newSize < oldFileSize) { + + yaffs_PruneResizedChunks(in, newSize); + + if (newSizeOfPartialChunk != 0) { + int lastChunk = 1 + newFullChunks; + + __u8 *localBuffer = yaffs_GetTempBuffer(dev, __LINE__); + + /* Got to read and rewrite the last chunk with its new size and zero pad */ + yaffs_ReadChunkDataFromObject(in, lastChunk, + localBuffer); + + memset(localBuffer + newSizeOfPartialChunk, 0, + dev->nDataBytesPerChunk - newSizeOfPartialChunk); + + yaffs_WriteChunkDataToObject(in, lastChunk, localBuffer, + newSizeOfPartialChunk, 1); + + yaffs_ReleaseTempBuffer(dev, localBuffer, __LINE__); + } + + in->variant.fileVariant.fileSize = newSize; + + yaffs_PruneFileStructure(dev, &in->variant.fileVariant); + } else { + /* newsSize > oldFileSize */ + in->variant.fileVariant.fileSize = newSize; + } + + + + /* Write a new object header. + * show we've shrunk the file, if need be + * Do this only if the file is not in the deleted directories. + */ + if (in->parent->objectId != YAFFS_OBJECTID_UNLINKED && + in->parent->objectId != YAFFS_OBJECTID_DELETED) { + yaffs_UpdateObjectHeader(in, NULL, 0, + (newSize < oldFileSize) ? 1 : 0, 0); + } + + return YAFFS_OK; +} + +loff_t yaffs_GetFileSize(yaffs_Object * obj) +{ + obj = yaffs_GetEquivalentObject(obj); + + switch (obj->variantType) { + case YAFFS_OBJECT_TYPE_FILE: + return obj->variant.fileVariant.fileSize; + case YAFFS_OBJECT_TYPE_SYMLINK: + return yaffs_strlen(obj->variant.symLinkVariant.alias); + default: + return 0; + } +} + + + +int yaffs_FlushFile(yaffs_Object * in, int updateTime) +{ + int retVal; + if (in->dirty) { + yaffs_FlushFilesChunkCache(in); + if (updateTime) { +#ifdef CONFIG_YAFFS_WINCE + yfsd_WinFileTimeNow(in->win_mtime); +#else + + in->yst_mtime = Y_CURRENT_TIME; + +#endif + } + + retVal = + (yaffs_UpdateObjectHeader(in, NULL, 0, 0, 0) >= + 0) ? YAFFS_OK : YAFFS_FAIL; + } else { + retVal = YAFFS_OK; + } + + return retVal; + +} + +static int yaffs_DoGenericObjectDeletion(yaffs_Object * in) +{ + + /* First off, invalidate the file's data in the cache, without flushing. */ + yaffs_InvalidateWholeChunkCache(in); + + if (in->myDev->isYaffs2 && (in->parent != in->myDev->deletedDir)) { + /* Move to the unlinked directory so we have a record that it was deleted. */ + yaffs_ChangeObjectName(in, in->myDev->deletedDir,"deleted", 0, 0); + + } + + yaffs_RemoveObjectFromDirectory(in); + yaffs_DeleteChunk(in->myDev, in->chunkId, 1, __LINE__); + in->chunkId = -1; + + yaffs_FreeObject(in); + return YAFFS_OK; + +} + +/* yaffs_DeleteFile deletes the whole file data + * and the inode associated with the file. + * It does not delete the links associated with the file. + */ +static int yaffs_UnlinkFile(yaffs_Object * in) +{ + + int retVal; + int immediateDeletion = 0; + + if (1) { +/* XXX U-BOOT XXX */ +#if 0 +#ifdef __KERNEL__ + if (!in->myInode) { + immediateDeletion = 1; + + } +#endif +#else + if (in->inUse <= 0) { + immediateDeletion = 1; + + } +#endif + if (immediateDeletion) { + retVal = + yaffs_ChangeObjectName(in, in->myDev->deletedDir, + "deleted", 0, 0); + T(YAFFS_TRACE_TRACING, + (TSTR("yaffs: immediate deletion of file %d" TENDSTR), + in->objectId)); + in->deleted = 1; + in->myDev->nDeletedFiles++; + if (0 && in->myDev->isYaffs2) { + yaffs_ResizeFile(in, 0); + } + yaffs_SoftDeleteFile(in); + } else { + retVal = + yaffs_ChangeObjectName(in, in->myDev->unlinkedDir, + "unlinked", 0, 0); + } + + } + return retVal; +} + +int yaffs_DeleteFile(yaffs_Object * in) +{ + int retVal = YAFFS_OK; + + if (in->nDataChunks > 0) { + /* Use soft deletion if there is data in the file */ + if (!in->unlinked) { + retVal = yaffs_UnlinkFile(in); + } + if (retVal == YAFFS_OK && in->unlinked && !in->deleted) { + in->deleted = 1; + in->myDev->nDeletedFiles++; + yaffs_SoftDeleteFile(in); + } + return in->deleted ? YAFFS_OK : YAFFS_FAIL; + } else { + /* The file has no data chunks so we toss it immediately */ + yaffs_FreeTnode(in->myDev, in->variant.fileVariant.top); + in->variant.fileVariant.top = NULL; + yaffs_DoGenericObjectDeletion(in); + + return YAFFS_OK; + } +} + +static int yaffs_DeleteDirectory(yaffs_Object * in) +{ + /* First check that the directory is empty. */ + if (list_empty(&in->variant.directoryVariant.children)) { + return yaffs_DoGenericObjectDeletion(in); + } + + return YAFFS_FAIL; + +} + +static int yaffs_DeleteSymLink(yaffs_Object * in) +{ + YFREE(in->variant.symLinkVariant.alias); + + return yaffs_DoGenericObjectDeletion(in); +} + +static int yaffs_DeleteHardLink(yaffs_Object * in) +{ + /* remove this hardlink from the list assocaited with the equivalent + * object + */ + list_del(&in->hardLinks); + return yaffs_DoGenericObjectDeletion(in); +} + +static void yaffs_DestroyObject(yaffs_Object * obj) +{ + switch (obj->variantType) { + case YAFFS_OBJECT_TYPE_FILE: + yaffs_DeleteFile(obj); + break; + case YAFFS_OBJECT_TYPE_DIRECTORY: + yaffs_DeleteDirectory(obj); + break; + case YAFFS_OBJECT_TYPE_SYMLINK: + yaffs_DeleteSymLink(obj); + break; + case YAFFS_OBJECT_TYPE_HARDLINK: + yaffs_DeleteHardLink(obj); + break; + case YAFFS_OBJECT_TYPE_SPECIAL: + yaffs_DoGenericObjectDeletion(obj); + break; + case YAFFS_OBJECT_TYPE_UNKNOWN: + break; /* should not happen. */ + } +} + +static int yaffs_UnlinkWorker(yaffs_Object * obj) +{ + + if (obj->variantType == YAFFS_OBJECT_TYPE_HARDLINK) { + return yaffs_DeleteHardLink(obj); + } else if (!list_empty(&obj->hardLinks)) { + /* Curve ball: We're unlinking an object that has a hardlink. + * + * This problem arises because we are not strictly following + * The Linux link/inode model. + * + * We can't really delete the object. + * Instead, we do the following: + * - Select a hardlink. + * - Unhook it from the hard links + * - Unhook it from its parent directory (so that the rename can work) + * - Rename the object to the hardlink's name. + * - Delete the hardlink + */ + + yaffs_Object *hl; + int retVal; + YCHAR name[YAFFS_MAX_NAME_LENGTH + 1]; + + hl = list_entry(obj->hardLinks.next, yaffs_Object, hardLinks); + + list_del_init(&hl->hardLinks); + list_del_init(&hl->siblings); + + yaffs_GetObjectName(hl, name, YAFFS_MAX_NAME_LENGTH + 1); + + retVal = yaffs_ChangeObjectName(obj, hl->parent, name, 0, 0); + + if (retVal == YAFFS_OK) { + retVal = yaffs_DoGenericObjectDeletion(hl); + } + return retVal; + + } else { + switch (obj->variantType) { + case YAFFS_OBJECT_TYPE_FILE: + return yaffs_UnlinkFile(obj); + break; + case YAFFS_OBJECT_TYPE_DIRECTORY: + return yaffs_DeleteDirectory(obj); + break; + case YAFFS_OBJECT_TYPE_SYMLINK: + return yaffs_DeleteSymLink(obj); + break; + case YAFFS_OBJECT_TYPE_SPECIAL: + return yaffs_DoGenericObjectDeletion(obj); + break; + case YAFFS_OBJECT_TYPE_HARDLINK: + case YAFFS_OBJECT_TYPE_UNKNOWN: + default: + return YAFFS_FAIL; + } + } +} + + +static int yaffs_UnlinkObject( yaffs_Object *obj) +{ + + if (obj && obj->unlinkAllowed) { + return yaffs_UnlinkWorker(obj); + } + + return YAFFS_FAIL; + +} +int yaffs_Unlink(yaffs_Object * dir, const YCHAR * name) +{ + yaffs_Object *obj; + + obj = yaffs_FindObjectByName(dir, name); + return yaffs_UnlinkObject(obj); +} + +/*----------------------- Initialisation Scanning ---------------------- */ + +static void yaffs_HandleShadowedObject(yaffs_Device * dev, int objId, + int backwardScanning) +{ + yaffs_Object *obj; + + if (!backwardScanning) { + /* Handle YAFFS1 forward scanning case + * For YAFFS1 we always do the deletion + */ + + } else { + /* Handle YAFFS2 case (backward scanning) + * If the shadowed object exists then ignore. + */ + if (yaffs_FindObjectByNumber(dev, objId)) { + return; + } + } + + /* Let's create it (if it does not exist) assuming it is a file so that it can do shrinking etc. + * We put it in unlinked dir to be cleaned up after the scanning + */ + obj = + yaffs_FindOrCreateObjectByNumber(dev, objId, + YAFFS_OBJECT_TYPE_FILE); + yaffs_AddObjectToDirectory(dev->unlinkedDir, obj); + obj->variant.fileVariant.shrinkSize = 0; + obj->valid = 1; /* So that we don't read any other info for this file */ + +} + +typedef struct { + int seq; + int block; +} yaffs_BlockIndex; + + +static void yaffs_HardlinkFixup(yaffs_Device *dev, yaffs_Object *hardList) +{ + yaffs_Object *hl; + yaffs_Object *in; + + while (hardList) { + hl = hardList; + hardList = (yaffs_Object *) (hardList->hardLinks.next); + + in = yaffs_FindObjectByNumber(dev, + hl->variant.hardLinkVariant. + equivalentObjectId); + + if (in) { + /* Add the hardlink pointers */ + hl->variant.hardLinkVariant.equivalentObject = in; + list_add(&hl->hardLinks, &in->hardLinks); + } else { + /* Todo Need to report/handle this better. + * Got a problem... hardlink to a non-existant object + */ + hl->variant.hardLinkVariant.equivalentObject = NULL; + INIT_LIST_HEAD(&hl->hardLinks); + + } + + } + +} + + + + + +static int ybicmp(const void *a, const void *b){ + register int aseq = ((yaffs_BlockIndex *)a)->seq; + register int bseq = ((yaffs_BlockIndex *)b)->seq; + register int ablock = ((yaffs_BlockIndex *)a)->block; + register int bblock = ((yaffs_BlockIndex *)b)->block; + if( aseq == bseq ) + return ablock - bblock; + else + return aseq - bseq; + +} +#if 0 +static int yaffs_Scan(yaffs_Device * dev) +{ + yaffs_ExtendedTags tags; + int blk; + int blockIterator; + int startIterator; + int endIterator; + int nBlocksToScan = 0; + int result; + + int chunk; + int c; + int deleted; + yaffs_BlockState state; + yaffs_Object *hardList = NULL; + yaffs_BlockInfo *bi; + int sequenceNumber; + yaffs_ObjectHeader *oh; + yaffs_Object *in; + yaffs_Object *parent; + int nBlocks = dev->internalEndBlock - dev->internalStartBlock + 1; + + int alloc_failed = 0; + + + __u8 *chunkData; + + yaffs_BlockIndex *blockIndex = NULL; + + if (dev->isYaffs2) { + T(YAFFS_TRACE_SCAN, + (TSTR("yaffs_Scan is not for YAFFS2!" TENDSTR))); + return YAFFS_FAIL; + } + + //TODO Throw all the yaffs2 stuuf out of yaffs_Scan since it is only for yaffs1 format. + + T(YAFFS_TRACE_SCAN, + (TSTR("yaffs_Scan starts intstartblk %d intendblk %d..." TENDSTR), + dev->internalStartBlock, dev->internalEndBlock)); + + chunkData = yaffs_GetTempBuffer(dev, __LINE__); + + dev->sequenceNumber = YAFFS_LOWEST_SEQUENCE_NUMBER; + + if (dev->isYaffs2) { + blockIndex = YMALLOC(nBlocks * sizeof(yaffs_BlockIndex)); + if(!blockIndex) + return YAFFS_FAIL; + } + + /* Scan all the blocks to determine their state */ + for (blk = dev->internalStartBlock; blk <= dev->internalEndBlock; blk++) { + bi = yaffs_GetBlockInfo(dev, blk); + yaffs_ClearChunkBits(dev, blk); + bi->pagesInUse = 0; + bi->softDeletions = 0; + + yaffs_QueryInitialBlockState(dev, blk, &state, &sequenceNumber); + + bi->blockState = state; + bi->sequenceNumber = sequenceNumber; + + T(YAFFS_TRACE_SCAN_DEBUG, + (TSTR("Block scanning block %d state %d seq %d" TENDSTR), blk, + state, sequenceNumber)); + + if (state == YAFFS_BLOCK_STATE_DEAD) { + T(YAFFS_TRACE_BAD_BLOCKS, + (TSTR("block %d is bad" TENDSTR), blk)); + } else if (state == YAFFS_BLOCK_STATE_EMPTY) { + T(YAFFS_TRACE_SCAN_DEBUG, + (TSTR("Block empty " TENDSTR))); + dev->nErasedBlocks++; + dev->nFreeChunks += dev->nChunksPerBlock; + } else if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) { + + /* Determine the highest sequence number */ + if (dev->isYaffs2 && + sequenceNumber >= YAFFS_LOWEST_SEQUENCE_NUMBER && + sequenceNumber < YAFFS_HIGHEST_SEQUENCE_NUMBER) { + + blockIndex[nBlocksToScan].seq = sequenceNumber; + blockIndex[nBlocksToScan].block = blk; + + nBlocksToScan++; + + if (sequenceNumber >= dev->sequenceNumber) { + dev->sequenceNumber = sequenceNumber; + } + } else if (dev->isYaffs2) { + /* TODO: Nasty sequence number! */ + T(YAFFS_TRACE_SCAN, + (TSTR + ("Block scanning block %d has bad sequence number %d" + TENDSTR), blk, sequenceNumber)); + + } + } + } + + if (dev->isYaffs2) { + yaffs_BlockIndex temp; + int i; + int j; + + for (i = 0; i < nBlocksToScan; i++) + for (j = i + 1; j < nBlocksToScan; j++) + if (blockIndex[i].seq > blockIndex[j].seq) { + temp = blockIndex[j]; + blockIndex[j] = blockIndex[i]; + blockIndex[i] = temp; + } + } + + /* Now scan the blocks looking at the data. */ + if (dev->isYaffs2) { + startIterator = 0; + endIterator = nBlocksToScan - 1; + T(YAFFS_TRACE_SCAN_DEBUG, + (TSTR("%d blocks to be scanned" TENDSTR), nBlocksToScan)); + } else { + startIterator = dev->internalStartBlock; + endIterator = dev->internalEndBlock; + } + + /* For each block.... */ + for (blockIterator = startIterator; !alloc_failed && blockIterator <= endIterator; + blockIterator++) { + + if (dev->isYaffs2) { + /* get the block to scan in the correct order */ + blk = blockIndex[blockIterator].block; + } else { + blk = blockIterator; + } + + bi = yaffs_GetBlockInfo(dev, blk); + state = bi->blockState; + + deleted = 0; + + /* For each chunk in each block that needs scanning....*/ + for (c = 0; !alloc_failed && c < dev->nChunksPerBlock && + state == YAFFS_BLOCK_STATE_NEEDS_SCANNING; c++) { + /* Read the tags and decide what to do */ + chunk = blk * dev->nChunksPerBlock + c; + + result = yaffs_ReadChunkWithTagsFromNAND(dev, chunk, NULL, + &tags); + + /* Let's have a good look at this chunk... */ + + if (!dev->isYaffs2 && tags.chunkDeleted) { + /* YAFFS1 only... + * A deleted chunk + */ + deleted++; + dev->nFreeChunks++; + /*T((" %d %d deleted\n",blk,c)); */ + } else if (!tags.chunkUsed) { + /* An unassigned chunk in the block + * This means that either the block is empty or + * this is the one being allocated from + */ + + if (c == 0) { + /* We're looking at the first chunk in the block so the block is unused */ + state = YAFFS_BLOCK_STATE_EMPTY; + dev->nErasedBlocks++; + } else { + /* this is the block being allocated from */ + T(YAFFS_TRACE_SCAN, + (TSTR + (" Allocating from %d %d" TENDSTR), + blk, c)); + state = YAFFS_BLOCK_STATE_ALLOCATING; + dev->allocationBlock = blk; + dev->allocationPage = c; + dev->allocationBlockFinder = blk; + /* Set it to here to encourage the allocator to go forth from here. */ + + /* Yaffs2 sanity check: + * This should be the one with the highest sequence number + */ + if (dev->isYaffs2 + && (dev->sequenceNumber != + bi->sequenceNumber)) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("yaffs: Allocation block %d was not highest sequence id:" + " block seq = %d, dev seq = %d" + TENDSTR), blk,bi->sequenceNumber,dev->sequenceNumber)); + } + } + + dev->nFreeChunks += (dev->nChunksPerBlock - c); + } else if (tags.chunkId > 0) { + /* chunkId > 0 so it is a data chunk... */ + unsigned int endpos; + + yaffs_SetChunkBit(dev, blk, c); + bi->pagesInUse++; + + in = yaffs_FindOrCreateObjectByNumber(dev, + tags. + objectId, + YAFFS_OBJECT_TYPE_FILE); + /* PutChunkIntoFile checks for a clash (two data chunks with + * the same chunkId). + */ + + if(!in) + alloc_failed = 1; + + if(in){ + if(!yaffs_PutChunkIntoFile(in, tags.chunkId, chunk,1)) + alloc_failed = 1; + } + + endpos = + (tags.chunkId - 1) * dev->nDataBytesPerChunk + + tags.byteCount; + if (in && + in->variantType == YAFFS_OBJECT_TYPE_FILE + && in->variant.fileVariant.scannedFileSize < + endpos) { + in->variant.fileVariant. + scannedFileSize = endpos; + if (!dev->useHeaderFileSize) { + in->variant.fileVariant. + fileSize = + in->variant.fileVariant. + scannedFileSize; + } + + } + /* T((" %d %d data %d %d\n",blk,c,tags.objectId,tags.chunkId)); */ + } else { + /* chunkId == 0, so it is an ObjectHeader. + * Thus, we read in the object header and make the object + */ + yaffs_SetChunkBit(dev, blk, c); + bi->pagesInUse++; + + result = yaffs_ReadChunkWithTagsFromNAND(dev, chunk, + chunkData, + NULL); + + oh = (yaffs_ObjectHeader *) chunkData; + + in = yaffs_FindObjectByNumber(dev, + tags.objectId); + if (in && in->variantType != oh->type) { + /* This should not happen, but somehow + * Wev'e ended up with an objectId that has been reused but not yet + * deleted, and worse still it has changed type. Delete the old object. + */ + + yaffs_DestroyObject(in); + + in = 0; + } + + in = yaffs_FindOrCreateObjectByNumber(dev, + tags. + objectId, + oh->type); + + if(!in) + alloc_failed = 1; + + if (in && oh->shadowsObject > 0) { + yaffs_HandleShadowedObject(dev, + oh-> + shadowsObject, + 0); + } + + if (in && in->valid) { + /* We have already filled this one. We have a duplicate and need to resolve it. */ + + unsigned existingSerial = in->serial; + unsigned newSerial = tags.serialNumber; + + if (dev->isYaffs2 || + ((existingSerial + 1) & 3) == + newSerial) { + /* Use new one - destroy the exisiting one */ + yaffs_DeleteChunk(dev, + in->chunkId, + 1, __LINE__); + in->valid = 0; + } else { + /* Use existing - destroy this one. */ + yaffs_DeleteChunk(dev, chunk, 1, + __LINE__); + } + } + + if (in && !in->valid && + (tags.objectId == YAFFS_OBJECTID_ROOT || + tags.objectId == YAFFS_OBJECTID_LOSTNFOUND)) { + /* We only load some info, don't fiddle with directory structure */ + in->valid = 1; + in->variantType = oh->type; + + in->yst_mode = oh->yst_mode; +#ifdef CONFIG_YAFFS_WINCE + in->win_atime[0] = oh->win_atime[0]; + in->win_ctime[0] = oh->win_ctime[0]; + in->win_mtime[0] = oh->win_mtime[0]; + in->win_atime[1] = oh->win_atime[1]; + in->win_ctime[1] = oh->win_ctime[1]; + in->win_mtime[1] = oh->win_mtime[1]; +#else + in->yst_uid = oh->yst_uid; + in->yst_gid = oh->yst_gid; + in->yst_atime = oh->yst_atime; + in->yst_mtime = oh->yst_mtime; + in->yst_ctime = oh->yst_ctime; + in->yst_rdev = oh->yst_rdev; +#endif + in->chunkId = chunk; + + } else if (in && !in->valid) { + /* we need to load this info */ + + in->valid = 1; + in->variantType = oh->type; + + in->yst_mode = oh->yst_mode; +#ifdef CONFIG_YAFFS_WINCE + in->win_atime[0] = oh->win_atime[0]; + in->win_ctime[0] = oh->win_ctime[0]; + in->win_mtime[0] = oh->win_mtime[0]; + in->win_atime[1] = oh->win_atime[1]; + in->win_ctime[1] = oh->win_ctime[1]; + in->win_mtime[1] = oh->win_mtime[1]; +#else + in->yst_uid = oh->yst_uid; + in->yst_gid = oh->yst_gid; + in->yst_atime = oh->yst_atime; + in->yst_mtime = oh->yst_mtime; + in->yst_ctime = oh->yst_ctime; + in->yst_rdev = oh->yst_rdev; +#endif + in->chunkId = chunk; + + yaffs_SetObjectName(in, oh->name); + in->dirty = 0; + + /* directory stuff... + * hook up to parent + */ + + parent = + yaffs_FindOrCreateObjectByNumber + (dev, oh->parentObjectId, + YAFFS_OBJECT_TYPE_DIRECTORY); + if (parent->variantType == + YAFFS_OBJECT_TYPE_UNKNOWN) { + /* Set up as a directory */ + parent->variantType = + YAFFS_OBJECT_TYPE_DIRECTORY; + INIT_LIST_HEAD(&parent->variant. + directoryVariant. + children); + } else if (parent->variantType != + YAFFS_OBJECT_TYPE_DIRECTORY) + { + /* Hoosterman, another problem.... + * We're trying to use a non-directory as a directory + */ + + T(YAFFS_TRACE_ERROR, + (TSTR + ("yaffs tragedy: attempting to use non-directory as" + " a directory in scan. Put in lost+found." + TENDSTR))); + parent = dev->lostNFoundDir; + } + + yaffs_AddObjectToDirectory(parent, in); + + if (0 && (parent == dev->deletedDir || + parent == dev->unlinkedDir)) { + in->deleted = 1; /* If it is unlinked at start up then it wants deleting */ + dev->nDeletedFiles++; + } + /* Note re hardlinks. + * Since we might scan a hardlink before its equivalent object is scanned + * we put them all in a list. + * After scanning is complete, we should have all the objects, so we run through this + * list and fix up all the chains. + */ + + switch (in->variantType) { + case YAFFS_OBJECT_TYPE_UNKNOWN: + /* Todo got a problem */ + break; + case YAFFS_OBJECT_TYPE_FILE: + if (dev->isYaffs2 + && oh->isShrink) { + /* Prune back the shrunken chunks */ + yaffs_PruneResizedChunks + (in, oh->fileSize); + /* Mark the block as having a shrinkHeader */ + bi->hasShrinkHeader = 1; + } + + if (dev->useHeaderFileSize) + + in->variant.fileVariant. + fileSize = + oh->fileSize; + + break; + case YAFFS_OBJECT_TYPE_HARDLINK: + in->variant.hardLinkVariant. + equivalentObjectId = + oh->equivalentObjectId; + in->hardLinks.next = + (struct list_head *) + hardList; + hardList = in; + break; + case YAFFS_OBJECT_TYPE_DIRECTORY: + /* Do nothing */ + break; + case YAFFS_OBJECT_TYPE_SPECIAL: + /* Do nothing */ + break; + case YAFFS_OBJECT_TYPE_SYMLINK: + in->variant.symLinkVariant.alias = + yaffs_CloneString(oh->alias); + if(!in->variant.symLinkVariant.alias) + alloc_failed = 1; + break; + } + + if (parent == dev->deletedDir) { + yaffs_DestroyObject(in); + bi->hasShrinkHeader = 1; + } + } + } + } + + if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) { + /* If we got this far while scanning, then the block is fully allocated.*/ + state = YAFFS_BLOCK_STATE_FULL; + } + + bi->blockState = state; + + /* Now let's see if it was dirty */ + if (bi->pagesInUse == 0 && + !bi->hasShrinkHeader && + bi->blockState == YAFFS_BLOCK_STATE_FULL) { + yaffs_BlockBecameDirty(dev, blk); + } + + } + + if (blockIndex) { + YFREE(blockIndex); + } + + + /* Ok, we've done all the scanning. + * Fix up the hard link chains. + * We should now have scanned all the objects, now it's time to add these + * hardlinks. + */ + + yaffs_HardlinkFixup(dev,hardList); + + /* Handle the unlinked files. Since they were left in an unlinked state we should + * just delete them. + */ + { + struct list_head *i; + struct list_head *n; + + yaffs_Object *l; + /* Soft delete all the unlinked files */ + list_for_each_safe(i, n, + &dev->unlinkedDir->variant.directoryVariant. + children) { + if (i) { + l = list_entry(i, yaffs_Object, siblings); + yaffs_DestroyObject(l); + } + } + } + + yaffs_ReleaseTempBuffer(dev, chunkData, __LINE__); + + if(alloc_failed){ + return YAFFS_FAIL; + } + + T(YAFFS_TRACE_SCAN, (TSTR("yaffs_Scan ends" TENDSTR))); + + + return YAFFS_OK; +} +#endif +static void yaffs_CheckObjectDetailsLoaded(yaffs_Object *in) +{ + __u8 *chunkData; + yaffs_ObjectHeader *oh; + yaffs_Device *dev = in->myDev; + yaffs_ExtendedTags tags; + int result; + int alloc_failed = 0; + + if(!in) + return; + +#if 0 + T(YAFFS_TRACE_SCAN,(TSTR("details for object %d %s loaded" TENDSTR), + in->objectId, + in->lazyLoaded ? "not yet" : "already")); +#endif + + if(in->lazyLoaded){ + in->lazyLoaded = 0; + chunkData = yaffs_GetTempBuffer(dev, __LINE__); + + result = yaffs_ReadChunkWithTagsFromNAND(dev,in->chunkId,chunkData,&tags); + oh = (yaffs_ObjectHeader *) chunkData; + + in->yst_mode = oh->yst_mode; +#ifdef CONFIG_YAFFS_WINCE + in->win_atime[0] = oh->win_atime[0]; + in->win_ctime[0] = oh->win_ctime[0]; + in->win_mtime[0] = oh->win_mtime[0]; + in->win_atime[1] = oh->win_atime[1]; + in->win_ctime[1] = oh->win_ctime[1]; + in->win_mtime[1] = oh->win_mtime[1]; +#else + in->yst_uid = oh->yst_uid; + in->yst_gid = oh->yst_gid; + in->yst_atime = oh->yst_atime; + in->yst_mtime = oh->yst_mtime; + in->yst_ctime = oh->yst_ctime; + in->yst_rdev = oh->yst_rdev; + +#endif + yaffs_SetObjectName(in, oh->name); + + if(in->variantType == YAFFS_OBJECT_TYPE_SYMLINK){ + in->variant.symLinkVariant.alias = + yaffs_CloneString(oh->alias); + if(!in->variant.symLinkVariant.alias) + alloc_failed = 1; /* Not returned to caller */ + } + + yaffs_ReleaseTempBuffer(dev,chunkData, __LINE__); + } +} + +static int yaffs_ScanBackwards(yaffs_Device * dev) +{ + yaffs_ExtendedTags tags; + int blk; + int blockIterator; + int startIterator; + int endIterator; + int nBlocksToScan = 0; + + int chunk; + int result; + int c; + int deleted; + yaffs_BlockState state; + yaffs_Object *hardList = NULL; + yaffs_BlockInfo *bi; + int sequenceNumber; + yaffs_ObjectHeader *oh; + yaffs_Object *in; + yaffs_Object *parent; + int nBlocks = dev->internalEndBlock - dev->internalStartBlock + 1; + int itsUnlinked; + __u8 *chunkData; + + int fileSize; + int isShrink; + int foundChunksInBlock; + int equivalentObjectId; + int alloc_failed = 0; + + + yaffs_BlockIndex *blockIndex = NULL; + int altBlockIndex = 0; + + if (!dev->isYaffs2) { + T(YAFFS_TRACE_SCAN, + (TSTR("yaffs_ScanBackwards is only for YAFFS2!" TENDSTR))); + return YAFFS_FAIL; + } + + T(YAFFS_TRACE_SCAN, + (TSTR + ("yaffs_ScanBackwards starts intstartblk %d intendblk %d..." + TENDSTR), dev->internalStartBlock, dev->internalEndBlock)); + + + dev->sequenceNumber = YAFFS_LOWEST_SEQUENCE_NUMBER; + + blockIndex = YMALLOC(nBlocks * sizeof(yaffs_BlockIndex)); + + if(!blockIndex) { + blockIndex = YMALLOC_ALT(nBlocks * sizeof(yaffs_BlockIndex)); + altBlockIndex = 1; + } + + if(!blockIndex) { + T(YAFFS_TRACE_SCAN, + (TSTR("yaffs_Scan() could not allocate block index!" TENDSTR))); + return YAFFS_FAIL; + } + + dev->blocksInCheckpoint = 0; + + chunkData = yaffs_GetTempBuffer(dev, __LINE__); + + /* Scan all the blocks to determine their state */ + for (blk = dev->internalStartBlock; blk <= dev->internalEndBlock; blk++) { + bi = yaffs_GetBlockInfo(dev, blk); + yaffs_ClearChunkBits(dev, blk); + bi->pagesInUse = 0; + bi->softDeletions = 0; + + yaffs_QueryInitialBlockState(dev, blk, &state, &sequenceNumber); + + bi->blockState = state; + bi->sequenceNumber = sequenceNumber; + + if(bi->sequenceNumber == YAFFS_SEQUENCE_CHECKPOINT_DATA) + bi->blockState = state = YAFFS_BLOCK_STATE_CHECKPOINT; + + T(YAFFS_TRACE_SCAN_DEBUG, + (TSTR("Block scanning block %d state %d seq %d" TENDSTR), blk, + state, sequenceNumber)); + + + if(state == YAFFS_BLOCK_STATE_CHECKPOINT){ + dev->blocksInCheckpoint++; + + } else if (state == YAFFS_BLOCK_STATE_DEAD) { + T(YAFFS_TRACE_BAD_BLOCKS, + (TSTR("block %d is bad" TENDSTR), blk)); + } else if (state == YAFFS_BLOCK_STATE_EMPTY) { + T(YAFFS_TRACE_SCAN_DEBUG, + (TSTR("Block empty " TENDSTR))); + dev->nErasedBlocks++; + dev->nFreeChunks += dev->nChunksPerBlock; + } else if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) { + + /* Determine the highest sequence number */ + if (dev->isYaffs2 && + sequenceNumber >= YAFFS_LOWEST_SEQUENCE_NUMBER && + sequenceNumber < YAFFS_HIGHEST_SEQUENCE_NUMBER) { + + blockIndex[nBlocksToScan].seq = sequenceNumber; + blockIndex[nBlocksToScan].block = blk; + + nBlocksToScan++; + + if (sequenceNumber >= dev->sequenceNumber) { + dev->sequenceNumber = sequenceNumber; + } + } else if (dev->isYaffs2) { + /* TODO: Nasty sequence number! */ + T(YAFFS_TRACE_SCAN, + (TSTR + ("Block scanning block %d has bad sequence number %d" + TENDSTR), blk, sequenceNumber)); + + } + } + } + + T(YAFFS_TRACE_SCAN, + (TSTR("%d blocks to be sorted..." TENDSTR), nBlocksToScan)); + + + + YYIELD(); + + /* Dungy old bubble sort... */ + + yaffs_BlockIndex temp; + int i; + int j; + + for (i = 0; i < nBlocksToScan; i++) + for (j = i + 1; j < nBlocksToScan; j++) + if (blockIndex[i].seq > blockIndex[j].seq) { + temp = blockIndex[j]; + blockIndex[j] = blockIndex[i]; + blockIndex[i] = temp; + } + + YYIELD(); + + T(YAFFS_TRACE_SCAN, (TSTR("...done" TENDSTR))); + + /* Now scan the blocks looking at the data. */ + startIterator = 0; + endIterator = nBlocksToScan - 1; + T(YAFFS_TRACE_SCAN_DEBUG, + (TSTR("%d blocks to be scanned" TENDSTR), nBlocksToScan)); + + /* For each block.... backwards */ + for (blockIterator = endIterator; !alloc_failed && blockIterator >= startIterator; + blockIterator--) { + /* Cooperative multitasking! This loop can run for so + long that watchdog timers expire. */ + YYIELD(); + + /* get the block to scan in the correct order */ + blk = blockIndex[blockIterator].block; + + bi = yaffs_GetBlockInfo(dev, blk); + + + state = bi->blockState; + + deleted = 0; + + /* For each chunk in each block that needs scanning.... */ + foundChunksInBlock = 0; + for (c = dev->nChunksPerBlock - 1; + !alloc_failed && c >= 0 && + (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING || + state == YAFFS_BLOCK_STATE_ALLOCATING); c--) { + /* Scan backwards... + * Read the tags and decide what to do + */ + + chunk = blk * dev->nChunksPerBlock + c; + + result = yaffs_ReadChunkWithTagsFromNAND(dev, chunk, NULL, + &tags); + + /* Let's have a good look at this chunk... */ + + if (!tags.chunkUsed) { + /* An unassigned chunk in the block. + * If there are used chunks after this one, then + * it is a chunk that was skipped due to failing the erased + * check. Just skip it so that it can be deleted. + * But, more typically, We get here when this is an unallocated + * chunk and his means that either the block is empty or + * this is the one being allocated from + */ + + if(foundChunksInBlock) + { + /* This is a chunk that was skipped due to failing the erased check */ + + } else if (c == 0) { + /* We're looking at the first chunk in the block so the block is unused */ + state = YAFFS_BLOCK_STATE_EMPTY; + dev->nErasedBlocks++; + } else { + if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING || + state == YAFFS_BLOCK_STATE_ALLOCATING) { + if(dev->sequenceNumber == bi->sequenceNumber) { + /* this is the block being allocated from */ + + T(YAFFS_TRACE_SCAN, + (TSTR + (" Allocating from %d %d" + TENDSTR), blk, c)); + + state = YAFFS_BLOCK_STATE_ALLOCATING; + dev->allocationBlock = blk; + dev->allocationPage = c; + dev->allocationBlockFinder = blk; + } + else { + /* This is a partially written block that is not + * the current allocation block. This block must have + * had a write failure, so set up for retirement. + */ + + bi->needsRetiring = 1; + bi->gcPrioritise = 1; + + T(YAFFS_TRACE_ALWAYS, + (TSTR("Partially written block %d being set for retirement" TENDSTR), + blk)); + } + + } + + } + + dev->nFreeChunks++; + + } else if (tags.chunkId > 0) { + /* chunkId > 0 so it is a data chunk... */ + unsigned int endpos; + __u32 chunkBase = + (tags.chunkId - 1) * dev->nDataBytesPerChunk; + + foundChunksInBlock = 1; + + + yaffs_SetChunkBit(dev, blk, c); + bi->pagesInUse++; + + in = yaffs_FindOrCreateObjectByNumber(dev, + tags. + objectId, + YAFFS_OBJECT_TYPE_FILE); + if(!in){ + /* Out of memory */ + alloc_failed = 1; + } + + if (in && + in->variantType == YAFFS_OBJECT_TYPE_FILE + && chunkBase < + in->variant.fileVariant.shrinkSize) { + /* This has not been invalidated by a resize */ + if(!yaffs_PutChunkIntoFile(in, tags.chunkId, + chunk, -1)){ + alloc_failed = 1; + } + + /* File size is calculated by looking at the data chunks if we have not + * seen an object header yet. Stop this practice once we find an object header. + */ + endpos = + (tags.chunkId - + 1) * dev->nDataBytesPerChunk + + tags.byteCount; + + if (!in->valid && /* have not got an object header yet */ + in->variant.fileVariant. + scannedFileSize < endpos) { + in->variant.fileVariant. + scannedFileSize = endpos; + in->variant.fileVariant. + fileSize = + in->variant.fileVariant. + scannedFileSize; + } + + } else if(in) { + /* This chunk has been invalidated by a resize, so delete */ + yaffs_DeleteChunk(dev, chunk, 1, __LINE__); + + } + } else { + /* chunkId == 0, so it is an ObjectHeader. + * Thus, we read in the object header and make the object + */ + foundChunksInBlock = 1; + + yaffs_SetChunkBit(dev, blk, c); + bi->pagesInUse++; + + oh = NULL; + in = NULL; + + if (tags.extraHeaderInfoAvailable) { + in = yaffs_FindOrCreateObjectByNumber + (dev, tags.objectId, + tags.extraObjectType); + } + + if (!in || +#ifdef CONFIG_YAFFS_DISABLE_LAZY_LOAD + !in->valid || +#endif + tags.extraShadows || + (!in->valid && + (tags.objectId == YAFFS_OBJECTID_ROOT || + tags.objectId == YAFFS_OBJECTID_LOSTNFOUND)) + ) { + + /* If we don't have valid info then we need to read the chunk + * TODO In future we can probably defer reading the chunk and + * living with invalid data until needed. + */ + + result = yaffs_ReadChunkWithTagsFromNAND(dev, + chunk, + chunkData, + NULL); + + oh = (yaffs_ObjectHeader *) chunkData; + + if (!in) + in = yaffs_FindOrCreateObjectByNumber(dev, tags.objectId, oh->type); + + } + + if (!in) { + /* TODO Hoosterman we have a problem! */ + T(YAFFS_TRACE_ERROR, + (TSTR + ("yaffs tragedy: Could not make object for object %d " + "at chunk %d during scan" + TENDSTR), tags.objectId, chunk)); + + } + + if (in->valid) { + /* We have already filled this one. + * We have a duplicate that will be discarded, but + * we first have to suck out resize info if it is a file. + */ + + if ((in->variantType == YAFFS_OBJECT_TYPE_FILE) && + ((oh && + oh-> type == YAFFS_OBJECT_TYPE_FILE)|| + (tags.extraHeaderInfoAvailable && + tags.extraObjectType == YAFFS_OBJECT_TYPE_FILE)) + ) { + __u32 thisSize = + (oh) ? oh->fileSize : tags. + extraFileLength; + __u32 parentObjectId = + (oh) ? oh-> + parentObjectId : tags. + extraParentObjectId; + unsigned isShrink = + (oh) ? oh->isShrink : tags. + extraIsShrinkHeader; + + /* If it is deleted (unlinked at start also means deleted) + * we treat the file size as being zeroed at this point. + */ + if (parentObjectId == + YAFFS_OBJECTID_DELETED + || parentObjectId == + YAFFS_OBJECTID_UNLINKED) { + thisSize = 0; + isShrink = 1; + } + + if (isShrink && + in->variant.fileVariant. + shrinkSize > thisSize) { + in->variant.fileVariant. + shrinkSize = + thisSize; + } + + if (isShrink) { + bi->hasShrinkHeader = 1; + } + + } + /* Use existing - destroy this one. */ + yaffs_DeleteChunk(dev, chunk, 1, __LINE__); + + } + + if (!in->valid && + (tags.objectId == YAFFS_OBJECTID_ROOT || + tags.objectId == + YAFFS_OBJECTID_LOSTNFOUND)) { + /* We only load some info, don't fiddle with directory structure */ + in->valid = 1; + + if(oh) { + in->variantType = oh->type; + + in->yst_mode = oh->yst_mode; +#ifdef CONFIG_YAFFS_WINCE + in->win_atime[0] = oh->win_atime[0]; + in->win_ctime[0] = oh->win_ctime[0]; + in->win_mtime[0] = oh->win_mtime[0]; + in->win_atime[1] = oh->win_atime[1]; + in->win_ctime[1] = oh->win_ctime[1]; + in->win_mtime[1] = oh->win_mtime[1]; +#else + in->yst_uid = oh->yst_uid; + in->yst_gid = oh->yst_gid; + in->yst_atime = oh->yst_atime; + in->yst_mtime = oh->yst_mtime; + in->yst_ctime = oh->yst_ctime; + in->yst_rdev = oh->yst_rdev; + +#endif + } else { + in->variantType = tags.extraObjectType; + in->lazyLoaded = 1; + } + + in->chunkId = chunk; + + } else if (!in->valid) { + /* we need to load this info */ + + in->valid = 1; + in->chunkId = chunk; + + if(oh) { + in->variantType = oh->type; + + in->yst_mode = oh->yst_mode; +#ifdef CONFIG_YAFFS_WINCE + in->win_atime[0] = oh->win_atime[0]; + in->win_ctime[0] = oh->win_ctime[0]; + in->win_mtime[0] = oh->win_mtime[0]; + in->win_atime[1] = oh->win_atime[1]; + in->win_ctime[1] = oh->win_ctime[1]; + in->win_mtime[1] = oh->win_mtime[1]; +#else + in->yst_uid = oh->yst_uid; + in->yst_gid = oh->yst_gid; + in->yst_atime = oh->yst_atime; + in->yst_mtime = oh->yst_mtime; + in->yst_ctime = oh->yst_ctime; + in->yst_rdev = oh->yst_rdev; +#endif + + if (oh->shadowsObject > 0) + yaffs_HandleShadowedObject(dev, + oh-> + shadowsObject, + 1); + + + yaffs_SetObjectName(in, oh->name); + parent = + yaffs_FindOrCreateObjectByNumber + (dev, oh->parentObjectId, + YAFFS_OBJECT_TYPE_DIRECTORY); + + fileSize = oh->fileSize; + isShrink = oh->isShrink; + equivalentObjectId = oh->equivalentObjectId; + + } + else { + in->variantType = tags.extraObjectType; + parent = + yaffs_FindOrCreateObjectByNumber + (dev, tags.extraParentObjectId, + YAFFS_OBJECT_TYPE_DIRECTORY); + fileSize = tags.extraFileLength; + isShrink = tags.extraIsShrinkHeader; + equivalentObjectId = tags.extraEquivalentObjectId; + in->lazyLoaded = 1; + + } + in->dirty = 0; + + /* directory stuff... + * hook up to parent + */ + + if (parent->variantType == + YAFFS_OBJECT_TYPE_UNKNOWN) { + /* Set up as a directory */ + parent->variantType = + YAFFS_OBJECT_TYPE_DIRECTORY; + INIT_LIST_HEAD(&parent->variant. + directoryVariant. + children); + } else if (parent->variantType != + YAFFS_OBJECT_TYPE_DIRECTORY) + { + /* Hoosterman, another problem.... + * We're trying to use a non-directory as a directory + */ + + T(YAFFS_TRACE_ERROR, + (TSTR + ("yaffs tragedy: attempting to use non-directory as" + " a directory in scan. Put in lost+found." + TENDSTR))); + parent = dev->lostNFoundDir; + } + + yaffs_AddObjectToDirectory(parent, in); + + itsUnlinked = (parent == dev->deletedDir) || + (parent == dev->unlinkedDir); + + if (isShrink) { + /* Mark the block as having a shrinkHeader */ + bi->hasShrinkHeader = 1; + } + + /* Note re hardlinks. + * Since we might scan a hardlink before its equivalent object is scanned + * we put them all in a list. + * After scanning is complete, we should have all the objects, so we run + * through this list and fix up all the chains. + */ + + switch (in->variantType) { + case YAFFS_OBJECT_TYPE_UNKNOWN: + /* Todo got a problem */ + break; + case YAFFS_OBJECT_TYPE_FILE: + + if (in->variant.fileVariant. + scannedFileSize < fileSize) { + /* This covers the case where the file size is greater + * than where the data is + * This will happen if the file is resized to be larger + * than its current data extents. + */ + in->variant.fileVariant.fileSize = fileSize; + in->variant.fileVariant.scannedFileSize = + in->variant.fileVariant.fileSize; + } + + if (isShrink && + in->variant.fileVariant.shrinkSize > fileSize) { + in->variant.fileVariant.shrinkSize = fileSize; + } + + break; + case YAFFS_OBJECT_TYPE_HARDLINK: + if(!itsUnlinked) { + in->variant.hardLinkVariant.equivalentObjectId = + equivalentObjectId; + in->hardLinks.next = + (struct list_head *) hardList; + hardList = in; + } + break; + case YAFFS_OBJECT_TYPE_DIRECTORY: + /* Do nothing */ + break; + case YAFFS_OBJECT_TYPE_SPECIAL: + /* Do nothing */ + break; + case YAFFS_OBJECT_TYPE_SYMLINK: + if(oh){ + in->variant.symLinkVariant.alias = + yaffs_CloneString(oh-> + alias); + if(!in->variant.symLinkVariant.alias) + alloc_failed = 1; + } + break; + } + + } + + } + + } /* End of scanning for each chunk */ + + if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) { + /* If we got this far while scanning, then the block is fully allocated. */ + state = YAFFS_BLOCK_STATE_FULL; + } + + bi->blockState = state; + + /* Now let's see if it was dirty */ + if (bi->pagesInUse == 0 && + !bi->hasShrinkHeader && + bi->blockState == YAFFS_BLOCK_STATE_FULL) { + yaffs_BlockBecameDirty(dev, blk); + } + + } + + if (altBlockIndex) + YFREE_ALT(blockIndex); + else + YFREE(blockIndex); + + /* Ok, we've done all the scanning. + * Fix up the hard link chains. + * We should now have scanned all the objects, now it's time to add these + * hardlinks. + */ + yaffs_HardlinkFixup(dev,hardList); + + + /* + * Sort out state of unlinked and deleted objects. + */ + { + struct list_head *i; + struct list_head *n; + + yaffs_Object *l; + + /* Soft delete all the unlinked files */ + list_for_each_safe(i, n, + &dev->unlinkedDir->variant.directoryVariant. + children) { + if (i) { + l = list_entry(i, yaffs_Object, siblings); + yaffs_DestroyObject(l); + } + } + + /* Soft delete all the deletedDir files */ + list_for_each_safe(i, n, + &dev->deletedDir->variant.directoryVariant. + children) { + if (i) { + l = list_entry(i, yaffs_Object, siblings); + yaffs_DestroyObject(l); + + } + } + } + + yaffs_ReleaseTempBuffer(dev, chunkData, __LINE__); + + if(alloc_failed){ + return YAFFS_FAIL; + } + + T(YAFFS_TRACE_SCAN, (TSTR("yaffs_ScanBackwards ends" TENDSTR))); + + return YAFFS_OK; +} + +/*------------------------------ Directory Functions ----------------------------- */ + +static void yaffs_RemoveObjectFromDirectory(yaffs_Object * obj) +{ + yaffs_Device *dev = obj->myDev; + + if(dev && dev->removeObjectCallback) + dev->removeObjectCallback(obj); + + list_del_init(&obj->siblings); + obj->parent = NULL; +} + + +static void yaffs_AddObjectToDirectory(yaffs_Object * directory, + yaffs_Object * obj) +{ + + if (!directory) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("tragedy: Trying to add an object to a null pointer directory" + TENDSTR))); + YBUG(); + } + if (directory->variantType != YAFFS_OBJECT_TYPE_DIRECTORY) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("tragedy: Trying to add an object to a non-directory" + TENDSTR))); + YBUG(); + } + + if (obj->siblings.prev == NULL) { + /* Not initialised */ + INIT_LIST_HEAD(&obj->siblings); + + } else if (!list_empty(&obj->siblings)) { + /* If it is holed up somewhere else, un hook it */ + yaffs_RemoveObjectFromDirectory(obj); + } + /* Now add it */ + list_add(&obj->siblings, &directory->variant.directoryVariant.children); + obj->parent = directory; + + if (directory == obj->myDev->unlinkedDir + || directory == obj->myDev->deletedDir) { + obj->unlinked = 1; + obj->myDev->nUnlinkedFiles++; + obj->renameAllowed = 0; + } +} + +yaffs_Object *yaffs_FindObjectByName(yaffs_Object * directory, + const YCHAR * name) +{ + int sum; + + struct list_head *i; + YCHAR buffer[YAFFS_MAX_NAME_LENGTH + 1]; + + yaffs_Object *l; + + if (!name) { + return NULL; + } + + if (!directory) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("tragedy: yaffs_FindObjectByName: null pointer directory" + TENDSTR))); + YBUG(); + } + if (directory->variantType != YAFFS_OBJECT_TYPE_DIRECTORY) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("tragedy: yaffs_FindObjectByName: non-directory" TENDSTR))); + YBUG(); + } + + sum = yaffs_CalcNameSum(name); + + list_for_each(i, &directory->variant.directoryVariant.children) { + if (i) { + l = list_entry(i, yaffs_Object, siblings); + + yaffs_CheckObjectDetailsLoaded(l); + + /* Special case for lost-n-found */ + if (l->objectId == YAFFS_OBJECTID_LOSTNFOUND) { + if (yaffs_strcmp(name, YAFFS_LOSTNFOUND_NAME) == 0) { + return l; + } + } else if (yaffs_SumCompare(l->sum, sum) || l->chunkId <= 0) + { + /* LostnFound cunk called Objxxx + * Do a real check + */ + yaffs_GetObjectName(l, buffer, + YAFFS_MAX_NAME_LENGTH); + if (yaffs_strncmp(name, buffer,YAFFS_MAX_NAME_LENGTH) == 0) { + return l; + } + + } + } + } + + return NULL; +} + + +#if 0 +int yaffs_ApplyToDirectoryChildren(yaffs_Object * theDir, + int (*fn) (yaffs_Object *)) +{ + struct list_head *i; + yaffs_Object *l; + + if (!theDir) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("tragedy: yaffs_FindObjectByName: null pointer directory" + TENDSTR))); + YBUG(); + } + if (theDir->variantType != YAFFS_OBJECT_TYPE_DIRECTORY) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("tragedy: yaffs_FindObjectByName: non-directory" TENDSTR))); + YBUG(); + } + + list_for_each(i, &theDir->variant.directoryVariant.children) { + if (i) { + l = list_entry(i, yaffs_Object, siblings); + if (l && !fn(l)) { + return YAFFS_FAIL; + } + } + } + + return YAFFS_OK; + +} +#endif + +/* GetEquivalentObject dereferences any hard links to get to the + * actual object. + */ + +yaffs_Object *yaffs_GetEquivalentObject(yaffs_Object * obj) +{ + if (obj && obj->variantType == YAFFS_OBJECT_TYPE_HARDLINK) { + /* We want the object id of the equivalent object, not this one */ + obj = obj->variant.hardLinkVariant.equivalentObject; + yaffs_CheckObjectDetailsLoaded(obj); + } + return obj; + +} + +int yaffs_GetObjectName(yaffs_Object * obj, YCHAR * name, int buffSize) +{ + memset(name, 0, buffSize * sizeof(YCHAR)); + + yaffs_CheckObjectDetailsLoaded(obj); + + if (obj->objectId == YAFFS_OBJECTID_LOSTNFOUND) { + yaffs_strncpy(name, YAFFS_LOSTNFOUND_NAME, buffSize - 1); + } else if (obj->chunkId <= 0) { + YCHAR locName[20]; + /* make up a name */ + yaffs_sprintf(locName, _Y("%s%d"), YAFFS_LOSTNFOUND_PREFIX, + obj->objectId); + yaffs_strncpy(name, locName, buffSize - 1); + + } +#ifdef CONFIG_YAFFS_SHORT_NAMES_IN_RAM + else if (obj->shortName[0]) { + yaffs_strcpy(name, obj->shortName); + } +#endif + else { + int result; + __u8 *buffer = yaffs_GetTempBuffer(obj->myDev, __LINE__); + + yaffs_ObjectHeader *oh = (yaffs_ObjectHeader *) buffer; + + memset(buffer, 0, obj->myDev->nDataBytesPerChunk); + + if (obj->chunkId >= 0) { + result = yaffs_ReadChunkWithTagsFromNAND(obj->myDev, + obj->chunkId, buffer, + NULL); + } + yaffs_strncpy(name, oh->name, buffSize - 1); + + yaffs_ReleaseTempBuffer(obj->myDev, buffer, __LINE__); + } + + return yaffs_strlen(name); +} + +int yaffs_GetObjectFileLength(yaffs_Object * obj) +{ + + /* Dereference any hard linking */ + obj = yaffs_GetEquivalentObject(obj); + + if (obj->variantType == YAFFS_OBJECT_TYPE_FILE) { + return obj->variant.fileVariant.fileSize; + } + if (obj->variantType == YAFFS_OBJECT_TYPE_SYMLINK) { + return yaffs_strlen(obj->variant.symLinkVariant.alias); + } else { + /* Only a directory should drop through to here */ + return obj->myDev->nDataBytesPerChunk; + } +} + +int yaffs_GetObjectLinkCount(yaffs_Object * obj) +{ + int count = 0; + struct list_head *i; + + if (!obj->unlinked) { + count++; /* the object itself */ + } + list_for_each(i, &obj->hardLinks) { + count++; /* add the hard links; */ + } + return count; + +} + +int yaffs_GetObjectInode(yaffs_Object * obj) +{ + obj = yaffs_GetEquivalentObject(obj); + + return obj->objectId; +} + +unsigned yaffs_GetObjectType(yaffs_Object * obj) +{ + obj = yaffs_GetEquivalentObject(obj); + + switch (obj->variantType) { + case YAFFS_OBJECT_TYPE_FILE: + return DT_REG; + break; + case YAFFS_OBJECT_TYPE_DIRECTORY: + return DT_DIR; + break; + case YAFFS_OBJECT_TYPE_SYMLINK: + return DT_LNK; + break; + case YAFFS_OBJECT_TYPE_HARDLINK: + return DT_REG; + break; + case YAFFS_OBJECT_TYPE_SPECIAL: + if (S_ISFIFO(obj->yst_mode)) + return DT_FIFO; + if (S_ISCHR(obj->yst_mode)) + return DT_CHR; + if (S_ISBLK(obj->yst_mode)) + return DT_BLK; + if (S_ISSOCK(obj->yst_mode)) + return DT_SOCK; + default: + return DT_REG; + break; + } +} + +YCHAR *yaffs_GetSymlinkAlias(yaffs_Object * obj) +{ + obj = yaffs_GetEquivalentObject(obj); + if (obj->variantType == YAFFS_OBJECT_TYPE_SYMLINK) { + return yaffs_CloneString(obj->variant.symLinkVariant.alias); + } else { + return yaffs_CloneString(_Y("")); + } +} + +#ifndef CONFIG_YAFFS_WINCE + +int yaffs_SetAttributes(yaffs_Object * obj, struct iattr *attr) +{ + unsigned int valid = attr->ia_valid; + + if (valid & ATTR_MODE) + obj->yst_mode = attr->ia_mode; + if (valid & ATTR_UID) + obj->yst_uid = attr->ia_uid; + if (valid & ATTR_GID) + obj->yst_gid = attr->ia_gid; + + if (valid & ATTR_ATIME) + obj->yst_atime = Y_TIME_CONVERT(attr->ia_atime); + if (valid & ATTR_CTIME) + obj->yst_ctime = Y_TIME_CONVERT(attr->ia_ctime); + if (valid & ATTR_MTIME) + obj->yst_mtime = Y_TIME_CONVERT(attr->ia_mtime); + + if (valid & ATTR_SIZE) + yaffs_ResizeFile(obj, attr->ia_size); + + yaffs_UpdateObjectHeader(obj, NULL, 1, 0, 0); + + return YAFFS_OK; + +} +int yaffs_GetAttributes(yaffs_Object * obj, struct iattr *attr) +{ + unsigned int valid = 0; + + attr->ia_mode = obj->yst_mode; + valid |= ATTR_MODE; + attr->ia_uid = obj->yst_uid; + valid |= ATTR_UID; + attr->ia_gid = obj->yst_gid; + valid |= ATTR_GID; + + Y_TIME_CONVERT(attr->ia_atime) = obj->yst_atime; + valid |= ATTR_ATIME; + Y_TIME_CONVERT(attr->ia_ctime) = obj->yst_ctime; + valid |= ATTR_CTIME; + Y_TIME_CONVERT(attr->ia_mtime) = obj->yst_mtime; + valid |= ATTR_MTIME; + + attr->ia_size = yaffs_GetFileSize(obj); + valid |= ATTR_SIZE; + + attr->ia_valid = valid; + + return YAFFS_OK; + +} + +#endif + +#if 0 +int yaffs_DumpObject(yaffs_Object * obj) +{ + YCHAR name[257]; + + yaffs_GetObjectName(obj, name, 256); + + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("Object %d, inode %d \"%s\"\n dirty %d valid %d serial %d sum %d" + " chunk %d type %d size %d\n" + TENDSTR), obj->objectId, yaffs_GetObjectInode(obj), name, + obj->dirty, obj->valid, obj->serial, obj->sum, obj->chunkId, + yaffs_GetObjectType(obj), yaffs_GetObjectFileLength(obj))); + + return YAFFS_OK; +} +#endif + +/*---------------------------- Initialisation code -------------------------------------- */ + +static int yaffs_CheckDevFunctions(const yaffs_Device * dev) +{ + + /* Common functions, gotta have */ + if (!dev->eraseBlockInNAND || !dev->initialiseNAND) + return 0; + + + /* Can use the "with tags" style interface for yaffs1 or yaffs2 */ + if (dev->writeChunkWithTagsToNAND && + dev->readChunkWithTagsFromNAND && + !dev->writeChunkToNAND && + !dev->readChunkFromNAND && + dev->markNANDBlockBad && dev->queryNANDBlock) + return 1; + + return 0; /* bad */ +} + + +static int yaffs_CreateInitialDirectories(yaffs_Device *dev) +{ + /* Initialise the unlinked, deleted, root and lost and found directories */ + + dev->lostNFoundDir = dev->rootDir = NULL; + dev->unlinkedDir = dev->deletedDir = NULL; + + dev->unlinkedDir = + yaffs_CreateFakeDirectory(dev, YAFFS_OBJECTID_UNLINKED, S_IFDIR); + + dev->deletedDir = + yaffs_CreateFakeDirectory(dev, YAFFS_OBJECTID_DELETED, S_IFDIR); + + dev->rootDir = + yaffs_CreateFakeDirectory(dev, YAFFS_OBJECTID_ROOT, + YAFFS_ROOT_MODE | S_IFDIR); + dev->lostNFoundDir = + yaffs_CreateFakeDirectory(dev, YAFFS_OBJECTID_LOSTNFOUND, + YAFFS_LOSTNFOUND_MODE | S_IFDIR); + + if(dev->lostNFoundDir && dev->rootDir && dev->unlinkedDir && dev->deletedDir){ + yaffs_AddObjectToDirectory(dev->rootDir, dev->lostNFoundDir); + return YAFFS_OK; + } + + return YAFFS_FAIL; +} + +int yaffs_GutsInitialise(yaffs_Device * dev) +{ + int init_failed = 0; + unsigned x; + int bits; + + T(YAFFS_TRACE_TRACING, (TSTR("yaffs: yaffs_GutsInitialise()" TENDSTR))); + + /* Check stuff that must be set */ + + if (!dev) { + T(YAFFS_TRACE_ALWAYS, (TSTR("yaffs: Need a device" TENDSTR))); + return YAFFS_FAIL; + } + + dev->internalStartBlock = dev->startBlock; + dev->internalEndBlock = dev->endBlock; + dev->blockOffset = 0; + dev->chunkOffset = 0; + dev->nFreeChunks = 0; + + if (dev->startBlock == 0) { + dev->internalStartBlock = dev->startBlock + 1; + dev->internalEndBlock = dev->endBlock + 1; + dev->blockOffset = 1; + dev->chunkOffset = dev->nChunksPerBlock; + } + + /* Check geometry parameters. */ + + if ((dev->isYaffs2 && dev->nDataBytesPerChunk < 1024) || + (!dev->isYaffs2 && dev->nDataBytesPerChunk != 512) || + dev->nChunksPerBlock < 2 || + dev->nReservedBlocks < 2 || + dev->internalStartBlock <= 0 || + dev->internalEndBlock <= 0 || + dev->internalEndBlock <= (dev->internalStartBlock + dev->nReservedBlocks + 2) // otherwise it is too small + ) { + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("yaffs: NAND geometry problems: chunk size %d, type is yaffs%s " + TENDSTR), dev->nDataBytesPerChunk, dev->isYaffs2 ? "2" : "")); + return YAFFS_FAIL; + } + + if (yaffs_InitialiseNAND(dev) != YAFFS_OK) { + T(YAFFS_TRACE_ALWAYS, + (TSTR("yaffs: InitialiseNAND failed" TENDSTR))); + return YAFFS_FAIL; + } + + /* Got the right mix of functions? */ + if (!yaffs_CheckDevFunctions(dev)) { + /* Function missing */ + T(YAFFS_TRACE_ALWAYS, + (TSTR + ("yaffs: device function(s) missing or wrong\n" TENDSTR))); + + return YAFFS_FAIL; + } + + /* This is really a compilation check. */ + if (!yaffs_CheckStructures()) { + T(YAFFS_TRACE_ALWAYS, + (TSTR("yaffs_CheckStructures failed\n" TENDSTR))); + return YAFFS_FAIL; + } + + if (dev->isMounted) { + T(YAFFS_TRACE_ALWAYS, + (TSTR("yaffs: device already mounted\n" TENDSTR))); + return YAFFS_FAIL; + } + + /* Finished with most checks. One or two more checks happen later on too. */ + + dev->isMounted = 1; + + + + /* OK now calculate a few things for the device */ + + /* + * Calculate all the chunk size manipulation numbers: + */ + /* Start off assuming it is a power of 2 */ + dev->chunkShift = ShiftDiv(dev->nDataBytesPerChunk); + dev->chunkMask = (1<<dev->chunkShift) - 1; + + if(dev->nDataBytesPerChunk == (dev->chunkMask + 1)){ + /* Yes it is a power of 2, disable crumbs */ + dev->crumbMask = 0; + dev->crumbShift = 0; + dev->crumbsPerChunk = 0; + } else { + /* Not a power of 2, use crumbs instead */ + dev->crumbShift = ShiftDiv(sizeof(yaffs_PackedTags2TagsPart)); + dev->crumbMask = (1<<dev->crumbShift)-1; + dev->crumbsPerChunk = dev->nDataBytesPerChunk/(1 << dev->crumbShift); + dev->chunkShift = 0; + dev->chunkMask = 0; + } + + + /* + * Calculate chunkGroupBits. + * We need to find the next power of 2 > than internalEndBlock + */ + + x = dev->nChunksPerBlock * (dev->internalEndBlock + 1); + + bits = ShiftsGE(x); + + /* Set up tnode width if wide tnodes are enabled. */ + if(!dev->wideTnodesDisabled){ + /* bits must be even so that we end up with 32-bit words */ + if(bits & 1) + bits++; + if(bits < 16) + dev->tnodeWidth = 16; + else + dev->tnodeWidth = bits; + } + else + dev->tnodeWidth = 16; + + dev->tnodeMask = (1<<dev->tnodeWidth)-1; + + /* Level0 Tnodes are 16 bits or wider (if wide tnodes are enabled), + * so if the bitwidth of the + * chunk range we're using is greater than 16 we need + * to figure out chunk shift and chunkGroupSize + */ + + if (bits <= dev->tnodeWidth) + dev->chunkGroupBits = 0; + else + dev->chunkGroupBits = bits - dev->tnodeWidth; + + + dev->chunkGroupSize = 1 << dev->chunkGroupBits; + + if (dev->nChunksPerBlock < dev->chunkGroupSize) { + /* We have a problem because the soft delete won't work if + * the chunk group size > chunks per block. + * This can be remedied by using larger "virtual blocks". + */ + T(YAFFS_TRACE_ALWAYS, + (TSTR("yaffs: chunk group too large\n" TENDSTR))); + + return YAFFS_FAIL; + } + + /* OK, we've finished verifying the device, lets continue with initialisation */ + + /* More device initialisation */ + dev->garbageCollections = 0; + dev->passiveGarbageCollections = 0; + dev->currentDirtyChecker = 0; + dev->bufferedBlock = -1; + dev->doingBufferedBlockRewrite = 0; + dev->nDeletedFiles = 0; + dev->nBackgroundDeletions = 0; + dev->nUnlinkedFiles = 0; + dev->eccFixed = 0; + dev->eccUnfixed = 0; + dev->tagsEccFixed = 0; + dev->tagsEccUnfixed = 0; + dev->nErasureFailures = 0; + dev->nErasedBlocks = 0; + dev->isDoingGC = 0; + dev->hasPendingPrioritisedGCs = 1; /* Assume the worst for now, will get fixed on first GC */ + + /* Initialise temporary buffers and caches. */ + if(!yaffs_InitialiseTempBuffers(dev)) + init_failed = 1; + + dev->srCache = NULL; + dev->gcCleanupList = NULL; + + + if (!init_failed && + dev->nShortOpCaches > 0) { + int i; + __u8 *buf; + int srCacheBytes = dev->nShortOpCaches * sizeof(yaffs_ChunkCache); + + if (dev->nShortOpCaches > YAFFS_MAX_SHORT_OP_CACHES) { + dev->nShortOpCaches = YAFFS_MAX_SHORT_OP_CACHES; + } + + buf = dev->srCache = YMALLOC(srCacheBytes); + + if(dev->srCache) + memset(dev->srCache,0,srCacheBytes); + + for (i = 0; i < dev->nShortOpCaches && buf; i++) { + dev->srCache[i].object = NULL; + dev->srCache[i].lastUse = 0; + dev->srCache[i].dirty = 0; + dev->srCache[i].data = buf = YMALLOC_DMA(dev->nDataBytesPerChunk); + } + if(!buf) + init_failed = 1; + + dev->srLastUse = 0; + } + + dev->cacheHits = 0; + + if(!init_failed){ + dev->gcCleanupList = YMALLOC(dev->nChunksPerBlock * sizeof(__u32)); + if(!dev->gcCleanupList) + init_failed = 1; + } + + if (dev->isYaffs2) { + dev->useHeaderFileSize = 1; + } + if(!init_failed && !yaffs_InitialiseBlocks(dev)) + init_failed = 1; + + yaffs_InitialiseTnodes(dev); + yaffs_InitialiseObjects(dev); + + if(!init_failed && !yaffs_CreateInitialDirectories(dev)) + init_failed = 1; + + + if(!init_failed){ + /* Now scan the flash. */ + if (dev->isYaffs2) { + if (yaffs_CheckpointRestore(dev)) { + T(YAFFS_TRACE_ALWAYS, + (TSTR("yaffs: restored from checkpoint" TENDSTR))); + } else { + + /* Clean up the mess caused by an aborted checkpoint load + * and scan backwards. + */ + yaffs_DeinitialiseBlocks(dev); + yaffs_DeinitialiseTnodes(dev); + yaffs_DeinitialiseObjects(dev); + + + dev->nErasedBlocks = 0; + dev->nFreeChunks = 0; + dev->allocationBlock = -1; + dev->allocationPage = -1; + dev->nDeletedFiles = 0; + dev->nUnlinkedFiles = 0; + dev->nBackgroundDeletions = 0; + dev->oldestDirtySequence = 0; + + if(!init_failed && !yaffs_InitialiseBlocks(dev)) + init_failed = 1; + + yaffs_InitialiseTnodes(dev); + yaffs_InitialiseObjects(dev); + + if(!init_failed && !yaffs_CreateInitialDirectories(dev)) + init_failed = 1; + + if(!init_failed && !yaffs_ScanBackwards(dev)) + init_failed = 1; + } + } + } + + if(init_failed){ + /* Clean up the mess */ + T(YAFFS_TRACE_TRACING, + (TSTR("yaffs: yaffs_GutsInitialise() aborted.\n" TENDSTR))); + + yaffs_Deinitialise(dev); + return YAFFS_FAIL; + } + + /* Zero out stats */ + dev->nPageReads = 0; + dev->nPageWrites = 0; + dev->nBlockErasures = 0; + dev->nGCCopies = 0; + dev->nRetriedWrites = 0; + + dev->nRetiredBlocks = 0; + + yaffs_VerifyFreeChunks(dev); + yaffs_VerifyBlocks(dev); + + + T(YAFFS_TRACE_TRACING, + (TSTR("yaffs: yaffs_GutsInitialise() done.\n" TENDSTR))); + return YAFFS_OK; + +} + +void yaffs_Deinitialise(yaffs_Device * dev) +{ + if (dev->isMounted) { + int i; + + yaffs_DeinitialiseBlocks(dev); + yaffs_DeinitialiseTnodes(dev); + yaffs_DeinitialiseObjects(dev); + if (dev->nShortOpCaches > 0 && + dev->srCache) { + + for (i = 0; i < dev->nShortOpCaches; i++) { + if(dev->srCache[i].data) + YFREE(dev->srCache[i].data); + dev->srCache[i].data = NULL; + } + + YFREE(dev->srCache); + dev->srCache = NULL; + } + + YFREE(dev->gcCleanupList); + + for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) { + YFREE(dev->tempBuffer[i].buffer); + } + + dev->isMounted = 0; + } + +} + +static int yaffs_CountFreeChunks(yaffs_Device * dev) +{ + int nFree; + int b; + + yaffs_BlockInfo *blk; + + for (nFree = 0, b = dev->internalStartBlock; b <= dev->internalEndBlock; + b++) { + blk = yaffs_GetBlockInfo(dev, b); + + switch (blk->blockState) { + case YAFFS_BLOCK_STATE_EMPTY: + case YAFFS_BLOCK_STATE_ALLOCATING: + case YAFFS_BLOCK_STATE_COLLECTING: + case YAFFS_BLOCK_STATE_FULL: + nFree += + (dev->nChunksPerBlock - blk->pagesInUse + + blk->softDeletions); + break; + default: + break; + } + + } + + return nFree; +} + +int yaffs_GetNumberOfFreeChunks(yaffs_Device * dev) +{ + /* This is what we report to the outside world */ + + int nFree; + int nDirtyCacheChunks; + int blocksForCheckpoint; + +#if 1 + nFree = dev->nFreeChunks; +#else + nFree = yaffs_CountFreeChunks(dev); +#endif + + nFree += dev->nDeletedFiles; + + /* Now count the number of dirty chunks in the cache and subtract those */ + + { + int i; + for (nDirtyCacheChunks = 0, i = 0; i < dev->nShortOpCaches; i++) { + if (dev->srCache[i].dirty) + nDirtyCacheChunks++; + } + } + + nFree -= nDirtyCacheChunks; + + nFree -= ((dev->nReservedBlocks + 1) * dev->nChunksPerBlock); + + /* Now we figure out how much to reserve for the checkpoint and report that... */ + blocksForCheckpoint = dev->nCheckpointReservedBlocks - dev->blocksInCheckpoint; + if(blocksForCheckpoint < 0) + blocksForCheckpoint = 0; + + nFree -= (blocksForCheckpoint * dev->nChunksPerBlock); + + if (nFree < 0) + nFree = 0; + + return nFree; + +} + +static int yaffs_freeVerificationFailures; + +static void yaffs_VerifyFreeChunks(yaffs_Device * dev) +{ + int counted; + int difference; + + if(yaffs_SkipVerification(dev)) + return; + + counted = yaffs_CountFreeChunks(dev); + + difference = dev->nFreeChunks - counted; + + if (difference) { + T(YAFFS_TRACE_ALWAYS, + (TSTR("Freechunks verification failure %d %d %d" TENDSTR), + dev->nFreeChunks, counted, difference)); + yaffs_freeVerificationFailures++; + } +} + +/*---------------------------------------- YAFFS test code ----------------------*/ + +#define yaffs_CheckStruct(structure,syze, name) \ + if(sizeof(structure) != syze) \ + { \ + T(YAFFS_TRACE_ALWAYS,(TSTR("%s should be %d but is %d\n" TENDSTR),\ + name,syze,sizeof(structure))); \ + return YAFFS_FAIL; \ + } + +static int yaffs_CheckStructures(void) +{ +/* yaffs_CheckStruct(yaffs_Tags,8,"yaffs_Tags") */ +/* yaffs_CheckStruct(yaffs_TagsUnion,8,"yaffs_TagsUnion") */ +/* yaffs_CheckStruct(yaffs_Spare,16,"yaffs_Spare") */ +#ifndef CONFIG_YAFFS_TNODE_LIST_DEBUG + yaffs_CheckStruct(yaffs_Tnode, 2 * YAFFS_NTNODES_LEVEL0, "yaffs_Tnode") +#endif + yaffs_CheckStruct(yaffs_ObjectHeader, 512, "yaffs_ObjectHeader") + + return YAFFS_OK; +} diff --git a/fs/yaffs2/yaffs_guts.h b/fs/yaffs2/yaffs_guts.h new file mode 100755 index 0000000..0162b5d --- /dev/null +++ b/fs/yaffs2/yaffs_guts.h @@ -0,0 +1,900 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +#ifndef __YAFFS_GUTS_H__ +#define __YAFFS_GUTS_H__ + +#include "devextras.h" +#include "yportenv.h" + +#define YAFFS_OK 1 +#define YAFFS_FAIL 0 + +/* Give us a Y=0x59, + * Give us an A=0x41, + * Give us an FF=0xFF + * Give us an S=0x53 + * And what have we got... + */ +#define YAFFS_MAGIC 0x5941FF53 + +#define YAFFS_NTNODES_LEVEL0 16 +#define YAFFS_TNODES_LEVEL0_BITS 4 +#define YAFFS_TNODES_LEVEL0_MASK 0xf + +#define YAFFS_NTNODES_INTERNAL (YAFFS_NTNODES_LEVEL0 / 2) +#define YAFFS_TNODES_INTERNAL_BITS (YAFFS_TNODES_LEVEL0_BITS - 1) +#define YAFFS_TNODES_INTERNAL_MASK 0x7 +#define YAFFS_TNODES_MAX_LEVEL 6 + +#ifndef CONFIG_YAFFS_NO_YAFFS1 +#define YAFFS_BYTES_PER_SPARE 16 +#define YAFFS_BYTES_PER_CHUNK 512 +#define YAFFS_CHUNK_SIZE_SHIFT 9 +#define YAFFS_CHUNKS_PER_BLOCK 32 +#define YAFFS_BYTES_PER_BLOCK (YAFFS_CHUNKS_PER_BLOCK*YAFFS_BYTES_PER_CHUNK) +#endif + +#define YAFFS_MIN_YAFFS2_CHUNK_SIZE 1024 +#define YAFFS_MIN_YAFFS2_SPARE_SIZE 32 + +#define YAFFS_MAX_CHUNK_ID 0x000FFFFF + +#define YAFFS_UNUSED_OBJECT_ID 0x0003FFFF + +#define YAFFS_ALLOCATION_NOBJECTS 100 +#define YAFFS_ALLOCATION_NTNODES 100 +#define YAFFS_ALLOCATION_NLINKS 100 + +#define YAFFS_NOBJECT_BUCKETS 256 + + +#define YAFFS_OBJECT_SPACE 0x40000 + +#define YAFFS_CHECKPOINT_VERSION 7//3 + +#ifdef CONFIG_YAFFS_UNICODE +#define YAFFS_MAX_NAME_LENGTH 127 +#define YAFFS_MAX_ALIAS_LENGTH 79 +#else +#define YAFFS_MAX_NAME_LENGTH 255 +#define YAFFS_MAX_ALIAS_LENGTH 159 +#endif + +#define YAFFS_SHORT_NAME_LENGTH 15 + +/* Some special object ids for pseudo objects */ +#define YAFFS_OBJECTID_ROOT 1 +#define YAFFS_OBJECTID_LOSTNFOUND 2 +#define YAFFS_OBJECTID_UNLINKED 3 +#define YAFFS_OBJECTID_DELETED 4 + +/* Sseudo object ids for checkpointing */ +#define YAFFS_OBJECTID_SB_HEADER 0x10 +#define YAFFS_OBJECTID_CHECKPOINT_DATA 0x20 +#define YAFFS_SEQUENCE_CHECKPOINT_DATA 0x21 + +/* */ + +#define YAFFS_MAX_SHORT_OP_CACHES 20 + +#define YAFFS_N_TEMP_BUFFERS 4 + +/* We limit the number attempts at sucessfully saving a chunk of data. + * Small-page devices have 32 pages per block; large-page devices have 64. + * Default to something in the order of 5 to 10 blocks worth of chunks. + */ +#define YAFFS_WR_ATTEMPTS (5*64) + +/* Sequence numbers are used in YAFFS2 to determine block allocation order. + * The range is limited slightly to help distinguish bad numbers from good. + * This also allows us to perhaps in the future use special numbers for + * special purposes. + * EFFFFF00 allows the allocation of 8 blocks per second (~1Mbytes) for 15 years, + * and is a larger number than the lifetime of a 2GB device. + */ +#define YAFFS_LOWEST_SEQUENCE_NUMBER 0x00001000 +#define YAFFS_HIGHEST_SEQUENCE_NUMBER 0xEFFFFF00 + +/* ChunkCache is used for short read/write operations.*/ +typedef struct { + struct yaffs_ObjectStruct *object; + int chunkId; + int lastUse; + int dirty; + int nBytes; /* Only valid if the cache is dirty */ + int locked; /* Can't push out or flush while locked. */ + __u8 *data; +} yaffs_ChunkCache; + + + +/* Tags structures in RAM + * NB This uses bitfield. Bitfields should not straddle a u32 boundary otherwise + * the structure size will get blown out. + */ + +#ifndef CONFIG_YAFFS_NO_YAFFS1 +typedef struct { + unsigned chunkId:20; + unsigned serialNumber:2; + unsigned byteCount:10; + unsigned objectId:18; + unsigned ecc:12; + unsigned unusedStuff:2; + +} yaffs_Tags; + +typedef union { + yaffs_Tags asTags; + __u8 asBytes[8]; +} yaffs_TagsUnion; + +#endif + +/* Stuff used for extended tags in YAFFS2 */ + +typedef enum { + YAFFS_ECC_RESULT_UNKNOWN, + YAFFS_ECC_RESULT_NO_ERROR, + YAFFS_ECC_RESULT_FIXED, + YAFFS_ECC_RESULT_UNFIXED +} yaffs_ECCResult; + +typedef enum { + YAFFS_OBJECT_TYPE_UNKNOWN, + YAFFS_OBJECT_TYPE_FILE, + YAFFS_OBJECT_TYPE_SYMLINK, + YAFFS_OBJECT_TYPE_DIRECTORY, + YAFFS_OBJECT_TYPE_HARDLINK, + YAFFS_OBJECT_TYPE_SPECIAL +} yaffs_ObjectType; + +#define YAFFS_OBJECT_TYPE_MAX YAFFS_OBJECT_TYPE_SPECIAL + +typedef struct { + + unsigned validMarker0; + unsigned chunkUsed; /* Status of the chunk: used or unused */ + unsigned objectId; /* If 0 then this is not part of an object (unused) */ + unsigned chunkId; /* If 0 then this is a header, else a data chunk */ + unsigned byteCount; /* Only valid for data chunks */ + + /* The following stuff only has meaning when we read */ + yaffs_ECCResult eccResult; + unsigned blockBad; + + /* YAFFS 1 stuff */ + unsigned chunkDeleted; /* The chunk is marked deleted */ + unsigned serialNumber; /* Yaffs1 2-bit serial number */ + + /* YAFFS2 stuff */ + unsigned sequenceNumber; /* The sequence number of this block */ + + /* Extra info if this is an object header (YAFFS2 only) */ + + unsigned extraHeaderInfoAvailable; /* There is extra info available if this is not zero */ + unsigned extraParentObjectId; /* The parent object */ + unsigned extraIsShrinkHeader; /* Is it a shrink header? */ + unsigned extraShadows; /* Does this shadow another object? */ + + yaffs_ObjectType extraObjectType; /* What object type? */ + + unsigned extraFileLength; /* Length if it is a file */ + unsigned extraEquivalentObjectId; /* Equivalent object Id if it is a hard link */ + + unsigned validMarker1; + +} yaffs_ExtendedTags; + +/* Spare structure for YAFFS1 */ +typedef struct { + __u8 tagByte0; + __u8 tagByte1; + __u8 tagByte2; + __u8 tagByte3; + __u8 pageStatus; /* set to 0 to delete the chunk */ + __u8 blockStatus; + __u8 tagByte4; + __u8 tagByte5; + __u8 ecc1[3]; + __u8 tagByte6; + __u8 tagByte7; + __u8 ecc2[3]; +} yaffs_Spare; + +/*Special structure for passing through to mtd */ +struct yaffs_NANDSpare { + yaffs_Spare spare; + int eccres1; + int eccres2; +}; + +/* Block data in RAM */ + +typedef enum { + YAFFS_BLOCK_STATE_UNKNOWN = 0, + + YAFFS_BLOCK_STATE_SCANNING, + YAFFS_BLOCK_STATE_NEEDS_SCANNING, + /* The block might have something on it (ie it is allocating or full, perhaps empty) + * but it needs to be scanned to determine its true state. + * This state is only valid during yaffs_Scan. + * NB We tolerate empty because the pre-scanner might be incapable of deciding + * However, if this state is returned on a YAFFS2 device, then we expect a sequence number + */ + + YAFFS_BLOCK_STATE_EMPTY, + /* This block is empty */ + + YAFFS_BLOCK_STATE_ALLOCATING, + /* This block is partially allocated. + * At least one page holds valid data. + * This is the one currently being used for page + * allocation. Should never be more than one of these + */ + + YAFFS_BLOCK_STATE_FULL, + /* All the pages in this block have been allocated. + */ + + YAFFS_BLOCK_STATE_DIRTY, + /* All pages have been allocated and deleted. + * Erase me, reuse me. + */ + + YAFFS_BLOCK_STATE_CHECKPOINT, + /* This block is assigned to holding checkpoint data. + */ + + YAFFS_BLOCK_STATE_COLLECTING, + /* This block is being garbage collected */ + + YAFFS_BLOCK_STATE_DEAD + /* This block has failed and is not in use */ +} yaffs_BlockState; + +#define YAFFS_NUMBER_OF_BLOCK_STATES (YAFFS_BLOCK_STATE_DEAD + 1) + + +typedef struct { + + int softDeletions:10; /* number of soft deleted pages */ + int pagesInUse:10; /* number of pages in use */ + unsigned blockState:4; /* One of the above block states. NB use unsigned because enum is sometimes an int */ + __u32 needsRetiring:1; /* Data has failed on this block, need to get valid data off */ + /* and retire the block. */ + __u32 skipErasedCheck: 1; /* If this is set we can skip the erased check on this block */ + __u32 gcPrioritise: 1; /* An ECC check or blank check has failed on this block. + It should be prioritised for GC */ + __u32 chunkErrorStrikes:3; /* How many times we've had ecc etc failures on this block and tried to reuse it */ + + __u32 hasShrinkHeader:1; /* This block has at least one shrink object header */ + __u32 sequenceNumber; /* block sequence number for yaffs2 */ + +} yaffs_BlockInfo; + +/* -------------------------- Object structure -------------------------------*/ +/* This is the object structure as stored on NAND */ + +typedef struct { + yaffs_ObjectType type; + + /* Apply to everything */ + int parentObjectId; + __u16 sum__NoLongerUsed; /* checksum of name. No longer used */ + YCHAR name[YAFFS_MAX_NAME_LENGTH + 1]; + + /* Thes following apply to directories, files, symlinks - not hard links */ + __u32 yst_mode; /* protection */ + +#ifdef CONFIG_YAFFS_WINCE + __u32 notForWinCE[5]; +#else + __u32 yst_uid; + __u32 yst_gid; + __u32 yst_atime; + __u32 yst_mtime; + __u32 yst_ctime; +#endif + + /* File size applies to files only */ + int fileSize; + + /* Equivalent object id applies to hard links only. */ + int equivalentObjectId; + + /* Alias is for symlinks only. */ + YCHAR alias[YAFFS_MAX_ALIAS_LENGTH + 1]; + + __u32 yst_rdev; /* device stuff for block and char devices (major/min) */ + +#ifdef CONFIG_YAFFS_WINCE + __u32 win_ctime[2]; + __u32 win_atime[2]; + __u32 win_mtime[2]; + __u32 roomToGrow[4]; +#else + __u32 roomToGrow[10]; +#endif + + int shadowsObject; /* This object header shadows the specified object if > 0 */ + + /* isShrink applies to object headers written when we shrink the file (ie resize) */ + __u32 isShrink; + +} yaffs_ObjectHeader; + +/*--------------------------- Tnode -------------------------- */ + +union yaffs_Tnode_union { +#ifdef CONFIG_YAFFS_TNODE_LIST_DEBUG + union yaffs_Tnode_union *internal[YAFFS_NTNODES_INTERNAL + 1]; +#else + union yaffs_Tnode_union *internal[YAFFS_NTNODES_INTERNAL]; +#endif +/* __u16 level0[YAFFS_NTNODES_LEVEL0]; */ + +}; + +typedef union yaffs_Tnode_union yaffs_Tnode; + +struct yaffs_TnodeList_struct { + struct yaffs_TnodeList_struct *next; + yaffs_Tnode *tnodes; +}; + +typedef struct yaffs_TnodeList_struct yaffs_TnodeList; + +/*------------------------ Object -----------------------------*/ +/* An object can be one of: + * - a directory (no data, has children links + * - a regular file (data.... not prunes :->). + * - a symlink [symbolic link] (the alias). + * - a hard link + */ + +typedef struct { + __u32 fileSize; + __u32 scannedFileSize; + __u32 shrinkSize; + int topLevel; + yaffs_Tnode *top; +} yaffs_FileStructure; + +typedef struct { + struct list_head children; /* list of child links */ +} yaffs_DirectoryStructure; + +typedef struct { + YCHAR *alias; +} yaffs_SymLinkStructure; + +typedef struct { + struct yaffs_ObjectStruct *equivalentObject; + __u32 equivalentObjectId; +} yaffs_HardLinkStructure; + +typedef union { + yaffs_FileStructure fileVariant; + yaffs_DirectoryStructure directoryVariant; + yaffs_SymLinkStructure symLinkVariant; + yaffs_HardLinkStructure hardLinkVariant; +} yaffs_ObjectVariant; + +struct yaffs_ObjectStruct { + __u8 deleted:1; /* This should only apply to unlinked files. */ + __u8 softDeleted:1; /* it has also been soft deleted */ + __u8 unlinked:1; /* An unlinked file. The file should be in the unlinked directory.*/ + __u8 fake:1; /* A fake object has no presence on NAND. */ + __u8 renameAllowed:1; /* Some objects are not allowed to be renamed. */ + __u8 unlinkAllowed:1; + __u8 dirty:1; /* the object needs to be written to flash */ + __u8 valid:1; /* When the file system is being loaded up, this + * object might be created before the data + * is available (ie. file data records appear before the header). + */ + __u8 lazyLoaded:1; /* This object has been lazy loaded and is missing some detail */ + + __u8 deferedFree:1; /* For Linux kernel. Object is removed from NAND, but is + * still in the inode cache. Free of object is defered. + * until the inode is released. + */ + + __u8 serial; /* serial number of chunk in NAND. Cached here */ + __u16 sum; /* sum of the name to speed searching */ + + struct yaffs_DeviceStruct *myDev; /* The device I'm on */ + + struct list_head hashLink; /* list of objects in this hash bucket */ + + struct list_head hardLinks; /* all the equivalent hard linked objects */ + + /* directory structure stuff */ + /* also used for linking up the free list */ + struct yaffs_ObjectStruct *parent; + struct list_head siblings; + + /* Where's my object header in NAND? */ + int chunkId; + + int nDataChunks; /* Number of data chunks attached to the file. */ + + __u32 objectId; /* the object id value */ + + __u32 yst_mode; + +#ifdef CONFIG_YAFFS_SHORT_NAMES_IN_RAM + YCHAR shortName[YAFFS_SHORT_NAME_LENGTH + 1]; +#endif + +/* XXX U-BOOT XXX */ +/* #ifndef __KERNEL__ */ + __u32 inUse; +/* #endif */ + +#ifdef CONFIG_YAFFS_WINCE + __u32 win_ctime[2]; + __u32 win_mtime[2]; + __u32 win_atime[2]; +#else + __u32 yst_uid; + __u32 yst_gid; + __u32 yst_atime; + __u32 yst_mtime; + __u32 yst_ctime; +#endif + + __u32 yst_rdev; + +/* XXX U-BOOT XXX */ +/* #ifndef __KERNEL__ */ + struct inode *myInode; +/* #endif */ + + yaffs_ObjectType variantType; + + yaffs_ObjectVariant variant; + +}; + +typedef struct yaffs_ObjectStruct yaffs_Object; + +struct yaffs_ObjectList_struct { + yaffs_Object *objects; + struct yaffs_ObjectList_struct *next; +}; + +typedef struct yaffs_ObjectList_struct yaffs_ObjectList; + +typedef struct { + struct list_head list; + int count; +} yaffs_ObjectBucket; + + +/* yaffs_CheckpointObject holds the definition of an object as dumped + * by checkpointing. + */ + +typedef struct { + int structType; + __u32 objectId; + __u32 parentId; + int chunkId; + + yaffs_ObjectType variantType:3; + __u8 deleted:1; + __u8 softDeleted:1; + __u8 unlinked:1; + __u8 fake:1; + __u8 renameAllowed:1; + __u8 unlinkAllowed:1; + __u8 serial; + + int nDataChunks; + loff_t fileSizeOrEquivalentObjectId; + +}yaffs_CheckpointObject; + +/*--------------------- Temporary buffers ---------------- + * + * These are chunk-sized working buffers. Each device has a few + */ + +typedef struct { + __u8 *buffer; + int line; /* track from whence this buffer was allocated */ + int maxLine; +} yaffs_TempBuffer; + +/*----------------- Device ---------------------------------*/ + +struct yaffs_DeviceStruct { + struct list_head devList; + const char *name; + + /* Entry parameters set up way early. Yaffs sets up the rest.*/ + int nDataBytesPerChunk; /* Should be a power of 2 >= 512 */ + int nChunksPerBlock; /* does not need to be a power of 2 */ + int nBytesPerSpare; /* spare area size */ + int startBlock; /* Start block we're allowed to use */ + int endBlock; /* End block we're allowed to use */ + int nReservedBlocks; /* We want this tuneable so that we can reduce */ + /* reserved blocks on NOR and RAM. */ + + + /* Stuff used by the shared space checkpointing mechanism */ + /* If this value is zero, then this mechanism is disabled */ + + int nCheckpointReservedBlocks; /* Blocks to reserve for checkpoint data */ + + + + + int nShortOpCaches; /* If <= 0, then short op caching is disabled, else + * the number of short op caches (don't use too many) + */ + + int useHeaderFileSize; /* Flag to determine if we should use file sizes from the header */ + + int useNANDECC; /* Flag to decide whether or not to use NANDECC */ + + void *genericDevice; /* Pointer to device context + * On an mtd this holds the mtd pointer. + */ + void *superBlock; + + /* NAND access functions (Must be set before calling YAFFS)*/ + + int (*writeChunkToNAND) (struct yaffs_DeviceStruct * dev, + int chunkInNAND, const __u8 * data, + const yaffs_Spare * spare); + int (*readChunkFromNAND) (struct yaffs_DeviceStruct * dev, + int chunkInNAND, __u8 * data, + yaffs_Spare * spare); + int (*eraseBlockInNAND) (struct yaffs_DeviceStruct * dev, + int blockInNAND); + int (*initialiseNAND) (struct yaffs_DeviceStruct * dev); + + int (*writeChunkWithTagsToNAND) (struct yaffs_DeviceStruct * dev, + int chunkInNAND, const __u8 * data, + const yaffs_ExtendedTags * tags); + int (*readChunkWithTagsFromNAND) (struct yaffs_DeviceStruct * dev, + int chunkInNAND, __u8 * data, + yaffs_ExtendedTags * tags); + int (*markNANDBlockBad) (struct yaffs_DeviceStruct * dev, int blockNo); + int (*queryNANDBlock) (struct yaffs_DeviceStruct * dev, int blockNo, + yaffs_BlockState * state, int *sequenceNumber); + + int isYaffs2; + + /* The removeObjectCallback function must be supplied by OS flavours that + * need it. The Linux kernel does not use this, but yaffs direct does use + * it to implement the faster readdir + */ + void (*removeObjectCallback)(struct yaffs_ObjectStruct *obj); + + /* Callback to mark the superblock dirsty */ + void (*markSuperBlockDirty)(void * superblock); + + int wideTnodesDisabled; /* Set to disable wide tnodes */ + + + /* End of stuff that must be set before initialisation. */ + + /* Checkpoint control. Can be set before or after initialisation */ + __u8 skipCheckpointRead; + __u8 skipCheckpointWrite; + + /* Runtime parameters. Set up by YAFFS. */ + + __u16 chunkGroupBits; /* 0 for devices <= 32MB. else log2(nchunks) - 16 */ + __u16 chunkGroupSize; /* == 2^^chunkGroupBits */ + + /* Stuff to support wide tnodes */ + __u32 tnodeWidth; + __u32 tnodeMask; + + /* Stuff to support various file offses to chunk/offset translations */ + /* "Crumbs" for nDataBytesPerChunk not being a power of 2 */ + __u32 crumbMask; + __u32 crumbShift; + __u32 crumbsPerChunk; + + /* Straight shifting for nDataBytesPerChunk being a power of 2 */ + __u32 chunkShift; + __u32 chunkMask; + + +/* XXX U-BOOT XXX */ +#if 0 +#ifndef __KERNEL__ + + struct semaphore sem; /* Semaphore for waiting on erasure.*/ + struct semaphore grossLock; /* Gross locking semaphore */ + void (*putSuperFunc) (struct super_block * sb); +#endif +#endif + __u8 *spareBuffer; /* For mtdif2 use. Don't know the size of the buffer + * at compile time so we have to allocate it. + */ + + int isMounted; + + int isCheckpointed; + + + /* Stuff to support block offsetting to support start block zero */ + int internalStartBlock; + int internalEndBlock; + int blockOffset; + int chunkOffset; + + + /* Runtime checkpointing stuff */ + int checkpointPageSequence; /* running sequence number of checkpoint pages */ + int checkpointByteCount; + int checkpointByteOffset; + __u8 *checkpointBuffer; + int checkpointOpenForWrite; + int blocksInCheckpoint; + int checkpointCurrentChunk; + int checkpointCurrentBlock; + int checkpointNextBlock; + int *checkpointBlockList; + int checkpointMaxBlocks; + __u32 checkpointSum; + __u32 checkpointXor; + + /* Block Info */ + yaffs_BlockInfo *blockInfo; + __u8 *chunkBits; /* bitmap of chunks in use */ + unsigned blockInfoAlt:1; /* was allocated using alternative strategy */ + unsigned chunkBitsAlt:1; /* was allocated using alternative strategy */ + int chunkBitmapStride; /* Number of bytes of chunkBits per block. + * Must be consistent with nChunksPerBlock. + */ + + int nErasedBlocks; + int allocationBlock; /* Current block being allocated off */ + __u32 allocationPage; + int allocationBlockFinder; /* Used to search for next allocation block */ + + /* Runtime state */ + int nTnodesCreated; + yaffs_Tnode *freeTnodes; + int nFreeTnodes; + yaffs_TnodeList *allocatedTnodeList; + + int isDoingGC; + + int nObjectsCreated; + yaffs_Object *freeObjects; + int nFreeObjects; + + yaffs_ObjectList *allocatedObjectList; + + yaffs_ObjectBucket objectBucket[YAFFS_NOBJECT_BUCKETS]; + + int nFreeChunks; + + int currentDirtyChecker; /* Used to find current dirtiest block */ + + __u32 *gcCleanupList; /* objects to delete at the end of a GC. */ + int nonAggressiveSkip; /* GC state/mode */ + + /* Statistcs */ + int nPageWrites; + int nPageReads; + int nBlockErasures; + int nErasureFailures; + int nGCCopies; + int garbageCollections; + int passiveGarbageCollections; + int nRetriedWrites; + int nRetiredBlocks; + int eccFixed; + int eccUnfixed; + int tagsEccFixed; + int tagsEccUnfixed; + int nDeletions; + int nUnmarkedDeletions; + + int hasPendingPrioritisedGCs; /* We think this device might have pending prioritised gcs */ + + /* Special directories */ + yaffs_Object *rootDir; + yaffs_Object *lostNFoundDir; + + /* Buffer areas for storing data to recover from write failures TODO + * __u8 bufferedData[YAFFS_CHUNKS_PER_BLOCK][YAFFS_BYTES_PER_CHUNK]; + * yaffs_Spare bufferedSpare[YAFFS_CHUNKS_PER_BLOCK]; + */ + + int bufferedBlock; /* Which block is buffered here? */ + int doingBufferedBlockRewrite; + + yaffs_ChunkCache *srCache; + int srLastUse; + + int cacheHits; + + /* Stuff for background deletion and unlinked files.*/ + yaffs_Object *unlinkedDir; /* Directory where unlinked and deleted files live. */ + yaffs_Object *deletedDir; /* Directory where deleted objects are sent to disappear. */ + yaffs_Object *unlinkedDeletion; /* Current file being background deleted.*/ + int nDeletedFiles; /* Count of files awaiting deletion;*/ + int nUnlinkedFiles; /* Count of unlinked files. */ + int nBackgroundDeletions; /* Count of background deletions. */ + + + yaffs_TempBuffer tempBuffer[YAFFS_N_TEMP_BUFFERS]; + int maxTemp; + int unmanagedTempAllocations; + int unmanagedTempDeallocations; + + /* yaffs2 runtime stuff */ + unsigned sequenceNumber; /* Sequence number of currently allocating block */ + unsigned oldestDirtySequence; + +}; + +typedef struct yaffs_DeviceStruct yaffs_Device; + +/* The static layout of bllock usage etc is stored in the super block header */ +typedef struct { + int StructType; + int version; + int checkpointStartBlock; + int checkpointEndBlock; + int startBlock; + int endBlock; + int rfu[100]; +} yaffs_SuperBlockHeader; + +/* The CheckpointDevice structure holds the device information that changes at runtime and + * must be preserved over unmount/mount cycles. + */ +typedef struct { + int structType; + int nErasedBlocks; + int allocationBlock; /* Current block being allocated off */ + __u32 allocationPage; + int nFreeChunks; + + int nDeletedFiles; /* Count of files awaiting deletion;*/ + int nUnlinkedFiles; /* Count of unlinked files. */ + int nBackgroundDeletions; /* Count of background deletions. */ + + /* yaffs2 runtime stuff */ + unsigned sequenceNumber; /* Sequence number of currently allocating block */ +// unsigned oldestDirtySequence; + +} yaffs_CheckpointDevice; + + +typedef struct { + int structType; + __u32 magic; + __u32 version; + __u32 head; +} yaffs_CheckpointValidity; + +/* Function to manipulate block info */ +static Y_INLINE yaffs_BlockInfo *yaffs_GetBlockInfo(yaffs_Device * dev, int blk) +{ + if (blk < dev->internalStartBlock || blk > dev->internalEndBlock) { + T(YAFFS_TRACE_ERROR, + (TSTR + ("**>> yaffs: getBlockInfo block %d is not valid" TENDSTR), + blk)); + YBUG(); + } + return &dev->blockInfo[blk - dev->internalStartBlock]; +} + +/*----------------------- YAFFS Functions -----------------------*/ + +int yaffs_GutsInitialise(yaffs_Device * dev); +void yaffs_Deinitialise(yaffs_Device * dev); + +int yaffs_GetNumberOfFreeChunks(yaffs_Device * dev); + +int yaffs_RenameObject(yaffs_Object * oldDir, const YCHAR * oldName, + yaffs_Object * newDir, const YCHAR * newName); + +int yaffs_Unlink(yaffs_Object * dir, const YCHAR * name); +int yaffs_DeleteFile(yaffs_Object * obj); + +int yaffs_GetObjectName(yaffs_Object * obj, YCHAR * name, int buffSize); +int yaffs_GetObjectFileLength(yaffs_Object * obj); +int yaffs_GetObjectInode(yaffs_Object * obj); +unsigned yaffs_GetObjectType(yaffs_Object * obj); +int yaffs_GetObjectLinkCount(yaffs_Object * obj); + +int yaffs_SetAttributes(yaffs_Object * obj, struct iattr *attr); +int yaffs_GetAttributes(yaffs_Object * obj, struct iattr *attr); + +/* File operations */ +int yaffs_ReadDataFromFile(yaffs_Object * obj, __u8 * buffer, loff_t offset, + int nBytes); +int yaffs_WriteDataToFile(yaffs_Object * obj, const __u8 * buffer, loff_t offset, + int nBytes, int writeThrough); +int yaffs_ResizeFile(yaffs_Object * obj, loff_t newSize); + +yaffs_Object *yaffs_MknodFile(yaffs_Object * parent, const YCHAR * name, + __u32 mode, __u32 uid, __u32 gid); +int yaffs_FlushFile(yaffs_Object * obj, int updateTime); + +/* Flushing and checkpointing */ +void yaffs_FlushEntireDeviceCache(yaffs_Device *dev); + +int yaffs_CheckpointSave(yaffs_Device *dev); +int yaffs_CheckpointRestore(yaffs_Device *dev); + +/* Directory operations */ +yaffs_Object *yaffs_MknodDirectory(yaffs_Object * parent, const YCHAR * name, + __u32 mode, __u32 uid, __u32 gid); +yaffs_Object *yaffs_FindObjectByName(yaffs_Object * theDir, const YCHAR * name); +int yaffs_ApplyToDirectoryChildren(yaffs_Object * theDir, + int (*fn) (yaffs_Object *)); + +yaffs_Object *yaffs_FindObjectByNumber(yaffs_Device * dev, __u32 number); + +/* Link operations */ +yaffs_Object *yaffs_Link(yaffs_Object * parent, const YCHAR * name, + yaffs_Object * equivalentObject); + +yaffs_Object *yaffs_GetEquivalentObject(yaffs_Object * obj); + +/* Symlink operations */ +yaffs_Object *yaffs_MknodSymLink(yaffs_Object * parent, const YCHAR * name, + __u32 mode, __u32 uid, __u32 gid, + const YCHAR * alias); +YCHAR *yaffs_GetSymlinkAlias(yaffs_Object * obj); + +/* Special inodes (fifos, sockets and devices) */ +yaffs_Object *yaffs_MknodSpecial(yaffs_Object * parent, const YCHAR * name, + __u32 mode, __u32 uid, __u32 gid, __u32 rdev); + +/* Special directories */ +yaffs_Object *yaffs_Root(yaffs_Device * dev); +yaffs_Object *yaffs_LostNFound(yaffs_Device * dev); + +#ifdef CONFIG_YAFFS_WINCE +/* CONFIG_YAFFS_WINCE special stuff */ +void yfsd_WinFileTimeNow(__u32 target[2]); +#endif + +/* XXX U-BOOT XXX */ +#if 0 +#ifndef __KERNEL__ +void yaffs_HandleDeferedFree(yaffs_Object * obj); +#endif +#endif + +/* Debug dump */ +int yaffs_DumpObject(yaffs_Object * obj); + +void yaffs_GutsTest(yaffs_Device * dev); + +/* A few useful functions */ +void yaffs_InitialiseTags(yaffs_ExtendedTags * tags); +void yaffs_DeleteChunk(yaffs_Device * dev, int chunkId, int markNAND, int lyn); +int yaffs_CheckFF(__u8 * buffer, int nBytes); +void yaffs_HandleChunkError(yaffs_Device *dev, yaffs_BlockInfo *bi); + +#endif diff --git a/fs/yaffs2/yaffs_malloc.h b/fs/yaffs2/yaffs_malloc.h new file mode 100755 index 0000000..3ed6175 --- /dev/null +++ b/fs/yaffs2/yaffs_malloc.h @@ -0,0 +1,25 @@ +#ifndef __YAFFS_MALLOC_H__ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* XXX U-BOOT XXX */ +#if 0 +#include <stdlib.h> +#endif + +void *yaffs_malloc(size_t size); +void yaffs_free(void *ptr); + +#endif diff --git a/fs/yaffs2/yaffs_mtdif.c b/fs/yaffs2/yaffs_mtdif.c new file mode 100755 index 0000000..c31b1f2 --- /dev/null +++ b/fs/yaffs2/yaffs_mtdif.c @@ -0,0 +1,81 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +const char *yaffs_mtdif_c_version = + "$Id: yaffs_mtdif.c,v 1.19 2007/02/14 01:09:06 wookey Exp $"; + +#include "yportenv.h" + + +#include "yaffs_mtdif.h" + +#include "linux/types.h" +#include "linux/time.h" +#include "linux/mtd/nand.h" + + +extern struct nand_chip nand_dev_desc[]; + +int erase_yaffs2_nand(unsigned int block, int nanddev); + +static inline void translate_spare2oob(const yaffs_Spare *spare, __u8 *oob) +{ + oob[0] = spare->tagByte0; + oob[1] = spare->tagByte1; + oob[2] = spare->tagByte2; + oob[3] = spare->tagByte3; + oob[4] = spare->tagByte4; + oob[5] = spare->tagByte5 & 0x3f; + oob[5] |= spare->blockStatus == 'Y' ? 0: 0x80; + oob[5] |= spare->pageStatus == 0 ? 0: 0x40; + oob[6] = spare->tagByte6; + oob[7] = spare->tagByte7; +} + +static inline void translate_oob2spare(yaffs_Spare *spare, __u8 *oob) +{ + struct yaffs_NANDSpare *nspare = (struct yaffs_NANDSpare *)spare; + spare->tagByte0 = oob[0]; + spare->tagByte1 = oob[1]; + spare->tagByte2 = oob[2]; + spare->tagByte3 = oob[3]; + spare->tagByte4 = oob[4]; + spare->tagByte5 = oob[5] == 0xff ? 0xff : oob[5] & 0x3f; + spare->blockStatus = oob[5] & 0x80 ? 0xff : 'Y'; + spare->pageStatus = oob[5] & 0x40 ? 0xff : 0; + spare->ecc1[0] = spare->ecc1[1] = spare->ecc1[2] = 0xff; + spare->tagByte6 = oob[6]; + spare->tagByte7 = oob[7]; + spare->ecc2[0] = spare->ecc2[1] = spare->ecc2[2] = 0xff; + + nspare->eccres1 = nspare->eccres2 = 0; /* FIXME */ +} + + +int nandmtd_EraseBlockInNAND(yaffs_Device * dev, int blockNumber) +{ + int retval = erase_yaffs2_nand(blockNumber, 0); + + if (retval == 0) + return YAFFS_OK; + else + return YAFFS_FAIL; +} + +int nandmtd_InitialiseNAND(yaffs_Device * dev) +{ + return YAFFS_OK; +} diff --git a/fs/yaffs2/yaffs_mtdif.h b/fs/yaffs2/yaffs_mtdif.h new file mode 100755 index 0000000..317600c --- /dev/null +++ b/fs/yaffs2/yaffs_mtdif.h @@ -0,0 +1,27 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +#ifndef __YAFFS_MTDIF_H__ +#define __YAFFS_MTDIF_H__ + +#include "yaffs_guts.h" + +int nandmtd_WriteChunkToNAND(yaffs_Device * dev, int chunkInNAND, + const __u8 * data, const yaffs_Spare * spare); +int nandmtd_ReadChunkFromNAND(yaffs_Device * dev, int chunkInNAND, __u8 * data, + yaffs_Spare * spare); +int nandmtd_EraseBlockInNAND(yaffs_Device * dev, int blockNumber); +int nandmtd_InitialiseNAND(yaffs_Device * dev); +#endif diff --git a/fs/yaffs2/yaffs_mtdif2.c b/fs/yaffs2/yaffs_mtdif2.c new file mode 100755 index 0000000..fc20d3c --- /dev/null +++ b/fs/yaffs2/yaffs_mtdif2.c @@ -0,0 +1,168 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* mtd interface for YAFFS2 */ + +/* XXX U-BOOT XXX */ +#include <common.h> +#include "asm/errno.h" + +const char *yaffs_mtdif2_c_version = + "$Id: yaffs_mtdif2.c,v 1.17 2007/02/14 01:09:06 wookey Exp $"; + +#include "yportenv.h" + + +#include "yaffs_mtdif2.h" + +#include "linux/types.h" +#include "linux/time.h" + +#include "yaffs_packedtags2.h" + +extern int write_yaffs2_nand(unsigned int addr, const __u8 * data); +extern int write_oob_yaffs2_nand(unsigned int addr, const __u8 * data, const __u8 * oob, unsigned int ooblen); +extern int read_yaffs2_nand(unsigned int addr, const __u8 * data); +extern int read_oob_yaffs2_nand(unsigned int addr, const __u8 * data, const __u8 * oob, unsigned int ooblen); +extern int nand_block_isbad(unsigned int block); +extern int nand_block_markbad(unsigned int block); +extern void print_nand_buf(unsigned char * rvalue, int length); + +int nandmtd2_WriteChunkWithTagsToNAND(yaffs_Device * dev, int chunkInNAND, + const __u8 * data, + const yaffs_ExtendedTags * tags) +{ + int retval = 0; + + unsigned int addr = chunkInNAND * dev->nDataBytesPerChunk; + yaffs_PackedTags2 pt; + T(YAFFS_TRACE_MTD, + (TSTR + ("nandmtd2_WriteChunkWithTagsToNAND chunk %d data %p tags %p" + TENDSTR), chunkInNAND, data, tags)); + + if (tags) + yaffs_PackTags2(&pt, tags); + if (data) { + retval = write_oob_yaffs2_nand(addr, data, (__u8 *) &pt, sizeof(pt)); + } + if (retval == 0) + return YAFFS_OK; + else + return YAFFS_FAIL; +} + +int nandmtd2_ReadChunkWithTagsFromNAND(yaffs_Device * dev, int chunkInNAND, + __u8 * data, yaffs_ExtendedTags * tags) +{ + int retval = 0; + + int addr = chunkInNAND * dev->nDataBytesPerChunk; + + yaffs_PackedTags2 pt; + + T(YAFFS_TRACE_MTD, + (TSTR + ("nandmtd2_ReadChunkWithTagsFromNAND chunk %d data %p tags %p" + TENDSTR), chunkInNAND, data, tags)); + + if (data && !tags) + retval = read_yaffs2_nand(addr, data); + else if (tags) { + retval = read_oob_yaffs2_nand(addr, data, dev->spareBuffer, sizeof(pt)); + } + + memcpy(&pt, dev->spareBuffer, sizeof(pt)); + + if (tags) + yaffs_UnpackTags2(tags, &pt); + + if(tags && retval == -EBADMSG && tags->eccResult == YAFFS_ECC_RESULT_NO_ERROR) + tags->eccResult = YAFFS_ECC_RESULT_UNFIXED; + + if (retval == 0) + return YAFFS_OK; + else + return YAFFS_FAIL; +} + +int nandmtd2_MarkNANDBlockBad(struct yaffs_DeviceStruct *dev, int blockNo) +{ + int retval; + T(YAFFS_TRACE_MTD, + (TSTR("nandmtd2_MarkNANDBlockBad %d" TENDSTR), blockNo)); + + retval = + nand_block_markbad(blockNo); + + if (retval == 0) + return YAFFS_OK; + else + return YAFFS_FAIL; + +} + +int nandmtd2_QueryNANDBlock(struct yaffs_DeviceStruct *dev, int blockNo, + yaffs_BlockState * state, int *sequenceNumber) +{ + int retval; + + T(YAFFS_TRACE_MTD, + (TSTR("nandmtd2_QueryNANDBlock %d" TENDSTR), blockNo)); + retval = + nand_block_isbad(blockNo); + + if (retval) { + T(YAFFS_TRACE_MTD, (TSTR("block is bad"TENDSTR))); + *state = YAFFS_BLOCK_STATE_DEAD; + *sequenceNumber = 0; + } else { + yaffs_ExtendedTags t; + nandmtd2_ReadChunkWithTagsFromNAND(dev, + blockNo * + dev->nChunksPerBlock, NULL, + &t); + + if (t.chunkUsed) { + *sequenceNumber = t.sequenceNumber; + *state = YAFFS_BLOCK_STATE_NEEDS_SCANNING; + } else { + *sequenceNumber = 0; + *state = YAFFS_BLOCK_STATE_EMPTY; + } + } +#if 0 + T(YAFFS_TRACE_MTD, + (TSTR("block is bad seq %d state %d" TENDSTR), *sequenceNumber, + *state)); +#endif + if (retval == 0) + return YAFFS_OK; + else + return YAFFS_FAIL; +} +int nandmtd_EraseBlockInNAND(yaffs_Device * dev, int blockNumber) +{ + int retval = erase_yaffs2_nand(blockNumber, 0); + + if (retval == 0) + return YAFFS_OK; + else + return YAFFS_FAIL; +} + +int nandmtd_InitialiseNAND(yaffs_Device * dev) +{ + return YAFFS_OK; +} + diff --git a/fs/yaffs2/yaffs_mtdif2.h b/fs/yaffs2/yaffs_mtdif2.h new file mode 100755 index 0000000..5e0cea5 --- /dev/null +++ b/fs/yaffs2/yaffs_mtdif2.h @@ -0,0 +1,31 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +#ifndef __YAFFS_MTDIF2_H__ +#define __YAFFS_MTDIF2_H__ + +#include "yaffs_guts.h" +int nandmtd2_WriteChunkWithTagsToNAND(yaffs_Device * dev, int chunkInNAND, + const __u8 * data, + const yaffs_ExtendedTags * tags); +int nandmtd2_ReadChunkWithTagsFromNAND(yaffs_Device * dev, int chunkInNAND, + __u8 * data, yaffs_ExtendedTags * tags); +int nandmtd2_MarkNANDBlockBad(struct yaffs_DeviceStruct *dev, int blockNo); +int nandmtd2_QueryNANDBlock(struct yaffs_DeviceStruct *dev, int blockNo, + yaffs_BlockState * state, int *sequenceNumber); +int nandmtd_EraseBlockInNAND(yaffs_Device * dev, int blockNumber); +int nandmtd_InitialiseNAND(yaffs_Device * dev); + +#endif diff --git a/fs/yaffs2/yaffs_nand.c b/fs/yaffs2/yaffs_nand.c new file mode 100755 index 0000000..5f952a9 --- /dev/null +++ b/fs/yaffs2/yaffs_nand.c @@ -0,0 +1,114 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +const char *yaffs_nand_c_version = + "$Id: yaffs_nand.c,v 1.7 2007/02/14 01:09:06 wookey Exp $"; + +#include "yaffs_nand.h" + + +int yaffs_ReadChunkWithTagsFromNAND(yaffs_Device * dev, int chunkInNAND, + __u8 * buffer, + yaffs_ExtendedTags * tags) +{ + int result; + yaffs_ExtendedTags localTags; + + int realignedChunkInNAND = chunkInNAND - dev->chunkOffset; + + /* If there are no tags provided, use local tags to get prioritised gc working */ + if(!tags) + tags = &localTags; + + if (dev->readChunkWithTagsFromNAND) + result = dev->readChunkWithTagsFromNAND(dev, realignedChunkInNAND, buffer, + tags); + if(tags && + tags->eccResult > YAFFS_ECC_RESULT_NO_ERROR){ + + yaffs_BlockInfo *bi = yaffs_GetBlockInfo(dev, chunkInNAND/dev->nChunksPerBlock); + yaffs_HandleChunkError(dev,bi); + } + + return result; +} + +int yaffs_WriteChunkWithTagsToNAND(yaffs_Device * dev, + int chunkInNAND, + const __u8 * buffer, + yaffs_ExtendedTags * tags) +{ + chunkInNAND -= dev->chunkOffset; + + if (tags) { + tags->sequenceNumber = dev->sequenceNumber; + tags->chunkUsed = 1; + if (!yaffs_ValidateTags(tags)) { + T(YAFFS_TRACE_ERROR, + (TSTR("Writing uninitialised tags" TENDSTR))); + YBUG(); + } + T(YAFFS_TRACE_WRITE, + (TSTR("Writing chunk %d tags %d %d" TENDSTR), chunkInNAND, + tags->objectId, tags->chunkId)); + } else { + T(YAFFS_TRACE_ERROR, (TSTR("Writing with no tags" TENDSTR))); + YBUG(); + } + + if (dev->writeChunkWithTagsToNAND) + return dev->writeChunkWithTagsToNAND(dev, chunkInNAND, buffer, + tags); +} + +int yaffs_MarkBlockBad(yaffs_Device * dev, int blockNo) +{ + blockNo -= dev->blockOffset; + +; + if (dev->markNANDBlockBad) + return dev->markNANDBlockBad(dev, blockNo); +} + +int yaffs_QueryInitialBlockState(yaffs_Device * dev, + int blockNo, + yaffs_BlockState * state, + unsigned *sequenceNumber) +{ + blockNo -= dev->blockOffset; + + if (dev->queryNANDBlock) + return dev->queryNANDBlock(dev, blockNo, state, sequenceNumber); +} + + +int yaffs_EraseBlockInNAND(struct yaffs_DeviceStruct *dev, + int blockInNAND) +{ + int result; + + blockInNAND -= dev->blockOffset; + + dev->nBlockErasures++; + result = dev->eraseBlockInNAND(dev, blockInNAND); + + return result; +} + +int yaffs_InitialiseNAND(struct yaffs_DeviceStruct *dev) +{ + return dev->initialiseNAND(dev); +} diff --git a/fs/yaffs2/yaffs_nand.h b/fs/yaffs2/yaffs_nand.h new file mode 100755 index 0000000..48e3f7e --- /dev/null +++ b/fs/yaffs2/yaffs_nand.h @@ -0,0 +1,43 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +#ifndef __YAFFS_NAND_H__ +#define __YAFFS_NAND_H__ +#include "yaffs_guts.h" + + + +int yaffs_ReadChunkWithTagsFromNAND(yaffs_Device * dev, int chunkInNAND, + __u8 * buffer, + yaffs_ExtendedTags * tags); + +int yaffs_WriteChunkWithTagsToNAND(yaffs_Device * dev, + int chunkInNAND, + const __u8 * buffer, + yaffs_ExtendedTags * tags); + +int yaffs_MarkBlockBad(yaffs_Device * dev, int blockNo); + +int yaffs_QueryInitialBlockState(yaffs_Device * dev, + int blockNo, + yaffs_BlockState * state, + unsigned *sequenceNumber); + +int yaffs_EraseBlockInNAND(struct yaffs_DeviceStruct *dev, + int blockInNAND); + +int yaffs_InitialiseNAND(struct yaffs_DeviceStruct *dev); + +#endif diff --git a/fs/yaffs2/yaffs_nandemul2k.h b/fs/yaffs2/yaffs_nandemul2k.h new file mode 100755 index 0000000..cd2e96f --- /dev/null +++ b/fs/yaffs2/yaffs_nandemul2k.h @@ -0,0 +1,39 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* Interface to emulated NAND functions (2k page size) */ + +#ifndef __YAFFS_NANDEMUL2K_H__ +#define __YAFFS_NANDEMUL2K_H__ + +#include "yaffs_guts.h" + +int nandemul2k_WriteChunkWithTagsToNAND(struct yaffs_DeviceStruct *dev, + int chunkInNAND, const __u8 * data, + yaffs_ExtendedTags * tags); +int nandemul2k_ReadChunkWithTagsFromNAND(struct yaffs_DeviceStruct *dev, + int chunkInNAND, __u8 * data, + yaffs_ExtendedTags * tags); +int nandemul2k_MarkNANDBlockBad(struct yaffs_DeviceStruct *dev, int blockNo); +int nandemul2k_QueryNANDBlock(struct yaffs_DeviceStruct *dev, int blockNo, + yaffs_BlockState * state, int *sequenceNumber); +int nandemul2k_EraseBlockInNAND(struct yaffs_DeviceStruct *dev, + int blockInNAND); +int nandemul2k_InitialiseNAND(struct yaffs_DeviceStruct *dev); +int nandemul2k_GetBytesPerChunk(void); +int nandemul2k_GetChunksPerBlock(void); +int nandemul2k_GetNumberOfBlocks(void); + +#endif diff --git a/fs/yaffs2/yaffs_packedtags1.c b/fs/yaffs2/yaffs_packedtags1.c new file mode 100755 index 0000000..a149431 --- /dev/null +++ b/fs/yaffs2/yaffs_packedtags1.c @@ -0,0 +1,55 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +#include "yaffs_packedtags1.h" +#include "yportenv.h" + +void yaffs_PackTags1(yaffs_PackedTags1 * pt, const yaffs_ExtendedTags * t) +{ + pt->chunkId = t->chunkId; + pt->serialNumber = t->serialNumber; + pt->byteCount = t->byteCount; + pt->objectId = t->objectId; + pt->ecc = 0; + pt->deleted = (t->chunkDeleted) ? 0 : 1; + pt->unusedStuff = 0; + pt->shouldBeFF = 0xFFFFFFFF; + +} + +void yaffs_UnpackTags1(yaffs_ExtendedTags * t, const yaffs_PackedTags1 * pt) +{ + static const __u8 allFF[] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff }; + + if (memcmp(allFF, pt, sizeof(yaffs_PackedTags1))) { + t->blockBad = 0; + if (pt->shouldBeFF != 0xFFFFFFFF) { + t->blockBad = 1; + } + t->chunkUsed = 1; + t->objectId = pt->objectId; + t->chunkId = pt->chunkId; + t->byteCount = pt->byteCount; + t->eccResult = YAFFS_ECC_RESULT_NO_ERROR; + t->chunkDeleted = (pt->deleted) ? 0 : 1; + t->serialNumber = pt->serialNumber; + } else { + memset(t, 0, sizeof(yaffs_ExtendedTags)); + + } +} diff --git a/fs/yaffs2/yaffs_packedtags1.h b/fs/yaffs2/yaffs_packedtags1.h new file mode 100755 index 0000000..776c5c2 --- /dev/null +++ b/fs/yaffs2/yaffs_packedtags1.h @@ -0,0 +1,37 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* This is used to pack YAFFS1 tags, not YAFFS2 tags. */ + +#ifndef __YAFFS_PACKEDTAGS1_H__ +#define __YAFFS_PACKEDTAGS1_H__ + +#include "yaffs_guts.h" + +typedef struct { + unsigned chunkId:20; + unsigned serialNumber:2; + unsigned byteCount:10; + unsigned objectId:18; + unsigned ecc:12; + unsigned deleted:1; + unsigned unusedStuff:1; + unsigned shouldBeFF; + +} yaffs_PackedTags1; + +void yaffs_PackTags1(yaffs_PackedTags1 * pt, const yaffs_ExtendedTags * t); +void yaffs_UnpackTags1(yaffs_ExtendedTags * t, const yaffs_PackedTags1 * pt); +#endif diff --git a/fs/yaffs2/yaffs_packedtags2.c b/fs/yaffs2/yaffs_packedtags2.c new file mode 100755 index 0000000..335374a --- /dev/null +++ b/fs/yaffs2/yaffs_packedtags2.c @@ -0,0 +1,148 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +#include "yaffs_packedtags2.h" +#include "yportenv.h" + +/* This code packs a set of extended tags into a binary structure for + * NAND storage + */ + +/* Some of the information is "extra" struff which can be packed in to + * speed scanning + * This is defined by having the EXTRA_HEADER_INFO_FLAG set. + */ + +/* Extra flags applied to chunkId */ + +#define EXTRA_HEADER_INFO_FLAG 0x80000000 +#define EXTRA_SHRINK_FLAG 0x40000000 +#define EXTRA_SHADOWS_FLAG 0x20000000 +#define EXTRA_SPARE_FLAGS 0x10000000 + +#define ALL_EXTRA_FLAGS 0xF0000000 + +/* Also, the top 4 bits of the object Id are set to the object type. */ +#define EXTRA_OBJECT_TYPE_SHIFT (28) +#define EXTRA_OBJECT_TYPE_MASK ((0x0F) << EXTRA_OBJECT_TYPE_SHIFT) + +static void yaffs_DumpPackedTags2(const yaffs_PackedTags2 * pt) +{ + T(YAFFS_TRACE_MTD, + (TSTR("packed tags obj %d chunk %d byte %d seq %d" TENDSTR), + pt->t.objectId, pt->t.chunkId, pt->t.byteCount, + pt->t.sequenceNumber)); +} + +static void yaffs_DumpTags2(const yaffs_ExtendedTags * t) +{ + T(YAFFS_TRACE_MTD, + (TSTR + ("ext.tags eccres %d blkbad %d chused %d obj %d chunk%d byte " + "%d del %d ser %d seq %d" + TENDSTR), t->eccResult, t->blockBad, t->chunkUsed, t->objectId, + t->chunkId, t->byteCount, t->chunkDeleted, t->serialNumber, + t->sequenceNumber)); + +} + +void yaffs_PackTags2(yaffs_PackedTags2 * pt, const yaffs_ExtendedTags * t) +{ + pt->t.chunkId = t->chunkId; + pt->t.sequenceNumber = t->sequenceNumber; + pt->t.byteCount = t->byteCount; + pt->t.objectId = t->objectId; + + if (t->chunkId == 0 && t->extraHeaderInfoAvailable) { + /* Store the extra header info instead */ + /* We save the parent object in the chunkId */ + pt->t.chunkId = EXTRA_HEADER_INFO_FLAG + | t->extraParentObjectId; + if (t->extraIsShrinkHeader) { + pt->t.chunkId |= EXTRA_SHRINK_FLAG; + } + if (t->extraShadows) { + pt->t.chunkId |= EXTRA_SHADOWS_FLAG; + } + + pt->t.objectId &= ~EXTRA_OBJECT_TYPE_MASK; + pt->t.objectId |= + (t->extraObjectType << EXTRA_OBJECT_TYPE_SHIFT); + + if (t->extraObjectType == YAFFS_OBJECT_TYPE_HARDLINK) { + pt->t.byteCount = t->extraEquivalentObjectId; + } else if (t->extraObjectType == YAFFS_OBJECT_TYPE_FILE) { + pt->t.byteCount = t->extraFileLength; + } else { + pt->t.byteCount = 0; + } + } + + yaffs_DumpPackedTags2(pt); + yaffs_DumpTags2(t); + +} + +void yaffs_UnpackTags2(yaffs_ExtendedTags * t, yaffs_PackedTags2 * pt) +{ + + memset(t, 0, sizeof(yaffs_ExtendedTags)); + + yaffs_InitialiseTags(t); + + if (pt->t.sequenceNumber != 0xFFFFFFFF) { + /* Page is in use */ + { + t->eccResult = YAFFS_ECC_RESULT_NO_ERROR; + } + t->blockBad = 0; + t->chunkUsed = 1; + t->objectId = pt->t.objectId; + t->chunkId = pt->t.chunkId; + t->byteCount = pt->t.byteCount; + t->chunkDeleted = 0; + t->serialNumber = 0; + t->sequenceNumber = pt->t.sequenceNumber; + + /* Do extra header info stuff */ + + if (pt->t.chunkId & EXTRA_HEADER_INFO_FLAG) { + t->chunkId = 0; + t->byteCount = 0; + + t->extraHeaderInfoAvailable = 1; + t->extraParentObjectId = + pt->t.chunkId & (~(ALL_EXTRA_FLAGS)); + t->extraIsShrinkHeader = + (pt->t.chunkId & EXTRA_SHRINK_FLAG) ? 1 : 0; + t->extraShadows = + (pt->t.chunkId & EXTRA_SHADOWS_FLAG) ? 1 : 0; + t->extraObjectType = + pt->t.objectId >> EXTRA_OBJECT_TYPE_SHIFT; + t->objectId &= ~EXTRA_OBJECT_TYPE_MASK; + + if (t->extraObjectType == YAFFS_OBJECT_TYPE_HARDLINK) { + t->extraEquivalentObjectId = pt->t.byteCount; + } else { + t->extraFileLength = pt->t.byteCount; + } + } + } + + yaffs_DumpPackedTags2(pt); + yaffs_DumpTags2(t); + +} diff --git a/fs/yaffs2/yaffs_packedtags2.h b/fs/yaffs2/yaffs_packedtags2.h new file mode 100755 index 0000000..386a058 --- /dev/null +++ b/fs/yaffs2/yaffs_packedtags2.h @@ -0,0 +1,38 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* This is used to pack YAFFS2 tags, not YAFFS1tags. */ + +#ifndef __YAFFS_PACKEDTAGS2_H__ +#define __YAFFS_PACKEDTAGS2_H__ + +#include "yaffs_guts.h" +//#include "yaffs_ecc.h" + +typedef struct { + unsigned sequenceNumber; + unsigned objectId; + unsigned chunkId; + unsigned byteCount; +} yaffs_PackedTags2TagsPart; + +typedef struct { + yaffs_PackedTags2TagsPart t; + //yaffs_ECCOther ecc; +} yaffs_PackedTags2; + +void yaffs_PackTags2(yaffs_PackedTags2 * pt, const yaffs_ExtendedTags * t); +void yaffs_UnpackTags2(yaffs_ExtendedTags * t, yaffs_PackedTags2 * pt); +#endif diff --git a/fs/yaffs2/yaffs_qsort.c b/fs/yaffs2/yaffs_qsort.c new file mode 100755 index 0000000..4d56f96 --- /dev/null +++ b/fs/yaffs2/yaffs_qsort.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +#include "yportenv.h" +//#include <linux/string.h> + +/* + * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". + */ +#define swapcode(TYPE, parmi, parmj, n) { \ + long i = (n) / sizeof (TYPE); \ + register TYPE *pi = (TYPE *) (parmi); \ + register TYPE *pj = (TYPE *) (parmj); \ + do { \ + register TYPE t = *pi; \ + *pi++ = *pj; \ + *pj++ = t; \ + } while (--i > 0); \ +} + +#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \ + es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1; + +static __inline void +swapfunc(char *a, char *b, int n, int swaptype) +{ + if (swaptype <= 1) + swapcode(long, a, b, n) + else + swapcode(char, a, b, n) +} + +#define swap(a, b) \ + if (swaptype == 0) { \ + long t = *(long *)(a); \ + *(long *)(a) = *(long *)(b); \ + *(long *)(b) = t; \ + } else \ + swapfunc(a, b, es, swaptype) + +#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) + +static __inline char * +med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *)) +{ + return cmp(a, b) < 0 ? + (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a )) + :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c )); +} + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +void +yaffs_qsort(void *aa, size_t n, size_t es, + int (*cmp)(const void *, const void *)) +{ + char *pa, *pb, *pc, *pd, *pl, *pm, *pn; + int d, r, swaptype, swap_cnt; + register char *a = aa; + +loop: SWAPINIT(a, es); + swap_cnt = 0; + if (n < 7) { + for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es) + for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0; + pl -= es) + swap(pl, pl - es); + return; + } + pm = (char *)a + (n / 2) * es; + if (n > 7) { + pl = (char *)a; + pn = (char *)a + (n - 1) * es; + if (n > 40) { + d = (n / 8) * es; + pl = med3(pl, pl + d, pl + 2 * d, cmp); + pm = med3(pm - d, pm, pm + d, cmp); + pn = med3(pn - 2 * d, pn - d, pn, cmp); + } + pm = med3(pl, pm, pn, cmp); + } + swap(a, pm); + pa = pb = (char *)a + es; + + pc = pd = (char *)a + (n - 1) * es; + for (;;) { + while (pb <= pc && (r = cmp(pb, a)) <= 0) { + if (r == 0) { + swap_cnt = 1; + swap(pa, pb); + pa += es; + } + pb += es; + } + while (pb <= pc && (r = cmp(pc, a)) >= 0) { + if (r == 0) { + swap_cnt = 1; + swap(pc, pd); + pd -= es; + } + pc -= es; + } + if (pb > pc) + break; + swap(pb, pc); + swap_cnt = 1; + pb += es; + pc -= es; + } + if (swap_cnt == 0) { /* Switch to insertion sort */ + for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) + for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0; + pl -= es) + swap(pl, pl - es); + return; + } + + pn = (char *)a + n * es; + r = min(pa - (char *)a, pb - pa); + vecswap(a, pb - r, r); + r = min((long)(pd - pc), (long)(pn - pd - es)); + vecswap(pb, pn - r, r); + if ((r = pb - pa) > es) + yaffs_qsort(a, r / es, es, cmp); + if ((r = pd - pc) > es) { + /* Iterate rather than recurse to save stack space */ + a = pn - r; + n = r / es; + goto loop; + } +/* yaffs_qsort(pn - r, r / es, es, cmp);*/ +} diff --git a/fs/yaffs2/yaffs_qsort.h b/fs/yaffs2/yaffs_qsort.h new file mode 100755 index 0000000..19083da --- /dev/null +++ b/fs/yaffs2/yaffs_qsort.h @@ -0,0 +1,23 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + + +#ifndef __YAFFS_QSORT_H__ +#define __YAFFS_QSORT_H__ + +extern void yaffs_qsort (void *const base, size_t total_elems, size_t size, + int (*cmp)(const void *, const void *)); + +#endif diff --git a/fs/yaffs2/yaffs_ramdisk.h b/fs/yaffs2/yaffs_ramdisk.h new file mode 100755 index 0000000..3cff8be --- /dev/null +++ b/fs/yaffs2/yaffs_ramdisk.h @@ -0,0 +1,32 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* + * yaffs_ramdisk.h: yaffs ram disk component + */ + +#ifndef __YAFFS_RAMDISK_H__ +#define __YAFFS_RAMDISK_H__ + + +#include "yaffs_guts.h" +int yramdisk_EraseBlockInNAND(yaffs_Device *dev, int blockNumber); +int yramdisk_WriteChunkWithTagsToNAND(yaffs_Device *dev,int chunkInNAND,const __u8 *data, yaffs_ExtendedTags *tags); +int yramdisk_ReadChunkWithTagsFromNAND(yaffs_Device *dev,int chunkInNAND, __u8 *data, yaffs_ExtendedTags *tags); +int yramdisk_EraseBlockInNAND(yaffs_Device *dev, int blockNumber); +int yramdisk_InitialiseNAND(yaffs_Device *dev); +int yramdisk_MarkNANDBlockBad(yaffs_Device *dev,int blockNumber); +int yramdisk_QueryNANDBlock(yaffs_Device *dev, int blockNo, yaffs_BlockState *state, int *sequenceNumber); +#endif diff --git a/fs/yaffs2/yaffs_tagscompat.c b/fs/yaffs2/yaffs_tagscompat.c new file mode 100755 index 0000000..9617041 --- /dev/null +++ b/fs/yaffs2/yaffs_tagscompat.c @@ -0,0 +1,468 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +#include "yaffs_guts.h" +#include "yaffs_tagscompat.h" +//#include "yaffs_ecc.h" + +static void yaffs_HandleReadDataError(yaffs_Device * dev, int chunkInNAND); +#ifdef NOTYET +static void yaffs_CheckWrittenBlock(yaffs_Device * dev, int chunkInNAND); +static void yaffs_HandleWriteChunkOk(yaffs_Device * dev, int chunkInNAND, + const __u8 * data, + const yaffs_Spare * spare); +static void yaffs_HandleUpdateChunk(yaffs_Device * dev, int chunkInNAND, + const yaffs_Spare * spare); +static void yaffs_HandleWriteChunkError(yaffs_Device * dev, int chunkInNAND); +#endif + +static const char yaffs_countBitsTable[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +int yaffs_CountBits(__u8 x) +{ + int retVal; + retVal = yaffs_countBitsTable[x]; + return retVal; +} + +/********** Tags ECC calculations *********/ + + +void yaffs_CalcTagsECC(yaffs_Tags * tags) +{ + /* Calculate an ecc */ + + unsigned char *b = ((yaffs_TagsUnion *) tags)->asBytes; + unsigned i, j; + unsigned ecc = 0; + unsigned bit = 0; + + tags->ecc = 0; + + for (i = 0; i < 8; i++) { + for (j = 1; j & 0xff; j <<= 1) { + bit++; + if (b[i] & j) { + ecc ^= bit; + } + } + } + + tags->ecc = ecc; + +} + +int yaffs_CheckECCOnTags(yaffs_Tags * tags) +{ + unsigned ecc = tags->ecc; + + yaffs_CalcTagsECC(tags); + + ecc ^= tags->ecc; + + if (ecc && ecc <= 64) { + /* TODO: Handle the failure better. Retire? */ + unsigned char *b = ((yaffs_TagsUnion *) tags)->asBytes; + + ecc--; + + b[ecc / 8] ^= (1 << (ecc & 7)); + + /* Now recvalc the ecc */ + yaffs_CalcTagsECC(tags); + + return 1; /* recovered error */ + } else if (ecc) { + /* Wierd ecc failure value */ + /* TODO Need to do somethiong here */ + return -1; /* unrecovered error */ + } + + return 0; +} + +/********** Tags **********/ + +static void yaffs_LoadTagsIntoSpare(yaffs_Spare * sparePtr, + yaffs_Tags * tagsPtr) +{ + yaffs_TagsUnion *tu = (yaffs_TagsUnion *) tagsPtr; + + yaffs_CalcTagsECC(tagsPtr); + + sparePtr->tagByte0 = tu->asBytes[0]; + sparePtr->tagByte1 = tu->asBytes[1]; + sparePtr->tagByte2 = tu->asBytes[2]; + sparePtr->tagByte3 = tu->asBytes[3]; + sparePtr->tagByte4 = tu->asBytes[4]; + sparePtr->tagByte5 = tu->asBytes[5]; + sparePtr->tagByte6 = tu->asBytes[6]; + sparePtr->tagByte7 = tu->asBytes[7]; +} + +static void yaffs_GetTagsFromSpare(yaffs_Device * dev, yaffs_Spare * sparePtr, + yaffs_Tags * tagsPtr) +{ + yaffs_TagsUnion *tu = (yaffs_TagsUnion *) tagsPtr; + int result; + + tu->asBytes[0] = sparePtr->tagByte0; + tu->asBytes[1] = sparePtr->tagByte1; + tu->asBytes[2] = sparePtr->tagByte2; + tu->asBytes[3] = sparePtr->tagByte3; + tu->asBytes[4] = sparePtr->tagByte4; + tu->asBytes[5] = sparePtr->tagByte5; + tu->asBytes[6] = sparePtr->tagByte6; + tu->asBytes[7] = sparePtr->tagByte7; + + result = yaffs_CheckECCOnTags(tagsPtr); + if (result > 0) { + dev->tagsEccFixed++; + } else if (result < 0) { + dev->tagsEccUnfixed++; + } +} + +static void yaffs_SpareInitialise(yaffs_Spare * spare) +{ + memset(spare, 0xFF, sizeof(yaffs_Spare)); +} + +static int yaffs_WriteChunkToNAND(struct yaffs_DeviceStruct *dev, + int chunkInNAND, const __u8 * data, + yaffs_Spare * spare) +{ + if (chunkInNAND < dev->startBlock * dev->nChunksPerBlock) { + T(YAFFS_TRACE_ERROR, + (TSTR("**>> yaffs chunk %d is not valid" TENDSTR), + chunkInNAND)); + return YAFFS_FAIL; + } + + dev->nPageWrites++; + return dev->writeChunkToNAND(dev, chunkInNAND, data, spare); +} + +static int yaffs_ReadChunkFromNAND(struct yaffs_DeviceStruct *dev, + int chunkInNAND, + __u8 * data, + yaffs_Spare * spare, + yaffs_ECCResult * eccResult, + int doErrorCorrection) +{ + int retVal; + yaffs_Spare localSpare; + + dev->nPageReads++; + + if (!spare && data) { + /* If we don't have a real spare, then we use a local one. */ + /* Need this for the calculation of the ecc */ + spare = &localSpare; + } + + /* for ecc results from device. */ + struct yaffs_NANDSpare nspare; + retVal = + dev->readChunkFromNAND(dev, chunkInNAND, data, + (yaffs_Spare *) & nspare); + memcpy(spare, &nspare, sizeof(yaffs_Spare)); + if (data && doErrorCorrection) { + if (nspare.eccres1 > 0) { + T(YAFFS_TRACE_ERROR, + (TSTR + ("**>>mtd ecc error fix performed on chunk %d:0" + TENDSTR), chunkInNAND)); + } else if (nspare.eccres1 < 0) { + T(YAFFS_TRACE_ERROR, + (TSTR + ("**>>mtd ecc error unfixed on chunk %d:0" + TENDSTR), chunkInNAND)); + } + + if (nspare.eccres2 > 0) { + T(YAFFS_TRACE_ERROR, + (TSTR + ("**>>mtd ecc error fix performed on chunk %d:1" + TENDSTR), chunkInNAND)); + } else if (nspare.eccres2 < 0) { + T(YAFFS_TRACE_ERROR, + (TSTR + ("**>>mtd ecc error unfixed on chunk %d:1" + TENDSTR), chunkInNAND)); + } + + if (nspare.eccres1 || nspare.eccres2) { + /* We had a data problem on this page */ + yaffs_HandleReadDataError(dev, chunkInNAND); + } + + if (nspare.eccres1 < 0 || nspare.eccres2 < 0) + *eccResult = YAFFS_ECC_RESULT_UNFIXED; + else if (nspare.eccres1 > 0 || nspare.eccres2 > 0) + *eccResult = YAFFS_ECC_RESULT_FIXED; + else + *eccResult = YAFFS_ECC_RESULT_NO_ERROR; + + + } + return retVal; +} + +#ifdef NOTYET +static int yaffs_CheckChunkErased(struct yaffs_DeviceStruct *dev, + int chunkInNAND) +{ + + static int init = 0; + static __u8 cmpbuf[YAFFS_BYTES_PER_CHUNK]; + static __u8 data[YAFFS_BYTES_PER_CHUNK]; + /* Might as well always allocate the larger size for */ + /* dev->useNANDECC == true; */ + static __u8 spare[sizeof(struct yaffs_NANDSpare)]; + + dev->readChunkFromNAND(dev, chunkInNAND, data, (yaffs_Spare *) spare); + + if (!init) { + memset(cmpbuf, 0xff, YAFFS_BYTES_PER_CHUNK); + init = 1; + } + + if (memcmp(cmpbuf, data, YAFFS_BYTES_PER_CHUNK)) + return YAFFS_FAIL; + if (memcmp(cmpbuf, spare, 16)) + return YAFFS_FAIL; + + return YAFFS_OK; + +} +#endif + +/* + * Functions for robustisizing + */ + +static void yaffs_HandleReadDataError(yaffs_Device * dev, int chunkInNAND) +{ + int blockInNAND = chunkInNAND / dev->nChunksPerBlock; + + /* Mark the block for retirement */ + yaffs_GetBlockInfo(dev, blockInNAND)->needsRetiring = 1; + T(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS, + (TSTR("**>>Block %d marked for retirement" TENDSTR), blockInNAND)); + + /* TODO: + * Just do a garbage collection on the affected block + * then retire the block + * NB recursion + */ +} + +#ifdef NOTYET +static void yaffs_CheckWrittenBlock(yaffs_Device * dev, int chunkInNAND) +{ +} + +static void yaffs_HandleWriteChunkOk(yaffs_Device * dev, int chunkInNAND, + const __u8 * data, + const yaffs_Spare * spare) +{ +} + +static void yaffs_HandleUpdateChunk(yaffs_Device * dev, int chunkInNAND, + const yaffs_Spare * spare) +{ +} + +static void yaffs_HandleWriteChunkError(yaffs_Device * dev, int chunkInNAND) +{ + int blockInNAND = chunkInNAND / dev->nChunksPerBlock; + + /* Mark the block for retirement */ + yaffs_GetBlockInfo(dev, blockInNAND)->needsRetiring = 1; + /* Delete the chunk */ + yaffs_DeleteChunk(dev, chunkInNAND, 1, __LINE__); +} + +static int yaffs_VerifyCompare(const __u8 * d0, const __u8 * d1, + const yaffs_Spare * s0, const yaffs_Spare * s1) +{ + + if (memcmp(d0, d1, YAFFS_BYTES_PER_CHUNK) != 0 || + s0->tagByte0 != s1->tagByte0 || + s0->tagByte1 != s1->tagByte1 || + s0->tagByte2 != s1->tagByte2 || + s0->tagByte3 != s1->tagByte3 || + s0->tagByte4 != s1->tagByte4 || + s0->tagByte5 != s1->tagByte5 || + s0->tagByte6 != s1->tagByte6 || + s0->tagByte7 != s1->tagByte7 || + s0->ecc1[0] != s1->ecc1[0] || + s0->ecc1[1] != s1->ecc1[1] || + s0->ecc1[2] != s1->ecc1[2] || + s0->ecc2[0] != s1->ecc2[0] || + s0->ecc2[1] != s1->ecc2[1] || s0->ecc2[2] != s1->ecc2[2]) { + return 0; + } + + return 1; +} +#endif /* NOTYET */ + +int yaffs_TagsCompatabilityWriteChunkWithTagsToNAND(yaffs_Device * dev, + int chunkInNAND, + const __u8 * data, + const yaffs_ExtendedTags * + eTags) +{ + yaffs_Spare spare; + yaffs_Tags tags; + + yaffs_SpareInitialise(&spare); + + if (eTags->chunkDeleted) { + spare.pageStatus = 0; + } else { + tags.objectId = eTags->objectId; + tags.chunkId = eTags->chunkId; + tags.byteCount = eTags->byteCount; + tags.serialNumber = eTags->serialNumber; + + yaffs_LoadTagsIntoSpare(&spare, &tags); + + } + + return yaffs_WriteChunkToNAND(dev, chunkInNAND, data, &spare); +} + +int yaffs_TagsCompatabilityReadChunkWithTagsFromNAND(yaffs_Device * dev, + int chunkInNAND, + __u8 * data, + yaffs_ExtendedTags * eTags) +{ + + yaffs_Spare spare; + yaffs_Tags tags; + yaffs_ECCResult eccResult; + + static yaffs_Spare spareFF; + static int init; + + if (!init) { + memset(&spareFF, 0xFF, sizeof(spareFF)); + init = 1; + } + + if (yaffs_ReadChunkFromNAND + (dev, chunkInNAND, data, &spare, &eccResult, 1)) { + /* eTags may be NULL */ + if (eTags) { + + int deleted = + (yaffs_CountBits(spare.pageStatus) < 7) ? 1 : 0; + + eTags->chunkDeleted = deleted; + eTags->eccResult = eccResult; + eTags->blockBad = 0; /* We're reading it */ + /* therefore it is not a bad block */ + eTags->chunkUsed = + (memcmp(&spareFF, &spare, sizeof(spareFF)) != + 0) ? 1 : 0; + + if (eTags->chunkUsed) { + yaffs_GetTagsFromSpare(dev, &spare, &tags); + + eTags->objectId = tags.objectId; + eTags->chunkId = tags.chunkId; + eTags->byteCount = tags.byteCount; + eTags->serialNumber = tags.serialNumber; + } + } + + return YAFFS_OK; + } else { + return YAFFS_FAIL; + } +} + +int yaffs_TagsCompatabilityMarkNANDBlockBad(struct yaffs_DeviceStruct *dev, + int blockInNAND) +{ + + yaffs_Spare spare; + + memset(&spare, 0xff, sizeof(yaffs_Spare)); + + spare.blockStatus = 'Y'; + + yaffs_WriteChunkToNAND(dev, blockInNAND * dev->nChunksPerBlock, NULL, + &spare); + yaffs_WriteChunkToNAND(dev, blockInNAND * dev->nChunksPerBlock + 1, + NULL, &spare); + + return YAFFS_OK; + +} + +int yaffs_TagsCompatabilityQueryNANDBlock(struct yaffs_DeviceStruct *dev, + int blockNo, yaffs_BlockState * + state, + int *sequenceNumber) +{ + + yaffs_Spare spare0, spare1; + static yaffs_Spare spareFF; + static int init; + yaffs_ECCResult dummy; + + if (!init) { + memset(&spareFF, 0xFF, sizeof(spareFF)); + init = 1; + } + + *sequenceNumber = 0; + + yaffs_ReadChunkFromNAND(dev, blockNo * dev->nChunksPerBlock, NULL, + &spare0, &dummy, 1); + yaffs_ReadChunkFromNAND(dev, blockNo * dev->nChunksPerBlock + 1, NULL, + &spare1, &dummy, 1); + + if (yaffs_CountBits(spare0.blockStatus & spare1.blockStatus) < 7) + *state = YAFFS_BLOCK_STATE_DEAD; + else if (memcmp(&spareFF, &spare0, sizeof(spareFF)) == 0) + *state = YAFFS_BLOCK_STATE_EMPTY; + else + *state = YAFFS_BLOCK_STATE_NEEDS_SCANNING; + + return YAFFS_OK; +} diff --git a/fs/yaffs2/yaffs_tagscompat.h b/fs/yaffs2/yaffs_tagscompat.h new file mode 100755 index 0000000..a61e3ba --- /dev/null +++ b/fs/yaffs2/yaffs_tagscompat.h @@ -0,0 +1,40 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +#ifndef __YAFFS_TAGSCOMPAT_H__ +#define __YAFFS_TAGSCOMPAT_H__ + +#include "yaffs_guts.h" +int yaffs_TagsCompatabilityWriteChunkWithTagsToNAND(yaffs_Device * dev, + int chunkInNAND, + const __u8 * data, + const yaffs_ExtendedTags * + tags); +int yaffs_TagsCompatabilityReadChunkWithTagsFromNAND(yaffs_Device * dev, + int chunkInNAND, + __u8 * data, + yaffs_ExtendedTags * + tags); +int yaffs_TagsCompatabilityMarkNANDBlockBad(struct yaffs_DeviceStruct *dev, + int blockNo); +int yaffs_TagsCompatabilityQueryNANDBlock(struct yaffs_DeviceStruct *dev, + int blockNo, yaffs_BlockState * + state, int *sequenceNumber); + +void yaffs_CalcTagsECC(yaffs_Tags * tags); +int yaffs_CheckECCOnTags(yaffs_Tags * tags); +int yaffs_CountBits(__u8 byte); + +#endif diff --git a/fs/yaffs2/yaffs_tagsvalidity.c b/fs/yaffs2/yaffs_tagsvalidity.c new file mode 100755 index 0000000..f588d3a --- /dev/null +++ b/fs/yaffs2/yaffs_tagsvalidity.c @@ -0,0 +1,31 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +#include "yaffs_tagsvalidity.h" + +void yaffs_InitialiseTags(yaffs_ExtendedTags * tags) +{ + memset(tags, 0, sizeof(yaffs_ExtendedTags)); + tags->validMarker0 = 0xAAAAAAAA; + tags->validMarker1 = 0x55555555; +} + +int yaffs_ValidateTags(yaffs_ExtendedTags * tags) +{ + return (tags->validMarker0 == 0xAAAAAAAA && + tags->validMarker1 == 0x55555555); + +} diff --git a/fs/yaffs2/yaffs_tagsvalidity.h b/fs/yaffs2/yaffs_tagsvalidity.h new file mode 100755 index 0000000..2fd0c24 --- /dev/null +++ b/fs/yaffs2/yaffs_tagsvalidity.h @@ -0,0 +1,24 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + + +#ifndef __YAFFS_TAGS_VALIDITY_H__ +#define __YAFFS_TAGS_VALIDITY_H__ + +#include "yaffs_guts.h" + +void yaffs_InitialiseTags(yaffs_ExtendedTags * tags); +int yaffs_ValidateTags(yaffs_ExtendedTags * tags); +#endif diff --git a/fs/yaffs2/yaffscfg.c b/fs/yaffs2/yaffscfg.c new file mode 100755 index 0000000..b071e2b --- /dev/null +++ b/fs/yaffs2/yaffscfg.c @@ -0,0 +1,375 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * yaffscfg.c The configuration for the "direct" use of yaffs. + * + * This file is intended to be modified to your requirements. + * There is no need to redistribute this file. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> + +#include <config.h> +#include "nand.h" +#include "yaffscfg.h" +#include "yaffsfs.h" +#include "yaffs_packedtags2.h" +#include "yaffs_mtdif2.h" +#if 0 +#include <errno.h> +#else +#include "malloc.h" +#endif + +unsigned yaffs_traceMask = 0x0; /* Disable logging */ +static int yaffs_errno = 0; +static int yaffs_startBlock = 0; +static int yaffs_blockNumber = 128; + +extern void print_nand_buf(unsigned char * rvalue, int length); + +void yaffsfs_SetError(int err) +{ + //Do whatever to set error + yaffs_errno = err; +} + +int yaffsfs_GetError(void) +{ + return yaffs_errno; +} + +void yaffsfs_Lock(void) +{ +} + +void yaffsfs_Unlock(void) +{ +} + +__u32 yaffsfs_CurrentTime(void) +{ + return 0; +} + +void *yaffs_malloc(size_t size) +{ + return malloc(size); +} + +void yaffs_free(void *ptr) +{ + free(ptr); +} + +void yaffsfs_LocalInitialisation(void) +{ + // Define locking semaphore. +} + +// Configuration for: +// /ram 2MB ramdisk +// /boot 2MB boot disk (flash) +// /flash 14MB flash disk (flash) +// NB Though /boot and /flash occupy the same physical device they +// are still disticnt "yaffs_Devices. You may think of these as "partitions" +// using non-overlapping areas in the same device. +// + +#include "yaffs_flashif.h" +#include <linux/mtd/nand.h> +static int isMounted = 0; +#define MOUNT_POINT "/flash" +extern struct nand_chip nand_dev_desc[]; +/* XXX U-BOOT XXX */ +#if 0 +static yaffs_Device ramDev; +static yaffs_Device bootDev; +static yaffs_Device flashDev; +#endif + +static yaffsfs_DeviceConfiguration yaffsfs_config[] = { +/* XXX U-BOOT XXX */ +#if 0 + { "/ram", &ramDev}, + { "/boot", &bootDev}, + { "/flash", &flashDev}, +#else + { MOUNT_POINT, 0}, +#endif + {(void *)0,(void *)0} +}; + + +int yaffs_StartUp(void) +{ + int yaffsVersion = 2; + int nBlocks; + + yaffs_Device *flashDev = calloc(1, sizeof(yaffs_Device)); + yaffsfs_config[0].dev = flashDev; + + /* store the mtd device for later use */ + + // Stuff to configure YAFFS + // Stuff to initialise anything special (eg lock semaphore). + yaffsfs_LocalInitialisation(); + + // /flash + flashDev->nReservedBlocks = 5; + flashDev->nShortOpCaches = 10; // Use caches + + if (yaffsVersion == 2) + { + flashDev->writeChunkWithTagsToNAND = nandmtd2_WriteChunkWithTagsToNAND; + flashDev->readChunkWithTagsFromNAND = nandmtd2_ReadChunkWithTagsFromNAND; + flashDev->markNANDBlockBad = nandmtd2_MarkNANDBlockBad; + flashDev->queryNANDBlock = nandmtd2_QueryNANDBlock; + + flashDev->spareBuffer = YMALLOC(nand_dev_desc[0].oobsize); + flashDev->isYaffs2 = 1; + flashDev->nDataBytesPerChunk = nand_dev_desc[0].oobblock; + flashDev->nChunksPerBlock = nand_dev_desc[0].erasesize / nand_dev_desc[0].oobblock; + nBlocks = yaffs_blockNumber; + flashDev->nCheckpointReservedBlocks = 10; + flashDev->startBlock = yaffs_startBlock; + flashDev->endBlock = nBlocks - 1 + flashDev->startBlock; + } + + /* ... and common functions */ + flashDev->eraseBlockInNAND = nandmtd_EraseBlockInNAND; + flashDev->initialiseNAND = nandmtd_InitialiseNAND; + + yaffs_initialise(yaffsfs_config); + + return 0; +} + + +void make_a_file(char *yaffsName,char bval,int sizeOfFile) +{ + int outh; + int i; + unsigned char buffer[nand_dev_desc[0].oobblock]; + + outh = yaffs_open(yaffsName, O_CREAT | O_RDWR | O_TRUNC, S_IREAD | S_IWRITE); + if (outh < 0) + { + printf("Error opening file: %d\n", outh); + return; + } + + memset(buffer,bval,nand_dev_desc[0].oobblock); + + do{ + i = sizeOfFile; + if(i > nand_dev_desc[0].oobblock) i = nand_dev_desc[0].oobblock; + sizeOfFile -= i; + + yaffs_write(outh,buffer,i); + + } while (sizeOfFile > 0); + + + yaffs_close(outh); +} + +void read_a_file(char *fn) +{ + int h; + int i = 0; + unsigned char b; + + h = yaffs_open(fn, O_RDWR,0); + if(h<0) + { + printf("File not found\n"); + return; + } + + while(yaffs_read(h,&b,1)> 0) + { + printf("%02x ",b); + i++; + if(i > 32) + { + printf("\n"); + i = 0;; + } + } + printf("\n"); + yaffs_close(h); +} +void cmd_yaffs_start(int start) +{ + yaffs_startBlock = start; +} + +void cmd_yaffs_size(int size) +{ + yaffs_blockNumber = size; +} +void cmd_yaffs_mount(char *mp) +{ + yaffs_StartUp(); + int retval = yaffs_mount(mp); + if( retval != -1) + isMounted = 1; + else + printf("Error mounting %s, return value: %d\n", mp, yaffsfs_GetError()); +} + +static void checkMount(void) +{ + if( !isMounted ) + { + cmd_yaffs_mount(MOUNT_POINT); + } +} + +void cmd_yaffs_umount(char *mp) +{ + checkMount(); + if( yaffs_unmount(mp) == -1) + printf("Error umounting %s, return value: %d\n", mp, yaffsfs_GetError()); +} + +void cmd_yaffs_write_file(char *yaffsName,char bval,int sizeOfFile) +{ + checkMount(); + make_a_file(yaffsName,bval,sizeOfFile); +} + + +void cmd_yaffs_read_file(char *fn) +{ + checkMount(); + read_a_file(fn); +} + + +void cmd_yaffs_mread_file(char *fn, char *addr) +{ + int h; + struct yaffs_stat s; + + checkMount(); + + yaffs_stat(fn,&s); + + printf ("Copy %s to 0x%08x... ", fn, addr); + h = yaffs_open(fn, O_RDWR,0); + if(h<0) + { + printf("File not found\n"); + return; + } + + yaffs_read(h,addr,(int)s.st_size); + printf("\t[DONE]\n"); + + yaffs_close(h); +} + + +void cmd_yaffs_mwrite_file(char *fn, char *addr, int size) +{ + int outh; + + checkMount(); + outh = yaffs_open(fn, O_CREAT | O_RDWR | O_TRUNC, S_IREAD | S_IWRITE); + if (outh < 0) + { + printf("Error opening file: %d\n", outh); + } + yaffs_write(outh,addr,size); + + yaffs_close(outh); +} + + +void cmd_yaffs_ls(const char *mountpt, int longlist) +{ + int i; + yaffs_DIR *d; + yaffs_dirent *de; + struct yaffs_stat stat; + char tempstr[255]; + + checkMount(); + d = yaffs_opendir(mountpt); + + if(!d) + { + printf("opendir failed\n"); + } + else + { + for(i = 0; (de = yaffs_readdir(d)) != NULL; i++) + { + if (longlist) + { + sprintf(tempstr, "%s/%s", mountpt, de->d_name); + yaffs_stat(tempstr, &stat); + printf("%-25s\t%7d\n",de->d_name, stat.st_size); + } + else + { + printf("%s\n",de->d_name); + } + } + } +} + + +void cmd_yaffs_mkdir(const char *dir) +{ + checkMount(); + + int retval = yaffs_mkdir(dir, 0); + + if ( retval < 0) + printf("yaffs_mkdir returning error: %d\n", retval); +} + +void cmd_yaffs_rmdir(const char *dir) +{ + checkMount(); + + int retval = yaffs_rmdir(dir); + + if ( retval < 0) + printf("yaffs_rmdir returning error: %d\n", retval); +} + +void cmd_yaffs_rm(const char *path) +{ + checkMount(); + + int retval = yaffs_unlink(path); + + if ( retval < 0) + printf("yaffs_unlink returning error: %d\n", retval); +} + +void cmd_yaffs_mv(const char *oldPath, const char *newPath) +{ + checkMount(); + + int retval = yaffs_rename(newPath, oldPath); + + if ( retval < 0) + printf("yaffs_unlink returning error: %d\n", retval); +} diff --git a/fs/yaffs2/yaffscfg.h b/fs/yaffs2/yaffscfg.h new file mode 100755 index 0000000..3503dc8 --- /dev/null +++ b/fs/yaffs2/yaffscfg.h @@ -0,0 +1,45 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* + * Header file for using yaffs in an application via + * a direct interface. + */ + + +#ifndef __YAFFSCFG_H__ +#define __YAFFSCFG_H__ + + +#include "devextras.h" + +#define YAFFSFS_N_HANDLES 200 + + +typedef struct { + const char *prefix; + struct yaffs_DeviceStruct *dev; +} yaffsfs_DeviceConfiguration; + + +void yaffsfs_Lock(void); +void yaffsfs_Unlock(void); + +__u32 yaffsfs_CurrentTime(void); + +void yaffsfs_SetError(int err); +int yaffsfs_GetError(void); + +#endif diff --git a/fs/yaffs2/yaffsfs.c b/fs/yaffs2/yaffsfs.c new file mode 100755 index 0000000..98b13d0 --- /dev/null +++ b/fs/yaffs2/yaffsfs.c @@ -0,0 +1,1510 @@ +/* + * YAFFS: Yet Another Flash File System. A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* XXX U-BOOT XXX */ +#include <common.h> +#include <malloc.h> + +#include "yaffsfs.h" +#include "yaffs_guts.h" +#include "yaffscfg.h" +#include "yportenv.h" + +/* XXX U-BOOT XXX */ +#if 0 +#include <string.h> // for memset +#endif + +#define YAFFSFS_MAX_SYMLINK_DEREFERENCES 5 + +#ifndef NULL +#define NULL ((void *)0) +#endif + + +const char *yaffsfs_c_version="$Id: yaffsfs.c,v 1.18 2007/07/18 19:40:38 charles Exp $"; + +// configurationList is the list of devices that are supported +static yaffsfs_DeviceConfiguration *yaffsfs_configurationList; + + +/* Some forward references */ +static yaffs_Object *yaffsfs_FindObject(yaffs_Object *relativeDirectory, const char *path, int symDepth); +static void yaffsfs_RemoveObjectCallback(yaffs_Object *obj); + + +// Handle management. +// + + +unsigned int yaffs_wr_attempts; + +typedef struct +{ + __u8 inUse:1; // this handle is in use + __u8 readOnly:1; // this handle is read only + __u8 append:1; // append only + __u8 exclusive:1; // exclusive + __u32 position; // current position in file + yaffs_Object *obj; // the object +}yaffsfs_Handle; + + +static yaffsfs_Handle yaffsfs_handle[YAFFSFS_N_HANDLES]; + +// yaffsfs_InitHandle +/// Inilitalise handles on start-up. +// +static int yaffsfs_InitHandles(void) +{ + int i; + for(i = 0; i < YAFFSFS_N_HANDLES; i++) + { + yaffsfs_handle[i].inUse = 0; + yaffsfs_handle[i].obj = NULL; + } + return 0; +} + +yaffsfs_Handle *yaffsfs_GetHandlePointer(int h) +{ + if(h < 0 || h >= YAFFSFS_N_HANDLES) + { + return NULL; + } + + return &yaffsfs_handle[h]; +} + +yaffs_Object *yaffsfs_GetHandleObject(int handle) +{ + yaffsfs_Handle *h = yaffsfs_GetHandlePointer(handle); + + if(h && h->inUse) + { + return h->obj; + } + + return NULL; +} + + +//yaffsfs_GetHandle +// Grab a handle (when opening a file) +// + +static int yaffsfs_GetHandle(void) +{ + int i; + yaffsfs_Handle *h; + + for(i = 0; i < YAFFSFS_N_HANDLES; i++) + { + h = yaffsfs_GetHandlePointer(i); + if(!h) + { + // todo bug: should never happen + } + if(!h->inUse) + { + memset(h,0,sizeof(yaffsfs_Handle)); + h->inUse=1; + return i; + } + } + return -1; +} + +// yaffs_PutHandle +// Let go of a handle (when closing a file) +// +static int yaffsfs_PutHandle(int handle) +{ + yaffsfs_Handle *h = yaffsfs_GetHandlePointer(handle); + + if(h) + { + h->inUse = 0; + h->obj = NULL; + } + return 0; +} + + + +// Stuff to search for a directory from a path + + +int yaffsfs_Match(char a, char b) +{ + // case sensitive + return (a == b); +} + +// yaffsfs_FindDevice +// yaffsfs_FindRoot +// Scan the configuration list to find the root. +// Curveballs: Should match paths that end in '/' too +// Curveball2 Might have "/x/ and "/x/y". Need to return the longest match +static yaffs_Device *yaffsfs_FindDevice(const char *path, char **restOfPath) +{ + yaffsfs_DeviceConfiguration *cfg = yaffsfs_configurationList; + const char *leftOver; + const char *p; + yaffs_Device *retval = NULL; + int thisMatchLength; + int longestMatch = -1; + + // Check all configs, choose the one that: + // 1) Actually matches a prefix (ie /a amd /abc will not match + // 2) Matches the longest. + while(cfg && cfg->prefix && cfg->dev) + { + leftOver = path; + p = cfg->prefix; + thisMatchLength = 0; + + while(*p && //unmatched part of prefix + strcmp(p,"/") && // the rest of the prefix is not / (to catch / at end) + *leftOver && + yaffsfs_Match(*p,*leftOver)) + { + p++; + leftOver++; + thisMatchLength++; + } + if((!*p || strcmp(p,"/") == 0) && // end of prefix + (!*leftOver || *leftOver == '/') && // no more in this path name part + (thisMatchLength > longestMatch)) + { + // Matched prefix + *restOfPath = (char *)leftOver; + retval = cfg->dev; + longestMatch = thisMatchLength; + } + cfg++; + } + return retval; +} + +static yaffs_Object *yaffsfs_FindRoot(const char *path, char **restOfPath) +{ + + yaffs_Device *dev; + + dev= yaffsfs_FindDevice(path,restOfPath); + if(dev && dev->isMounted) + { + return dev->rootDir; + } + return NULL; +} + +static yaffs_Object *yaffsfs_FollowLink(yaffs_Object *obj,int symDepth) +{ + + while(obj && obj->variantType == YAFFS_OBJECT_TYPE_SYMLINK) + { + char *alias = obj->variant.symLinkVariant.alias; + + if(*alias == '/') + { + // Starts with a /, need to scan from root up + obj = yaffsfs_FindObject(NULL,alias,symDepth++); + } + else + { + // Relative to here, so use the parent of the symlink as a start + obj = yaffsfs_FindObject(obj->parent,alias,symDepth++); + } + } + return obj; +} + + +// yaffsfs_FindDirectory +// Parse a path to determine the directory and the name within the directory. +// +// eg. "/data/xx/ff" --> puts name="ff" and returns the directory "/data/xx" +static yaffs_Object *yaffsfs_DoFindDirectory(yaffs_Object *startDir,const char *path,char **name,int symDepth) +{ + yaffs_Object *dir; + char *restOfPath; + char str[YAFFS_MAX_NAME_LENGTH+1]; + int i; + + if(symDepth > YAFFSFS_MAX_SYMLINK_DEREFERENCES) + { + return NULL; + } + + if(startDir) + { + dir = startDir; + restOfPath = (char *)path; + } + else + { + dir = yaffsfs_FindRoot(path,&restOfPath); + } + + while(dir) + { + // parse off /. + // curve ball: also throw away surplus '/' + // eg. "/ram/x////ff" gets treated the same as "/ram/x/ff" + while(*restOfPath == '/') + { + restOfPath++; // get rid of '/' + } + + *name = restOfPath; + i = 0; + + while(*restOfPath && *restOfPath != '/') + { + if (i < YAFFS_MAX_NAME_LENGTH) + { + str[i] = *restOfPath; + str[i+1] = '\0'; + i++; + } + restOfPath++; + } + + if(!*restOfPath) + { + // got to the end of the string + return dir; + } + else + { + if(strcmp(str,".") == 0) + { + // Do nothing + } + else if(strcmp(str,"..") == 0) + { + dir = dir->parent; + } + else + { + dir = yaffs_FindObjectByName(dir,str); + + while(dir && dir->variantType == YAFFS_OBJECT_TYPE_SYMLINK) + { + + dir = yaffsfs_FollowLink(dir,symDepth); + + } + + if(dir && dir->variantType != YAFFS_OBJECT_TYPE_DIRECTORY) + { + dir = NULL; + } + } + } + } + // directory did not exist. + return NULL; +} + +static yaffs_Object *yaffsfs_FindDirectory(yaffs_Object *relativeDirectory,const char *path,char **name,int symDepth) +{ + return yaffsfs_DoFindDirectory(relativeDirectory,path,name,symDepth); +} + +// yaffsfs_FindObject turns a path for an existing object into the object +// +static yaffs_Object *yaffsfs_FindObject(yaffs_Object *relativeDirectory, const char *path,int symDepth) +{ + yaffs_Object *dir; + char *name; + + dir = yaffsfs_FindDirectory(relativeDirectory,path,&name,symDepth); + + if(dir && *name) + { + return yaffs_FindObjectByName(dir,name); + } + + return dir; +} + + + +int yaffs_open(const char *path, int oflag, int mode) +{ + yaffs_Object *obj = NULL; + yaffs_Object *dir = NULL; + char *name; + int handle = -1; + yaffsfs_Handle *h = NULL; + int alreadyOpen = 0; + int alreadyExclusive = 0; + int openDenied = 0; + int symDepth = 0; + int errorReported = 0; + + int i; + + + // todo sanity check oflag (eg. can't have O_TRUNC without WRONLY or RDWR + + + yaffsfs_Lock(); + + handle = yaffsfs_GetHandle(); + + if(handle >= 0) + { + + h = yaffsfs_GetHandlePointer(handle); + + + // try to find the exisiting object + obj = yaffsfs_FindObject(NULL,path,0); + + if(obj && obj->variantType == YAFFS_OBJECT_TYPE_SYMLINK) + { + + obj = yaffsfs_FollowLink(obj,symDepth++); + } + + if(obj) + { + // Check if the object is already in use + alreadyOpen = alreadyExclusive = 0; + + for(i = 0; i <= YAFFSFS_N_HANDLES; i++) + { + + if(i != handle && + yaffsfs_handle[i].inUse && + obj == yaffsfs_handle[i].obj) + { + alreadyOpen = 1; + if(yaffsfs_handle[i].exclusive) + { + alreadyExclusive = 1; + } + } + } + + if(((oflag & O_EXCL) && alreadyOpen) || alreadyExclusive) + { + openDenied = 1; + } + + // Open should fail if O_CREAT and O_EXCL are specified + if((oflag & O_EXCL) && (oflag & O_CREAT)) + { + openDenied = 1; + yaffsfs_SetError(-EEXIST); + errorReported = 1; + } + + // Check file permissions + if( (oflag & (O_RDWR | O_WRONLY)) == 0 && // ie O_RDONLY + !(obj->yst_mode & S_IREAD)) + { + openDenied = 1; + } + + if( (oflag & O_RDWR) && + !(obj->yst_mode & S_IREAD)) + { + openDenied = 1; + } + + if( (oflag & (O_RDWR | O_WRONLY)) && + !(obj->yst_mode & S_IWRITE)) + { + openDenied = 1; + } + + } + + else if((oflag & O_CREAT)) + { + // Let's see if we can create this file + dir = yaffsfs_FindDirectory(NULL,path,&name,0); + if(dir) + { + obj = yaffs_MknodFile(dir,name,mode,0,0); + } + else + { + yaffsfs_SetError(-ENOTDIR); + } + } + + if(obj && !openDenied) + { + h->obj = obj; + h->inUse = 1; + h->readOnly = (oflag & (O_WRONLY | O_RDWR)) ? 0 : 1; + h->append = (oflag & O_APPEND) ? 1 : 0; + h->exclusive = (oflag & O_EXCL) ? 1 : 0; + h->position = 0; + + obj->inUse++; + if((oflag & O_TRUNC) && !h->readOnly) + { + //todo truncate + yaffs_ResizeFile(obj,0); + } + + } + else + { + yaffsfs_PutHandle(handle); + if(!errorReported) + { + yaffsfs_SetError(-EACCESS); + errorReported = 1; + } + handle = -1; + } + + } + + yaffsfs_Unlock(); + + return handle; +} + +int yaffs_close(int fd) +{ + yaffsfs_Handle *h = NULL; + int retVal = 0; + + yaffsfs_Lock(); + + h = yaffsfs_GetHandlePointer(fd); + + if(h && h->inUse) + { + // clean up + yaffs_FlushFile(h->obj,1); + h->obj->inUse--; + if(h->obj->inUse <= 0 && h->obj->unlinked) + { + yaffs_DeleteFile(h->obj); + } + yaffsfs_PutHandle(fd); + retVal = 0; + } + else + { + // bad handle + yaffsfs_SetError(-EBADF); + retVal = -1; + } + + yaffsfs_Unlock(); + + return retVal; +} + +int yaffs_read(int fd, void *buf, unsigned int nbyte) +{ + yaffsfs_Handle *h = NULL; + yaffs_Object *obj = NULL; + int pos = 0; + int nRead = -1; + int maxRead; + + yaffsfs_Lock(); + h = yaffsfs_GetHandlePointer(fd); + obj = yaffsfs_GetHandleObject(fd); + + if(!h || !obj) + { + // bad handle + yaffsfs_SetError(-EBADF); + } + else if( h && obj) + { + pos= h->position; + if(yaffs_GetObjectFileLength(obj) > pos) + { + maxRead = yaffs_GetObjectFileLength(obj) - pos; + } + else + { + maxRead = 0; + } + + if(nbyte > maxRead) + { + nbyte = maxRead; + } + + + if(nbyte > 0) + { + nRead = yaffs_ReadDataFromFile(obj,buf,pos,nbyte); + if(nRead >= 0) + { + h->position = pos + nRead; + } + else + { + //todo error + } + } + else + { + nRead = 0; + } + + } + + yaffsfs_Unlock(); + + + return (nRead >= 0) ? nRead : -1; + +} + +int yaffs_write(int fd, const void *buf, unsigned int nbyte) +{ + yaffsfs_Handle *h = NULL; + yaffs_Object *obj = NULL; + int pos = 0; + int nWritten = -1; + int writeThrough = 0; + + yaffsfs_Lock(); + h = yaffsfs_GetHandlePointer(fd); + obj = yaffsfs_GetHandleObject(fd); + + if(!h || !obj) + { + // bad handle + yaffsfs_SetError(-EBADF); + } + else if( h && obj && h->readOnly) + { + // todo error + } + else if( h && obj) + { + if(h->append) + { + pos = yaffs_GetObjectFileLength(obj); + } + else + { + pos = h->position; + } + + nWritten = yaffs_WriteDataToFile(obj,buf,pos,nbyte,writeThrough); + + if(nWritten >= 0) + { + h->position = pos + nWritten; + } + else + { + //todo error + } + + } + + yaffsfs_Unlock(); + + + return (nWritten >= 0) ? nWritten : -1; + +} + +int yaffs_truncate(int fd, off_t newSize) +{ + yaffsfs_Handle *h = NULL; + yaffs_Object *obj = NULL; + int result = 0; + + yaffsfs_Lock(); + h = yaffsfs_GetHandlePointer(fd); + obj = yaffsfs_GetHandleObject(fd); + + if(!h || !obj) + { + // bad handle + yaffsfs_SetError(-EBADF); + } + else + { + // resize the file + result = yaffs_ResizeFile(obj,newSize); + } + yaffsfs_Unlock(); + + + return (result) ? 0 : -1; + +} + +off_t yaffs_lseek(int fd, off_t offset, int whence) +{ + yaffsfs_Handle *h = NULL; + yaffs_Object *obj = NULL; + int pos = -1; + int fSize = -1; + + yaffsfs_Lock(); + h = yaffsfs_GetHandlePointer(fd); + obj = yaffsfs_GetHandleObject(fd); + + if(!h || !obj) + { + // bad handle + yaffsfs_SetError(-EBADF); + } + else if(whence == SEEK_SET) + { + if(offset >= 0) + { + pos = offset; + } + } + else if(whence == SEEK_CUR) + { + if( (h->position + offset) >= 0) + { + pos = (h->position + offset); + } + } + else if(whence == SEEK_END) + { + fSize = yaffs_GetObjectFileLength(obj); + if(fSize >= 0 && (fSize + offset) >= 0) + { + pos = fSize + offset; + } + } + + if(pos >= 0) + { + h->position = pos; + } + else + { + // todo error + } + + + yaffsfs_Unlock(); + + return pos; +} + + +int yaffsfs_DoUnlink(const char *path,int isDirectory) +{ + yaffs_Object *dir = NULL; + yaffs_Object *obj = NULL; + char *name; + int result = YAFFS_FAIL; + + yaffsfs_Lock(); + + obj = yaffsfs_FindObject(NULL,path,0); + dir = yaffsfs_FindDirectory(NULL,path,&name,0); + if(!dir) + { + yaffsfs_SetError(-ENOTDIR); + } + else if(!obj) + { + yaffsfs_SetError(-ENOENT); + } + else if(!isDirectory && obj->variantType == YAFFS_OBJECT_TYPE_DIRECTORY) + { + yaffsfs_SetError(-EISDIR); + } + else if(isDirectory && obj->variantType != YAFFS_OBJECT_TYPE_DIRECTORY) + { + yaffsfs_SetError(-ENOTDIR); + } + else + { + result = yaffs_Unlink(dir,name); + + if(result == YAFFS_FAIL && isDirectory) + { + yaffsfs_SetError(-ENOTEMPTY); + } + } + + yaffsfs_Unlock(); + + // todo error + + return (result == YAFFS_FAIL) ? -1 : 0; +} +int yaffs_rmdir(const char *path) +{ + return yaffsfs_DoUnlink(path,1); +} + +int yaffs_unlink(const char *path) +{ + return yaffsfs_DoUnlink(path,0); +} + +int yaffs_rename(const char *oldPath, const char *newPath) +{ + yaffs_Object *olddir = NULL; + yaffs_Object *newdir = NULL; + yaffs_Object *obj = NULL; + char *oldname; + char *newname; + int result= YAFFS_FAIL; + int renameAllowed = 1; + + yaffsfs_Lock(); + + olddir = yaffsfs_FindDirectory(NULL,oldPath,&oldname,0); + newdir = yaffsfs_FindDirectory(NULL,newPath,&newname,0); + obj = yaffsfs_FindObject(NULL,oldPath,0); + + if(!olddir || !newdir || !obj) + { + // bad file + yaffsfs_SetError(-EBADF); + renameAllowed = 0; + } + else if(olddir->myDev != newdir->myDev) + { + // oops must be on same device + // todo error + yaffsfs_SetError(-EXDEV); + renameAllowed = 0; + } + else if(obj && obj->variantType == YAFFS_OBJECT_TYPE_DIRECTORY) + { + // It is a directory, check that it is not being renamed to + // being its own decendent. + // Do this by tracing from the new directory back to the root, checking for obj + + yaffs_Object *xx = newdir; + + while( renameAllowed && xx) + { + if(xx == obj) + { + renameAllowed = 0; + } + xx = xx->parent; + } + if(!renameAllowed) yaffsfs_SetError(-EACCESS); + } + + if(renameAllowed) + { + result = yaffs_RenameObject(olddir,oldname,newdir,newname); + } + + yaffsfs_Unlock(); + + return (result == YAFFS_FAIL) ? -1 : 0; +} + + +static int yaffsfs_DoStat(yaffs_Object *obj,struct yaffs_stat *buf) +{ + int retVal = -1; + + if(obj) + { + obj = yaffs_GetEquivalentObject(obj); + } + + if(obj && buf) + { + buf->st_dev = (int)obj->myDev->genericDevice; + buf->st_ino = obj->objectId; + buf->st_mode = obj->yst_mode & ~S_IFMT; // clear out file type bits + + if(obj->variantType == YAFFS_OBJECT_TYPE_DIRECTORY) + { + buf->st_mode |= S_IFDIR; + } + else if(obj->variantType == YAFFS_OBJECT_TYPE_SYMLINK) + { + buf->st_mode |= S_IFLNK; + } + else if(obj->variantType == YAFFS_OBJECT_TYPE_FILE) + { + buf->st_mode |= S_IFREG; + } + + buf->st_nlink = yaffs_GetObjectLinkCount(obj); + buf->st_uid = 0; + buf->st_gid = 0;; + buf->st_rdev = obj->yst_rdev; + buf->st_size = yaffs_GetObjectFileLength(obj); + buf->st_blksize = obj->myDev->nDataBytesPerChunk; + buf->st_blocks = (buf->st_size + buf->st_blksize -1)/buf->st_blksize; + buf->yst_atime = obj->yst_atime; + buf->yst_ctime = obj->yst_ctime; + buf->yst_mtime = obj->yst_mtime; + retVal = 0; + } + return retVal; +} + +static int yaffsfs_DoStatOrLStat(const char *path, struct yaffs_stat *buf,int doLStat) +{ + yaffs_Object *obj; + + int retVal = -1; + + yaffsfs_Lock(); + obj = yaffsfs_FindObject(NULL,path,0); + + if(!doLStat && obj) + { + obj = yaffsfs_FollowLink(obj,0); + } + + if(obj) + { + retVal = yaffsfs_DoStat(obj,buf); + } + else + { + // todo error not found + yaffsfs_SetError(-ENOENT); + } + + yaffsfs_Unlock(); + + return retVal; + +} + +int yaffs_stat(const char *path, struct yaffs_stat *buf) +{ + return yaffsfs_DoStatOrLStat(path,buf,0); +} + +int yaffs_lstat(const char *path, struct yaffs_stat *buf) +{ + return yaffsfs_DoStatOrLStat(path,buf,1); +} + +int yaffs_fstat(int fd, struct yaffs_stat *buf) +{ + yaffs_Object *obj; + + int retVal = -1; + + yaffsfs_Lock(); + obj = yaffsfs_GetHandleObject(fd); + + if(obj) + { + retVal = yaffsfs_DoStat(obj,buf); + } + else + { + // bad handle + yaffsfs_SetError(-EBADF); + } + + yaffsfs_Unlock(); + + return retVal; +} + +static int yaffsfs_DoChMod(yaffs_Object *obj,mode_t mode) +{ + int result = YAFFS_FAIL; + + if(obj) + { + obj = yaffs_GetEquivalentObject(obj); + } + + if(obj) + { + obj->yst_mode = mode; + obj->dirty = 1; + result = yaffs_FlushFile(obj,0); + } + + return result == YAFFS_OK ? 0 : -1; +} + + +int yaffs_chmod(const char *path, mode_t mode) +{ + yaffs_Object *obj; + + int retVal = -1; + + yaffsfs_Lock(); + obj = yaffsfs_FindObject(NULL,path,0); + + if(obj) + { + retVal = yaffsfs_DoChMod(obj,mode); + } + else + { + // todo error not found + yaffsfs_SetError(-ENOENT); + } + + yaffsfs_Unlock(); + + return retVal; + +} + + +int yaffs_fchmod(int fd, mode_t mode) +{ + yaffs_Object *obj; + + int retVal = -1; + + yaffsfs_Lock(); + obj = yaffsfs_GetHandleObject(fd); + + if(obj) + { + retVal = yaffsfs_DoChMod(obj,mode); + } + else + { + // bad handle + yaffsfs_SetError(-EBADF); + } + + yaffsfs_Unlock(); + + return retVal; +} + + +int yaffs_mkdir(const char *path, mode_t mode) +{ + yaffs_Object *parent = NULL; + yaffs_Object *dir = NULL; + char *name; + int retVal= -1; + + yaffsfs_Lock(); + parent = yaffsfs_FindDirectory(NULL,path,&name,0); + if(parent) + dir = yaffs_MknodDirectory(parent,name,mode,0,0); + if(dir) + { + retVal = 0; + } + else + { + yaffsfs_SetError(-ENOSPC); // just assume no space for now + retVal = -1; + } + + yaffsfs_Unlock(); + + return retVal; +} + +int yaffs_mount(const char *path) +{ + int retVal=-1; + int result=YAFFS_FAIL; + yaffs_Device *dev=NULL; + char *dummy; + + T(YAFFS_TRACE_ALWAYS,("yaffs: Mounting %s\n",path)); + + yaffsfs_Lock(); + dev = yaffsfs_FindDevice(path,&dummy); + if(dev) + { + if(!dev->isMounted) + { + result = yaffs_GutsInitialise(dev); + if(result == YAFFS_FAIL) + { + // todo error - mount failed + yaffsfs_SetError(-ENOMEM); + } + retVal = result ? 0 : -1; + + } + else + { + //todo error - already mounted. + yaffsfs_SetError(-EBUSY); + } + } + else + { + // todo error - no device + yaffsfs_SetError(-ENODEV); + } + yaffsfs_Unlock(); + return retVal; + +} + +int yaffs_unmount(const char *path) +{ + int retVal=-1; + yaffs_Device *dev=NULL; + char *dummy; + + yaffsfs_Lock(); + dev = yaffsfs_FindDevice(path,&dummy); + if(dev) + { + if(dev->isMounted) + { + int i; + int inUse; + + yaffs_FlushEntireDeviceCache(dev); + //yaffs_CheckpointSave(dev); + + for(i = inUse = 0; i < YAFFSFS_N_HANDLES && !inUse; i++) + { + if(yaffsfs_handle[i].inUse && yaffsfs_handle[i].obj->myDev == dev) + { + inUse = 1; // the device is in use, can't unmount + } + } + + if(!inUse) + { + yaffs_Deinitialise(dev); + + retVal = 0; + } + else + { + // todo error can't unmount as files are open + yaffsfs_SetError(-EBUSY); + } + + } + else + { + //todo error - not mounted. + yaffsfs_SetError(-EINVAL); + + } + } + else + { + // todo error - no device + yaffsfs_SetError(-ENODEV); + } + yaffsfs_Unlock(); + return retVal; + +} + +loff_t yaffs_freespace(const char *path) +{ + loff_t retVal=-1; + yaffs_Device *dev=NULL; + char *dummy; + + yaffsfs_Lock(); + dev = yaffsfs_FindDevice(path,&dummy); + if(dev && dev->isMounted) + { + retVal = yaffs_GetNumberOfFreeChunks(dev); + retVal *= dev->nDataBytesPerChunk; + + } + else + { + yaffsfs_SetError(-EINVAL); + } + + yaffsfs_Unlock(); + return retVal; +} + + + +void yaffs_initialise(yaffsfs_DeviceConfiguration *cfgList) +{ + + yaffsfs_DeviceConfiguration *cfg; + + yaffsfs_configurationList = cfgList; + + yaffsfs_InitHandles(); + + cfg = yaffsfs_configurationList; + + while(cfg && cfg->prefix && cfg->dev) + { + cfg->dev->isMounted = 0; + cfg->dev->removeObjectCallback = yaffsfs_RemoveObjectCallback; + cfg++; + } +} + + +// +// Directory search stuff. + +// +// Directory search context +// +// NB this is an opaque structure. + + +typedef struct +{ + __u32 magic; + yaffs_dirent de; /* directory entry being used by this dsc */ + char name[NAME_MAX+1]; /* name of directory being searched */ + yaffs_Object *dirObj; /* ptr to directory being searched */ + yaffs_Object *nextReturn; /* obj to be returned by next readddir */ + int offset; + struct list_head others; +} yaffsfs_DirectorySearchContext; + + + +static struct list_head search_contexts; + + +static void yaffsfs_SetDirRewound(yaffsfs_DirectorySearchContext *dsc) +{ + if(dsc && + dsc->dirObj && + dsc->dirObj->variantType == YAFFS_OBJECT_TYPE_DIRECTORY){ + + dsc->offset = 0; + + if( list_empty(&dsc->dirObj->variant.directoryVariant.children)){ + dsc->nextReturn = NULL; + } else { + dsc->nextReturn = list_entry(dsc->dirObj->variant.directoryVariant.children.next, + yaffs_Object,siblings); + } + } else { + /* Hey someone isn't playing nice! */ + } +} + +static void yaffsfs_DirAdvance(yaffsfs_DirectorySearchContext *dsc) +{ + if(dsc && + dsc->dirObj && + dsc->dirObj->variantType == YAFFS_OBJECT_TYPE_DIRECTORY){ + + if( dsc->nextReturn == NULL || + list_empty(&dsc->dirObj->variant.directoryVariant.children)){ + dsc->nextReturn = NULL; + } else { + struct list_head *next = dsc->nextReturn->siblings.next; + + if( next == &dsc->dirObj->variant.directoryVariant.children) + dsc->nextReturn = NULL; /* end of list */ + else + dsc->nextReturn = list_entry(next,yaffs_Object,siblings); + } + } else { + /* Hey someone isn't playing nice! */ + } +} + +static void yaffsfs_RemoveObjectCallback(yaffs_Object *obj) +{ + + struct list_head *i; + yaffsfs_DirectorySearchContext *dsc; + + /* if search contexts not initilised then skip */ + if(!search_contexts.next) + return; + + /* Iteratethrough the directory search contexts. + * If any are the one being removed, then advance the dsc to + * the next one to prevent a hanging ptr. + */ + list_for_each(i, &search_contexts) { + if (i) { + dsc = list_entry(i, yaffsfs_DirectorySearchContext,others); + if(dsc->nextReturn == obj) + yaffsfs_DirAdvance(dsc); + } + } + +} + +yaffs_DIR *yaffs_opendir(const char *dirname) +{ + yaffs_DIR *dir = NULL; + yaffs_Object *obj = NULL; + yaffsfs_DirectorySearchContext *dsc = NULL; + + yaffsfs_Lock(); + + obj = yaffsfs_FindObject(NULL,dirname,0); + + if(obj && obj->variantType == YAFFS_OBJECT_TYPE_DIRECTORY) + { + + dsc = YMALLOC(sizeof(yaffsfs_DirectorySearchContext)); + dir = (yaffs_DIR *)dsc; + if(dsc) + { + memset(dsc,0,sizeof(yaffsfs_DirectorySearchContext)); + dsc->magic = YAFFS_MAGIC; + dsc->dirObj = obj; + strncpy(dsc->name,dirname,NAME_MAX); + INIT_LIST_HEAD(&dsc->others); + + if(!search_contexts.next) + INIT_LIST_HEAD(&search_contexts); + + list_add(&dsc->others,&search_contexts); + yaffsfs_SetDirRewound(dsc); } + + } + + yaffsfs_Unlock(); + + return dir; +} + +struct yaffs_dirent *yaffs_readdir(yaffs_DIR *dirp) +{ + yaffsfs_DirectorySearchContext *dsc = (yaffsfs_DirectorySearchContext *)dirp; + struct yaffs_dirent *retVal = NULL; + + yaffsfs_Lock(); + + if(dsc && dsc->magic == YAFFS_MAGIC){ + yaffsfs_SetError(0); + if(dsc->nextReturn){ + dsc->de.d_ino = yaffs_GetEquivalentObject(dsc->nextReturn)->objectId; + dsc->de.d_dont_use = (unsigned)dsc->nextReturn; + dsc->de.d_off = dsc->offset++; + yaffs_GetObjectName(dsc->nextReturn,dsc->de.d_name,NAME_MAX); + if(strlen(dsc->de.d_name) == 0) + { + // this should not happen! + strcpy(dsc->de.d_name,"zz"); + } + dsc->de.d_reclen = sizeof(struct yaffs_dirent); + retVal = &dsc->de; + yaffsfs_DirAdvance(dsc); + } else + retVal = NULL; + } + else + { + yaffsfs_SetError(-EBADF); + } + + yaffsfs_Unlock(); + + return retVal; + +} + + +void yaffs_rewinddir(yaffs_DIR *dirp) +{ + yaffsfs_DirectorySearchContext *dsc = (yaffsfs_DirectorySearchContext *)dirp; + + yaffsfs_Lock(); + + yaffsfs_SetDirRewound(dsc); + + yaffsfs_Unlock(); +} + + +int yaffs_closedir(yaffs_DIR *dirp) +{ + yaffsfs_DirectorySearchContext *dsc = (yaffsfs_DirectorySearchContext *)dirp; + + yaffsfs_Lock(); + dsc->magic = 0; + list_del(&dsc->others); /* unhook from list */ + YFREE(dsc); + yaffsfs_Unlock(); + return 0; +} + +// end of directory stuff + + +int yaffs_symlink(const char *oldpath, const char *newpath) +{ + yaffs_Object *parent = NULL; + yaffs_Object *obj; + char *name; + int retVal= -1; + int mode = 0; // ignore for now + + yaffsfs_Lock(); + parent = yaffsfs_FindDirectory(NULL,newpath,&name,0); + obj = yaffs_MknodSymLink(parent,name,mode,0,0,oldpath); + if(obj) + { + retVal = 0; + } + else + { + yaffsfs_SetError(-ENOSPC); // just assume no space for now + retVal = -1; + } + + yaffsfs_Unlock(); + + return retVal; + +} + +int yaffs_readlink(const char *path, char *buf, int bufsiz) +{ + yaffs_Object *obj = NULL; + int retVal; + + + yaffsfs_Lock(); + + obj = yaffsfs_FindObject(NULL,path,0); + + if(!obj) + { + yaffsfs_SetError(-ENOENT); + retVal = -1; + } + else if(obj->variantType != YAFFS_OBJECT_TYPE_SYMLINK) + { + yaffsfs_SetError(-EINVAL); + retVal = -1; + } + else + { + char *alias = obj->variant.symLinkVariant.alias; + memset(buf,0,bufsiz); + strncpy(buf,alias,bufsiz - 1); + retVal = 0; + } + yaffsfs_Unlock(); + return retVal; +} + +int yaffs_link(const char *oldpath, const char *newpath) +{ + // Creates a link called newpath to existing oldpath + yaffs_Object *obj = NULL; + yaffs_Object *target = NULL; + int retVal = 0; + + + yaffsfs_Lock(); + + obj = yaffsfs_FindObject(NULL,oldpath,0); + target = yaffsfs_FindObject(NULL,newpath,0); + + if(!obj) + { + yaffsfs_SetError(-ENOENT); + retVal = -1; + } + else if(target) + { + yaffsfs_SetError(-EEXIST); + retVal = -1; + } + else + { + yaffs_Object *newdir = NULL; + yaffs_Object *link = NULL; + + char *newname; + + newdir = yaffsfs_FindDirectory(NULL,newpath,&newname,0); + + if(!newdir) + { + yaffsfs_SetError(-ENOTDIR); + retVal = -1; + } + else if(newdir->myDev != obj->myDev) + { + yaffsfs_SetError(-EXDEV); + retVal = -1; + } + if(newdir && strlen(newname) > 0) + { + link = yaffs_Link(newdir,newname,obj); + if(link) + retVal = 0; + else + { + yaffsfs_SetError(-ENOSPC); + retVal = -1; + } + + } + } + yaffsfs_Unlock(); + + return retVal; +} + +int yaffs_mknod(const char *pathname, mode_t mode, dev_t dev); + +int yaffs_DumpDevStruct(const char *path) +{ +/* char *rest; + + yaffs_Object *obj = yaffsfs_FindRoot(path,&rest); + + if(obj) + { + yaffs_Device *dev = obj->myDev; + + printf("\n" + "nPageWrites.......... %d\n" + "nPageReads........... %d\n" + "nBlockErasures....... %d\n" + "nGCCopies............ %d\n" + "garbageCollections... %d\n" + "passiveGarbageColl'ns %d\n" + "\n", + dev->nPageWrites, + dev->nPageReads, + dev->nBlockErasures, + dev->nGCCopies, + dev->garbageCollections, + dev->passiveGarbageCollections + ); + + }*/ + return 0; +} diff --git a/fs/yaffs2/yaffsfs.h b/fs/yaffs2/yaffsfs.h new file mode 100755 index 0000000..95e7a91 --- /dev/null +++ b/fs/yaffs2/yaffsfs.h @@ -0,0 +1,231 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* + * Header file for using yaffs in an application via + * a direct interface. + */ + + +#ifndef __YAFFSFS_H__ +#define __YAFFSFS_H__ + +#include "yaffscfg.h" +#include "yportenv.h" + + +//typedef long off_t; +//typedef long dev_t; +//typedef unsigned long mode_t; + + +#ifndef NAME_MAX +#define NAME_MAX 256 +#endif + +#ifndef O_RDONLY +#define O_RDONLY 00 +#endif + +#ifndef O_WRONLY +#define O_WRONLY 01 +#endif + +#ifndef O_RDWR +#define O_RDWR 02 +#endif + +#ifndef O_CREAT +#define O_CREAT 0100 +#endif + +#ifndef O_EXCL +#define O_EXCL 0200 +#endif + +#ifndef O_TRUNC +#define O_TRUNC 01000 +#endif + +#ifndef O_APPEND +#define O_APPEND 02000 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif + +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif + +#ifndef SEEK_END +#define SEEK_END 2 +#endif + +#ifndef EBUSY +#define EBUSY 16 +#endif + +#ifndef ENODEV +#define ENODEV 19 +#endif + +#ifndef EINVAL +#define EINVAL 22 +#endif + +#ifndef EBADF +#define EBADF 9 +#endif + +#ifndef EACCESS +#define EACCESS 13 +#endif + +#ifndef EXDEV +#define EXDEV 18 +#endif + +#ifndef ENOENT +#define ENOENT 2 +#endif + +#ifndef ENOSPC +#define ENOSPC 28 +#endif + +#ifndef ENOTEMPTY +#define ENOTEMPTY 39 +#endif + +#ifndef ENOMEM +#define ENOMEM 12 +#endif + +#ifndef EEXIST +#define EEXIST 17 +#endif + +#ifndef ENOTDIR +#define ENOTDIR 20 +#endif + +#ifndef EISDIR +#define EISDIR 21 +#endif + + +// Mode flags + +#ifndef S_IFMT +#define S_IFMT 0170000 +#endif + +#ifndef S_IFLNK +#define S_IFLNK 0120000 +#endif + +#ifndef S_IFDIR +#define S_IFDIR 0040000 +#endif + +#ifndef S_IFREG +#define S_IFREG 0100000 +#endif + +#ifndef S_IREAD +#define S_IREAD 0000400 +#endif + +#ifndef S_IWRITE +#define S_IWRITE 0000200 +#endif + + + + +struct yaffs_dirent{ + long d_ino; /* inode number */ + off_t d_off; /* offset to this dirent */ + unsigned short d_reclen; /* length of this d_name */ + char d_name [NAME_MAX+1]; /* file name (null-terminated) */ + unsigned d_dont_use; /* debug pointer, not for public consumption */ +}; + +typedef struct yaffs_dirent yaffs_dirent; + + +typedef struct __opaque yaffs_DIR; + + + +struct yaffs_stat{ + int st_dev; /* device */ + int st_ino; /* inode */ + mode_t st_mode; /* protection */ + int st_nlink; /* number of hard links */ + int st_uid; /* user ID of owner */ + int st_gid; /* group ID of owner */ + unsigned st_rdev; /* device type (if inode device) */ + off_t st_size; /* total size, in bytes */ + unsigned long st_blksize; /* blocksize for filesystem I/O */ + unsigned long st_blocks; /* number of blocks allocated */ + unsigned long yst_atime; /* time of last access */ + unsigned long yst_mtime; /* time of last modification */ + unsigned long yst_ctime; /* time of last change */ +}; + +int yaffs_open(const char *path, int oflag, int mode) ; +int yaffs_read(int fd, void *buf, unsigned int nbyte) ; +int yaffs_write(int fd, const void *buf, unsigned int nbyte) ; +int yaffs_close(int fd) ; +off_t yaffs_lseek(int fd, off_t offset, int whence) ; +int yaffs_truncate(int fd, off_t newSize); + +int yaffs_unlink(const char *path) ; +int yaffs_rename(const char *oldPath, const char *newPath) ; + +int yaffs_stat(const char *path, struct yaffs_stat *buf) ; +int yaffs_lstat(const char *path, struct yaffs_stat *buf) ; +int yaffs_fstat(int fd, struct yaffs_stat *buf) ; + +int yaffs_chmod(const char *path, mode_t mode); +int yaffs_fchmod(int fd, mode_t mode); + +int yaffs_mkdir(const char *path, mode_t mode) ; +int yaffs_rmdir(const char *path) ; + +yaffs_DIR *yaffs_opendir(const char *dirname) ; +struct yaffs_dirent *yaffs_readdir(yaffs_DIR *dirp) ; +void yaffs_rewinddir(yaffs_DIR *dirp) ; +int yaffs_closedir(yaffs_DIR *dirp) ; + +int yaffs_mount(const char *path) ; +int yaffs_unmount(const char *path) ; + +int yaffs_symlink(const char *oldpath, const char *newpath); +int yaffs_readlink(const char *path, char *buf, int bufsiz); + +int yaffs_link(const char *oldpath, const char *newpath); +int yaffs_mknod(const char *pathname, mode_t mode, dev_t dev); + +loff_t yaffs_freespace(const char *path); + +void yaffs_initialise(yaffsfs_DeviceConfiguration *configList); + +int yaffs_StartUp(void); + +#endif diff --git a/fs/yaffs2/yaffsinterface.h b/fs/yaffs2/yaffsinterface.h new file mode 100755 index 0000000..810837a --- /dev/null +++ b/fs/yaffs2/yaffsinterface.h @@ -0,0 +1,21 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +#ifndef __YAFFSINTERFACE_H__ +#define __YAFFSINTERFACE_H__ + +int yaffs_Initialise(unsigned nBlocks); + +#endif diff --git a/fs/yaffs2/ydirectenv.h b/fs/yaffs2/ydirectenv.h new file mode 100755 index 0000000..b555810 --- /dev/null +++ b/fs/yaffs2/ydirectenv.h @@ -0,0 +1,92 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + +/* + * ydirectenv.h: Environment wrappers for YAFFS direct. + */ + +#ifndef __YDIRECTENV_H__ +#define __YDIRECTENV_H__ + +/* Direct interface */ + +#include "devextras.h" + +/* XXX U-BOOT XXX */ +#if 0 +#include "stdlib.h" +#include "stdio.h" +#include "string.h" +#include "assert.h" +#endif +#include "yaffs_malloc.h" + +/* XXX U-BOOT XXX */ +#if 0 +#define YBUG() assert(1) +#endif + +#define YCHAR char +#define YUCHAR unsigned char +#define _Y(x) x +#define yaffs_strcpy(a,b) strcpy(a,b) +#define yaffs_strncpy(a,b,c) strncpy(a,b,c) +#define yaffs_strncmp(a,b,c) strncmp(a,b,c) +#define yaffs_strlen(s) strlen(s) +#define yaffs_sprintf sprintf +#define yaffs_toupper(a) toupper(a) + +#ifdef NO_Y_INLINE +#define Y_INLINE +#else +#define Y_INLINE inline +#endif + +#define YMALLOC(x) yaffs_malloc(x) +#define YFREE(x) free(x) +#define YMALLOC_ALT(x) yaffs_malloc(x) +#define YFREE_ALT(x) free(x) + +#define YMALLOC_DMA(x) yaffs_malloc(x) + +#define YYIELD() do {} while(0) + + + +//#define YINFO(s) YPRINTF(( __FILE__ " %d %s\n",__LINE__,s)) +//#define YALERT(s) YINFO(s) + + +#define TENDSTR "\n" +#define TSTR(x) x +#define TOUT(p) printf p + + +#define YAFFS_LOSTNFOUND_NAME "lost+found" +#define YAFFS_LOSTNFOUND_PREFIX "obj" +//#define YPRINTF(x) printf x + +#include "yaffscfg.h" + +#define Y_CURRENT_TIME yaffsfs_CurrentTime() +#define Y_TIME_CONVERT(x) x + +#define YAFFS_ROOT_MODE 0666 +#define YAFFS_LOSTNFOUND_MODE 0666 + +#define yaffs_SumCompare(x,y) ((x) == (y)) +#define yaffs_strcmp(a,b) strcmp(a,b) + +#endif diff --git a/fs/yaffs2/yportenv.h b/fs/yaffs2/yportenv.h new file mode 100755 index 0000000..62a50c3 --- /dev/null +++ b/fs/yaffs2/yportenv.h @@ -0,0 +1,210 @@ +/* + * YAFFS: Yet another Flash File System . A NAND-flash specific file system. + * + * Copyright (C) 2002-2007 Aleph One Ltd. + * for Toby Churchill Ltd and Brightstar Engineering + * + * Created by Charles Manning <charles@aleph1.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL. + */ + + +#ifndef __YPORTENV_H__ +#define __YPORTENV_H__ + +/* XXX U-BOOT XXX */ +#ifndef CONFIG_YAFFS_DIRECT +#define CONFIG_YAFFS_DIRECT +#endif + +#if defined CONFIG_YAFFS_WINCE + +#include "ywinceenv.h" + +/* XXX U-BOOT XXX */ +#elif 0 /* defined __KERNEL__ */ + +#include "moduleconfig.h" + +/* Linux kernel */ +#include <linux/version.h> +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)) +#include <linux/config.h> +#endif +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#define YCHAR char +#define YUCHAR unsigned char +#define _Y(x) x +#define yaffs_strcpy(a,b) strcpy(a,b) +#define yaffs_strncpy(a,b,c) strncpy(a,b,c) +#define yaffs_strncmp(a,b,c) strncmp(a,b,c) +#define yaffs_strlen(s) strlen(s) +#define yaffs_sprintf sprintf +#define yaffs_toupper(a) toupper(a) + +#define Y_INLINE inline + +#define YAFFS_LOSTNFOUND_NAME "lost+found" +#define YAFFS_LOSTNFOUND_PREFIX "obj" + +/* #define YPRINTF(x) printk x */ +#define YMALLOC(x) kmalloc(x,GFP_KERNEL) +#define YFREE(x) kfree(x) +#define YMALLOC_ALT(x) vmalloc(x) +#define YFREE_ALT(x) vfree(x) +#define YMALLOC_DMA(x) YMALLOC(x) + +// KR - added for use in scan so processes aren't blocked indefinitely. +#define YYIELD() schedule() + +#define YAFFS_ROOT_MODE 0666 +#define YAFFS_LOSTNFOUND_MODE 0666 + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +#define Y_CURRENT_TIME CURRENT_TIME.tv_sec +#define Y_TIME_CONVERT(x) (x).tv_sec +#else +#define Y_CURRENT_TIME CURRENT_TIME +#define Y_TIME_CONVERT(x) (x) +#endif + +#define yaffs_SumCompare(x,y) ((x) == (y)) +#define yaffs_strcmp(a,b) strcmp(a,b) + +#define TENDSTR "\n" +#define TSTR(x) KERN_WARNING x +#define TOUT(p) printk p + +//#define yaffs_trace(mask, fmt, args...) \ + do { if ((mask) & (yaffs_traceMask|YAFFS_TRACE_ERROR)) \ + printk(KERN_WARNING "yaffs: " fmt, ## args); \ + } while (0) + +#define compile_time_assertion(assertion) \ + ({ int x = __builtin_choose_expr(assertion, 0, (void)0); (void) x; }) + +#elif defined CONFIG_YAFFS_DIRECT + +/* Direct interface */ +#include "ydirectenv.h" + +#elif defined CONFIG_YAFFS_UTIL + +/* Stuff for YAFFS utilities */ + +#include "stdlib.h" +#include "stdio.h" +#include "string.h" + +#include "devextras.h" + +#define YMALLOC(x) malloc(x) +#define YFREE(x) free(x) +#define YMALLOC_ALT(x) malloc(x) +#define YFREE_ALT(x) free(x) + +#define YCHAR char +#define YUCHAR unsigned char +#define _Y(x) x +#define yaffs_strcpy(a,b) strcpy(a,b) +#define yaffs_strncpy(a,b,c) strncpy(a,b,c) +#define yaffs_strlen(s) strlen(s) +#define yaffs_sprintf sprintf +#define yaffs_toupper(a) toupper(a) + +#define Y_INLINE inline + +/* #define YINFO(s) YPRINTF(( __FILE__ " %d %s\n",__LINE__,s)) */ +/* #define YALERT(s) YINFO(s) */ + +#define TENDSTR "\n" +#define TSTR(x) x +#define TOUT(p) printf p + +#define YAFFS_LOSTNFOUND_NAME "lost+found" +#define YAFFS_LOSTNFOUND_PREFIX "obj" +/* #define YPRINTF(x) printf x */ + +#define YAFFS_ROOT_MODE 0666 +#define YAFFS_LOSTNFOUND_MODE 0666 + +#define yaffs_SumCompare(x,y) ((x) == (y)) +#define yaffs_strcmp(a,b) strcmp(a,b) + +#else +/* Should have specified a configuration type */ +#error Unknown configuration + +#endif + +/* see yaffs_fs.c */ +extern unsigned int yaffs_traceMask; +extern unsigned int yaffs_wr_attempts; + +/* + * Tracing flags. + * The flags masked in YAFFS_TRACE_ALWAYS are always traced. + */ + +#define YAFFS_TRACE_OS 0x00000002 +#define YAFFS_TRACE_ALLOCATE 0x00000004 + +#define YAFFS_TRACE_SCAN 0x00000008 +//#define YAFFS_TRACE_SCAN 0xF0000000 + +#define YAFFS_TRACE_BAD_BLOCKS 0x00000010 +//#define YAFFS_TRACE_BAD_BLOCKS 0xF0000000 + +#define YAFFS_TRACE_ERASE 0x00000020 +#define YAFFS_TRACE_GC 0x00000040 + +#define YAFFS_TRACE_WRITE 0x00000080 +//#define YAFFS_TRACE_WRITE 0xF0000000 + +#define YAFFS_TRACE_TRACING 0x00000100 +#define YAFFS_TRACE_DELETION 0x00000200 +#define YAFFS_TRACE_BUFFERS 0x00000400 + +//#define YAFFS_TRACE_NANDACCESS 0xF0000000 +#define YAFFS_TRACE_NANDACCESS 0x00000800 + +#define YAFFS_TRACE_GC_DETAIL 0x00001000 + +#define YAFFS_TRACE_SCAN_DEBUG 0x00002000 +//#define YAFFS_TRACE_SCAN_DEBUG 0xF0000000 + +#define YAFFS_TRACE_MTD 0x00004000 +//#define YAFFS_TRACE_MTD 0xF0000000 + +#define YAFFS_TRACE_CHECKPOINT 0x00008000 +//#define YAFFS_TRACE_CHECKPOINT 0xF0000000 + +#define YAFFS_TRACE_VERIFY 0x00010000 +#define YAFFS_TRACE_VERIFY_NAND 0x00020000 +#define YAFFS_TRACE_VERIFY_FULL 0x00040000 +#define YAFFS_TRACE_VERIFY_ALL 0x000F0000 + + +#define YAFFS_TRACE_ERROR 0x40000000 +#define YAFFS_TRACE_BUG 0x80000000 +#define YAFFS_TRACE_ALWAYS 0xF0000000 + + +#define T(mask,p) do{ if((mask) & (yaffs_traceMask | YAFFS_TRACE_ALWAYS)) TOUT(p);} while(0) + +#ifndef CONFIG_YAFFS_WINCE +#define YBUG() T(YAFFS_TRACE_BUG,(TSTR("==>> yaffs bug: " __FILE__ " %d" TENDSTR),__LINE__)) +#endif + +#endif |