summaryrefslogtreecommitdiff
path: root/fs/jfs/jfs_logmgr.h
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jfs/jfs_logmgr.h')
-rw-r--r--fs/jfs/jfs_logmgr.h513
1 files changed, 513 insertions, 0 deletions
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
new file mode 100644
index 00000000..e38c2159
--- /dev/null
+++ b/fs/jfs/jfs_logmgr.h
@@ -0,0 +1,513 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2000-2004
+ * Portions Copyright (C) Christoph Hellwig, 2001-2002
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _H_JFS_LOGMGR
+#define _H_JFS_LOGMGR
+
+#include "jfs_filsys.h"
+#include "jfs_lock.h"
+
+/*
+ * log manager configuration parameters
+ */
+
+/* log page size */
+#define LOGPSIZE 4096
+#define L2LOGPSIZE 12
+
+#define LOGPAGES 16 /* Log pages per mounted file system */
+
+/*
+ * log logical volume
+ *
+ * a log is used to make the commit operation on journalled
+ * files within the same logical volume group atomic.
+ * a log is implemented with a logical volume.
+ * there is one log per logical volume group.
+ *
+ * block 0 of the log logical volume is not used (ipl etc).
+ * block 1 contains a log "superblock" and is used by logFormat(),
+ * lmLogInit(), lmLogShutdown(), and logRedo() to record status
+ * of the log but is not otherwise used during normal processing.
+ * blocks 2 - (N-1) are used to contain log records.
+ *
+ * when a volume group is varied-on-line, logRedo() must have
+ * been executed before the file systems (logical volumes) in
+ * the volume group can be mounted.
+ */
+/*
+ * log superblock (block 1 of logical volume)
+ */
+#define LOGSUPER_B 1
+#define LOGSTART_B 2
+
+#define LOGMAGIC 0x87654321
+#define LOGVERSION 1
+
+#define MAX_ACTIVE 128 /* Max active file systems sharing log */
+
+struct logsuper {
+ __le32 magic; /* 4: log lv identifier */
+ __le32 version; /* 4: version number */
+ __le32 serial; /* 4: log open/mount counter */
+ __le32 size; /* 4: size in number of LOGPSIZE blocks */
+ __le32 bsize; /* 4: logical block size in byte */
+ __le32 l2bsize; /* 4: log2 of bsize */
+
+ __le32 flag; /* 4: option */
+ __le32 state; /* 4: state - see below */
+
+ __le32 end; /* 4: addr of last log record set by logredo */
+ char uuid[16]; /* 16: 128-bit journal uuid */
+ char label[16]; /* 16: journal label */
+ struct {
+ char uuid[16];
+ } active[MAX_ACTIVE]; /* 2048: active file systems list */
+};
+
+#define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+/* log flag: commit option (see jfs_filsys.h) */
+
+/* log state */
+#define LOGMOUNT 0 /* log mounted by lmLogInit() */
+#define LOGREDONE 1 /* log shutdown by lmLogShutdown().
+ * log redo completed by logredo().
+ */
+#define LOGWRAP 2 /* log wrapped */
+#define LOGREADERR 3 /* log read error detected in logredo() */
+
+
+/*
+ * log logical page
+ *
+ * (this comment should be rewritten !)
+ * the header and trailer structures (h,t) will normally have
+ * the same page and eor value.
+ * An exception to this occurs when a complete page write is not
+ * accomplished on a power failure. Since the hardware may "split write"
+ * sectors in the page, any out of order sequence may occur during powerfail
+ * and needs to be recognized during log replay. The xor value is
+ * an "exclusive or" of all log words in the page up to eor. This
+ * 32 bit eor is stored with the top 16 bits in the header and the
+ * bottom 16 bits in the trailer. logredo can easily recognize pages
+ * that were not completed by reconstructing this eor and checking
+ * the log page.
+ *
+ * Previous versions of the operating system did not allow split
+ * writes and detected partially written records in logredo by
+ * ordering the updates to the header, trailer, and the move of data
+ * into the logdata area. The order: (1) data is moved (2) header
+ * is updated (3) trailer is updated. In logredo, when the header
+ * differed from the trailer, the header and trailer were reconciled
+ * as follows: if h.page != t.page they were set to the smaller of
+ * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
+ * h.eor != t.eor they were set to the smaller of their two values.
+ */
+struct logpage {
+ struct { /* header */
+ __le32 page; /* 4: log sequence page number */
+ __le16 rsrvd; /* 2: */
+ __le16 eor; /* 2: end-of-log offset of lasrt record write */
+ } h;
+
+ __le32 data[LOGPSIZE / 4 - 4]; /* log record area */
+
+ struct { /* trailer */
+ __le32 page; /* 4: normally the same as h.page */
+ __le16 rsrvd; /* 2: */
+ __le16 eor; /* 2: normally the same as h.eor */
+ } t;
+};
+
+#define LOGPHDRSIZE 8 /* log page header size */
+#define LOGPTLRSIZE 8 /* log page trailer size */
+
+
+/*
+ * log record
+ *
+ * (this comment should be rewritten !)
+ * jfs uses only "after" log records (only a single writer is allowed
+ * in a page, pages are written to temporary paging space if
+ * if they must be written to disk before commit, and i/o is
+ * scheduled for modified pages to their home location after
+ * the log records containing the after values and the commit
+ * record is written to the log on disk, undo discards the copy
+ * in main-memory.)
+ *
+ * a log record consists of a data area of variable length followed by
+ * a descriptor of fixed size LOGRDSIZE bytes.
+ * the data area is rounded up to an integral number of 4-bytes and
+ * must be no longer than LOGPSIZE.
+ * the descriptor is of size of multiple of 4-bytes and aligned on a
+ * 4-byte boundary.
+ * records are packed one after the other in the data area of log pages.
+ * (sometimes a DUMMY record is inserted so that at least one record ends
+ * on every page or the longest record is placed on at most two pages).
+ * the field eor in page header/trailer points to the byte following
+ * the last record on a page.
+ */
+
+/* log record types */
+#define LOG_COMMIT 0x8000
+#define LOG_SYNCPT 0x4000
+#define LOG_MOUNT 0x2000
+#define LOG_REDOPAGE 0x0800
+#define LOG_NOREDOPAGE 0x0080
+#define LOG_NOREDOINOEXT 0x0040
+#define LOG_UPDATEMAP 0x0008
+#define LOG_NOREDOFILE 0x0001
+
+/* REDOPAGE/NOREDOPAGE log record data type */
+#define LOG_INODE 0x0001
+#define LOG_XTREE 0x0002
+#define LOG_DTREE 0x0004
+#define LOG_BTROOT 0x0010
+#define LOG_EA 0x0020
+#define LOG_ACL 0x0040
+#define LOG_DATA 0x0080
+#define LOG_NEW 0x0100
+#define LOG_EXTEND 0x0200
+#define LOG_RELOCATE 0x0400
+#define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */
+
+/* UPDATEMAP log record descriptor type */
+#define LOG_ALLOCXADLIST 0x0080
+#define LOG_ALLOCPXDLIST 0x0040
+#define LOG_ALLOCXAD 0x0020
+#define LOG_ALLOCPXD 0x0010
+#define LOG_FREEXADLIST 0x0008
+#define LOG_FREEPXDLIST 0x0004
+#define LOG_FREEXAD 0x0002
+#define LOG_FREEPXD 0x0001
+
+
+struct lrd {
+ /*
+ * type independent area
+ */
+ __le32 logtid; /* 4: log transaction identifier */
+ __le32 backchain; /* 4: ptr to prev record of same transaction */
+ __le16 type; /* 2: record type */
+ __le16 length; /* 2: length of data in record (in byte) */
+ __le32 aggregate; /* 4: file system lv/aggregate */
+ /* (16) */
+
+ /*
+ * type dependent area (20)
+ */
+ union {
+
+ /*
+ * COMMIT: commit
+ *
+ * transaction commit: no type-dependent information;
+ */
+
+ /*
+ * REDOPAGE: after-image
+ *
+ * apply after-image;
+ *
+ * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+ */
+ struct {
+ __le32 fileset; /* 4: fileset number */
+ __le32 inode; /* 4: inode number */
+ __le16 type; /* 2: REDOPAGE record type */
+ __le16 l2linesize; /* 2: log2 of line size */
+ pxd_t pxd; /* 8: on-disk page pxd */
+ } redopage; /* (20) */
+
+ /*
+ * NOREDOPAGE: the page is freed
+ *
+ * do not apply after-image records which precede this record
+ * in the log with the same page block number to this page.
+ *
+ * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+ */
+ struct {
+ __le32 fileset; /* 4: fileset number */
+ __le32 inode; /* 4: inode number */
+ __le16 type; /* 2: NOREDOPAGE record type */
+ __le16 rsrvd; /* 2: reserved */
+ pxd_t pxd; /* 8: on-disk page pxd */
+ } noredopage; /* (20) */
+
+ /*
+ * UPDATEMAP: update block allocation map
+ *
+ * either in-line PXD,
+ * or out-of-line XADLIST;
+ *
+ * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+ */
+ struct {
+ __le32 fileset; /* 4: fileset number */
+ __le32 inode; /* 4: inode number */
+ __le16 type; /* 2: UPDATEMAP record type */
+ __le16 nxd; /* 2: number of extents */
+ pxd_t pxd; /* 8: pxd */
+ } updatemap; /* (20) */
+
+ /*
+ * NOREDOINOEXT: the inode extent is freed
+ *
+ * do not apply after-image records which precede this
+ * record in the log with the any of the 4 page block
+ * numbers in this inode extent.
+ *
+ * NOTE: The fileset and pxd fields MUST remain in
+ * the same fields in the REDOPAGE record format.
+ *
+ */
+ struct {
+ __le32 fileset; /* 4: fileset number */
+ __le32 iagnum; /* 4: IAG number */
+ __le32 inoext_idx; /* 4: inode extent index */
+ pxd_t pxd; /* 8: on-disk page pxd */
+ } noredoinoext; /* (20) */
+
+ /*
+ * SYNCPT: log sync point
+ *
+ * replay log up to syncpt address specified;
+ */
+ struct {
+ __le32 sync; /* 4: syncpt address (0 = here) */
+ } syncpt;
+
+ /*
+ * MOUNT: file system mount
+ *
+ * file system mount: no type-dependent information;
+ */
+
+ /*
+ * ? FREEXTENT: free specified extent(s)
+ *
+ * free specified extent(s) from block allocation map
+ * N.B.: nextents should be length of data/sizeof(xad_t)
+ */
+ struct {
+ __le32 type; /* 4: FREEXTENT record type */
+ __le32 nextent; /* 4: number of extents */
+
+ /* data: PXD or XAD list */
+ } freextent;
+
+ /*
+ * ? NOREDOFILE: this file is freed
+ *
+ * do not apply records which precede this record in the log
+ * with the same inode number.
+ *
+ * NOREDOFILE must be the first to be written at commit
+ * (last to be read in logredo()) - it prevents
+ * replay of preceding updates of all preceding generations
+ * of the inumber esp. the on-disk inode itself.
+ */
+ struct {
+ __le32 fileset; /* 4: fileset number */
+ __le32 inode; /* 4: inode number */
+ } noredofile;
+
+ /*
+ * ? NEWPAGE:
+ *
+ * metadata type dependent
+ */
+ struct {
+ __le32 fileset; /* 4: fileset number */
+ __le32 inode; /* 4: inode number */
+ __le32 type; /* 4: NEWPAGE record type */
+ pxd_t pxd; /* 8: on-disk page pxd */
+ } newpage;
+
+ /*
+ * ? DUMMY: filler
+ *
+ * no type-dependent information
+ */
+ } log;
+}; /* (36) */
+
+#define LOGRDSIZE (sizeof(struct lrd))
+
+/*
+ * line vector descriptor
+ */
+struct lvd {
+ __le16 offset;
+ __le16 length;
+};
+
+
+/*
+ * log logical volume
+ */
+struct jfs_log {
+
+ struct list_head sb_list;/* This is used to sync metadata
+ * before writing syncpt.
+ */
+ struct list_head journal_list; /* Global list */
+ struct block_device *bdev; /* 4: log lv pointer */
+ int serial; /* 4: log mount serial number */
+
+ s64 base; /* @8: log extent address (inline log ) */
+ int size; /* 4: log size in log page (in page) */
+ int l2bsize; /* 4: log2 of bsize */
+
+ unsigned long flag; /* 4: flag */
+
+ struct lbuf *lbuf_free; /* 4: free lbufs */
+ wait_queue_head_t free_wait; /* 4: */
+
+ /* log write */
+ int logtid; /* 4: log tid */
+ int page; /* 4: page number of eol page */
+ int eor; /* 4: eor of last record in eol page */
+ struct lbuf *bp; /* 4: current log page buffer */
+
+ struct mutex loglock; /* 4: log write serialization lock */
+
+ /* syncpt */
+ int nextsync; /* 4: bytes to write before next syncpt */
+ int active; /* 4: */
+ wait_queue_head_t syncwait; /* 4: */
+
+ /* commit */
+ uint cflag; /* 4: */
+ struct list_head cqueue; /* FIFO commit queue */
+ struct tblock *flush_tblk; /* tblk we're waiting on for flush */
+ int gcrtc; /* 4: GC_READY transaction count */
+ struct tblock *gclrt; /* 4: latest GC_READY transaction */
+ spinlock_t gclock; /* 4: group commit lock */
+ int logsize; /* 4: log data area size in byte */
+ int lsn; /* 4: end-of-log */
+ int clsn; /* 4: clsn */
+ int syncpt; /* 4: addr of last syncpt record */
+ int sync; /* 4: addr from last logsync() */
+ struct list_head synclist; /* 8: logsynclist anchor */
+ spinlock_t synclock; /* 4: synclist lock */
+ struct lbuf *wqueue; /* 4: log pageout queue */
+ int count; /* 4: count */
+ char uuid[16]; /* 16: 128-bit uuid of log device */
+
+ int no_integrity; /* 3: flag to disable journaling to disk */
+};
+
+/*
+ * Log flag
+ */
+#define log_INLINELOG 1
+#define log_SYNCBARRIER 2
+#define log_QUIESCE 3
+#define log_FLUSH 4
+
+/*
+ * group commit flag
+ */
+/* jfs_log */
+#define logGC_PAGEOUT 0x00000001
+
+/* tblock/lbuf */
+#define tblkGC_QUEUE 0x0001
+#define tblkGC_READY 0x0002
+#define tblkGC_COMMIT 0x0004
+#define tblkGC_COMMITTED 0x0008
+#define tblkGC_EOP 0x0010
+#define tblkGC_FREE 0x0020
+#define tblkGC_LEADER 0x0040
+#define tblkGC_ERROR 0x0080
+#define tblkGC_LAZY 0x0100 // D230860
+#define tblkGC_UNLOCKED 0x0200 // D230860
+
+/*
+ * log cache buffer header
+ */
+struct lbuf {
+ struct jfs_log *l_log; /* 4: log associated with buffer */
+
+ /*
+ * data buffer base area
+ */
+ uint l_flag; /* 4: pageout control flags */
+
+ struct lbuf *l_wqnext; /* 4: write queue link */
+ struct lbuf *l_freelist; /* 4: freelistlink */
+
+ int l_pn; /* 4: log page number */
+ int l_eor; /* 4: log record eor */
+ int l_ceor; /* 4: committed log record eor */
+
+ s64 l_blkno; /* 8: log page block number */
+ caddr_t l_ldata; /* 4: data page */
+ struct page *l_page; /* The page itself */
+ uint l_offset; /* Offset of l_ldata within the page */
+
+ wait_queue_head_t l_ioevent; /* 4: i/o done event */
+};
+
+/* Reuse l_freelist for redrive list */
+#define l_redrive_next l_freelist
+
+/*
+ * logsynclist block
+ *
+ * common logsyncblk prefix for jbuf_t and tblock
+ */
+struct logsyncblk {
+ u16 xflag; /* flags */
+ u16 flag; /* only meaninful in tblock */
+ lid_t lid; /* lock id */
+ s32 lsn; /* log sequence number */
+ struct list_head synclist; /* log sync list link */
+};
+
+/*
+ * logsynclist serialization (per log)
+ */
+
+#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
+#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
+#define LOGSYNC_UNLOCK(log, flags) \
+ spin_unlock_irqrestore(&(log)->synclock, flags)
+
+/* compute the difference in bytes of lsn from sync point */
+#define logdiff(diff, lsn, log)\
+{\
+ diff = (lsn) - (log)->syncpt;\
+ if (diff < 0)\
+ diff += (log)->logsize;\
+}
+
+extern int lmLogOpen(struct super_block *sb);
+extern int lmLogClose(struct super_block *sb);
+extern int lmLogShutdown(struct jfs_log * log);
+extern int lmLogInit(struct jfs_log * log);
+extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
+extern int lmGroupCommit(struct jfs_log *, struct tblock *);
+extern int jfsIOWait(void *);
+extern void jfs_flush_journal(struct jfs_log * log, int wait);
+extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
+
+#endif /* _H_JFS_LOGMGR */