diff options
author | Srikant Patnaik | 2015-01-11 12:28:04 +0530 |
---|---|---|
committer | Srikant Patnaik | 2015-01-11 12:28:04 +0530 |
commit | 871480933a1c28f8a9fed4c4d34d06c439a7a422 (patch) | |
tree | 8718f573808810c2a1e8cb8fb6ac469093ca2784 /ANDROID_3.4.5/fs/ocfs2/dlm | |
parent | 9d40ac5867b9aefe0722bc1f110b965ff294d30d (diff) | |
download | FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.gz FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.bz2 FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.zip |
Moved, renamed, and deleted files
The original directory structure was scattered and unorganized.
Changes are basically to make it look like kernel structure.
Diffstat (limited to 'ANDROID_3.4.5/fs/ocfs2/dlm')
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/Makefile | 7 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmapi.h | 220 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmast.c | 502 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmcommon.h | 1149 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmconvert.c | 548 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmconvert.h | 35 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmdebug.c | 1018 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmdebug.h | 81 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmdomain.c | 2397 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmdomain.h | 36 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmlock.c | 765 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmmaster.c | 3414 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmrecovery.c | 2886 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmthread.c | 762 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmunlock.c | 692 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmver.c | 42 | ||||
-rw-r--r-- | ANDROID_3.4.5/fs/ocfs2/dlm/dlmver.h | 31 |
17 files changed, 0 insertions, 14585 deletions
diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/Makefile b/ANDROID_3.4.5/fs/ocfs2/dlm/Makefile deleted file mode 100644 index c8a044ef..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -ccflags-y := -Ifs/ocfs2 - -obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o - -ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ - dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o - diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmapi.h b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmapi.h deleted file mode 100644 index 3cfa114a..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmapi.h +++ /dev/null @@ -1,220 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmapi.h - * - * externally exported dlm interfaces - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - -#ifndef DLMAPI_H -#define DLMAPI_H - -struct dlm_lock; -struct dlm_ctxt; - -/* NOTE: changes made to this enum should be reflected in dlmdebug.c */ -enum dlm_status { - DLM_NORMAL = 0, /* 0: request in progress */ - DLM_GRANTED, /* 1: request granted */ - DLM_DENIED, /* 2: request denied */ - DLM_DENIED_NOLOCKS, /* 3: request denied, out of system resources */ - DLM_WORKING, /* 4: async request in progress */ - DLM_BLOCKED, /* 5: lock request blocked */ - DLM_BLOCKED_ORPHAN, /* 6: lock request blocked by a orphan lock*/ - DLM_DENIED_GRACE_PERIOD, /* 7: topological change in progress */ - DLM_SYSERR, /* 8: system error */ - DLM_NOSUPPORT, /* 9: unsupported */ - DLM_CANCELGRANT, /* 10: can't cancel convert: already granted */ - DLM_IVLOCKID, /* 11: bad lockid */ - DLM_SYNC, /* 12: synchronous request granted */ - DLM_BADTYPE, /* 13: bad resource type */ - DLM_BADRESOURCE, /* 14: bad resource handle */ - DLM_MAXHANDLES, /* 15: no more resource handles */ - DLM_NOCLINFO, /* 16: can't contact cluster manager */ - DLM_NOLOCKMGR, /* 17: can't contact lock manager */ - DLM_NOPURGED, /* 18: can't contact purge daemon */ - DLM_BADARGS, /* 19: bad api args */ - DLM_VOID, /* 20: no status */ - DLM_NOTQUEUED, /* 21: NOQUEUE was specified and request failed */ - DLM_IVBUFLEN, /* 22: invalid resource name length */ - DLM_CVTUNGRANT, /* 23: attempted to convert ungranted lock */ - DLM_BADPARAM, /* 24: invalid lock mode specified */ - DLM_VALNOTVALID, /* 25: value block has been invalidated */ - DLM_REJECTED, /* 26: request rejected, unrecognized client */ - DLM_ABORT, /* 27: blocked lock request cancelled */ - DLM_CANCEL, /* 28: conversion request cancelled */ - DLM_IVRESHANDLE, /* 29: invalid resource handle */ - DLM_DEADLOCK, /* 30: deadlock recovery refused this request */ - DLM_DENIED_NOASTS, /* 31: failed to allocate AST */ - DLM_FORWARD, /* 32: request must wait for primary's response */ - DLM_TIMEOUT, /* 33: timeout value for lock has expired */ - DLM_IVGROUPID, /* 34: invalid group specification */ - DLM_VERS_CONFLICT, /* 35: version conflicts prevent request handling */ - DLM_BAD_DEVICE_PATH, /* 36: Locks device does not exist or path wrong */ - DLM_NO_DEVICE_PERMISSION, /* 37: Client has insufficient pers for device */ - DLM_NO_CONTROL_DEVICE, /* 38: Cannot set options on opened device */ - - DLM_RECOVERING, /* 39: extension, allows caller to fail a lock - request if it is being recovered */ - DLM_MIGRATING, /* 40: extension, allows caller to fail a lock - request if it is being migrated */ - DLM_MAXSTATS, /* 41: upper limit for return code validation */ -}; - -/* for pretty-printing dlm_status error messages */ -const char *dlm_errmsg(enum dlm_status err); -/* for pretty-printing dlm_status error names */ -const char *dlm_errname(enum dlm_status err); - -/* Eventually the DLM will use standard errno values, but in the - * meantime this lets us track dlm errors as they bubble up. When we - * bring its error reporting into line with the rest of the stack, - * these can just be replaced with calls to mlog_errno. */ -#define dlm_error(st) do { \ - if ((st) != DLM_RECOVERING && \ - (st) != DLM_MIGRATING && \ - (st) != DLM_FORWARD) \ - mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \ -} while (0) - -#define DLM_LKSB_UNUSED1 0x01 -#define DLM_LKSB_PUT_LVB 0x02 -#define DLM_LKSB_GET_LVB 0x04 -#define DLM_LKSB_UNUSED2 0x08 -#define DLM_LKSB_UNUSED3 0x10 -#define DLM_LKSB_UNUSED4 0x20 -#define DLM_LKSB_UNUSED5 0x40 -#define DLM_LKSB_UNUSED6 0x80 - -#define DLM_LVB_LEN 64 - -/* Callers are only allowed access to the lvb and status members of - * this struct. */ -struct dlm_lockstatus { - enum dlm_status status; - u32 flags; - struct dlm_lock *lockid; - char lvb[DLM_LVB_LEN]; -}; - -/* Valid lock modes. */ -#define LKM_IVMODE (-1) /* invalid mode */ -#define LKM_NLMODE 0 /* null lock */ -#define LKM_CRMODE 1 /* concurrent read unsupported */ -#define LKM_CWMODE 2 /* concurrent write unsupported */ -#define LKM_PRMODE 3 /* protected read */ -#define LKM_PWMODE 4 /* protected write unsupported */ -#define LKM_EXMODE 5 /* exclusive */ -#define LKM_MAXMODE 5 -#define LKM_MODEMASK 0xff - -/* Flags passed to dlmlock and dlmunlock: - * reserved: flags used by the "real" dlm - * only a few are supported by this dlm - * (U) = unsupported by ocfs2 dlm */ -#define LKM_ORPHAN 0x00000010 /* this lock is orphanable (U) */ -#define LKM_PARENTABLE 0x00000020 /* this lock was orphaned (U) */ -#define LKM_BLOCK 0x00000040 /* blocking lock request (U) */ -#define LKM_LOCAL 0x00000080 /* local lock request */ -#define LKM_VALBLK 0x00000100 /* lock value block request */ -#define LKM_NOQUEUE 0x00000200 /* non blocking request */ -#define LKM_CONVERT 0x00000400 /* conversion request */ -#define LKM_NODLCKWT 0x00000800 /* this lock wont deadlock (U) */ -#define LKM_UNLOCK 0x00001000 /* deallocate this lock */ -#define LKM_CANCEL 0x00002000 /* cancel conversion request */ -#define LKM_DEQALL 0x00004000 /* remove all locks held by proc (U) */ -#define LKM_INVVALBLK 0x00008000 /* invalidate lock value block */ -#define LKM_SYNCSTS 0x00010000 /* return synchronous status if poss (U) */ -#define LKM_TIMEOUT 0x00020000 /* lock request contains timeout (U) */ -#define LKM_SNGLDLCK 0x00040000 /* request can self-deadlock (U) */ -#define LKM_FINDLOCAL 0x00080000 /* find local lock request (U) */ -#define LKM_PROC_OWNED 0x00100000 /* owned by process, not group (U) */ -#define LKM_XID 0x00200000 /* use transaction id for deadlock (U) */ -#define LKM_XID_CONFLICT 0x00400000 /* do not allow lock inheritance (U) */ -#define LKM_FORCE 0x00800000 /* force unlock flag */ -#define LKM_REVVALBLK 0x01000000 /* temporary solution: re-validate - lock value block (U) */ -/* unused */ -#define LKM_UNUSED1 0x00000001 /* unused */ -#define LKM_UNUSED2 0x00000002 /* unused */ -#define LKM_UNUSED3 0x00000004 /* unused */ -#define LKM_UNUSED4 0x00000008 /* unused */ -#define LKM_UNUSED5 0x02000000 /* unused */ -#define LKM_UNUSED6 0x04000000 /* unused */ -#define LKM_UNUSED7 0x08000000 /* unused */ - -/* ocfs2 extensions: internal only - * should never be used by caller */ -#define LKM_MIGRATION 0x10000000 /* extension: lockres is to be migrated - to another node */ -#define LKM_PUT_LVB 0x20000000 /* extension: lvb is being passed - should be applied to lockres */ -#define LKM_GET_LVB 0x40000000 /* extension: lvb should be copied - from lockres when lock is granted */ -#define LKM_RECOVERY 0x80000000 /* extension: flag for recovery lock - used to avoid recovery rwsem */ - - -typedef void (dlm_astlockfunc_t)(void *); -typedef void (dlm_bastlockfunc_t)(void *, int); -typedef void (dlm_astunlockfunc_t)(void *, enum dlm_status); - -enum dlm_status dlmlock(struct dlm_ctxt *dlm, - int mode, - struct dlm_lockstatus *lksb, - int flags, - const char *name, - int namelen, - dlm_astlockfunc_t *ast, - void *data, - dlm_bastlockfunc_t *bast); - -enum dlm_status dlmunlock(struct dlm_ctxt *dlm, - struct dlm_lockstatus *lksb, - int flags, - dlm_astunlockfunc_t *unlockast, - void *data); - -struct dlm_protocol_version { - u8 pv_major; - u8 pv_minor; -}; -struct dlm_ctxt * dlm_register_domain(const char *domain, u32 key, - struct dlm_protocol_version *fs_proto); - -void dlm_unregister_domain(struct dlm_ctxt *dlm); - -void dlm_print_one_lock(struct dlm_lock *lockid); - -typedef void (dlm_eviction_func)(int, void *); -struct dlm_eviction_cb { - struct list_head ec_item; - dlm_eviction_func *ec_func; - void *ec_data; -}; -void dlm_setup_eviction_cb(struct dlm_eviction_cb *cb, - dlm_eviction_func *f, - void *data); -void dlm_register_eviction_cb(struct dlm_ctxt *dlm, - struct dlm_eviction_cb *cb); -void dlm_unregister_eviction_cb(struct dlm_eviction_cb *cb); - -#endif /* DLMAPI_H */ diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmast.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmast.c deleted file mode 100644 index 3a3ed4bb..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmast.c +++ /dev/null @@ -1,502 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmast.c - * - * AST and BAST functionality for local and remote nodes - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/blkdev.h> -#include <linux/socket.h> -#include <linux/inet.h> -#include <linux/spinlock.h> - - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" - -#define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" - -static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - struct dlm_lock *lock); -static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); - -/* Should be called as an ast gets queued to see if the new - * lock level will obsolete a pending bast. - * For example, if dlm_thread queued a bast for an EX lock that - * was blocking another EX, but before sending the bast the - * lock owner downconverted to NL, the bast is now obsolete. - * Only the ast should be sent. - * This is needed because the lock and convert paths can queue - * asts out-of-band (not waiting for dlm_thread) in order to - * allow for LKM_NOQUEUE to get immediate responses. */ -static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) -{ - assert_spin_locked(&dlm->ast_lock); - assert_spin_locked(&lock->spinlock); - - if (lock->ml.highest_blocked == LKM_IVMODE) - return 0; - BUG_ON(lock->ml.highest_blocked == LKM_NLMODE); - - if (lock->bast_pending && - list_empty(&lock->bast_list)) - /* old bast already sent, ok */ - return 0; - - if (lock->ml.type == LKM_EXMODE) - /* EX blocks anything left, any bast still valid */ - return 0; - else if (lock->ml.type == LKM_NLMODE) - /* NL blocks nothing, no reason to send any bast, cancel it */ - return 1; - else if (lock->ml.highest_blocked != LKM_EXMODE) - /* PR only blocks EX */ - return 1; - - return 0; -} - -void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) -{ - struct dlm_lock_resource *res; - - BUG_ON(!dlm); - BUG_ON(!lock); - - res = lock->lockres; - - assert_spin_locked(&dlm->ast_lock); - - if (!list_empty(&lock->ast_list)) { - mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, " - "AST list not empty, pending %d, newlevel %d\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - lock->ast_pending, lock->ml.type); - BUG(); - } - if (lock->ast_pending) - mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); - - /* putting lock on list, add a ref */ - dlm_lock_get(lock); - spin_lock(&lock->spinlock); - - /* check to see if this ast obsoletes the bast */ - if (dlm_should_cancel_bast(dlm, lock)) { - mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); - lock->bast_pending = 0; - list_del_init(&lock->bast_list); - lock->ml.highest_blocked = LKM_IVMODE; - /* removing lock from list, remove a ref. guaranteed - * this won't be the last ref because of the get above, - * so res->spinlock will not be taken here */ - dlm_lock_put(lock); - /* free up the reserved bast that we are cancelling. - * guaranteed that this will not be the last reserved - * ast because *both* an ast and a bast were reserved - * to get to this point. the res->spinlock will not be - * taken here */ - dlm_lockres_release_ast(dlm, res); - } - list_add_tail(&lock->ast_list, &dlm->pending_asts); - lock->ast_pending = 1; - spin_unlock(&lock->spinlock); -} - -void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) -{ - BUG_ON(!dlm); - BUG_ON(!lock); - - spin_lock(&dlm->ast_lock); - __dlm_queue_ast(dlm, lock); - spin_unlock(&dlm->ast_lock); -} - - -void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) -{ - struct dlm_lock_resource *res; - - BUG_ON(!dlm); - BUG_ON(!lock); - - assert_spin_locked(&dlm->ast_lock); - - res = lock->lockres; - - BUG_ON(!list_empty(&lock->bast_list)); - if (lock->bast_pending) - mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); - - /* putting lock on list, add a ref */ - dlm_lock_get(lock); - spin_lock(&lock->spinlock); - list_add_tail(&lock->bast_list, &dlm->pending_basts); - lock->bast_pending = 1; - spin_unlock(&lock->spinlock); -} - -void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) -{ - BUG_ON(!dlm); - BUG_ON(!lock); - - spin_lock(&dlm->ast_lock); - __dlm_queue_bast(dlm, lock); - spin_unlock(&dlm->ast_lock); -} - -static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - struct dlm_lock *lock) -{ - struct dlm_lockstatus *lksb = lock->lksb; - BUG_ON(!lksb); - - /* only updates if this node masters the lockres */ - spin_lock(&res->spinlock); - if (res->owner == dlm->node_num) { - /* check the lksb flags for the direction */ - if (lksb->flags & DLM_LKSB_GET_LVB) { - mlog(0, "getting lvb from lockres for %s node\n", - lock->ml.node == dlm->node_num ? "master" : - "remote"); - memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN); - } - /* Do nothing for lvb put requests - they should be done in - * place when the lock is downconverted - otherwise we risk - * racing gets and puts which could result in old lvb data - * being propagated. We leave the put flag set and clear it - * here. In the future we might want to clear it at the time - * the put is actually done. - */ - } - spin_unlock(&res->spinlock); - - /* reset any lvb flags on the lksb */ - lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); -} - -void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - struct dlm_lock *lock) -{ - dlm_astlockfunc_t *fn; - struct dlm_lockstatus *lksb; - - mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name, - res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); - - lksb = lock->lksb; - fn = lock->ast; - BUG_ON(lock->ml.node != dlm->node_num); - - dlm_update_lvb(dlm, res, lock); - (*fn)(lock->astdata); -} - - -int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - struct dlm_lock *lock) -{ - int ret; - struct dlm_lockstatus *lksb; - int lksbflags; - - mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name, - res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); - - lksb = lock->lksb; - BUG_ON(lock->ml.node == dlm->node_num); - - lksbflags = lksb->flags; - dlm_update_lvb(dlm, res, lock); - - /* lock request came from another node - * go do the ast over there */ - ret = dlm_send_proxy_ast(dlm, res, lock, lksbflags); - return ret; -} - -void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - struct dlm_lock *lock, int blocked_type) -{ - dlm_bastlockfunc_t *fn = lock->bast; - - BUG_ON(lock->ml.node != dlm->node_num); - - mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - blocked_type); - - (*fn)(lock->astdata, blocked_type); -} - - - -int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - int ret; - unsigned int locklen; - struct dlm_ctxt *dlm = data; - struct dlm_lock_resource *res = NULL; - struct dlm_lock *lock = NULL; - struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; - char *name; - struct list_head *iter, *head=NULL; - u64 cookie; - u32 flags; - u8 node; - - if (!dlm_grab(dlm)) { - dlm_error(DLM_REJECTED); - return DLM_REJECTED; - } - - mlog_bug_on_msg(!dlm_domain_fully_joined(dlm), - "Domain %s not fully joined!\n", dlm->name); - - name = past->name; - locklen = past->namelen; - cookie = past->cookie; - flags = be32_to_cpu(past->flags); - node = past->node_idx; - - if (locklen > DLM_LOCKID_NAME_MAX) { - ret = DLM_IVBUFLEN; - mlog(ML_ERROR, "Invalid name length (%d) in proxy ast " - "handler!\n", locklen); - goto leave; - } - - if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) == - (LKM_PUT_LVB|LKM_GET_LVB)) { - mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n", - flags); - ret = DLM_BADARGS; - goto leave; - } - - mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : - (flags & LKM_GET_LVB ? "get lvb" : "none")); - - mlog(0, "type=%d, blocked_type=%d\n", past->type, past->blocked_type); - - if (past->type != DLM_AST && - past->type != DLM_BAST) { - mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" - "name=%.*s, node=%u\n", past->type, - dlm_get_lock_cookie_node(be64_to_cpu(cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), - locklen, name, node); - ret = DLM_IVLOCKID; - goto leave; - } - - res = dlm_lookup_lockres(dlm, name, locklen); - if (!res) { - mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, " - "name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"), - dlm_get_lock_cookie_node(be64_to_cpu(cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), - locklen, name, node); - ret = DLM_IVLOCKID; - goto leave; - } - - /* cannot get a proxy ast message if this node owns it */ - BUG_ON(res->owner == dlm->node_num); - - mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len, - res->lockname.name); - - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_RECOVERING) { - mlog(0, "Responding with DLM_RECOVERING!\n"); - ret = DLM_RECOVERING; - goto unlock_out; - } - if (res->state & DLM_LOCK_RES_MIGRATING) { - mlog(0, "Responding with DLM_MIGRATING!\n"); - ret = DLM_MIGRATING; - goto unlock_out; - } - /* try convert queue for both ast/bast */ - head = &res->converting; - lock = NULL; - list_for_each(iter, head) { - lock = list_entry (iter, struct dlm_lock, list); - if (lock->ml.cookie == cookie) - goto do_ast; - } - - /* if not on convert, try blocked for ast, granted for bast */ - if (past->type == DLM_AST) - head = &res->blocked; - else - head = &res->granted; - - list_for_each(iter, head) { - lock = list_entry (iter, struct dlm_lock, list); - if (lock->ml.cookie == cookie) - goto do_ast; - } - - mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " - "node=%u\n", past->type == DLM_AST ? "" : "b", - dlm_get_lock_cookie_node(be64_to_cpu(cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), - locklen, name, node); - - ret = DLM_NORMAL; -unlock_out: - spin_unlock(&res->spinlock); - goto leave; - -do_ast: - ret = DLM_NORMAL; - if (past->type == DLM_AST) { - /* do not alter lock refcount. switching lists. */ - list_move_tail(&lock->list, &res->granted); - mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), - lock->ml.type, lock->ml.convert_type); - - if (lock->ml.convert_type != LKM_IVMODE) { - lock->ml.type = lock->ml.convert_type; - lock->ml.convert_type = LKM_IVMODE; - } else { - // should already be there.... - } - - lock->lksb->status = DLM_NORMAL; - - /* if we requested the lvb, fetch it into our lksb now */ - if (flags & LKM_GET_LVB) { - BUG_ON(!(lock->lksb->flags & DLM_LKSB_GET_LVB)); - memcpy(lock->lksb->lvb, past->lvb, DLM_LVB_LEN); - } - } - spin_unlock(&res->spinlock); - - if (past->type == DLM_AST) - dlm_do_local_ast(dlm, res, lock); - else - dlm_do_local_bast(dlm, res, lock, past->blocked_type); - -leave: - if (res) - dlm_lockres_put(res); - - dlm_put(dlm); - return ret; -} - - - -int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - struct dlm_lock *lock, int msg_type, - int blocked_type, int flags) -{ - int ret = 0; - struct dlm_proxy_ast past; - struct kvec vec[2]; - size_t veclen = 1; - int status; - - mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name, - res->lockname.len, res->lockname.name, lock->ml.node, msg_type, - blocked_type); - - memset(&past, 0, sizeof(struct dlm_proxy_ast)); - past.node_idx = dlm->node_num; - past.type = msg_type; - past.blocked_type = blocked_type; - past.namelen = res->lockname.len; - memcpy(past.name, res->lockname.name, past.namelen); - past.cookie = lock->ml.cookie; - - vec[0].iov_len = sizeof(struct dlm_proxy_ast); - vec[0].iov_base = &past; - if (flags & DLM_LKSB_GET_LVB) { - be32_add_cpu(&past.flags, LKM_GET_LVB); - vec[1].iov_len = DLM_LVB_LEN; - vec[1].iov_base = lock->lksb->lvb; - veclen++; - } - - ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen, - lock->ml.node, &status); - if (ret < 0) - mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n", - dlm->name, res->lockname.len, res->lockname.name, ret, - lock->ml.node); - else { - if (status == DLM_RECOVERING) { - mlog(ML_ERROR, "sent AST to node %u, it thinks this " - "node is dead!\n", lock->ml.node); - BUG(); - } else if (status == DLM_MIGRATING) { - mlog(ML_ERROR, "sent AST to node %u, it returned " - "DLM_MIGRATING!\n", lock->ml.node); - BUG(); - } else if (status != DLM_NORMAL && status != DLM_IVLOCKID) { - mlog(ML_ERROR, "AST to node %u returned %d!\n", - lock->ml.node, status); - /* ignore it */ - } - ret = 0; - } - return ret; -} diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmcommon.h b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmcommon.h deleted file mode 100644 index a5952cee..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmcommon.h +++ /dev/null @@ -1,1149 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmcommon.h - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - -#ifndef DLMCOMMON_H -#define DLMCOMMON_H - -#include <linux/kref.h> - -#define DLM_HB_NODE_DOWN_PRI (0xf000000) -#define DLM_HB_NODE_UP_PRI (0x8000000) - -#define DLM_LOCKID_NAME_MAX 32 - -#define DLM_DOMAIN_NAME_MAX_LEN 255 -#define DLM_LOCK_RES_OWNER_UNKNOWN O2NM_MAX_NODES -#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes -#define DLM_THREAD_MS 200 // flush at least every 200 ms - -#define DLM_HASH_SIZE_DEFAULT (1 << 17) -#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE -# define DLM_HASH_PAGES 1 -#else -# define DLM_HASH_PAGES (DLM_HASH_SIZE_DEFAULT / PAGE_SIZE) -#endif -#define DLM_BUCKETS_PER_PAGE (PAGE_SIZE / sizeof(struct hlist_head)) -#define DLM_HASH_BUCKETS (DLM_HASH_PAGES * DLM_BUCKETS_PER_PAGE) - -/* Intended to make it easier for us to switch out hash functions */ -#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) - -enum dlm_mle_type { - DLM_MLE_BLOCK = 0, - DLM_MLE_MASTER = 1, - DLM_MLE_MIGRATION = 2, - DLM_MLE_NUM_TYPES = 3, -}; - -struct dlm_master_list_entry { - struct hlist_node master_hash_node; - struct list_head hb_events; - struct dlm_ctxt *dlm; - spinlock_t spinlock; - wait_queue_head_t wq; - atomic_t woken; - struct kref mle_refs; - int inuse; - unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - u8 master; - u8 new_master; - enum dlm_mle_type type; - struct o2hb_callback_func mle_hb_up; - struct o2hb_callback_func mle_hb_down; - struct dlm_lock_resource *mleres; - unsigned char mname[DLM_LOCKID_NAME_MAX]; - unsigned int mnamelen; - unsigned int mnamehash; -}; - -enum dlm_ast_type { - DLM_AST = 0, - DLM_BAST = 1, - DLM_ASTUNLOCK = 2, -}; - - -#define LKM_VALID_FLAGS (LKM_VALBLK | LKM_CONVERT | LKM_UNLOCK | \ - LKM_CANCEL | LKM_INVVALBLK | LKM_FORCE | \ - LKM_RECOVERY | LKM_LOCAL | LKM_NOQUEUE) - -#define DLM_RECOVERY_LOCK_NAME "$RECOVERY" -#define DLM_RECOVERY_LOCK_NAME_LEN 9 - -static inline int dlm_is_recovery_lock(const char *lock_name, int name_len) -{ - if (name_len == DLM_RECOVERY_LOCK_NAME_LEN && - memcmp(lock_name, DLM_RECOVERY_LOCK_NAME, name_len)==0) - return 1; - return 0; -} - -#define DLM_RECO_STATE_ACTIVE 0x0001 -#define DLM_RECO_STATE_FINALIZE 0x0002 - -struct dlm_recovery_ctxt -{ - struct list_head resources; - struct list_head received; - struct list_head node_data; - u8 new_master; - u8 dead_node; - u16 state; - unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - wait_queue_head_t event; -}; - -enum dlm_ctxt_state { - DLM_CTXT_NEW = 0, - DLM_CTXT_JOINED = 1, - DLM_CTXT_IN_SHUTDOWN = 2, - DLM_CTXT_LEAVING = 3, -}; - -struct dlm_ctxt -{ - struct list_head list; - struct hlist_head **lockres_hash; - struct list_head dirty_list; - struct list_head purge_list; - struct list_head pending_asts; - struct list_head pending_basts; - struct list_head tracking_list; - unsigned int purge_count; - spinlock_t spinlock; - spinlock_t ast_lock; - spinlock_t track_lock; - char *name; - u8 node_num; - u32 key; - u8 joining_node; - wait_queue_head_t dlm_join_events; - unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - struct dlm_recovery_ctxt reco; - spinlock_t master_lock; - struct hlist_head **master_hash; - struct list_head mle_hb_events; - - /* these give a really vague idea of the system load */ - atomic_t mle_tot_count[DLM_MLE_NUM_TYPES]; - atomic_t mle_cur_count[DLM_MLE_NUM_TYPES]; - atomic_t res_tot_count; - atomic_t res_cur_count; - - struct dlm_debug_ctxt *dlm_debug_ctxt; - struct dentry *dlm_debugfs_subroot; - - /* NOTE: Next three are protected by dlm_domain_lock */ - struct kref dlm_refs; - enum dlm_ctxt_state dlm_state; - unsigned int num_joins; - - struct o2hb_callback_func dlm_hb_up; - struct o2hb_callback_func dlm_hb_down; - struct task_struct *dlm_thread_task; - struct task_struct *dlm_reco_thread_task; - struct workqueue_struct *dlm_worker; - wait_queue_head_t dlm_thread_wq; - wait_queue_head_t dlm_reco_thread_wq; - wait_queue_head_t ast_wq; - wait_queue_head_t migration_wq; - - struct work_struct dispatched_work; - struct list_head work_list; - spinlock_t work_lock; - struct list_head dlm_domain_handlers; - struct list_head dlm_eviction_callbacks; - - /* The filesystem specifies this at domain registration. We - * cache it here to know what to tell other nodes. */ - struct dlm_protocol_version fs_locking_proto; - /* This is the inter-dlm communication version */ - struct dlm_protocol_version dlm_locking_proto; -}; - -static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned i) -{ - return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE); -} - -static inline struct hlist_head *dlm_master_hash(struct dlm_ctxt *dlm, - unsigned i) -{ - return dlm->master_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + - (i % DLM_BUCKETS_PER_PAGE); -} - -/* these keventd work queue items are for less-frequently - * called functions that cannot be directly called from the - * net message handlers for some reason, usually because - * they need to send net messages of their own. */ -void dlm_dispatch_work(struct work_struct *work); - -struct dlm_lock_resource; -struct dlm_work_item; - -typedef void (dlm_workfunc_t)(struct dlm_work_item *, void *); - -struct dlm_request_all_locks_priv -{ - u8 reco_master; - u8 dead_node; -}; - -struct dlm_mig_lockres_priv -{ - struct dlm_lock_resource *lockres; - u8 real_master; - u8 extra_ref; -}; - -struct dlm_assert_master_priv -{ - struct dlm_lock_resource *lockres; - u8 request_from; - u32 flags; - unsigned ignore_higher:1; -}; - -struct dlm_deref_lockres_priv -{ - struct dlm_lock_resource *deref_res; - u8 deref_node; -}; - -struct dlm_work_item -{ - struct list_head list; - dlm_workfunc_t *func; - struct dlm_ctxt *dlm; - void *data; - union { - struct dlm_request_all_locks_priv ral; - struct dlm_mig_lockres_priv ml; - struct dlm_assert_master_priv am; - struct dlm_deref_lockres_priv dl; - } u; -}; - -static inline void dlm_init_work_item(struct dlm_ctxt *dlm, - struct dlm_work_item *i, - dlm_workfunc_t *f, void *data) -{ - memset(i, 0, sizeof(*i)); - i->func = f; - INIT_LIST_HEAD(&i->list); - i->data = data; - i->dlm = dlm; /* must have already done a dlm_grab on this! */ -} - - - -static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, - u8 node) -{ - assert_spin_locked(&dlm->spinlock); - - dlm->joining_node = node; - wake_up(&dlm->dlm_join_events); -} - -#define DLM_LOCK_RES_UNINITED 0x00000001 -#define DLM_LOCK_RES_RECOVERING 0x00000002 -#define DLM_LOCK_RES_READY 0x00000004 -#define DLM_LOCK_RES_DIRTY 0x00000008 -#define DLM_LOCK_RES_IN_PROGRESS 0x00000010 -#define DLM_LOCK_RES_MIGRATING 0x00000020 -#define DLM_LOCK_RES_DROPPING_REF 0x00000040 -#define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 -#define DLM_LOCK_RES_SETREF_INPROG 0x00002000 - -/* max milliseconds to wait to sync up a network failure with a node death */ -#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) - -#define DLM_PURGE_INTERVAL_MS (8 * 1000) - -struct dlm_lock_resource -{ - /* WARNING: Please see the comment in dlm_init_lockres before - * adding fields here. */ - struct hlist_node hash_node; - struct qstr lockname; - struct kref refs; - - /* - * Please keep granted, converting, and blocked in this order, - * as some funcs want to iterate over all lists. - * - * All four lists are protected by the hash's reference. - */ - struct list_head granted; - struct list_head converting; - struct list_head blocked; - struct list_head purge; - - /* - * These two lists require you to hold an additional reference - * while they are on the list. - */ - struct list_head dirty; - struct list_head recovering; // dlm_recovery_ctxt.resources list - - /* Added during init and removed during release */ - struct list_head tracking; /* dlm->tracking_list */ - - /* unused lock resources have their last_used stamped and are - * put on a list for the dlm thread to run. */ - unsigned long last_used; - - struct dlm_ctxt *dlm; - - unsigned migration_pending:1; - atomic_t asts_reserved; - spinlock_t spinlock; - wait_queue_head_t wq; - u8 owner; //node which owns the lock resource, or unknown - u16 state; - char lvb[DLM_LVB_LEN]; - unsigned int inflight_locks; - unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; -}; - -struct dlm_migratable_lock -{ - __be64 cookie; - - /* these 3 are just padding for the in-memory structure, but - * list and flags are actually used when sent over the wire */ - __be16 pad1; - u8 list; // 0=granted, 1=converting, 2=blocked - u8 flags; - - s8 type; - s8 convert_type; - s8 highest_blocked; - u8 node; -}; // 16 bytes - -struct dlm_lock -{ - struct dlm_migratable_lock ml; - - struct list_head list; - struct list_head ast_list; - struct list_head bast_list; - struct dlm_lock_resource *lockres; - spinlock_t spinlock; - struct kref lock_refs; - - // ast and bast must be callable while holding a spinlock! - dlm_astlockfunc_t *ast; - dlm_bastlockfunc_t *bast; - void *astdata; - struct dlm_lockstatus *lksb; - unsigned ast_pending:1, - bast_pending:1, - convert_pending:1, - lock_pending:1, - cancel_pending:1, - unlock_pending:1, - lksb_kernel_allocated:1; -}; - - -#define DLM_LKSB_UNUSED1 0x01 -#define DLM_LKSB_PUT_LVB 0x02 -#define DLM_LKSB_GET_LVB 0x04 -#define DLM_LKSB_UNUSED2 0x08 -#define DLM_LKSB_UNUSED3 0x10 -#define DLM_LKSB_UNUSED4 0x20 -#define DLM_LKSB_UNUSED5 0x40 -#define DLM_LKSB_UNUSED6 0x80 - - -enum dlm_lockres_list { - DLM_GRANTED_LIST = 0, - DLM_CONVERTING_LIST = 1, - DLM_BLOCKED_LIST = 2, -}; - -static inline int dlm_lvb_is_empty(char *lvb) -{ - int i; - for (i=0; i<DLM_LVB_LEN; i++) - if (lvb[i]) - return 0; - return 1; -} - -static inline char *dlm_list_in_text(enum dlm_lockres_list idx) -{ - if (idx == DLM_GRANTED_LIST) - return "granted"; - else if (idx == DLM_CONVERTING_LIST) - return "converting"; - else if (idx == DLM_BLOCKED_LIST) - return "blocked"; - else - return "unknown"; -} - -static inline struct list_head * -dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) -{ - struct list_head *ret = NULL; - if (idx == DLM_GRANTED_LIST) - ret = &res->granted; - else if (idx == DLM_CONVERTING_LIST) - ret = &res->converting; - else if (idx == DLM_BLOCKED_LIST) - ret = &res->blocked; - else - BUG(); - return ret; -} - - - - -struct dlm_node_iter -{ - unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - int curnode; -}; - - -enum { - DLM_MASTER_REQUEST_MSG = 500, - DLM_UNUSED_MSG1 = 501, - DLM_ASSERT_MASTER_MSG = 502, - DLM_CREATE_LOCK_MSG = 503, - DLM_CONVERT_LOCK_MSG = 504, - DLM_PROXY_AST_MSG = 505, - DLM_UNLOCK_LOCK_MSG = 506, - DLM_DEREF_LOCKRES_MSG = 507, - DLM_MIGRATE_REQUEST_MSG = 508, - DLM_MIG_LOCKRES_MSG = 509, - DLM_QUERY_JOIN_MSG = 510, - DLM_ASSERT_JOINED_MSG = 511, - DLM_CANCEL_JOIN_MSG = 512, - DLM_EXIT_DOMAIN_MSG = 513, - DLM_MASTER_REQUERY_MSG = 514, - DLM_LOCK_REQUEST_MSG = 515, - DLM_RECO_DATA_DONE_MSG = 516, - DLM_BEGIN_RECO_MSG = 517, - DLM_FINALIZE_RECO_MSG = 518, - DLM_QUERY_REGION = 519, - DLM_QUERY_NODEINFO = 520, - DLM_BEGIN_EXIT_DOMAIN_MSG = 521, -}; - -struct dlm_reco_node_data -{ - int state; - u8 node_num; - struct list_head list; -}; - -enum { - DLM_RECO_NODE_DATA_DEAD = -1, - DLM_RECO_NODE_DATA_INIT = 0, - DLM_RECO_NODE_DATA_REQUESTING = 1, - DLM_RECO_NODE_DATA_REQUESTED = 2, - DLM_RECO_NODE_DATA_RECEIVING = 3, - DLM_RECO_NODE_DATA_DONE = 4, - DLM_RECO_NODE_DATA_FINALIZE_SENT = 5, -}; - - -enum { - DLM_MASTER_RESP_NO = 0, - DLM_MASTER_RESP_YES = 1, - DLM_MASTER_RESP_MAYBE = 2, - DLM_MASTER_RESP_ERROR = 3, -}; - - -struct dlm_master_request -{ - u8 node_idx; - u8 namelen; - __be16 pad1; - __be32 flags; - - u8 name[O2NM_MAX_NAME_LEN]; -}; - -#define DLM_ASSERT_RESPONSE_REASSERT 0x00000001 -#define DLM_ASSERT_RESPONSE_MASTERY_REF 0x00000002 - -#define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001 -#define DLM_ASSERT_MASTER_REQUERY 0x00000002 -#define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004 -struct dlm_assert_master -{ - u8 node_idx; - u8 namelen; - __be16 pad1; - __be32 flags; - - u8 name[O2NM_MAX_NAME_LEN]; -}; - -#define DLM_MIGRATE_RESPONSE_MASTERY_REF 0x00000001 - -struct dlm_migrate_request -{ - u8 master; - u8 new_master; - u8 namelen; - u8 pad1; - __be32 pad2; - u8 name[O2NM_MAX_NAME_LEN]; -}; - -struct dlm_master_requery -{ - u8 pad1; - u8 pad2; - u8 node_idx; - u8 namelen; - __be32 pad3; - u8 name[O2NM_MAX_NAME_LEN]; -}; - -#define DLM_MRES_RECOVERY 0x01 -#define DLM_MRES_MIGRATION 0x02 -#define DLM_MRES_ALL_DONE 0x04 - -/* - * We would like to get one whole lockres into a single network - * message whenever possible. Generally speaking, there will be - * at most one dlm_lock on a lockres for each node in the cluster, - * plus (infrequently) any additional locks coming in from userdlm. - * - * struct _dlm_lockres_page - * { - * dlm_migratable_lockres mres; - * dlm_migratable_lock ml[DLM_MAX_MIGRATABLE_LOCKS]; - * u8 pad[DLM_MIG_LOCKRES_RESERVED]; - * }; - * - * from ../cluster/tcp.h - * NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg)) - * (roughly 4080 bytes) - * and sizeof(dlm_migratable_lockres) = 112 bytes - * and sizeof(dlm_migratable_lock) = 16 bytes - * - * Choosing DLM_MAX_MIGRATABLE_LOCKS=240 and - * DLM_MIG_LOCKRES_RESERVED=128 means we have this: - * - * (DLM_MAX_MIGRATABLE_LOCKS * sizeof(dlm_migratable_lock)) + - * sizeof(dlm_migratable_lockres) + DLM_MIG_LOCKRES_RESERVED = - * NET_MAX_PAYLOAD_BYTES - * (240 * 16) + 112 + 128 = 4080 - * - * So a lockres would need more than 240 locks before it would - * use more than one network packet to recover. Not too bad. - */ -#define DLM_MAX_MIGRATABLE_LOCKS 240 - -struct dlm_migratable_lockres -{ - u8 master; - u8 lockname_len; - u8 num_locks; // locks sent in this structure - u8 flags; - __be32 total_locks; // locks to be sent for this migration cookie - __be64 mig_cookie; // cookie for this lockres migration - // or zero if not needed - // 16 bytes - u8 lockname[DLM_LOCKID_NAME_MAX]; - // 48 bytes - u8 lvb[DLM_LVB_LEN]; - // 112 bytes - struct dlm_migratable_lock ml[0]; // 16 bytes each, begins at byte 112 -}; -#define DLM_MIG_LOCKRES_MAX_LEN \ - (sizeof(struct dlm_migratable_lockres) + \ - (sizeof(struct dlm_migratable_lock) * \ - DLM_MAX_MIGRATABLE_LOCKS) ) - -/* from above, 128 bytes - * for some undetermined future use */ -#define DLM_MIG_LOCKRES_RESERVED (NET_MAX_PAYLOAD_BYTES - \ - DLM_MIG_LOCKRES_MAX_LEN) - -struct dlm_create_lock -{ - __be64 cookie; - - __be32 flags; - u8 pad1; - u8 node_idx; - s8 requested_type; - u8 namelen; - - u8 name[O2NM_MAX_NAME_LEN]; -}; - -struct dlm_convert_lock -{ - __be64 cookie; - - __be32 flags; - u8 pad1; - u8 node_idx; - s8 requested_type; - u8 namelen; - - u8 name[O2NM_MAX_NAME_LEN]; - - s8 lvb[0]; -}; -#define DLM_CONVERT_LOCK_MAX_LEN (sizeof(struct dlm_convert_lock)+DLM_LVB_LEN) - -struct dlm_unlock_lock -{ - __be64 cookie; - - __be32 flags; - __be16 pad1; - u8 node_idx; - u8 namelen; - - u8 name[O2NM_MAX_NAME_LEN]; - - s8 lvb[0]; -}; -#define DLM_UNLOCK_LOCK_MAX_LEN (sizeof(struct dlm_unlock_lock)+DLM_LVB_LEN) - -struct dlm_proxy_ast -{ - __be64 cookie; - - __be32 flags; - u8 node_idx; - u8 type; - u8 blocked_type; - u8 namelen; - - u8 name[O2NM_MAX_NAME_LEN]; - - s8 lvb[0]; -}; -#define DLM_PROXY_AST_MAX_LEN (sizeof(struct dlm_proxy_ast)+DLM_LVB_LEN) - -#define DLM_MOD_KEY (0x666c6172) -enum dlm_query_join_response_code { - JOIN_DISALLOW = 0, - JOIN_OK = 1, - JOIN_OK_NO_MAP = 2, - JOIN_PROTOCOL_MISMATCH = 3, -}; - -struct dlm_query_join_packet { - u8 code; /* Response code. dlm_minor and fs_minor - are only valid if this is JOIN_OK */ - u8 dlm_minor; /* The minor version of the protocol the - dlm is speaking. */ - u8 fs_minor; /* The minor version of the protocol the - filesystem is speaking. */ - u8 reserved; -}; - -union dlm_query_join_response { - u32 intval; - struct dlm_query_join_packet packet; -}; - -struct dlm_lock_request -{ - u8 node_idx; - u8 dead_node; - __be16 pad1; - __be32 pad2; -}; - -struct dlm_reco_data_done -{ - u8 node_idx; - u8 dead_node; - __be16 pad1; - __be32 pad2; - - /* unused for now */ - /* eventually we can use this to attempt - * lvb recovery based on each node's info */ - u8 reco_lvb[DLM_LVB_LEN]; -}; - -struct dlm_begin_reco -{ - u8 node_idx; - u8 dead_node; - __be16 pad1; - __be32 pad2; -}; - - -#define BITS_PER_BYTE 8 -#define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE) - -struct dlm_query_join_request -{ - u8 node_idx; - u8 pad1[2]; - u8 name_len; - struct dlm_protocol_version dlm_proto; - struct dlm_protocol_version fs_proto; - u8 domain[O2NM_MAX_NAME_LEN]; - u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)]; -}; - -struct dlm_assert_joined -{ - u8 node_idx; - u8 pad1[2]; - u8 name_len; - u8 domain[O2NM_MAX_NAME_LEN]; -}; - -struct dlm_cancel_join -{ - u8 node_idx; - u8 pad1[2]; - u8 name_len; - u8 domain[O2NM_MAX_NAME_LEN]; -}; - -struct dlm_query_region { - u8 qr_node; - u8 qr_numregions; - u8 qr_namelen; - u8 pad1; - u8 qr_domain[O2NM_MAX_NAME_LEN]; - u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS]; -}; - -struct dlm_node_info { - u8 ni_nodenum; - u8 pad1; - u16 ni_ipv4_port; - u32 ni_ipv4_address; -}; - -struct dlm_query_nodeinfo { - u8 qn_nodenum; - u8 qn_numnodes; - u8 qn_namelen; - u8 pad1; - u8 qn_domain[O2NM_MAX_NAME_LEN]; - struct dlm_node_info qn_nodes[O2NM_MAX_NODES]; -}; - -struct dlm_exit_domain -{ - u8 node_idx; - u8 pad1[3]; -}; - -struct dlm_finalize_reco -{ - u8 node_idx; - u8 dead_node; - u8 flags; - u8 pad1; - __be32 pad2; -}; - -struct dlm_deref_lockres -{ - u32 pad1; - u16 pad2; - u8 node_idx; - u8 namelen; - - u8 name[O2NM_MAX_NAME_LEN]; -}; - -static inline enum dlm_status -__dlm_lockres_state_to_status(struct dlm_lock_resource *res) -{ - enum dlm_status status = DLM_NORMAL; - - assert_spin_locked(&res->spinlock); - - if (res->state & DLM_LOCK_RES_RECOVERING) - status = DLM_RECOVERING; - else if (res->state & DLM_LOCK_RES_MIGRATING) - status = DLM_MIGRATING; - else if (res->state & DLM_LOCK_RES_IN_PROGRESS) - status = DLM_FORWARD; - - return status; -} - -static inline u8 dlm_get_lock_cookie_node(u64 cookie) -{ - u8 ret; - cookie >>= 56; - ret = (u8)(cookie & 0xffULL); - return ret; -} - -static inline unsigned long long dlm_get_lock_cookie_seq(u64 cookie) -{ - unsigned long long ret; - ret = ((unsigned long long)cookie) & 0x00ffffffffffffffULL; - return ret; -} - -struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, - struct dlm_lockstatus *lksb); -void dlm_lock_get(struct dlm_lock *lock); -void dlm_lock_put(struct dlm_lock *lock); - -void dlm_lock_attach_lockres(struct dlm_lock *lock, - struct dlm_lock_resource *res); - -int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); - -void dlm_revert_pending_convert(struct dlm_lock_resource *res, - struct dlm_lock *lock); -void dlm_revert_pending_lock(struct dlm_lock_resource *res, - struct dlm_lock *lock); - -int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -void dlm_commit_pending_cancel(struct dlm_lock_resource *res, - struct dlm_lock *lock); -void dlm_commit_pending_unlock(struct dlm_lock_resource *res, - struct dlm_lock *lock); - -int dlm_launch_thread(struct dlm_ctxt *dlm); -void dlm_complete_thread(struct dlm_ctxt *dlm); -int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); -void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); -void dlm_wait_for_recovery(struct dlm_ctxt *dlm); -void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); -int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); -void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); -void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); - -void dlm_put(struct dlm_ctxt *dlm); -struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); -int dlm_domain_fully_joined(struct dlm_ctxt *dlm); - -void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); -void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); -static inline void dlm_lockres_get(struct dlm_lock_resource *res) -{ - /* This is called on every lookup, so it might be worth - * inlining. */ - kref_get(&res->refs); -} -void dlm_lockres_put(struct dlm_lock_resource *res); -void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); -void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); -struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, - const char *name, - unsigned int len, - unsigned int hash); -struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, - const char *name, - unsigned int len, - unsigned int hash); -struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, - const char *name, - unsigned int len); - -int dlm_is_host_down(int errno); - -struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, - const char *lockid, - int namelen, - int flags); -struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, - const char *name, - unsigned int namelen); - -void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, int bit); -void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, int bit); - -void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); -void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); - -void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); -void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); -void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); -void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); -void dlm_do_local_ast(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock); -int dlm_do_remote_ast(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock); -void dlm_do_local_bast(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - int blocked_type); -int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - int msg_type, - int blocked_type, int flags); -static inline int dlm_send_proxy_bast(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - int blocked_type) -{ - return dlm_send_proxy_ast_msg(dlm, res, lock, DLM_BAST, - blocked_type, 0); -} - -static inline int dlm_send_proxy_ast(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - int flags) -{ - return dlm_send_proxy_ast_msg(dlm, res, lock, DLM_AST, - 0, flags); -} - -void dlm_print_one_lock_resource(struct dlm_lock_resource *res); -void __dlm_print_one_lock_resource(struct dlm_lock_resource *res); - -u8 dlm_nm_this_node(struct dlm_ctxt *dlm); -void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); -void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); - - -int dlm_nm_init(struct dlm_ctxt *dlm); -int dlm_heartbeat_init(struct dlm_ctxt *dlm); -void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); -void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); - -int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); -int dlm_finish_migration(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 old_master); -void dlm_lockres_release_ast(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); -void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res); - -int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -void dlm_assert_master_post_handler(int status, void *data, void *ret_data); -int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - u8 nodenum, u8 *real_master); - - -int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - int ignore_higher, - u8 request_from, - u32 flags); - - -int dlm_send_one_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_migratable_lockres *mres, - u8 send_to, - u8 flags); -void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); - -/* will exit holding res->spinlock, but may drop in function */ -void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags); -void __dlm_wait_on_lockres_flags_set(struct dlm_lock_resource *res, int flags); - -/* will exit holding res->spinlock, but may drop in function */ -static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) -{ - __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_IN_PROGRESS| - DLM_LOCK_RES_RECOVERING| - DLM_LOCK_RES_MIGRATING)); -} - -void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle); -void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle); - -/* create/destroy slab caches */ -int dlm_init_master_caches(void); -void dlm_destroy_master_caches(void); - -int dlm_init_lock_cache(void); -void dlm_destroy_lock_cache(void); - -int dlm_init_mle_cache(void); -void dlm_destroy_mle_cache(void); - -void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); -int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); -void dlm_clean_master_list(struct dlm_ctxt *dlm, - u8 dead_node); -void dlm_force_free_mles(struct dlm_ctxt *dlm); -int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); -int __dlm_lockres_has_locks(struct dlm_lock_resource *res); -int __dlm_lockres_unused(struct dlm_lock_resource *res); - -static inline const char * dlm_lock_mode_name(int mode) -{ - switch (mode) { - case LKM_EXMODE: - return "EX"; - case LKM_PRMODE: - return "PR"; - case LKM_NLMODE: - return "NL"; - } - return "UNKNOWN"; -} - - -static inline int dlm_lock_compatible(int existing, int request) -{ - /* NO_LOCK compatible with all */ - if (request == LKM_NLMODE || - existing == LKM_NLMODE) - return 1; - - /* EX incompatible with all non-NO_LOCK */ - if (request == LKM_EXMODE) - return 0; - - /* request must be PR, which is compatible with PR */ - if (existing == LKM_PRMODE) - return 1; - - return 0; -} - -static inline int dlm_lock_on_list(struct list_head *head, - struct dlm_lock *lock) -{ - struct list_head *iter; - struct dlm_lock *tmplock; - - list_for_each(iter, head) { - tmplock = list_entry(iter, struct dlm_lock, list); - if (tmplock == lock) - return 1; - } - return 0; -} - - -static inline enum dlm_status dlm_err_to_dlm_status(int err) -{ - enum dlm_status ret; - if (err == -ENOMEM) - ret = DLM_SYSERR; - else if (err == -ETIMEDOUT || o2net_link_down(err, NULL)) - ret = DLM_NOLOCKMGR; - else if (err == -EINVAL) - ret = DLM_BADPARAM; - else if (err == -ENAMETOOLONG) - ret = DLM_IVBUFLEN; - else - ret = DLM_BADARGS; - return ret; -} - - -static inline void dlm_node_iter_init(unsigned long *map, - struct dlm_node_iter *iter) -{ - memcpy(iter->node_map, map, sizeof(iter->node_map)); - iter->curnode = -1; -} - -static inline int dlm_node_iter_next(struct dlm_node_iter *iter) -{ - int bit; - bit = find_next_bit(iter->node_map, O2NM_MAX_NODES, iter->curnode+1); - if (bit >= O2NM_MAX_NODES) { - iter->curnode = O2NM_MAX_NODES; - return -ENOENT; - } - iter->curnode = bit; - return bit; -} - -static inline void dlm_set_lockres_owner(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 owner) -{ - assert_spin_locked(&res->spinlock); - - res->owner = owner; -} - -static inline void dlm_change_lockres_owner(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 owner) -{ - assert_spin_locked(&res->spinlock); - - if (owner != res->owner) - dlm_set_lockres_owner(dlm, res, owner); -} - -#endif /* DLMCOMMON_H */ diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmconvert.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmconvert.c deleted file mode 100644 index 29a886d1..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmconvert.c +++ /dev/null @@ -1,548 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmconvert.c - * - * underlying calls for lock conversion - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/blkdev.h> -#include <linux/socket.h> -#include <linux/inet.h> -#include <linux/spinlock.h> - - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" - -#include "dlmconvert.h" - -#define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" - -/* NOTE: __dlmconvert_master is the only function in here that - * needs a spinlock held on entry (res->spinlock) and it is the - * only one that holds a lock on exit (res->spinlock). - * All other functions in here need no locks and drop all of - * the locks that they acquire. */ -static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags, - int type, int *call_ast, - int *kick_thread); -static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags, int type); - -/* - * this is only called directly by dlmlock(), and only when the - * local node is the owner of the lockres - * locking: - * caller needs: none - * taken: takes and drops res->spinlock - * held on exit: none - * returns: see __dlmconvert_master - */ -enum dlm_status dlmconvert_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags, int type) -{ - int call_ast = 0, kick_thread = 0; - enum dlm_status status; - - spin_lock(&res->spinlock); - /* we are not in a network handler, this is fine */ - __dlm_wait_on_lockres(res); - __dlm_lockres_reserve_ast(res); - res->state |= DLM_LOCK_RES_IN_PROGRESS; - - status = __dlmconvert_master(dlm, res, lock, flags, type, - &call_ast, &kick_thread); - - res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - spin_unlock(&res->spinlock); - wake_up(&res->wq); - if (status != DLM_NORMAL && status != DLM_NOTQUEUED) - dlm_error(status); - - /* either queue the ast or release it */ - if (call_ast) - dlm_queue_ast(dlm, lock); - else - dlm_lockres_release_ast(dlm, res); - - if (kick_thread) - dlm_kick_thread(dlm, res); - - return status; -} - -/* performs lock conversion at the lockres master site - * locking: - * caller needs: res->spinlock - * taken: takes and drops lock->spinlock - * held on exit: res->spinlock - * returns: DLM_NORMAL, DLM_NOTQUEUED, DLM_DENIED - * call_ast: whether ast should be called for this lock - * kick_thread: whether dlm_kick_thread should be called - */ -static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags, - int type, int *call_ast, - int *kick_thread) -{ - enum dlm_status status = DLM_NORMAL; - struct list_head *iter; - struct dlm_lock *tmplock=NULL; - - assert_spin_locked(&res->spinlock); - - mlog(0, "type=%d, convert_type=%d, new convert_type=%d\n", - lock->ml.type, lock->ml.convert_type, type); - - spin_lock(&lock->spinlock); - - /* already converting? */ - if (lock->ml.convert_type != LKM_IVMODE) { - mlog(ML_ERROR, "attempted to convert a lock with a lock " - "conversion pending\n"); - status = DLM_DENIED; - goto unlock_exit; - } - - /* must be on grant queue to convert */ - if (!dlm_lock_on_list(&res->granted, lock)) { - mlog(ML_ERROR, "attempted to convert a lock not on grant " - "queue\n"); - status = DLM_DENIED; - goto unlock_exit; - } - - if (flags & LKM_VALBLK) { - switch (lock->ml.type) { - case LKM_EXMODE: - /* EX + LKM_VALBLK + convert == set lvb */ - mlog(0, "will set lvb: converting %s->%s\n", - dlm_lock_mode_name(lock->ml.type), - dlm_lock_mode_name(type)); - lock->lksb->flags |= DLM_LKSB_PUT_LVB; - break; - case LKM_PRMODE: - case LKM_NLMODE: - /* refetch if new level is not NL */ - if (type > LKM_NLMODE) { - mlog(0, "will fetch new value into " - "lvb: converting %s->%s\n", - dlm_lock_mode_name(lock->ml.type), - dlm_lock_mode_name(type)); - lock->lksb->flags |= DLM_LKSB_GET_LVB; - } else { - mlog(0, "will NOT fetch new value " - "into lvb: converting %s->%s\n", - dlm_lock_mode_name(lock->ml.type), - dlm_lock_mode_name(type)); - flags &= ~(LKM_VALBLK); - } - break; - } - } - - - /* in-place downconvert? */ - if (type <= lock->ml.type) - goto grant; - - /* upconvert from here on */ - status = DLM_NORMAL; - list_for_each(iter, &res->granted) { - tmplock = list_entry(iter, struct dlm_lock, list); - if (tmplock == lock) - continue; - if (!dlm_lock_compatible(tmplock->ml.type, type)) - goto switch_queues; - } - - list_for_each(iter, &res->converting) { - tmplock = list_entry(iter, struct dlm_lock, list); - if (!dlm_lock_compatible(tmplock->ml.type, type)) - goto switch_queues; - /* existing conversion requests take precedence */ - if (!dlm_lock_compatible(tmplock->ml.convert_type, type)) - goto switch_queues; - } - - /* fall thru to grant */ - -grant: - mlog(0, "res %.*s, granting %s lock\n", res->lockname.len, - res->lockname.name, dlm_lock_mode_name(type)); - /* immediately grant the new lock type */ - lock->lksb->status = DLM_NORMAL; - if (lock->ml.node == dlm->node_num) - mlog(0, "doing in-place convert for nonlocal lock\n"); - lock->ml.type = type; - if (lock->lksb->flags & DLM_LKSB_PUT_LVB) - memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN); - - status = DLM_NORMAL; - *call_ast = 1; - goto unlock_exit; - -switch_queues: - if (flags & LKM_NOQUEUE) { - mlog(0, "failed to convert NOQUEUE lock %.*s from " - "%d to %d...\n", res->lockname.len, res->lockname.name, - lock->ml.type, type); - status = DLM_NOTQUEUED; - goto unlock_exit; - } - mlog(0, "res %.*s, queueing...\n", res->lockname.len, - res->lockname.name); - - lock->ml.convert_type = type; - /* do not alter lock refcount. switching lists. */ - list_move_tail(&lock->list, &res->converting); - -unlock_exit: - spin_unlock(&lock->spinlock); - if (status == DLM_DENIED) { - __dlm_print_one_lock_resource(res); - } - if (status == DLM_NORMAL) - *kick_thread = 1; - return status; -} - -void dlm_revert_pending_convert(struct dlm_lock_resource *res, - struct dlm_lock *lock) -{ - /* do not alter lock refcount. switching lists. */ - list_move_tail(&lock->list, &res->granted); - lock->ml.convert_type = LKM_IVMODE; - lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); -} - -/* messages the master site to do lock conversion - * locking: - * caller needs: none - * taken: takes and drops res->spinlock, uses DLM_LOCK_RES_IN_PROGRESS - * held on exit: none - * returns: DLM_NORMAL, DLM_RECOVERING, status from remote node - */ -enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags, int type) -{ - enum dlm_status status; - - mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, - lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); - - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_RECOVERING) { - mlog(0, "bailing out early since res is RECOVERING " - "on secondary queue\n"); - /* __dlm_print_one_lock_resource(res); */ - status = DLM_RECOVERING; - goto bail; - } - /* will exit this call with spinlock held */ - __dlm_wait_on_lockres(res); - - if (lock->ml.convert_type != LKM_IVMODE) { - __dlm_print_one_lock_resource(res); - mlog(ML_ERROR, "converting a remote lock that is already " - "converting! (cookie=%u:%llu, conv=%d)\n", - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - lock->ml.convert_type); - status = DLM_DENIED; - goto bail; - } - res->state |= DLM_LOCK_RES_IN_PROGRESS; - /* move lock to local convert queue */ - /* do not alter lock refcount. switching lists. */ - list_move_tail(&lock->list, &res->converting); - lock->convert_pending = 1; - lock->ml.convert_type = type; - - if (flags & LKM_VALBLK) { - if (lock->ml.type == LKM_EXMODE) { - flags |= LKM_PUT_LVB; - lock->lksb->flags |= DLM_LKSB_PUT_LVB; - } else { - if (lock->ml.convert_type == LKM_NLMODE) - flags &= ~LKM_VALBLK; - else { - flags |= LKM_GET_LVB; - lock->lksb->flags |= DLM_LKSB_GET_LVB; - } - } - } - spin_unlock(&res->spinlock); - - /* no locks held here. - * need to wait for a reply as to whether it got queued or not. */ - status = dlm_send_remote_convert_request(dlm, res, lock, flags, type); - - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - lock->convert_pending = 0; - /* if it failed, move it back to granted queue */ - if (status != DLM_NORMAL) { - if (status != DLM_NOTQUEUED) - dlm_error(status); - dlm_revert_pending_convert(res, lock); - } -bail: - spin_unlock(&res->spinlock); - - /* TODO: should this be a wake_one? */ - /* wake up any IN_PROGRESS waiters */ - wake_up(&res->wq); - - return status; -} - -/* sends DLM_CONVERT_LOCK_MSG to master site - * locking: - * caller needs: none - * taken: none - * held on exit: none - * returns: DLM_NOLOCKMGR, status from remote node - */ -static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags, int type) -{ - struct dlm_convert_lock convert; - int tmpret; - enum dlm_status ret; - int status = 0; - struct kvec vec[2]; - size_t veclen = 1; - - mlog(0, "%.*s\n", res->lockname.len, res->lockname.name); - - memset(&convert, 0, sizeof(struct dlm_convert_lock)); - convert.node_idx = dlm->node_num; - convert.requested_type = type; - convert.cookie = lock->ml.cookie; - convert.namelen = res->lockname.len; - convert.flags = cpu_to_be32(flags); - memcpy(convert.name, res->lockname.name, convert.namelen); - - vec[0].iov_len = sizeof(struct dlm_convert_lock); - vec[0].iov_base = &convert; - - if (flags & LKM_PUT_LVB) { - /* extra data to send if we are updating lvb */ - vec[1].iov_len = DLM_LVB_LEN; - vec[1].iov_base = lock->lksb->lvb; - veclen++; - } - - tmpret = o2net_send_message_vec(DLM_CONVERT_LOCK_MSG, dlm->key, - vec, veclen, res->owner, &status); - if (tmpret >= 0) { - // successfully sent and received - ret = status; // this is already a dlm_status - if (ret == DLM_RECOVERING) { - mlog(0, "node %u returned DLM_RECOVERING from convert " - "message!\n", res->owner); - } else if (ret == DLM_MIGRATING) { - mlog(0, "node %u returned DLM_MIGRATING from convert " - "message!\n", res->owner); - } else if (ret == DLM_FORWARD) { - mlog(0, "node %u returned DLM_FORWARD from convert " - "message!\n", res->owner); - } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED) - dlm_error(ret); - } else { - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key, - res->owner); - if (dlm_is_host_down(tmpret)) { - /* instead of logging the same network error over - * and over, sleep here and wait for the heartbeat - * to notice the node is dead. times out after 5s. */ - dlm_wait_for_node_death(dlm, res->owner, - DLM_NODE_DEATH_WAIT_MAX); - ret = DLM_RECOVERING; - mlog(0, "node %u died so returning DLM_RECOVERING " - "from convert message!\n", res->owner); - } else { - ret = dlm_err_to_dlm_status(tmpret); - } - } - - return ret; -} - -/* handler for DLM_CONVERT_LOCK_MSG on master site - * locking: - * caller needs: none - * taken: takes and drop res->spinlock - * held on exit: none - * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS, - * status from __dlmconvert_master - */ -int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; - struct dlm_lock_resource *res = NULL; - struct list_head *iter; - struct dlm_lock *lock = NULL; - struct dlm_lockstatus *lksb; - enum dlm_status status = DLM_NORMAL; - u32 flags; - int call_ast = 0, kick_thread = 0, ast_reserved = 0, wake = 0; - - if (!dlm_grab(dlm)) { - dlm_error(DLM_REJECTED); - return DLM_REJECTED; - } - - mlog_bug_on_msg(!dlm_domain_fully_joined(dlm), - "Domain %s not fully joined!\n", dlm->name); - - if (cnv->namelen > DLM_LOCKID_NAME_MAX) { - status = DLM_IVBUFLEN; - dlm_error(status); - goto leave; - } - - flags = be32_to_cpu(cnv->flags); - - if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) == - (LKM_PUT_LVB|LKM_GET_LVB)) { - mlog(ML_ERROR, "both PUT and GET lvb specified\n"); - status = DLM_BADARGS; - goto leave; - } - - mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : - (flags & LKM_GET_LVB ? "get lvb" : "none")); - - status = DLM_IVLOCKID; - res = dlm_lookup_lockres(dlm, cnv->name, cnv->namelen); - if (!res) { - dlm_error(status); - goto leave; - } - - spin_lock(&res->spinlock); - status = __dlm_lockres_state_to_status(res); - if (status != DLM_NORMAL) { - spin_unlock(&res->spinlock); - dlm_error(status); - goto leave; - } - list_for_each(iter, &res->granted) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock->ml.cookie == cnv->cookie && - lock->ml.node == cnv->node_idx) { - dlm_lock_get(lock); - break; - } - lock = NULL; - } - spin_unlock(&res->spinlock); - if (!lock) { - status = DLM_IVLOCKID; - mlog(ML_ERROR, "did not find lock to convert on grant queue! " - "cookie=%u:%llu\n", - dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie))); - dlm_print_one_lock_resource(res); - goto leave; - } - - /* found the lock */ - lksb = lock->lksb; - - /* see if caller needed to get/put lvb */ - if (flags & LKM_PUT_LVB) { - BUG_ON(lksb->flags & (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)); - lksb->flags |= DLM_LKSB_PUT_LVB; - memcpy(&lksb->lvb[0], &cnv->lvb[0], DLM_LVB_LEN); - } else if (flags & LKM_GET_LVB) { - BUG_ON(lksb->flags & (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)); - lksb->flags |= DLM_LKSB_GET_LVB; - } - - spin_lock(&res->spinlock); - status = __dlm_lockres_state_to_status(res); - if (status == DLM_NORMAL) { - __dlm_lockres_reserve_ast(res); - ast_reserved = 1; - res->state |= DLM_LOCK_RES_IN_PROGRESS; - status = __dlmconvert_master(dlm, res, lock, flags, - cnv->requested_type, - &call_ast, &kick_thread); - res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - wake = 1; - } - spin_unlock(&res->spinlock); - if (wake) - wake_up(&res->wq); - - if (status != DLM_NORMAL) { - if (status != DLM_NOTQUEUED) - dlm_error(status); - lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); - } - -leave: - if (lock) - dlm_lock_put(lock); - - /* either queue the ast or release it, if reserved */ - if (call_ast) - dlm_queue_ast(dlm, lock); - else if (ast_reserved) - dlm_lockres_release_ast(dlm, res); - - if (kick_thread) - dlm_kick_thread(dlm, res); - - if (res) - dlm_lockres_put(res); - - dlm_put(dlm); - - return status; -} diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmconvert.h b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmconvert.h deleted file mode 100644 index b2e3677d..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmconvert.h +++ /dev/null @@ -1,35 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmconvert.h - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - -#ifndef DLMCONVERT_H -#define DLMCONVERT_H - -enum dlm_status dlmconvert_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags, int type); -enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags, int type); - -#endif diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdebug.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdebug.c deleted file mode 100644 index 0e28e242..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdebug.c +++ /dev/null @@ -1,1018 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmdebug.c - * - * debug functionality for the dlm - * - * Copyright (C) 2004, 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <linux/sysctl.h> -#include <linux/spinlock.h> -#include <linux/debugfs.h> -#include <linux/export.h> - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" -#include "dlmdomain.h" -#include "dlmdebug.h" - -#define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" - -static int stringify_lockname(const char *lockname, int locklen, char *buf, - int len); - -void dlm_print_one_lock_resource(struct dlm_lock_resource *res) -{ - spin_lock(&res->spinlock); - __dlm_print_one_lock_resource(res); - spin_unlock(&res->spinlock); -} - -static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) -{ - int bit; - assert_spin_locked(&res->spinlock); - - printk(" refmap nodes: [ "); - bit = 0; - while (1) { - bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); - if (bit >= O2NM_MAX_NODES) - break; - printk("%u ", bit); - bit++; - } - printk("], inflight=%u\n", res->inflight_locks); -} - -static void __dlm_print_lock(struct dlm_lock *lock) -{ - spin_lock(&lock->spinlock); - - printk(" type=%d, conv=%d, node=%u, cookie=%u:%llu, " - "ref=%u, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c), " - "pending=(conv=%c,lock=%c,cancel=%c,unlock=%c)\n", - lock->ml.type, lock->ml.convert_type, lock->ml.node, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - atomic_read(&lock->lock_refs.refcount), - (list_empty(&lock->ast_list) ? 'y' : 'n'), - (lock->ast_pending ? 'y' : 'n'), - (list_empty(&lock->bast_list) ? 'y' : 'n'), - (lock->bast_pending ? 'y' : 'n'), - (lock->convert_pending ? 'y' : 'n'), - (lock->lock_pending ? 'y' : 'n'), - (lock->cancel_pending ? 'y' : 'n'), - (lock->unlock_pending ? 'y' : 'n')); - - spin_unlock(&lock->spinlock); -} - -void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) -{ - struct list_head *iter2; - struct dlm_lock *lock; - char buf[DLM_LOCKID_NAME_MAX]; - - assert_spin_locked(&res->spinlock); - - stringify_lockname(res->lockname.name, res->lockname.len, - buf, sizeof(buf)); - printk("lockres: %s, owner=%u, state=%u\n", - buf, res->owner, res->state); - printk(" last used: %lu, refcnt: %u, on purge list: %s\n", - res->last_used, atomic_read(&res->refs.refcount), - list_empty(&res->purge) ? "no" : "yes"); - printk(" on dirty list: %s, on reco list: %s, " - "migrating pending: %s\n", - list_empty(&res->dirty) ? "no" : "yes", - list_empty(&res->recovering) ? "no" : "yes", - res->migration_pending ? "yes" : "no"); - printk(" inflight locks: %d, asts reserved: %d\n", - res->inflight_locks, atomic_read(&res->asts_reserved)); - dlm_print_lockres_refmap(res); - printk(" granted queue:\n"); - list_for_each(iter2, &res->granted) { - lock = list_entry(iter2, struct dlm_lock, list); - __dlm_print_lock(lock); - } - printk(" converting queue:\n"); - list_for_each(iter2, &res->converting) { - lock = list_entry(iter2, struct dlm_lock, list); - __dlm_print_lock(lock); - } - printk(" blocked queue:\n"); - list_for_each(iter2, &res->blocked) { - lock = list_entry(iter2, struct dlm_lock, list); - __dlm_print_lock(lock); - } -} - -void dlm_print_one_lock(struct dlm_lock *lockid) -{ - dlm_print_one_lock_resource(lockid->lockres); -} -EXPORT_SYMBOL_GPL(dlm_print_one_lock); - -static const char *dlm_errnames[] = { - [DLM_NORMAL] = "DLM_NORMAL", - [DLM_GRANTED] = "DLM_GRANTED", - [DLM_DENIED] = "DLM_DENIED", - [DLM_DENIED_NOLOCKS] = "DLM_DENIED_NOLOCKS", - [DLM_WORKING] = "DLM_WORKING", - [DLM_BLOCKED] = "DLM_BLOCKED", - [DLM_BLOCKED_ORPHAN] = "DLM_BLOCKED_ORPHAN", - [DLM_DENIED_GRACE_PERIOD] = "DLM_DENIED_GRACE_PERIOD", - [DLM_SYSERR] = "DLM_SYSERR", - [DLM_NOSUPPORT] = "DLM_NOSUPPORT", - [DLM_CANCELGRANT] = "DLM_CANCELGRANT", - [DLM_IVLOCKID] = "DLM_IVLOCKID", - [DLM_SYNC] = "DLM_SYNC", - [DLM_BADTYPE] = "DLM_BADTYPE", - [DLM_BADRESOURCE] = "DLM_BADRESOURCE", - [DLM_MAXHANDLES] = "DLM_MAXHANDLES", - [DLM_NOCLINFO] = "DLM_NOCLINFO", - [DLM_NOLOCKMGR] = "DLM_NOLOCKMGR", - [DLM_NOPURGED] = "DLM_NOPURGED", - [DLM_BADARGS] = "DLM_BADARGS", - [DLM_VOID] = "DLM_VOID", - [DLM_NOTQUEUED] = "DLM_NOTQUEUED", - [DLM_IVBUFLEN] = "DLM_IVBUFLEN", - [DLM_CVTUNGRANT] = "DLM_CVTUNGRANT", - [DLM_BADPARAM] = "DLM_BADPARAM", - [DLM_VALNOTVALID] = "DLM_VALNOTVALID", - [DLM_REJECTED] = "DLM_REJECTED", - [DLM_ABORT] = "DLM_ABORT", - [DLM_CANCEL] = "DLM_CANCEL", - [DLM_IVRESHANDLE] = "DLM_IVRESHANDLE", - [DLM_DEADLOCK] = "DLM_DEADLOCK", - [DLM_DENIED_NOASTS] = "DLM_DENIED_NOASTS", - [DLM_FORWARD] = "DLM_FORWARD", - [DLM_TIMEOUT] = "DLM_TIMEOUT", - [DLM_IVGROUPID] = "DLM_IVGROUPID", - [DLM_VERS_CONFLICT] = "DLM_VERS_CONFLICT", - [DLM_BAD_DEVICE_PATH] = "DLM_BAD_DEVICE_PATH", - [DLM_NO_DEVICE_PERMISSION] = "DLM_NO_DEVICE_PERMISSION", - [DLM_NO_CONTROL_DEVICE ] = "DLM_NO_CONTROL_DEVICE ", - [DLM_RECOVERING] = "DLM_RECOVERING", - [DLM_MIGRATING] = "DLM_MIGRATING", - [DLM_MAXSTATS] = "DLM_MAXSTATS", -}; - -static const char *dlm_errmsgs[] = { - [DLM_NORMAL] = "request in progress", - [DLM_GRANTED] = "request granted", - [DLM_DENIED] = "request denied", - [DLM_DENIED_NOLOCKS] = "request denied, out of system resources", - [DLM_WORKING] = "async request in progress", - [DLM_BLOCKED] = "lock request blocked", - [DLM_BLOCKED_ORPHAN] = "lock request blocked by a orphan lock", - [DLM_DENIED_GRACE_PERIOD] = "topological change in progress", - [DLM_SYSERR] = "system error", - [DLM_NOSUPPORT] = "unsupported", - [DLM_CANCELGRANT] = "can't cancel convert: already granted", - [DLM_IVLOCKID] = "bad lockid", - [DLM_SYNC] = "synchronous request granted", - [DLM_BADTYPE] = "bad resource type", - [DLM_BADRESOURCE] = "bad resource handle", - [DLM_MAXHANDLES] = "no more resource handles", - [DLM_NOCLINFO] = "can't contact cluster manager", - [DLM_NOLOCKMGR] = "can't contact lock manager", - [DLM_NOPURGED] = "can't contact purge daemon", - [DLM_BADARGS] = "bad api args", - [DLM_VOID] = "no status", - [DLM_NOTQUEUED] = "NOQUEUE was specified and request failed", - [DLM_IVBUFLEN] = "invalid resource name length", - [DLM_CVTUNGRANT] = "attempted to convert ungranted lock", - [DLM_BADPARAM] = "invalid lock mode specified", - [DLM_VALNOTVALID] = "value block has been invalidated", - [DLM_REJECTED] = "request rejected, unrecognized client", - [DLM_ABORT] = "blocked lock request cancelled", - [DLM_CANCEL] = "conversion request cancelled", - [DLM_IVRESHANDLE] = "invalid resource handle", - [DLM_DEADLOCK] = "deadlock recovery refused this request", - [DLM_DENIED_NOASTS] = "failed to allocate AST", - [DLM_FORWARD] = "request must wait for primary's response", - [DLM_TIMEOUT] = "timeout value for lock has expired", - [DLM_IVGROUPID] = "invalid group specification", - [DLM_VERS_CONFLICT] = "version conflicts prevent request handling", - [DLM_BAD_DEVICE_PATH] = "Locks device does not exist or path wrong", - [DLM_NO_DEVICE_PERMISSION] = "Client has insufficient perms for device", - [DLM_NO_CONTROL_DEVICE] = "Cannot set options on opened device ", - [DLM_RECOVERING] = "lock resource being recovered", - [DLM_MIGRATING] = "lock resource being migrated", - [DLM_MAXSTATS] = "invalid error number", -}; - -const char *dlm_errmsg(enum dlm_status err) -{ - if (err >= DLM_MAXSTATS || err < 0) - return dlm_errmsgs[DLM_MAXSTATS]; - return dlm_errmsgs[err]; -} -EXPORT_SYMBOL_GPL(dlm_errmsg); - -const char *dlm_errname(enum dlm_status err) -{ - if (err >= DLM_MAXSTATS || err < 0) - return dlm_errnames[DLM_MAXSTATS]; - return dlm_errnames[err]; -} -EXPORT_SYMBOL_GPL(dlm_errname); - -/* NOTE: This function converts a lockname into a string. It uses knowledge - * of the format of the lockname that should be outside the purview of the dlm. - * We are adding only to make dlm debugging slightly easier. - * - * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. - */ -static int stringify_lockname(const char *lockname, int locklen, char *buf, - int len) -{ - int out = 0; - __be64 inode_blkno_be; - -#define OCFS2_DENTRY_LOCK_INO_START 18 - if (*lockname == 'N') { - memcpy((__be64 *)&inode_blkno_be, - (char *)&lockname[OCFS2_DENTRY_LOCK_INO_START], - sizeof(__be64)); - out += snprintf(buf + out, len - out, "%.*s%08x", - OCFS2_DENTRY_LOCK_INO_START - 1, lockname, - (unsigned int)be64_to_cpu(inode_blkno_be)); - } else - out += snprintf(buf + out, len - out, "%.*s", - locklen, lockname); - return out; -} - -static int stringify_nodemap(unsigned long *nodemap, int maxnodes, - char *buf, int len) -{ - int out = 0; - int i = -1; - - while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes) - out += snprintf(buf + out, len - out, "%d ", i); - - return out; -} - -static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) -{ - int out = 0; - char *mle_type; - - if (mle->type == DLM_MLE_BLOCK) - mle_type = "BLK"; - else if (mle->type == DLM_MLE_MASTER) - mle_type = "MAS"; - else - mle_type = "MIG"; - - out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out); - out += snprintf(buf + out, len - out, - "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", - mle_type, mle->master, mle->new_master, - !list_empty(&mle->hb_events), - !!mle->inuse, - atomic_read(&mle->mle_refs.refcount)); - - out += snprintf(buf + out, len - out, "Maybe="); - out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - out += snprintf(buf + out, len - out, "Vote="); - out += stringify_nodemap(mle->vote_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - out += snprintf(buf + out, len - out, "Response="); - out += stringify_nodemap(mle->response_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - out += snprintf(buf + out, len - out, "Node="); - out += stringify_nodemap(mle->node_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - out += snprintf(buf + out, len - out, "\n"); - - return out; -} - -void dlm_print_one_mle(struct dlm_master_list_entry *mle) -{ - char *buf; - - buf = (char *) get_zeroed_page(GFP_NOFS); - if (buf) { - dump_mle(mle, buf, PAGE_SIZE - 1); - free_page((unsigned long)buf); - } -} - -#ifdef CONFIG_DEBUG_FS - -static struct dentry *dlm_debugfs_root = NULL; - -#define DLM_DEBUGFS_DIR "o2dlm" -#define DLM_DEBUGFS_DLM_STATE "dlm_state" -#define DLM_DEBUGFS_LOCKING_STATE "locking_state" -#define DLM_DEBUGFS_MLE_STATE "mle_state" -#define DLM_DEBUGFS_PURGE_LIST "purge_list" - -/* begin - utils funcs */ -static void dlm_debug_free(struct kref *kref) -{ - struct dlm_debug_ctxt *dc; - - dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt); - - kfree(dc); -} - -static void dlm_debug_put(struct dlm_debug_ctxt *dc) -{ - if (dc) - kref_put(&dc->debug_refcnt, dlm_debug_free); -} - -static void dlm_debug_get(struct dlm_debug_ctxt *dc) -{ - kref_get(&dc->debug_refcnt); -} - -static int debug_release(struct inode *inode, struct file *file) -{ - free_page((unsigned long)file->private_data); - return 0; -} - -static ssize_t debug_read(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos) -{ - return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, - i_size_read(file->f_mapping->host)); -} -/* end - util funcs */ - -/* begin - purge list funcs */ -static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len) -{ - struct dlm_lock_resource *res; - int out = 0; - unsigned long total = 0; - - out += snprintf(buf + out, len - out, - "Dumping Purgelist for Domain: %s\n", dlm->name); - - spin_lock(&dlm->spinlock); - list_for_each_entry(res, &dlm->purge_list, purge) { - ++total; - if (len - out < 100) - continue; - spin_lock(&res->spinlock); - out += stringify_lockname(res->lockname.name, - res->lockname.len, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\t%ld\n", - (jiffies - res->last_used)/HZ); - spin_unlock(&res->spinlock); - } - spin_unlock(&dlm->spinlock); - - out += snprintf(buf + out, len - out, "Total on list: %ld\n", total); - - return out; -} - -static int debug_purgelist_open(struct inode *inode, struct file *file) -{ - struct dlm_ctxt *dlm = inode->i_private; - char *buf = NULL; - - buf = (char *) get_zeroed_page(GFP_NOFS); - if (!buf) - goto bail; - - i_size_write(inode, debug_purgelist_print(dlm, buf, PAGE_SIZE - 1)); - - file->private_data = buf; - - return 0; -bail: - return -ENOMEM; -} - -static const struct file_operations debug_purgelist_fops = { - .open = debug_purgelist_open, - .release = debug_release, - .read = debug_read, - .llseek = generic_file_llseek, -}; -/* end - purge list funcs */ - -/* begin - debug mle funcs */ -static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) -{ - struct dlm_master_list_entry *mle; - struct hlist_head *bucket; - struct hlist_node *list; - int i, out = 0; - unsigned long total = 0, longest = 0, bucket_count = 0; - - out += snprintf(buf + out, len - out, - "Dumping MLEs for Domain: %s\n", dlm->name); - - spin_lock(&dlm->master_lock); - for (i = 0; i < DLM_HASH_BUCKETS; i++) { - bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); - ++total; - ++bucket_count; - if (len - out < 200) - continue; - out += dump_mle(mle, buf + out, len - out); - } - longest = max(longest, bucket_count); - bucket_count = 0; - } - spin_unlock(&dlm->master_lock); - - out += snprintf(buf + out, len - out, - "Total: %ld, Longest: %ld\n", total, longest); - return out; -} - -static int debug_mle_open(struct inode *inode, struct file *file) -{ - struct dlm_ctxt *dlm = inode->i_private; - char *buf = NULL; - - buf = (char *) get_zeroed_page(GFP_NOFS); - if (!buf) - goto bail; - - i_size_write(inode, debug_mle_print(dlm, buf, PAGE_SIZE - 1)); - - file->private_data = buf; - - return 0; -bail: - return -ENOMEM; -} - -static const struct file_operations debug_mle_fops = { - .open = debug_mle_open, - .release = debug_release, - .read = debug_read, - .llseek = generic_file_llseek, -}; - -/* end - debug mle funcs */ - -/* begin - debug lockres funcs */ -static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len) -{ - int out; - -#define DEBUG_LOCK_VERSION 1 - spin_lock(&lock->spinlock); - out = snprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d," - "%d,%d,%d,%d\n", - DEBUG_LOCK_VERSION, - list_type, lock->ml.type, lock->ml.convert_type, - lock->ml.node, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - !list_empty(&lock->ast_list), - !list_empty(&lock->bast_list), - lock->ast_pending, lock->bast_pending, - lock->convert_pending, lock->lock_pending, - lock->cancel_pending, lock->unlock_pending, - atomic_read(&lock->lock_refs.refcount)); - spin_unlock(&lock->spinlock); - - return out; -} - -static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) -{ - struct dlm_lock *lock; - int i; - int out = 0; - - out += snprintf(buf + out, len - out, "NAME:"); - out += stringify_lockname(res->lockname.name, res->lockname.len, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - -#define DEBUG_LRES_VERSION 1 - out += snprintf(buf + out, len - out, - "LRES:%d,%d,%d,%ld,%d,%d,%d,%d,%d,%d,%d\n", - DEBUG_LRES_VERSION, - res->owner, res->state, res->last_used, - !list_empty(&res->purge), - !list_empty(&res->dirty), - !list_empty(&res->recovering), - res->inflight_locks, res->migration_pending, - atomic_read(&res->asts_reserved), - atomic_read(&res->refs.refcount)); - - /* refmap */ - out += snprintf(buf + out, len - out, "RMAP:"); - out += stringify_nodemap(res->refmap, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - /* lvb */ - out += snprintf(buf + out, len - out, "LVBX:"); - for (i = 0; i < DLM_LVB_LEN; i++) - out += snprintf(buf + out, len - out, - "%02x", (unsigned char)res->lvb[i]); - out += snprintf(buf + out, len - out, "\n"); - - /* granted */ - list_for_each_entry(lock, &res->granted, list) - out += dump_lock(lock, 0, buf + out, len - out); - - /* converting */ - list_for_each_entry(lock, &res->converting, list) - out += dump_lock(lock, 1, buf + out, len - out); - - /* blocked */ - list_for_each_entry(lock, &res->blocked, list) - out += dump_lock(lock, 2, buf + out, len - out); - - out += snprintf(buf + out, len - out, "\n"); - - return out; -} - -static void *lockres_seq_start(struct seq_file *m, loff_t *pos) -{ - struct debug_lockres *dl = m->private; - struct dlm_ctxt *dlm = dl->dl_ctxt; - struct dlm_lock_resource *oldres = dl->dl_res; - struct dlm_lock_resource *res = NULL; - struct list_head *track_list; - - spin_lock(&dlm->track_lock); - if (oldres) - track_list = &oldres->tracking; - else { - track_list = &dlm->tracking_list; - if (list_empty(track_list)) { - dl = NULL; - spin_unlock(&dlm->track_lock); - goto bail; - } - } - - list_for_each_entry(res, track_list, tracking) { - if (&res->tracking == &dlm->tracking_list) - res = NULL; - else - dlm_lockres_get(res); - break; - } - spin_unlock(&dlm->track_lock); - - if (oldres) - dlm_lockres_put(oldres); - - dl->dl_res = res; - - if (res) { - spin_lock(&res->spinlock); - dump_lockres(res, dl->dl_buf, dl->dl_len - 1); - spin_unlock(&res->spinlock); - } else - dl = NULL; - -bail: - /* passed to seq_show */ - return dl; -} - -static void lockres_seq_stop(struct seq_file *m, void *v) -{ -} - -static void *lockres_seq_next(struct seq_file *m, void *v, loff_t *pos) -{ - return NULL; -} - -static int lockres_seq_show(struct seq_file *s, void *v) -{ - struct debug_lockres *dl = (struct debug_lockres *)v; - - seq_printf(s, "%s", dl->dl_buf); - - return 0; -} - -static const struct seq_operations debug_lockres_ops = { - .start = lockres_seq_start, - .stop = lockres_seq_stop, - .next = lockres_seq_next, - .show = lockres_seq_show, -}; - -static int debug_lockres_open(struct inode *inode, struct file *file) -{ - struct dlm_ctxt *dlm = inode->i_private; - int ret = -ENOMEM; - struct seq_file *seq; - struct debug_lockres *dl = NULL; - - dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL); - if (!dl) { - mlog_errno(ret); - goto bail; - } - - dl->dl_len = PAGE_SIZE; - dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL); - if (!dl->dl_buf) { - mlog_errno(ret); - goto bail; - } - - ret = seq_open(file, &debug_lockres_ops); - if (ret) { - mlog_errno(ret); - goto bail; - } - - seq = file->private_data; - seq->private = dl; - - dlm_grab(dlm); - dl->dl_ctxt = dlm; - - return 0; -bail: - if (dl) - kfree(dl->dl_buf); - kfree(dl); - return ret; -} - -static int debug_lockres_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct debug_lockres *dl = (struct debug_lockres *)seq->private; - - if (dl->dl_res) - dlm_lockres_put(dl->dl_res); - dlm_put(dl->dl_ctxt); - kfree(dl->dl_buf); - return seq_release_private(inode, file); -} - -static const struct file_operations debug_lockres_fops = { - .open = debug_lockres_open, - .release = debug_lockres_release, - .read = seq_read, - .llseek = seq_lseek, -}; -/* end - debug lockres funcs */ - -/* begin - debug state funcs */ -static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) -{ - int out = 0; - struct dlm_reco_node_data *node; - char *state; - int cur_mles = 0, tot_mles = 0; - int i; - - spin_lock(&dlm->spinlock); - - switch (dlm->dlm_state) { - case DLM_CTXT_NEW: - state = "NEW"; break; - case DLM_CTXT_JOINED: - state = "JOINED"; break; - case DLM_CTXT_IN_SHUTDOWN: - state = "SHUTDOWN"; break; - case DLM_CTXT_LEAVING: - state = "LEAVING"; break; - default: - state = "UNKNOWN"; break; - } - - /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ - out += snprintf(buf + out, len - out, - "Domain: %s Key: 0x%08x Protocol: %d.%d\n", - dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, - dlm->dlm_locking_proto.pv_minor); - - /* Thread Pid: xxx Node: xxx State: xxxxx */ - out += snprintf(buf + out, len - out, - "Thread Pid: %d Node: %d State: %s\n", - task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state); - - /* Number of Joins: xxx Joining Node: xxx */ - out += snprintf(buf + out, len - out, - "Number of Joins: %d Joining Node: %d\n", - dlm->num_joins, dlm->joining_node); - - /* Domain Map: xx xx xx */ - out += snprintf(buf + out, len - out, "Domain Map: "); - out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - /* Exit Domain Map: xx xx xx */ - out += snprintf(buf + out, len - out, "Exit Domain Map: "); - out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - /* Live Map: xx xx xx */ - out += snprintf(buf + out, len - out, "Live Map: "); - out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - /* Lock Resources: xxx (xxx) */ - out += snprintf(buf + out, len - out, - "Lock Resources: %d (%d)\n", - atomic_read(&dlm->res_cur_count), - atomic_read(&dlm->res_tot_count)); - - for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) - tot_mles += atomic_read(&dlm->mle_tot_count[i]); - - for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) - cur_mles += atomic_read(&dlm->mle_cur_count[i]); - - /* MLEs: xxx (xxx) */ - out += snprintf(buf + out, len - out, - "MLEs: %d (%d)\n", cur_mles, tot_mles); - - /* Blocking: xxx (xxx) */ - out += snprintf(buf + out, len - out, - " Blocking: %d (%d)\n", - atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]), - atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK])); - - /* Mastery: xxx (xxx) */ - out += snprintf(buf + out, len - out, - " Mastery: %d (%d)\n", - atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]), - atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER])); - - /* Migration: xxx (xxx) */ - out += snprintf(buf + out, len - out, - " Migration: %d (%d)\n", - atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]), - atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION])); - - /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ - out += snprintf(buf + out, len - out, - "Lists: Dirty=%s Purge=%s PendingASTs=%s " - "PendingBASTs=%s\n", - (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), - (list_empty(&dlm->purge_list) ? "Empty" : "InUse"), - (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), - (list_empty(&dlm->pending_basts) ? "Empty" : "InUse")); - - /* Purge Count: xxx Refs: xxx */ - out += snprintf(buf + out, len - out, - "Purge Count: %d Refs: %d\n", dlm->purge_count, - atomic_read(&dlm->dlm_refs.refcount)); - - /* Dead Node: xxx */ - out += snprintf(buf + out, len - out, - "Dead Node: %d\n", dlm->reco.dead_node); - - /* What about DLM_RECO_STATE_FINALIZE? */ - if (dlm->reco.state == DLM_RECO_STATE_ACTIVE) - state = "ACTIVE"; - else - state = "INACTIVE"; - - /* Recovery Pid: xxxx Master: xxx State: xxxx */ - out += snprintf(buf + out, len - out, - "Recovery Pid: %d Master: %d State: %s\n", - task_pid_nr(dlm->dlm_reco_thread_task), - dlm->reco.new_master, state); - - /* Recovery Map: xx xx */ - out += snprintf(buf + out, len - out, "Recovery Map: "); - out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, - buf + out, len - out); - out += snprintf(buf + out, len - out, "\n"); - - /* Recovery Node State: */ - out += snprintf(buf + out, len - out, "Recovery Node State:\n"); - list_for_each_entry(node, &dlm->reco.node_data, list) { - switch (node->state) { - case DLM_RECO_NODE_DATA_INIT: - state = "INIT"; - break; - case DLM_RECO_NODE_DATA_REQUESTING: - state = "REQUESTING"; - break; - case DLM_RECO_NODE_DATA_DEAD: - state = "DEAD"; - break; - case DLM_RECO_NODE_DATA_RECEIVING: - state = "RECEIVING"; - break; - case DLM_RECO_NODE_DATA_REQUESTED: - state = "REQUESTED"; - break; - case DLM_RECO_NODE_DATA_DONE: - state = "DONE"; - break; - case DLM_RECO_NODE_DATA_FINALIZE_SENT: - state = "FINALIZE-SENT"; - break; - default: - state = "BAD"; - break; - } - out += snprintf(buf + out, len - out, "\t%u - %s\n", - node->node_num, state); - } - - spin_unlock(&dlm->spinlock); - - return out; -} - -static int debug_state_open(struct inode *inode, struct file *file) -{ - struct dlm_ctxt *dlm = inode->i_private; - char *buf = NULL; - - buf = (char *) get_zeroed_page(GFP_NOFS); - if (!buf) - goto bail; - - i_size_write(inode, debug_state_print(dlm, buf, PAGE_SIZE - 1)); - - file->private_data = buf; - - return 0; -bail: - return -ENOMEM; -} - -static const struct file_operations debug_state_fops = { - .open = debug_state_open, - .release = debug_release, - .read = debug_read, - .llseek = generic_file_llseek, -}; -/* end - debug state funcs */ - -/* files in subroot */ -int dlm_debug_init(struct dlm_ctxt *dlm) -{ - struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; - - /* for dumping dlm_ctxt */ - dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE, - S_IFREG|S_IRUSR, - dlm->dlm_debugfs_subroot, - dlm, &debug_state_fops); - if (!dc->debug_state_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } - - /* for dumping lockres */ - dc->debug_lockres_dentry = - debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, - S_IFREG|S_IRUSR, - dlm->dlm_debugfs_subroot, - dlm, &debug_lockres_fops); - if (!dc->debug_lockres_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } - - /* for dumping mles */ - dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE, - S_IFREG|S_IRUSR, - dlm->dlm_debugfs_subroot, - dlm, &debug_mle_fops); - if (!dc->debug_mle_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } - - /* for dumping lockres on the purge list */ - dc->debug_purgelist_dentry = - debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, - S_IFREG|S_IRUSR, - dlm->dlm_debugfs_subroot, - dlm, &debug_purgelist_fops); - if (!dc->debug_purgelist_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } - - dlm_debug_get(dc); - return 0; - -bail: - dlm_debug_shutdown(dlm); - return -ENOMEM; -} - -void dlm_debug_shutdown(struct dlm_ctxt *dlm) -{ - struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; - - if (dc) { - debugfs_remove(dc->debug_purgelist_dentry); - debugfs_remove(dc->debug_mle_dentry); - debugfs_remove(dc->debug_lockres_dentry); - debugfs_remove(dc->debug_state_dentry); - dlm_debug_put(dc); - } -} - -/* subroot - domain dir */ -int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) -{ - dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, - dlm_debugfs_root); - if (!dlm->dlm_debugfs_subroot) { - mlog_errno(-ENOMEM); - goto bail; - } - - dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt), - GFP_KERNEL); - if (!dlm->dlm_debug_ctxt) { - mlog_errno(-ENOMEM); - goto bail; - } - kref_init(&dlm->dlm_debug_ctxt->debug_refcnt); - - return 0; -bail: - dlm_destroy_debugfs_subroot(dlm); - return -ENOMEM; -} - -void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) -{ - debugfs_remove(dlm->dlm_debugfs_subroot); -} - -/* debugfs root */ -int dlm_create_debugfs_root(void) -{ - dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL); - if (!dlm_debugfs_root) { - mlog_errno(-ENOMEM); - return -ENOMEM; - } - return 0; -} - -void dlm_destroy_debugfs_root(void) -{ - debugfs_remove(dlm_debugfs_root); -} -#endif /* CONFIG_DEBUG_FS */ diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdebug.h b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdebug.h deleted file mode 100644 index 1f27c481..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdebug.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmdebug.h - * - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - -#ifndef DLMDEBUG_H -#define DLMDEBUG_H - -void dlm_print_one_mle(struct dlm_master_list_entry *mle); - -#ifdef CONFIG_DEBUG_FS - -struct dlm_debug_ctxt { - struct kref debug_refcnt; - struct dentry *debug_state_dentry; - struct dentry *debug_lockres_dentry; - struct dentry *debug_mle_dentry; - struct dentry *debug_purgelist_dentry; -}; - -struct debug_lockres { - int dl_len; - char *dl_buf; - struct dlm_ctxt *dl_ctxt; - struct dlm_lock_resource *dl_res; -}; - -int dlm_debug_init(struct dlm_ctxt *dlm); -void dlm_debug_shutdown(struct dlm_ctxt *dlm); - -int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm); -void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm); - -int dlm_create_debugfs_root(void); -void dlm_destroy_debugfs_root(void); - -#else - -static inline int dlm_debug_init(struct dlm_ctxt *dlm) -{ - return 0; -} -static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm) -{ -} -static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) -{ - return 0; -} -static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) -{ -} -static inline int dlm_create_debugfs_root(void) -{ - return 0; -} -static inline void dlm_destroy_debugfs_root(void) -{ -} - -#endif /* CONFIG_DEBUG_FS */ -#endif /* DLMDEBUG_H */ diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdomain.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdomain.c deleted file mode 100644 index 92f2ead0..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdomain.c +++ /dev/null @@ -1,2397 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmdomain.c - * - * defines domain join / leave apis - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/spinlock.h> -#include <linux/delay.h> -#include <linux/err.h> -#include <linux/debugfs.h> - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" -#include "dlmdomain.h" -#include "dlmdebug.h" - -#include "dlmver.h" - -#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) -#include "cluster/masklog.h" - -/* - * ocfs2 node maps are array of long int, which limits to send them freely - * across the wire due to endianness issues. To workaround this, we convert - * long ints to byte arrays. Following 3 routines are helper functions to - * set/test/copy bits within those array of bytes - */ -static inline void byte_set_bit(u8 nr, u8 map[]) -{ - map[nr >> 3] |= (1UL << (nr & 7)); -} - -static inline int byte_test_bit(u8 nr, u8 map[]) -{ - return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0; -} - -static inline void byte_copymap(u8 dmap[], unsigned long smap[], - unsigned int sz) -{ - unsigned int nn; - - if (!sz) - return; - - memset(dmap, 0, ((sz + 7) >> 3)); - for (nn = 0 ; nn < sz; nn++) - if (test_bit(nn, smap)) - byte_set_bit(nn, dmap); -} - -static void dlm_free_pagevec(void **vec, int pages) -{ - while (pages--) - free_page((unsigned long)vec[pages]); - kfree(vec); -} - -static void **dlm_alloc_pagevec(int pages) -{ - void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL); - int i; - - if (!vec) - return NULL; - - for (i = 0; i < pages; i++) - if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL))) - goto out_free; - - mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n", - pages, (unsigned long)DLM_HASH_PAGES, - (unsigned long)DLM_BUCKETS_PER_PAGE); - return vec; -out_free: - dlm_free_pagevec(vec, i); - return NULL; -} - -/* - * - * spinlock lock ordering: if multiple locks are needed, obey this ordering: - * dlm_domain_lock - * struct dlm_ctxt->spinlock - * struct dlm_lock_resource->spinlock - * struct dlm_ctxt->master_lock - * struct dlm_ctxt->ast_lock - * dlm_master_list_entry->spinlock - * dlm_lock->spinlock - * - */ - -DEFINE_SPINLOCK(dlm_domain_lock); -LIST_HEAD(dlm_domains); -static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); - -/* - * The supported protocol version for DLM communication. Running domains - * will have a negotiated version with the same major number and a minor - * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should - * be used to determine what a running domain is actually using. - * - * New in version 1.1: - * - Message DLM_QUERY_REGION added to support global heartbeat - * - Message DLM_QUERY_NODEINFO added to allow online node removes - * New in version 1.2: - * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain - */ -static const struct dlm_protocol_version dlm_protocol = { - .pv_major = 1, - .pv_minor = 2, -}; - -#define DLM_DOMAIN_BACKOFF_MS 200 - -static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, - void *data, void **ret_data); -static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data); -static int dlm_protocol_compare(struct dlm_protocol_version *existing, - struct dlm_protocol_version *request); - -static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); - -void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) -{ - if (hlist_unhashed(&res->hash_node)) - return; - - mlog(0, "%s: Unhash res %.*s\n", dlm->name, res->lockname.len, - res->lockname.name); - hlist_del_init(&res->hash_node); - dlm_lockres_put(res); -} - -void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) -{ - struct hlist_head *bucket; - struct qstr *q; - - assert_spin_locked(&dlm->spinlock); - - q = &res->lockname; - bucket = dlm_lockres_hash(dlm, q->hash); - - /* get a reference for our hashtable */ - dlm_lockres_get(res); - - hlist_add_head(&res->hash_node, bucket); - - mlog(0, "%s: Hash res %.*s\n", dlm->name, res->lockname.len, - res->lockname.name); -} - -struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, - const char *name, - unsigned int len, - unsigned int hash) -{ - struct hlist_head *bucket; - struct hlist_node *list; - - mlog(0, "%.*s\n", len, name); - - assert_spin_locked(&dlm->spinlock); - - bucket = dlm_lockres_hash(dlm, hash); - - hlist_for_each(list, bucket) { - struct dlm_lock_resource *res = hlist_entry(list, - struct dlm_lock_resource, hash_node); - if (res->lockname.name[0] != name[0]) - continue; - if (unlikely(res->lockname.len != len)) - continue; - if (memcmp(res->lockname.name + 1, name + 1, len - 1)) - continue; - dlm_lockres_get(res); - return res; - } - return NULL; -} - -/* intended to be called by functions which do not care about lock - * resources which are being purged (most net _handler functions). - * this will return NULL for any lock resource which is found but - * currently in the process of dropping its mastery reference. - * use __dlm_lookup_lockres_full when you need the lock resource - * regardless (e.g. dlm_get_lock_resource) */ -struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, - const char *name, - unsigned int len, - unsigned int hash) -{ - struct dlm_lock_resource *res = NULL; - - mlog(0, "%.*s\n", len, name); - - assert_spin_locked(&dlm->spinlock); - - res = __dlm_lookup_lockres_full(dlm, name, len, hash); - if (res) { - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_DROPPING_REF) { - spin_unlock(&res->spinlock); - dlm_lockres_put(res); - return NULL; - } - spin_unlock(&res->spinlock); - } - - return res; -} - -struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, - const char *name, - unsigned int len) -{ - struct dlm_lock_resource *res; - unsigned int hash = dlm_lockid_hash(name, len); - - spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres(dlm, name, len, hash); - spin_unlock(&dlm->spinlock); - return res; -} - -static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) -{ - struct dlm_ctxt *tmp = NULL; - struct list_head *iter; - - assert_spin_locked(&dlm_domain_lock); - - /* tmp->name here is always NULL terminated, - * but domain may not be! */ - list_for_each(iter, &dlm_domains) { - tmp = list_entry (iter, struct dlm_ctxt, list); - if (strlen(tmp->name) == len && - memcmp(tmp->name, domain, len)==0) - break; - tmp = NULL; - } - - return tmp; -} - -/* For null terminated domain strings ONLY */ -static struct dlm_ctxt * __dlm_lookup_domain(const char *domain) -{ - assert_spin_locked(&dlm_domain_lock); - - return __dlm_lookup_domain_full(domain, strlen(domain)); -} - - -/* returns true on one of two conditions: - * 1) the domain does not exist - * 2) the domain exists and it's state is "joined" */ -static int dlm_wait_on_domain_helper(const char *domain) -{ - int ret = 0; - struct dlm_ctxt *tmp = NULL; - - spin_lock(&dlm_domain_lock); - - tmp = __dlm_lookup_domain(domain); - if (!tmp) - ret = 1; - else if (tmp->dlm_state == DLM_CTXT_JOINED) - ret = 1; - - spin_unlock(&dlm_domain_lock); - return ret; -} - -static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) -{ - dlm_destroy_debugfs_subroot(dlm); - - if (dlm->lockres_hash) - dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); - - if (dlm->master_hash) - dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); - - if (dlm->name) - kfree(dlm->name); - - kfree(dlm); -} - -/* A little strange - this function will be called while holding - * dlm_domain_lock and is expected to be holding it on the way out. We - * will however drop and reacquire it multiple times */ -static void dlm_ctxt_release(struct kref *kref) -{ - struct dlm_ctxt *dlm; - - dlm = container_of(kref, struct dlm_ctxt, dlm_refs); - - BUG_ON(dlm->num_joins); - BUG_ON(dlm->dlm_state == DLM_CTXT_JOINED); - - /* we may still be in the list if we hit an error during join. */ - list_del_init(&dlm->list); - - spin_unlock(&dlm_domain_lock); - - mlog(0, "freeing memory from domain %s\n", dlm->name); - - wake_up(&dlm_domain_events); - - dlm_free_ctxt_mem(dlm); - - spin_lock(&dlm_domain_lock); -} - -void dlm_put(struct dlm_ctxt *dlm) -{ - spin_lock(&dlm_domain_lock); - kref_put(&dlm->dlm_refs, dlm_ctxt_release); - spin_unlock(&dlm_domain_lock); -} - -static void __dlm_get(struct dlm_ctxt *dlm) -{ - kref_get(&dlm->dlm_refs); -} - -/* given a questionable reference to a dlm object, gets a reference if - * it can find it in the list, otherwise returns NULL in which case - * you shouldn't trust your pointer. */ -struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) -{ - struct list_head *iter; - struct dlm_ctxt *target = NULL; - - spin_lock(&dlm_domain_lock); - - list_for_each(iter, &dlm_domains) { - target = list_entry (iter, struct dlm_ctxt, list); - - if (target == dlm) { - __dlm_get(target); - break; - } - - target = NULL; - } - - spin_unlock(&dlm_domain_lock); - - return target; -} - -int dlm_domain_fully_joined(struct dlm_ctxt *dlm) -{ - int ret; - - spin_lock(&dlm_domain_lock); - ret = (dlm->dlm_state == DLM_CTXT_JOINED) || - (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN); - spin_unlock(&dlm_domain_lock); - - return ret; -} - -static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) -{ - if (dlm->dlm_worker) { - flush_workqueue(dlm->dlm_worker); - destroy_workqueue(dlm->dlm_worker); - dlm->dlm_worker = NULL; - } -} - -static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) -{ - dlm_unregister_domain_handlers(dlm); - dlm_debug_shutdown(dlm); - dlm_complete_thread(dlm); - dlm_complete_recovery_thread(dlm); - dlm_destroy_dlm_worker(dlm); - - /* We've left the domain. Now we can take ourselves out of the - * list and allow the kref stuff to help us free the - * memory. */ - spin_lock(&dlm_domain_lock); - list_del_init(&dlm->list); - spin_unlock(&dlm_domain_lock); - - /* Wake up anyone waiting for us to remove this domain */ - wake_up(&dlm_domain_events); -} - -static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) -{ - int i, num, n, ret = 0; - struct dlm_lock_resource *res; - struct hlist_node *iter; - struct hlist_head *bucket; - int dropped; - - mlog(0, "Migrating locks from domain %s\n", dlm->name); - - num = 0; - spin_lock(&dlm->spinlock); - for (i = 0; i < DLM_HASH_BUCKETS; i++) { -redo_bucket: - n = 0; - bucket = dlm_lockres_hash(dlm, i); - iter = bucket->first; - while (iter) { - n++; - res = hlist_entry(iter, struct dlm_lock_resource, - hash_node); - dlm_lockres_get(res); - /* migrate, if necessary. this will drop the dlm - * spinlock and retake it if it does migration. */ - dropped = dlm_empty_lockres(dlm, res); - - spin_lock(&res->spinlock); - if (dropped) - __dlm_lockres_calc_usage(dlm, res); - else - iter = res->hash_node.next; - spin_unlock(&res->spinlock); - - dlm_lockres_put(res); - - if (dropped) { - cond_resched_lock(&dlm->spinlock); - goto redo_bucket; - } - } - cond_resched_lock(&dlm->spinlock); - num += n; - } - spin_unlock(&dlm->spinlock); - wake_up(&dlm->dlm_thread_wq); - - /* let the dlm thread take care of purging, keep scanning until - * nothing remains in the hash */ - if (num) { - mlog(0, "%s: %d lock resources in hash last pass\n", - dlm->name, num); - ret = -EAGAIN; - } - mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); - return ret; -} - -static int dlm_no_joining_node(struct dlm_ctxt *dlm) -{ - int ret; - - spin_lock(&dlm->spinlock); - ret = dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN; - spin_unlock(&dlm->spinlock); - - return ret; -} - -static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len, - void *data, void **ret_data) -{ - struct dlm_ctxt *dlm = data; - unsigned int node; - struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; - - if (!dlm_grab(dlm)) - return 0; - - node = exit_msg->node_idx; - mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node); - - spin_lock(&dlm->spinlock); - set_bit(node, dlm->exit_domain_map); - spin_unlock(&dlm->spinlock); - - dlm_put(dlm); - - return 0; -} - -static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) -{ - /* Yikes, a double spinlock! I need domain_lock for the dlm - * state and the dlm spinlock for join state... Sorry! */ -again: - spin_lock(&dlm_domain_lock); - spin_lock(&dlm->spinlock); - - if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "Node %d is joining, we wait on it.\n", - dlm->joining_node); - spin_unlock(&dlm->spinlock); - spin_unlock(&dlm_domain_lock); - - wait_event(dlm->dlm_join_events, dlm_no_joining_node(dlm)); - goto again; - } - - dlm->dlm_state = DLM_CTXT_LEAVING; - spin_unlock(&dlm->spinlock); - spin_unlock(&dlm_domain_lock); -} - -static void __dlm_print_nodes(struct dlm_ctxt *dlm) -{ - int node = -1, num = 0; - - assert_spin_locked(&dlm->spinlock); - - printk("( "); - while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, - node + 1)) < O2NM_MAX_NODES) { - printk("%d ", node); - ++num; - } - printk(") %u nodes\n", num); -} - -static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - unsigned int node; - struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; - - mlog(0, "%p %u %p", msg, len, data); - - if (!dlm_grab(dlm)) - return 0; - - node = exit_msg->node_idx; - - spin_lock(&dlm->spinlock); - clear_bit(node, dlm->domain_map); - clear_bit(node, dlm->exit_domain_map); - printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s ", node, dlm->name); - __dlm_print_nodes(dlm); - - /* notify anything attached to the heartbeat events */ - dlm_hb_event_notify_attached(dlm, node, 0); - - spin_unlock(&dlm->spinlock); - - dlm_put(dlm); - - return 0; -} - -static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type, - unsigned int node) -{ - int status; - struct dlm_exit_domain leave_msg; - - mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name, - msg_type, node); - - memset(&leave_msg, 0, sizeof(leave_msg)); - leave_msg.node_idx = dlm->node_num; - - status = o2net_send_message(msg_type, dlm->key, &leave_msg, - sizeof(leave_msg), node, NULL); - if (status < 0) - mlog(ML_ERROR, "Error %d sending domain exit message %u " - "to node %u on domain %s\n", status, msg_type, node, - dlm->name); - - return status; -} - -static void dlm_begin_exit_domain(struct dlm_ctxt *dlm) -{ - int node = -1; - - /* Support for begin exit domain was added in 1.2 */ - if (dlm->dlm_locking_proto.pv_major == 1 && - dlm->dlm_locking_proto.pv_minor < 2) - return; - - /* - * Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely - * informational. Meaning if a node does not receive the message, - * so be it. - */ - spin_lock(&dlm->spinlock); - while (1) { - node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1); - if (node >= O2NM_MAX_NODES) - break; - if (node == dlm->node_num) - continue; - - spin_unlock(&dlm->spinlock); - dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node); - spin_lock(&dlm->spinlock); - } - spin_unlock(&dlm->spinlock); -} - -static void dlm_leave_domain(struct dlm_ctxt *dlm) -{ - int node, clear_node, status; - - /* At this point we've migrated away all our locks and won't - * accept mastership of new ones. The dlm is responsible for - * almost nothing now. We make sure not to confuse any joining - * nodes and then commence shutdown procedure. */ - - spin_lock(&dlm->spinlock); - /* Clear ourselves from the domain map */ - clear_bit(dlm->node_num, dlm->domain_map); - while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, - 0)) < O2NM_MAX_NODES) { - /* Drop the dlm spinlock. This is safe wrt the domain_map. - * -nodes cannot be added now as the - * query_join_handlers knows to respond with OK_NO_MAP - * -we catch the right network errors if a node is - * removed from the map while we're sending him the - * exit message. */ - spin_unlock(&dlm->spinlock); - - clear_node = 1; - - status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG, - node); - if (status < 0 && - status != -ENOPROTOOPT && - status != -ENOTCONN) { - mlog(ML_NOTICE, "Error %d sending domain exit message " - "to node %d\n", status, node); - - /* Not sure what to do here but lets sleep for - * a bit in case this was a transient - * error... */ - msleep(DLM_DOMAIN_BACKOFF_MS); - clear_node = 0; - } - - spin_lock(&dlm->spinlock); - /* If we're not clearing the node bit then we intend - * to loop back around to try again. */ - if (clear_node) - clear_bit(node, dlm->domain_map); - } - spin_unlock(&dlm->spinlock); -} - -int dlm_joined(struct dlm_ctxt *dlm) -{ - int ret = 0; - - spin_lock(&dlm_domain_lock); - - if (dlm->dlm_state == DLM_CTXT_JOINED) - ret = 1; - - spin_unlock(&dlm_domain_lock); - - return ret; -} - -int dlm_shutting_down(struct dlm_ctxt *dlm) -{ - int ret = 0; - - spin_lock(&dlm_domain_lock); - - if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN) - ret = 1; - - spin_unlock(&dlm_domain_lock); - - return ret; -} - -void dlm_unregister_domain(struct dlm_ctxt *dlm) -{ - int leave = 0; - struct dlm_lock_resource *res; - - spin_lock(&dlm_domain_lock); - BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); - BUG_ON(!dlm->num_joins); - - dlm->num_joins--; - if (!dlm->num_joins) { - /* We mark it "in shutdown" now so new register - * requests wait until we've completely left the - * domain. Don't use DLM_CTXT_LEAVING yet as we still - * want new domain joins to communicate with us at - * least until we've completed migration of our - * resources. */ - dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN; - leave = 1; - } - spin_unlock(&dlm_domain_lock); - - if (leave) { - mlog(0, "shutting down domain %s\n", dlm->name); - dlm_begin_exit_domain(dlm); - - /* We changed dlm state, notify the thread */ - dlm_kick_thread(dlm, NULL); - - while (dlm_migrate_all_locks(dlm)) { - /* Give dlm_thread time to purge the lockres' */ - msleep(500); - mlog(0, "%s: more migration to do\n", dlm->name); - } - - /* This list should be empty. If not, print remaining lockres */ - if (!list_empty(&dlm->tracking_list)) { - mlog(ML_ERROR, "Following lockres' are still on the " - "tracking list:\n"); - list_for_each_entry(res, &dlm->tracking_list, tracking) - dlm_print_one_lock_resource(res); - } - - dlm_mark_domain_leaving(dlm); - dlm_leave_domain(dlm); - printk(KERN_NOTICE "o2dlm: Leaving domain %s\n", dlm->name); - dlm_force_free_mles(dlm); - dlm_complete_dlm_shutdown(dlm); - } - dlm_put(dlm); -} -EXPORT_SYMBOL_GPL(dlm_unregister_domain); - -static int dlm_query_join_proto_check(char *proto_type, int node, - struct dlm_protocol_version *ours, - struct dlm_protocol_version *request) -{ - int rc; - struct dlm_protocol_version proto = *request; - - if (!dlm_protocol_compare(ours, &proto)) { - mlog(0, - "node %u wanted to join with %s locking protocol " - "%u.%u, we respond with %u.%u\n", - node, proto_type, - request->pv_major, - request->pv_minor, - proto.pv_major, proto.pv_minor); - request->pv_minor = proto.pv_minor; - rc = 0; - } else { - mlog(ML_NOTICE, - "Node %u wanted to join with %s locking " - "protocol %u.%u, but we have %u.%u, disallowing\n", - node, proto_type, - request->pv_major, - request->pv_minor, - ours->pv_major, - ours->pv_minor); - rc = 1; - } - - return rc; -} - -/* - * struct dlm_query_join_packet is made up of four one-byte fields. They - * are effectively in big-endian order already. However, little-endian - * machines swap them before putting the packet on the wire (because - * query_join's response is a status, and that status is treated as a u32 - * on the wire). Thus, a big-endian and little-endian machines will treat - * this structure differently. - * - * The solution is to have little-endian machines swap the structure when - * converting from the structure to the u32 representation. This will - * result in the structure having the correct format on the wire no matter - * the host endian format. - */ -static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet, - u32 *wire) -{ - union dlm_query_join_response response; - - response.packet = *packet; - *wire = cpu_to_be32(response.intval); -} - -static void dlm_query_join_wire_to_packet(u32 wire, - struct dlm_query_join_packet *packet) -{ - union dlm_query_join_response response; - - response.intval = cpu_to_be32(wire); - *packet = response.packet; -} - -static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_query_join_request *query; - struct dlm_query_join_packet packet = { - .code = JOIN_DISALLOW, - }; - struct dlm_ctxt *dlm = NULL; - u32 response; - u8 nodenum; - - query = (struct dlm_query_join_request *) msg->buf; - - mlog(0, "node %u wants to join domain %s\n", query->node_idx, - query->domain); - - /* - * If heartbeat doesn't consider the node live, tell it - * to back off and try again. This gives heartbeat a chance - * to catch up. - */ - if (!o2hb_check_node_heartbeating(query->node_idx)) { - mlog(0, "node %u is not in our live map yet\n", - query->node_idx); - - packet.code = JOIN_DISALLOW; - goto respond; - } - - packet.code = JOIN_OK_NO_MAP; - - spin_lock(&dlm_domain_lock); - dlm = __dlm_lookup_domain_full(query->domain, query->name_len); - if (!dlm) - goto unlock_respond; - - /* - * There is a small window where the joining node may not see the - * node(s) that just left but still part of the cluster. DISALLOW - * join request if joining node has different node map. - */ - nodenum=0; - while (nodenum < O2NM_MAX_NODES) { - if (test_bit(nodenum, dlm->domain_map)) { - if (!byte_test_bit(nodenum, query->node_map)) { - mlog(0, "disallow join as node %u does not " - "have node %u in its nodemap\n", - query->node_idx, nodenum); - packet.code = JOIN_DISALLOW; - goto unlock_respond; - } - } - nodenum++; - } - - /* Once the dlm ctxt is marked as leaving then we don't want - * to be put in someone's domain map. - * Also, explicitly disallow joining at certain troublesome - * times (ie. during recovery). */ - if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { - int bit = query->node_idx; - spin_lock(&dlm->spinlock); - - if (dlm->dlm_state == DLM_CTXT_NEW && - dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN) { - /*If this is a brand new context and we - * haven't started our join process yet, then - * the other node won the race. */ - packet.code = JOIN_OK_NO_MAP; - } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { - /* Disallow parallel joins. */ - packet.code = JOIN_DISALLOW; - } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { - mlog(0, "node %u trying to join, but recovery " - "is ongoing.\n", bit); - packet.code = JOIN_DISALLOW; - } else if (test_bit(bit, dlm->recovery_map)) { - mlog(0, "node %u trying to join, but it " - "still needs recovery.\n", bit); - packet.code = JOIN_DISALLOW; - } else if (test_bit(bit, dlm->domain_map)) { - mlog(0, "node %u trying to join, but it " - "is still in the domain! needs recovery?\n", - bit); - packet.code = JOIN_DISALLOW; - } else { - /* Alright we're fully a part of this domain - * so we keep some state as to who's joining - * and indicate to him that needs to be fixed - * up. */ - - /* Make sure we speak compatible locking protocols. */ - if (dlm_query_join_proto_check("DLM", bit, - &dlm->dlm_locking_proto, - &query->dlm_proto)) { - packet.code = JOIN_PROTOCOL_MISMATCH; - } else if (dlm_query_join_proto_check("fs", bit, - &dlm->fs_locking_proto, - &query->fs_proto)) { - packet.code = JOIN_PROTOCOL_MISMATCH; - } else { - packet.dlm_minor = query->dlm_proto.pv_minor; - packet.fs_minor = query->fs_proto.pv_minor; - packet.code = JOIN_OK; - __dlm_set_joining_node(dlm, query->node_idx); - } - } - - spin_unlock(&dlm->spinlock); - } -unlock_respond: - spin_unlock(&dlm_domain_lock); - -respond: - mlog(0, "We respond with %u\n", packet.code); - - dlm_query_join_packet_to_wire(&packet, &response); - return response; -} - -static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_assert_joined *assert; - struct dlm_ctxt *dlm = NULL; - - assert = (struct dlm_assert_joined *) msg->buf; - - mlog(0, "node %u asserts join on domain %s\n", assert->node_idx, - assert->domain); - - spin_lock(&dlm_domain_lock); - dlm = __dlm_lookup_domain_full(assert->domain, assert->name_len); - /* XXX should we consider no dlm ctxt an error? */ - if (dlm) { - spin_lock(&dlm->spinlock); - - /* Alright, this node has officially joined our - * domain. Set him in the map and clean up our - * leftover join state. */ - BUG_ON(dlm->joining_node != assert->node_idx); - set_bit(assert->node_idx, dlm->domain_map); - clear_bit(assert->node_idx, dlm->exit_domain_map); - __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); - - printk(KERN_NOTICE "o2dlm: Node %u joins domain %s ", - assert->node_idx, dlm->name); - __dlm_print_nodes(dlm); - - /* notify anything attached to the heartbeat events */ - dlm_hb_event_notify_attached(dlm, assert->node_idx, 1); - - spin_unlock(&dlm->spinlock); - } - spin_unlock(&dlm_domain_lock); - - return 0; -} - -static int dlm_match_regions(struct dlm_ctxt *dlm, - struct dlm_query_region *qr, - char *local, int locallen) -{ - char *remote = qr->qr_regions; - char *l, *r; - int localnr, i, j, foundit; - int status = 0; - - if (!o2hb_global_heartbeat_active()) { - if (qr->qr_numregions) { - mlog(ML_ERROR, "Domain %s: Joining node %d has global " - "heartbeat enabled but local node %d does not\n", - qr->qr_domain, qr->qr_node, dlm->node_num); - status = -EINVAL; - } - goto bail; - } - - if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { - mlog(ML_ERROR, "Domain %s: Local node %d has global " - "heartbeat enabled but joining node %d does not\n", - qr->qr_domain, dlm->node_num, qr->qr_node); - status = -EINVAL; - goto bail; - } - - r = remote; - for (i = 0; i < qr->qr_numregions; ++i) { - mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); - r += O2HB_MAX_REGION_NAME_LEN; - } - - localnr = min(O2NM_MAX_REGIONS, locallen/O2HB_MAX_REGION_NAME_LEN); - localnr = o2hb_get_all_regions(local, (u8)localnr); - - /* compare local regions with remote */ - l = local; - for (i = 0; i < localnr; ++i) { - foundit = 0; - r = remote; - for (j = 0; j <= qr->qr_numregions; ++j) { - if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { - foundit = 1; - break; - } - r += O2HB_MAX_REGION_NAME_LEN; - } - if (!foundit) { - status = -EINVAL; - mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " - "in local node %d but not in joining node %d\n", - qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, - dlm->node_num, qr->qr_node); - goto bail; - } - l += O2HB_MAX_REGION_NAME_LEN; - } - - /* compare remote with local regions */ - r = remote; - for (i = 0; i < qr->qr_numregions; ++i) { - foundit = 0; - l = local; - for (j = 0; j < localnr; ++j) { - if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { - foundit = 1; - break; - } - l += O2HB_MAX_REGION_NAME_LEN; - } - if (!foundit) { - status = -EINVAL; - mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " - "in joining node %d but not in local node %d\n", - qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, - qr->qr_node, dlm->node_num); - goto bail; - } - r += O2HB_MAX_REGION_NAME_LEN; - } - -bail: - return status; -} - -static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) -{ - struct dlm_query_region *qr = NULL; - int status, ret = 0, i; - char *p; - - if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) - goto bail; - - qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); - if (!qr) { - ret = -ENOMEM; - mlog_errno(ret); - goto bail; - } - - qr->qr_node = dlm->node_num; - qr->qr_namelen = strlen(dlm->name); - memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); - /* if local hb, the numregions will be zero */ - if (o2hb_global_heartbeat_active()) - qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions, - O2NM_MAX_REGIONS); - - p = qr->qr_regions; - for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) - mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); - - i = -1; - while ((i = find_next_bit(node_map, O2NM_MAX_NODES, - i + 1)) < O2NM_MAX_NODES) { - if (i == dlm->node_num) - continue; - - mlog(0, "Sending regions to node %d\n", i); - - ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr, - sizeof(struct dlm_query_region), - i, &status); - if (ret >= 0) - ret = status; - if (ret) { - mlog(ML_ERROR, "Region mismatch %d, node %d\n", - ret, i); - break; - } - } - -bail: - kfree(qr); - return ret; -} - -static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, - void *data, void **ret_data) -{ - struct dlm_query_region *qr; - struct dlm_ctxt *dlm = NULL; - char *local = NULL; - int status = 0; - int locked = 0; - - qr = (struct dlm_query_region *) msg->buf; - - mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, - qr->qr_domain); - - /* buffer used in dlm_mast_regions() */ - local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); - if (!local) { - status = -ENOMEM; - goto bail; - } - - status = -EINVAL; - - spin_lock(&dlm_domain_lock); - dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen); - if (!dlm) { - mlog(ML_ERROR, "Node %d queried hb regions on domain %s " - "before join domain\n", qr->qr_node, qr->qr_domain); - goto bail; - } - - spin_lock(&dlm->spinlock); - locked = 1; - if (dlm->joining_node != qr->qr_node) { - mlog(ML_ERROR, "Node %d queried hb regions on domain %s " - "but joining node is %d\n", qr->qr_node, qr->qr_domain, - dlm->joining_node); - goto bail; - } - - /* Support for global heartbeat was added in 1.1 */ - if (dlm->dlm_locking_proto.pv_major == 1 && - dlm->dlm_locking_proto.pv_minor == 0) { - mlog(ML_ERROR, "Node %d queried hb regions on domain %s " - "but active dlm protocol is %d.%d\n", qr->qr_node, - qr->qr_domain, dlm->dlm_locking_proto.pv_major, - dlm->dlm_locking_proto.pv_minor); - goto bail; - } - - status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions)); - -bail: - if (locked) - spin_unlock(&dlm->spinlock); - spin_unlock(&dlm_domain_lock); - - kfree(local); - - return status; -} - -static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) -{ - struct o2nm_node *local; - struct dlm_node_info *remote; - int i, j; - int status = 0; - - for (j = 0; j < qn->qn_numnodes; ++j) - mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum, - &(qn->qn_nodes[j].ni_ipv4_address), - ntohs(qn->qn_nodes[j].ni_ipv4_port)); - - for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { - local = o2nm_get_node_by_num(i); - remote = NULL; - for (j = 0; j < qn->qn_numnodes; ++j) { - if (qn->qn_nodes[j].ni_nodenum == i) { - remote = &(qn->qn_nodes[j]); - break; - } - } - - if (!local && !remote) - continue; - - if ((local && !remote) || (!local && remote)) - status = -EINVAL; - - if (!status && - ((remote->ni_nodenum != local->nd_num) || - (remote->ni_ipv4_port != local->nd_ipv4_port) || - (remote->ni_ipv4_address != local->nd_ipv4_address))) - status = -EINVAL; - - if (status) { - if (remote && !local) - mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " - "registered in joining node %d but not in " - "local node %d\n", qn->qn_domain, - remote->ni_nodenum, - &(remote->ni_ipv4_address), - ntohs(remote->ni_ipv4_port), - qn->qn_nodenum, dlm->node_num); - if (local && !remote) - mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " - "registered in local node %d but not in " - "joining node %d\n", qn->qn_domain, - local->nd_num, &(local->nd_ipv4_address), - ntohs(local->nd_ipv4_port), - dlm->node_num, qn->qn_nodenum); - BUG_ON((!local && !remote)); - } - - if (local) - o2nm_node_put(local); - } - - return status; -} - -static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) -{ - struct dlm_query_nodeinfo *qn = NULL; - struct o2nm_node *node; - int ret = 0, status, count, i; - - if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) - goto bail; - - qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); - if (!qn) { - ret = -ENOMEM; - mlog_errno(ret); - goto bail; - } - - for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { - node = o2nm_get_node_by_num(i); - if (!node) - continue; - qn->qn_nodes[count].ni_nodenum = node->nd_num; - qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; - qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; - mlog(0, "Node %3d, %pI4:%u\n", node->nd_num, - &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); - ++count; - o2nm_node_put(node); - } - - qn->qn_nodenum = dlm->node_num; - qn->qn_numnodes = count; - qn->qn_namelen = strlen(dlm->name); - memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); - - i = -1; - while ((i = find_next_bit(node_map, O2NM_MAX_NODES, - i + 1)) < O2NM_MAX_NODES) { - if (i == dlm->node_num) - continue; - - mlog(0, "Sending nodeinfo to node %d\n", i); - - ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, - qn, sizeof(struct dlm_query_nodeinfo), - i, &status); - if (ret >= 0) - ret = status; - if (ret) { - mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); - break; - } - } - -bail: - kfree(qn); - return ret; -} - -static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, - void *data, void **ret_data) -{ - struct dlm_query_nodeinfo *qn; - struct dlm_ctxt *dlm = NULL; - int locked = 0, status = -EINVAL; - - qn = (struct dlm_query_nodeinfo *) msg->buf; - - mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum, - qn->qn_domain); - - spin_lock(&dlm_domain_lock); - dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); - if (!dlm) { - mlog(ML_ERROR, "Node %d queried nodes on domain %s before " - "join domain\n", qn->qn_nodenum, qn->qn_domain); - goto bail; - } - - spin_lock(&dlm->spinlock); - locked = 1; - if (dlm->joining_node != qn->qn_nodenum) { - mlog(ML_ERROR, "Node %d queried nodes on domain %s but " - "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, - dlm->joining_node); - goto bail; - } - - /* Support for node query was added in 1.1 */ - if (dlm->dlm_locking_proto.pv_major == 1 && - dlm->dlm_locking_proto.pv_minor == 0) { - mlog(ML_ERROR, "Node %d queried nodes on domain %s " - "but active dlm protocol is %d.%d\n", qn->qn_nodenum, - qn->qn_domain, dlm->dlm_locking_proto.pv_major, - dlm->dlm_locking_proto.pv_minor); - goto bail; - } - - status = dlm_match_nodes(dlm, qn); - -bail: - if (locked) - spin_unlock(&dlm->spinlock); - spin_unlock(&dlm_domain_lock); - - return status; -} - -static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_cancel_join *cancel; - struct dlm_ctxt *dlm = NULL; - - cancel = (struct dlm_cancel_join *) msg->buf; - - mlog(0, "node %u cancels join on domain %s\n", cancel->node_idx, - cancel->domain); - - spin_lock(&dlm_domain_lock); - dlm = __dlm_lookup_domain_full(cancel->domain, cancel->name_len); - - if (dlm) { - spin_lock(&dlm->spinlock); - - /* Yikes, this guy wants to cancel his join. No - * problem, we simply cleanup our join state. */ - BUG_ON(dlm->joining_node != cancel->node_idx); - __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); - - spin_unlock(&dlm->spinlock); - } - spin_unlock(&dlm_domain_lock); - - return 0; -} - -static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm, - unsigned int node) -{ - int status; - struct dlm_cancel_join cancel_msg; - - memset(&cancel_msg, 0, sizeof(cancel_msg)); - cancel_msg.node_idx = dlm->node_num; - cancel_msg.name_len = strlen(dlm->name); - memcpy(cancel_msg.domain, dlm->name, cancel_msg.name_len); - - status = o2net_send_message(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, - &cancel_msg, sizeof(cancel_msg), node, - NULL); - if (status < 0) { - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, - node); - goto bail; - } - -bail: - return status; -} - -/* map_size should be in bytes. */ -static int dlm_send_join_cancels(struct dlm_ctxt *dlm, - unsigned long *node_map, - unsigned int map_size) -{ - int status, tmpstat; - unsigned int node; - - if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * - sizeof(unsigned long))) { - mlog(ML_ERROR, - "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n", - map_size, (unsigned)BITS_TO_LONGS(O2NM_MAX_NODES)); - return -EINVAL; - } - - status = 0; - node = -1; - while ((node = find_next_bit(node_map, O2NM_MAX_NODES, - node + 1)) < O2NM_MAX_NODES) { - if (node == dlm->node_num) - continue; - - tmpstat = dlm_send_one_join_cancel(dlm, node); - if (tmpstat) { - mlog(ML_ERROR, "Error return %d cancelling join on " - "node %d\n", tmpstat, node); - if (!status) - status = tmpstat; - } - } - - if (status) - mlog_errno(status); - return status; -} - -static int dlm_request_join(struct dlm_ctxt *dlm, - int node, - enum dlm_query_join_response_code *response) -{ - int status; - struct dlm_query_join_request join_msg; - struct dlm_query_join_packet packet; - u32 join_resp; - - mlog(0, "querying node %d\n", node); - - memset(&join_msg, 0, sizeof(join_msg)); - join_msg.node_idx = dlm->node_num; - join_msg.name_len = strlen(dlm->name); - memcpy(join_msg.domain, dlm->name, join_msg.name_len); - join_msg.dlm_proto = dlm->dlm_locking_proto; - join_msg.fs_proto = dlm->fs_locking_proto; - - /* copy live node map to join message */ - byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); - - status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, - sizeof(join_msg), node, &join_resp); - if (status < 0 && status != -ENOPROTOOPT) { - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, - node); - goto bail; - } - dlm_query_join_wire_to_packet(join_resp, &packet); - - /* -ENOPROTOOPT from the net code means the other side isn't - listening for our message type -- that's fine, it means - his dlm isn't up, so we can consider him a 'yes' but not - joined into the domain. */ - if (status == -ENOPROTOOPT) { - status = 0; - *response = JOIN_OK_NO_MAP; - } else if (packet.code == JOIN_DISALLOW || - packet.code == JOIN_OK_NO_MAP) { - *response = packet.code; - } else if (packet.code == JOIN_PROTOCOL_MISMATCH) { - mlog(ML_NOTICE, - "This node requested DLM locking protocol %u.%u and " - "filesystem locking protocol %u.%u. At least one of " - "the protocol versions on node %d is not compatible, " - "disconnecting\n", - dlm->dlm_locking_proto.pv_major, - dlm->dlm_locking_proto.pv_minor, - dlm->fs_locking_proto.pv_major, - dlm->fs_locking_proto.pv_minor, - node); - status = -EPROTO; - *response = packet.code; - } else if (packet.code == JOIN_OK) { - *response = packet.code; - /* Use the same locking protocol as the remote node */ - dlm->dlm_locking_proto.pv_minor = packet.dlm_minor; - dlm->fs_locking_proto.pv_minor = packet.fs_minor; - mlog(0, - "Node %d responds JOIN_OK with DLM locking protocol " - "%u.%u and fs locking protocol %u.%u\n", - node, - dlm->dlm_locking_proto.pv_major, - dlm->dlm_locking_proto.pv_minor, - dlm->fs_locking_proto.pv_major, - dlm->fs_locking_proto.pv_minor); - } else { - status = -EINVAL; - mlog(ML_ERROR, "invalid response %d from node %u\n", - packet.code, node); - } - - mlog(0, "status %d, node %d response is %d\n", status, node, - *response); - -bail: - return status; -} - -static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, - unsigned int node) -{ - int status; - struct dlm_assert_joined assert_msg; - - mlog(0, "Sending join assert to node %u\n", node); - - memset(&assert_msg, 0, sizeof(assert_msg)); - assert_msg.node_idx = dlm->node_num; - assert_msg.name_len = strlen(dlm->name); - memcpy(assert_msg.domain, dlm->name, assert_msg.name_len); - - status = o2net_send_message(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, - &assert_msg, sizeof(assert_msg), node, - NULL); - if (status < 0) - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, - node); - - return status; -} - -static void dlm_send_join_asserts(struct dlm_ctxt *dlm, - unsigned long *node_map) -{ - int status, node, live; - - status = 0; - node = -1; - while ((node = find_next_bit(node_map, O2NM_MAX_NODES, - node + 1)) < O2NM_MAX_NODES) { - if (node == dlm->node_num) - continue; - - do { - /* It is very important that this message be - * received so we spin until either the node - * has died or it gets the message. */ - status = dlm_send_one_join_assert(dlm, node); - - spin_lock(&dlm->spinlock); - live = test_bit(node, dlm->live_nodes_map); - spin_unlock(&dlm->spinlock); - - if (status) { - mlog(ML_ERROR, "Error return %d asserting " - "join on node %d\n", status, node); - - /* give us some time between errors... */ - if (live) - msleep(DLM_DOMAIN_BACKOFF_MS); - } - } while (status && live); - } -} - -struct domain_join_ctxt { - unsigned long live_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long yes_resp_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; -}; - -static int dlm_should_restart_join(struct dlm_ctxt *dlm, - struct domain_join_ctxt *ctxt, - enum dlm_query_join_response_code response) -{ - int ret; - - if (response == JOIN_DISALLOW) { - mlog(0, "Latest response of disallow -- should restart\n"); - return 1; - } - - spin_lock(&dlm->spinlock); - /* For now, we restart the process if the node maps have - * changed at all */ - ret = memcmp(ctxt->live_map, dlm->live_nodes_map, - sizeof(dlm->live_nodes_map)); - spin_unlock(&dlm->spinlock); - - if (ret) - mlog(0, "Node maps changed -- should restart\n"); - - return ret; -} - -static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) -{ - int status = 0, tmpstat, node; - struct domain_join_ctxt *ctxt; - enum dlm_query_join_response_code response = JOIN_DISALLOW; - - mlog(0, "%p", dlm); - - ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); - if (!ctxt) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - - /* group sem locking should work for us here -- we're already - * registered for heartbeat events so filling this should be - * atomic wrt getting those handlers called. */ - o2hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map)); - - spin_lock(&dlm->spinlock); - memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map)); - - __dlm_set_joining_node(dlm, dlm->node_num); - - spin_unlock(&dlm->spinlock); - - node = -1; - while ((node = find_next_bit(ctxt->live_map, O2NM_MAX_NODES, - node + 1)) < O2NM_MAX_NODES) { - if (node == dlm->node_num) - continue; - - status = dlm_request_join(dlm, node, &response); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - /* Ok, either we got a response or the node doesn't have a - * dlm up. */ - if (response == JOIN_OK) - set_bit(node, ctxt->yes_resp_map); - - if (dlm_should_restart_join(dlm, ctxt, response)) { - status = -EAGAIN; - goto bail; - } - } - - mlog(0, "Yay, done querying nodes!\n"); - - /* Yay, everyone agree's we can join the domain. My domain is - * comprised of all nodes who were put in the - * yes_resp_map. Copy that into our domain map and send a join - * assert message to clean up everyone elses state. */ - spin_lock(&dlm->spinlock); - memcpy(dlm->domain_map, ctxt->yes_resp_map, - sizeof(ctxt->yes_resp_map)); - set_bit(dlm->node_num, dlm->domain_map); - spin_unlock(&dlm->spinlock); - - /* Support for global heartbeat and node info was added in 1.1 */ - if (dlm->dlm_locking_proto.pv_major > 1 || - dlm->dlm_locking_proto.pv_minor > 0) { - status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); - if (status) { - mlog_errno(status); - goto bail; - } - status = dlm_send_regions(dlm, ctxt->yes_resp_map); - if (status) { - mlog_errno(status); - goto bail; - } - } - - dlm_send_join_asserts(dlm, ctxt->yes_resp_map); - - /* Joined state *must* be set before the joining node - * information, otherwise the query_join handler may read no - * current joiner but a state of NEW and tell joining nodes - * we're not in the domain. */ - spin_lock(&dlm_domain_lock); - dlm->dlm_state = DLM_CTXT_JOINED; - dlm->num_joins++; - spin_unlock(&dlm_domain_lock); - -bail: - spin_lock(&dlm->spinlock); - __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); - if (!status) { - printk(KERN_NOTICE "o2dlm: Joining domain %s ", dlm->name); - __dlm_print_nodes(dlm); - } - spin_unlock(&dlm->spinlock); - - if (ctxt) { - /* Do we need to send a cancel message to any nodes? */ - if (status < 0) { - tmpstat = dlm_send_join_cancels(dlm, - ctxt->yes_resp_map, - sizeof(ctxt->yes_resp_map)); - if (tmpstat < 0) - mlog_errno(tmpstat); - } - kfree(ctxt); - } - - mlog(0, "returning %d\n", status); - return status; -} - -static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) -{ - o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up); - o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down); - o2net_unregister_handler_list(&dlm->dlm_domain_handlers); -} - -static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) -{ - int status; - - mlog(0, "registering handlers.\n"); - - o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, - dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); - status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down); - if (status) - goto bail; - - o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, - dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); - status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up); - if (status) - goto bail; - - status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key, - sizeof(struct dlm_master_request), - dlm_master_request_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key, - sizeof(struct dlm_assert_master), - dlm_assert_master_handler, - dlm, dlm_assert_master_post_handler, - &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key, - sizeof(struct dlm_create_lock), - dlm_create_lock_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key, - DLM_CONVERT_LOCK_MAX_LEN, - dlm_convert_lock_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key, - DLM_UNLOCK_LOCK_MAX_LEN, - dlm_unlock_lock_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key, - DLM_PROXY_AST_MAX_LEN, - dlm_proxy_ast_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, - sizeof(struct dlm_exit_domain), - dlm_exit_domain_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_DEREF_LOCKRES_MSG, dlm->key, - sizeof(struct dlm_deref_lockres), - dlm_deref_lockres_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key, - sizeof(struct dlm_migrate_request), - dlm_migrate_request_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key, - DLM_MIG_LOCKRES_MAX_LEN, - dlm_mig_lockres_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key, - sizeof(struct dlm_master_requery), - dlm_master_requery_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key, - sizeof(struct dlm_lock_request), - dlm_request_all_locks_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key, - sizeof(struct dlm_reco_data_done), - dlm_reco_data_done_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key, - sizeof(struct dlm_begin_reco), - dlm_begin_reco_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key, - sizeof(struct dlm_finalize_reco), - dlm_finalize_reco_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key, - sizeof(struct dlm_exit_domain), - dlm_begin_exit_domain_handler, - dlm, NULL, &dlm->dlm_domain_handlers); - if (status) - goto bail; - -bail: - if (status) - dlm_unregister_domain_handlers(dlm); - - return status; -} - -static int dlm_join_domain(struct dlm_ctxt *dlm) -{ - int status; - unsigned int backoff; - unsigned int total_backoff = 0; - - BUG_ON(!dlm); - - mlog(0, "Join domain %s\n", dlm->name); - - status = dlm_register_domain_handlers(dlm); - if (status) { - mlog_errno(status); - goto bail; - } - - status = dlm_debug_init(dlm); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - status = dlm_launch_thread(dlm); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - status = dlm_launch_recovery_thread(dlm); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - dlm->dlm_worker = create_singlethread_workqueue("dlm_wq"); - if (!dlm->dlm_worker) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - - do { - status = dlm_try_to_join_domain(dlm); - - /* If we're racing another node to the join, then we - * need to back off temporarily and let them - * complete. */ -#define DLM_JOIN_TIMEOUT_MSECS 90000 - if (status == -EAGAIN) { - if (signal_pending(current)) { - status = -ERESTARTSYS; - goto bail; - } - - if (total_backoff > - msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) { - status = -ERESTARTSYS; - mlog(ML_NOTICE, "Timed out joining dlm domain " - "%s after %u msecs\n", dlm->name, - jiffies_to_msecs(total_backoff)); - goto bail; - } - - /* - * <chip> After you! - * <dale> No, after you! - * <chip> I insist! - * <dale> But you first! - * ... - */ - backoff = (unsigned int)(jiffies & 0x3); - backoff *= DLM_DOMAIN_BACKOFF_MS; - total_backoff += backoff; - mlog(0, "backoff %d\n", backoff); - msleep(backoff); - } - } while (status == -EAGAIN); - - if (status < 0) { - mlog_errno(status); - goto bail; - } - - status = 0; -bail: - wake_up(&dlm_domain_events); - - if (status) { - dlm_unregister_domain_handlers(dlm); - dlm_debug_shutdown(dlm); - dlm_complete_thread(dlm); - dlm_complete_recovery_thread(dlm); - dlm_destroy_dlm_worker(dlm); - } - - return status; -} - -static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, - u32 key) -{ - int i; - int ret; - struct dlm_ctxt *dlm = NULL; - - dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); - if (!dlm) { - mlog_errno(-ENOMEM); - goto leave; - } - - dlm->name = kstrdup(domain, GFP_KERNEL); - if (dlm->name == NULL) { - mlog_errno(-ENOMEM); - kfree(dlm); - dlm = NULL; - goto leave; - } - - dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); - if (!dlm->lockres_hash) { - mlog_errno(-ENOMEM); - kfree(dlm->name); - kfree(dlm); - dlm = NULL; - goto leave; - } - - for (i = 0; i < DLM_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); - - dlm->master_hash = (struct hlist_head **) - dlm_alloc_pagevec(DLM_HASH_PAGES); - if (!dlm->master_hash) { - mlog_errno(-ENOMEM); - dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); - kfree(dlm->name); - kfree(dlm); - dlm = NULL; - goto leave; - } - - for (i = 0; i < DLM_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); - - dlm->key = key; - dlm->node_num = o2nm_this_node(); - - ret = dlm_create_debugfs_subroot(dlm); - if (ret < 0) { - dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); - dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); - kfree(dlm->name); - kfree(dlm); - dlm = NULL; - goto leave; - } - - spin_lock_init(&dlm->spinlock); - spin_lock_init(&dlm->master_lock); - spin_lock_init(&dlm->ast_lock); - spin_lock_init(&dlm->track_lock); - INIT_LIST_HEAD(&dlm->list); - INIT_LIST_HEAD(&dlm->dirty_list); - INIT_LIST_HEAD(&dlm->reco.resources); - INIT_LIST_HEAD(&dlm->reco.received); - INIT_LIST_HEAD(&dlm->reco.node_data); - INIT_LIST_HEAD(&dlm->purge_list); - INIT_LIST_HEAD(&dlm->dlm_domain_handlers); - INIT_LIST_HEAD(&dlm->tracking_list); - dlm->reco.state = 0; - - INIT_LIST_HEAD(&dlm->pending_asts); - INIT_LIST_HEAD(&dlm->pending_basts); - - mlog(0, "dlm->recovery_map=%p, &(dlm->recovery_map[0])=%p\n", - dlm->recovery_map, &(dlm->recovery_map[0])); - - memset(dlm->recovery_map, 0, sizeof(dlm->recovery_map)); - memset(dlm->live_nodes_map, 0, sizeof(dlm->live_nodes_map)); - memset(dlm->domain_map, 0, sizeof(dlm->domain_map)); - - dlm->dlm_thread_task = NULL; - dlm->dlm_reco_thread_task = NULL; - dlm->dlm_worker = NULL; - init_waitqueue_head(&dlm->dlm_thread_wq); - init_waitqueue_head(&dlm->dlm_reco_thread_wq); - init_waitqueue_head(&dlm->reco.event); - init_waitqueue_head(&dlm->ast_wq); - init_waitqueue_head(&dlm->migration_wq); - INIT_LIST_HEAD(&dlm->mle_hb_events); - - dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; - init_waitqueue_head(&dlm->dlm_join_events); - - dlm->reco.new_master = O2NM_INVALID_NODE_NUM; - dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; - - atomic_set(&dlm->res_tot_count, 0); - atomic_set(&dlm->res_cur_count, 0); - for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) { - atomic_set(&dlm->mle_tot_count[i], 0); - atomic_set(&dlm->mle_cur_count[i], 0); - } - - spin_lock_init(&dlm->work_lock); - INIT_LIST_HEAD(&dlm->work_list); - INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work); - - kref_init(&dlm->dlm_refs); - dlm->dlm_state = DLM_CTXT_NEW; - - INIT_LIST_HEAD(&dlm->dlm_eviction_callbacks); - - mlog(0, "context init: refcount %u\n", - atomic_read(&dlm->dlm_refs.refcount)); - -leave: - return dlm; -} - -/* - * Compare a requested locking protocol version against the current one. - * - * If the major numbers are different, they are incompatible. - * If the current minor is greater than the request, they are incompatible. - * If the current minor is less than or equal to the request, they are - * compatible, and the requester should run at the current minor version. - */ -static int dlm_protocol_compare(struct dlm_protocol_version *existing, - struct dlm_protocol_version *request) -{ - if (existing->pv_major != request->pv_major) - return 1; - - if (existing->pv_minor > request->pv_minor) - return 1; - - if (existing->pv_minor < request->pv_minor) - request->pv_minor = existing->pv_minor; - - return 0; -} - -/* - * dlm_register_domain: one-time setup per "domain". - * - * The filesystem passes in the requested locking version via proto. - * If registration was successful, proto will contain the negotiated - * locking protocol. - */ -struct dlm_ctxt * dlm_register_domain(const char *domain, - u32 key, - struct dlm_protocol_version *fs_proto) -{ - int ret; - struct dlm_ctxt *dlm = NULL; - struct dlm_ctxt *new_ctxt = NULL; - - if (strlen(domain) >= O2NM_MAX_NAME_LEN) { - ret = -ENAMETOOLONG; - mlog(ML_ERROR, "domain name length too long\n"); - goto leave; - } - - mlog(0, "register called for domain \"%s\"\n", domain); - -retry: - dlm = NULL; - if (signal_pending(current)) { - ret = -ERESTARTSYS; - mlog_errno(ret); - goto leave; - } - - spin_lock(&dlm_domain_lock); - - dlm = __dlm_lookup_domain(domain); - if (dlm) { - if (dlm->dlm_state != DLM_CTXT_JOINED) { - spin_unlock(&dlm_domain_lock); - - mlog(0, "This ctxt is not joined yet!\n"); - wait_event_interruptible(dlm_domain_events, - dlm_wait_on_domain_helper( - domain)); - goto retry; - } - - if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { - spin_unlock(&dlm_domain_lock); - mlog(ML_ERROR, - "Requested locking protocol version is not " - "compatible with already registered domain " - "\"%s\"\n", domain); - ret = -EPROTO; - goto leave; - } - - __dlm_get(dlm); - dlm->num_joins++; - - spin_unlock(&dlm_domain_lock); - - ret = 0; - goto leave; - } - - /* doesn't exist */ - if (!new_ctxt) { - spin_unlock(&dlm_domain_lock); - - new_ctxt = dlm_alloc_ctxt(domain, key); - if (new_ctxt) - goto retry; - - ret = -ENOMEM; - mlog_errno(ret); - goto leave; - } - - /* a little variable switch-a-roo here... */ - dlm = new_ctxt; - new_ctxt = NULL; - - /* add the new domain */ - list_add_tail(&dlm->list, &dlm_domains); - spin_unlock(&dlm_domain_lock); - - /* - * Pass the locking protocol version into the join. If the join - * succeeds, it will have the negotiated protocol set. - */ - dlm->dlm_locking_proto = dlm_protocol; - dlm->fs_locking_proto = *fs_proto; - - ret = dlm_join_domain(dlm); - if (ret) { - mlog_errno(ret); - dlm_put(dlm); - goto leave; - } - - /* Tell the caller what locking protocol we negotiated */ - *fs_proto = dlm->fs_locking_proto; - - ret = 0; -leave: - if (new_ctxt) - dlm_free_ctxt_mem(new_ctxt); - - if (ret < 0) - dlm = ERR_PTR(ret); - - return dlm; -} -EXPORT_SYMBOL_GPL(dlm_register_domain); - -static LIST_HEAD(dlm_join_handlers); - -static void dlm_unregister_net_handlers(void) -{ - o2net_unregister_handler_list(&dlm_join_handlers); -} - -static int dlm_register_net_handlers(void) -{ - int status = 0; - - status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, - sizeof(struct dlm_query_join_request), - dlm_query_join_handler, - NULL, NULL, &dlm_join_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, - sizeof(struct dlm_assert_joined), - dlm_assert_joined_handler, - NULL, NULL, &dlm_join_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, - sizeof(struct dlm_cancel_join), - dlm_cancel_join_handler, - NULL, NULL, &dlm_join_handlers); - if (status) - goto bail; - - status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY, - sizeof(struct dlm_query_region), - dlm_query_region_handler, - NULL, NULL, &dlm_join_handlers); - - if (status) - goto bail; - - status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, - sizeof(struct dlm_query_nodeinfo), - dlm_query_nodeinfo_handler, - NULL, NULL, &dlm_join_handlers); -bail: - if (status < 0) - dlm_unregister_net_handlers(); - - return status; -} - -/* Domain eviction callback handling. - * - * The file system requires notification of node death *before* the - * dlm completes it's recovery work, otherwise it may be able to - * acquire locks on resources requiring recovery. Since the dlm can - * evict a node from it's domain *before* heartbeat fires, a similar - * mechanism is required. */ - -/* Eviction is not expected to happen often, so a per-domain lock is - * not necessary. Eviction callbacks are allowed to sleep for short - * periods of time. */ -static DECLARE_RWSEM(dlm_callback_sem); - -void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, - int node_num) -{ - struct list_head *iter; - struct dlm_eviction_cb *cb; - - down_read(&dlm_callback_sem); - list_for_each(iter, &dlm->dlm_eviction_callbacks) { - cb = list_entry(iter, struct dlm_eviction_cb, ec_item); - - cb->ec_func(node_num, cb->ec_data); - } - up_read(&dlm_callback_sem); -} - -void dlm_setup_eviction_cb(struct dlm_eviction_cb *cb, - dlm_eviction_func *f, - void *data) -{ - INIT_LIST_HEAD(&cb->ec_item); - cb->ec_func = f; - cb->ec_data = data; -} -EXPORT_SYMBOL_GPL(dlm_setup_eviction_cb); - -void dlm_register_eviction_cb(struct dlm_ctxt *dlm, - struct dlm_eviction_cb *cb) -{ - down_write(&dlm_callback_sem); - list_add_tail(&cb->ec_item, &dlm->dlm_eviction_callbacks); - up_write(&dlm_callback_sem); -} -EXPORT_SYMBOL_GPL(dlm_register_eviction_cb); - -void dlm_unregister_eviction_cb(struct dlm_eviction_cb *cb) -{ - down_write(&dlm_callback_sem); - list_del_init(&cb->ec_item); - up_write(&dlm_callback_sem); -} -EXPORT_SYMBOL_GPL(dlm_unregister_eviction_cb); - -static int __init dlm_init(void) -{ - int status; - - dlm_print_version(); - - status = dlm_init_mle_cache(); - if (status) { - mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n"); - goto error; - } - - status = dlm_init_master_caches(); - if (status) { - mlog(ML_ERROR, "Could not create o2dlm_lockres and " - "o2dlm_lockname slabcaches\n"); - goto error; - } - - status = dlm_init_lock_cache(); - if (status) { - mlog(ML_ERROR, "Count not create o2dlm_lock slabcache\n"); - goto error; - } - - status = dlm_register_net_handlers(); - if (status) { - mlog(ML_ERROR, "Unable to register network handlers\n"); - goto error; - } - - status = dlm_create_debugfs_root(); - if (status) - goto error; - - return 0; -error: - dlm_unregister_net_handlers(); - dlm_destroy_lock_cache(); - dlm_destroy_master_caches(); - dlm_destroy_mle_cache(); - return -1; -} - -static void __exit dlm_exit (void) -{ - dlm_destroy_debugfs_root(); - dlm_unregister_net_handlers(); - dlm_destroy_lock_cache(); - dlm_destroy_master_caches(); - dlm_destroy_mle_cache(); -} - -MODULE_AUTHOR("Oracle"); -MODULE_LICENSE("GPL"); - -module_init(dlm_init); -module_exit(dlm_exit); diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdomain.h b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdomain.h deleted file mode 100644 index 2f7f60bf..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmdomain.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmdomain.h - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - -#ifndef DLMDOMAIN_H -#define DLMDOMAIN_H - -extern spinlock_t dlm_domain_lock; -extern struct list_head dlm_domains; - -int dlm_joined(struct dlm_ctxt *dlm); -int dlm_shutting_down(struct dlm_ctxt *dlm); -void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, - int node_num); - -#endif diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmlock.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmlock.c deleted file mode 100644 index 975810b9..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmlock.c +++ /dev/null @@ -1,765 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmlock.c - * - * underlying calls for lock creation - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/blkdev.h> -#include <linux/socket.h> -#include <linux/inet.h> -#include <linux/spinlock.h> -#include <linux/delay.h> - - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" - -#include "dlmconvert.h" - -#define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" - -static struct kmem_cache *dlm_lock_cache = NULL; - -static DEFINE_SPINLOCK(dlm_cookie_lock); -static u64 dlm_next_cookie = 1; - -static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags); -static void dlm_init_lock(struct dlm_lock *newlock, int type, - u8 node, u64 cookie); -static void dlm_lock_release(struct kref *kref); -static void dlm_lock_detach_lockres(struct dlm_lock *lock); - -int dlm_init_lock_cache(void) -{ - dlm_lock_cache = kmem_cache_create("o2dlm_lock", - sizeof(struct dlm_lock), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (dlm_lock_cache == NULL) - return -ENOMEM; - return 0; -} - -void dlm_destroy_lock_cache(void) -{ - if (dlm_lock_cache) - kmem_cache_destroy(dlm_lock_cache); -} - -/* Tell us whether we can grant a new lock request. - * locking: - * caller needs: res->spinlock - * taken: none - * held on exit: none - * returns: 1 if the lock can be granted, 0 otherwise. - */ -static int dlm_can_grant_new_lock(struct dlm_lock_resource *res, - struct dlm_lock *lock) -{ - struct list_head *iter; - struct dlm_lock *tmplock; - - list_for_each(iter, &res->granted) { - tmplock = list_entry(iter, struct dlm_lock, list); - - if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) - return 0; - } - - list_for_each(iter, &res->converting) { - tmplock = list_entry(iter, struct dlm_lock, list); - - if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) - return 0; - if (!dlm_lock_compatible(tmplock->ml.convert_type, - lock->ml.type)) - return 0; - } - - return 1; -} - -/* performs lock creation at the lockres master site - * locking: - * caller needs: none - * taken: takes and drops res->spinlock - * held on exit: none - * returns: DLM_NORMAL, DLM_NOTQUEUED - */ -static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags) -{ - int call_ast = 0, kick_thread = 0; - enum dlm_status status = DLM_NORMAL; - - mlog(0, "type=%d\n", lock->ml.type); - - spin_lock(&res->spinlock); - /* if called from dlm_create_lock_handler, need to - * ensure it will not sleep in dlm_wait_on_lockres */ - status = __dlm_lockres_state_to_status(res); - if (status != DLM_NORMAL && - lock->ml.node != dlm->node_num) { - /* erf. state changed after lock was dropped. */ - spin_unlock(&res->spinlock); - dlm_error(status); - return status; - } - __dlm_wait_on_lockres(res); - __dlm_lockres_reserve_ast(res); - - if (dlm_can_grant_new_lock(res, lock)) { - mlog(0, "I can grant this lock right away\n"); - /* got it right away */ - lock->lksb->status = DLM_NORMAL; - status = DLM_NORMAL; - dlm_lock_get(lock); - list_add_tail(&lock->list, &res->granted); - - /* for the recovery lock, we can't allow the ast - * to be queued since the dlmthread is already - * frozen. but the recovery lock is always locked - * with LKM_NOQUEUE so we do not need the ast in - * this special case */ - if (!dlm_is_recovery_lock(res->lockname.name, - res->lockname.len)) { - kick_thread = 1; - call_ast = 1; - } else { - mlog(0, "%s: returning DLM_NORMAL to " - "node %u for reco lock\n", dlm->name, - lock->ml.node); - } - } else { - /* for NOQUEUE request, unless we get the - * lock right away, return DLM_NOTQUEUED */ - if (flags & LKM_NOQUEUE) { - status = DLM_NOTQUEUED; - if (dlm_is_recovery_lock(res->lockname.name, - res->lockname.len)) { - mlog(0, "%s: returning NOTQUEUED to " - "node %u for reco lock\n", dlm->name, - lock->ml.node); - } - } else { - dlm_lock_get(lock); - list_add_tail(&lock->list, &res->blocked); - kick_thread = 1; - } - } - - spin_unlock(&res->spinlock); - wake_up(&res->wq); - - /* either queue the ast or release it */ - if (call_ast) - dlm_queue_ast(dlm, lock); - else - dlm_lockres_release_ast(dlm, res); - - dlm_lockres_calc_usage(dlm, res); - if (kick_thread) - dlm_kick_thread(dlm, res); - - return status; -} - -void dlm_revert_pending_lock(struct dlm_lock_resource *res, - struct dlm_lock *lock) -{ - /* remove from local queue if it failed */ - list_del_init(&lock->list); - lock->lksb->flags &= ~DLM_LKSB_GET_LVB; -} - - -/* - * locking: - * caller needs: none - * taken: takes and drops res->spinlock - * held on exit: none - * returns: DLM_DENIED, DLM_RECOVERING, or net status - */ -static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags) -{ - enum dlm_status status = DLM_DENIED; - int lockres_changed = 1; - - mlog(0, "type=%d, lockres %.*s, flags = 0x%x\n", - lock->ml.type, res->lockname.len, - res->lockname.name, flags); - - /* - * Wait if resource is getting recovered, remastered, etc. - * If the resource was remastered and new owner is self, then exit. - */ - spin_lock(&res->spinlock); - __dlm_wait_on_lockres(res); - if (res->owner == dlm->node_num) { - spin_unlock(&res->spinlock); - return DLM_RECOVERING; - } - res->state |= DLM_LOCK_RES_IN_PROGRESS; - - /* add lock to local (secondary) queue */ - dlm_lock_get(lock); - list_add_tail(&lock->list, &res->blocked); - lock->lock_pending = 1; - spin_unlock(&res->spinlock); - - /* spec seems to say that you will get DLM_NORMAL when the lock - * has been queued, meaning we need to wait for a reply here. */ - status = dlm_send_remote_lock_request(dlm, res, lock, flags); - - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - lock->lock_pending = 0; - if (status != DLM_NORMAL) { - if (status == DLM_RECOVERING && - dlm_is_recovery_lock(res->lockname.name, - res->lockname.len)) { - /* recovery lock was mastered by dead node. - * we need to have calc_usage shoot down this - * lockres and completely remaster it. */ - mlog(0, "%s: recovery lock was owned by " - "dead node %u, remaster it now.\n", - dlm->name, res->owner); - } else if (status != DLM_NOTQUEUED) { - /* - * DO NOT call calc_usage, as this would unhash - * the remote lockres before we ever get to use - * it. treat as if we never made any change to - * the lockres. - */ - lockres_changed = 0; - dlm_error(status); - } - dlm_revert_pending_lock(res, lock); - dlm_lock_put(lock); - } else if (dlm_is_recovery_lock(res->lockname.name, - res->lockname.len)) { - /* special case for the $RECOVERY lock. - * there will never be an AST delivered to put - * this lock on the proper secondary queue - * (granted), so do it manually. */ - mlog(0, "%s: $RECOVERY lock for this node (%u) is " - "mastered by %u; got lock, manually granting (no ast)\n", - dlm->name, dlm->node_num, res->owner); - list_move_tail(&lock->list, &res->granted); - } - spin_unlock(&res->spinlock); - - if (lockres_changed) - dlm_lockres_calc_usage(dlm, res); - - wake_up(&res->wq); - return status; -} - - -/* for remote lock creation. - * locking: - * caller needs: none, but need res->state & DLM_LOCK_RES_IN_PROGRESS - * taken: none - * held on exit: none - * returns: DLM_NOLOCKMGR, or net status - */ -static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, int flags) -{ - struct dlm_create_lock create; - int tmpret, status = 0; - enum dlm_status ret; - - memset(&create, 0, sizeof(create)); - create.node_idx = dlm->node_num; - create.requested_type = lock->ml.type; - create.cookie = lock->ml.cookie; - create.namelen = res->lockname.len; - create.flags = cpu_to_be32(flags); - memcpy(create.name, res->lockname.name, create.namelen); - - tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create, - sizeof(create), res->owner, &status); - if (tmpret >= 0) { - ret = status; - if (ret == DLM_REJECTED) { - mlog(ML_ERROR, "%s: res %.*s, Stale lockres no longer " - "owned by node %u. That node is coming back up " - "currently.\n", dlm->name, create.namelen, - create.name, res->owner); - dlm_print_one_lock_resource(res); - BUG(); - } - } else { - mlog(ML_ERROR, "%s: res %.*s, Error %d send CREATE LOCK to " - "node %u\n", dlm->name, create.namelen, create.name, - tmpret, res->owner); - if (dlm_is_host_down(tmpret)) - ret = DLM_RECOVERING; - else - ret = dlm_err_to_dlm_status(tmpret); - } - - return ret; -} - -void dlm_lock_get(struct dlm_lock *lock) -{ - kref_get(&lock->lock_refs); -} - -void dlm_lock_put(struct dlm_lock *lock) -{ - kref_put(&lock->lock_refs, dlm_lock_release); -} - -static void dlm_lock_release(struct kref *kref) -{ - struct dlm_lock *lock; - - lock = container_of(kref, struct dlm_lock, lock_refs); - - BUG_ON(!list_empty(&lock->list)); - BUG_ON(!list_empty(&lock->ast_list)); - BUG_ON(!list_empty(&lock->bast_list)); - BUG_ON(lock->ast_pending); - BUG_ON(lock->bast_pending); - - dlm_lock_detach_lockres(lock); - - if (lock->lksb_kernel_allocated) { - mlog(0, "freeing kernel-allocated lksb\n"); - kfree(lock->lksb); - } - kmem_cache_free(dlm_lock_cache, lock); -} - -/* associate a lock with it's lockres, getting a ref on the lockres */ -void dlm_lock_attach_lockres(struct dlm_lock *lock, - struct dlm_lock_resource *res) -{ - dlm_lockres_get(res); - lock->lockres = res; -} - -/* drop ref on lockres, if there is still one associated with lock */ -static void dlm_lock_detach_lockres(struct dlm_lock *lock) -{ - struct dlm_lock_resource *res; - - res = lock->lockres; - if (res) { - lock->lockres = NULL; - mlog(0, "removing lock's lockres reference\n"); - dlm_lockres_put(res); - } -} - -static void dlm_init_lock(struct dlm_lock *newlock, int type, - u8 node, u64 cookie) -{ - INIT_LIST_HEAD(&newlock->list); - INIT_LIST_HEAD(&newlock->ast_list); - INIT_LIST_HEAD(&newlock->bast_list); - spin_lock_init(&newlock->spinlock); - newlock->ml.type = type; - newlock->ml.convert_type = LKM_IVMODE; - newlock->ml.highest_blocked = LKM_IVMODE; - newlock->ml.node = node; - newlock->ml.pad1 = 0; - newlock->ml.list = 0; - newlock->ml.flags = 0; - newlock->ast = NULL; - newlock->bast = NULL; - newlock->astdata = NULL; - newlock->ml.cookie = cpu_to_be64(cookie); - newlock->ast_pending = 0; - newlock->bast_pending = 0; - newlock->convert_pending = 0; - newlock->lock_pending = 0; - newlock->unlock_pending = 0; - newlock->cancel_pending = 0; - newlock->lksb_kernel_allocated = 0; - - kref_init(&newlock->lock_refs); -} - -struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, - struct dlm_lockstatus *lksb) -{ - struct dlm_lock *lock; - int kernel_allocated = 0; - - lock = kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); - if (!lock) - return NULL; - - if (!lksb) { - /* zero memory only if kernel-allocated */ - lksb = kzalloc(sizeof(*lksb), GFP_NOFS); - if (!lksb) { - kmem_cache_free(dlm_lock_cache, lock); - return NULL; - } - kernel_allocated = 1; - } - - dlm_init_lock(lock, type, node, cookie); - if (kernel_allocated) - lock->lksb_kernel_allocated = 1; - lock->lksb = lksb; - lksb->lockid = lock; - return lock; -} - -/* handler for lock creation net message - * locking: - * caller needs: none - * taken: takes and drops res->spinlock - * held on exit: none - * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED - */ -int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf; - struct dlm_lock_resource *res = NULL; - struct dlm_lock *newlock = NULL; - struct dlm_lockstatus *lksb = NULL; - enum dlm_status status = DLM_NORMAL; - char *name; - unsigned int namelen; - - BUG_ON(!dlm); - - if (!dlm_grab(dlm)) - return DLM_REJECTED; - - name = create->name; - namelen = create->namelen; - status = DLM_REJECTED; - if (!dlm_domain_fully_joined(dlm)) { - mlog(ML_ERROR, "Domain %s not fully joined, but node %u is " - "sending a create_lock message for lock %.*s!\n", - dlm->name, create->node_idx, namelen, name); - dlm_error(status); - goto leave; - } - - status = DLM_IVBUFLEN; - if (namelen > DLM_LOCKID_NAME_MAX) { - dlm_error(status); - goto leave; - } - - status = DLM_SYSERR; - newlock = dlm_new_lock(create->requested_type, - create->node_idx, - be64_to_cpu(create->cookie), NULL); - if (!newlock) { - dlm_error(status); - goto leave; - } - - lksb = newlock->lksb; - - if (be32_to_cpu(create->flags) & LKM_GET_LVB) { - lksb->flags |= DLM_LKSB_GET_LVB; - mlog(0, "set DLM_LKSB_GET_LVB flag\n"); - } - - status = DLM_IVLOCKID; - res = dlm_lookup_lockres(dlm, name, namelen); - if (!res) { - dlm_error(status); - goto leave; - } - - spin_lock(&res->spinlock); - status = __dlm_lockres_state_to_status(res); - spin_unlock(&res->spinlock); - - if (status != DLM_NORMAL) { - mlog(0, "lockres recovering/migrating/in-progress\n"); - goto leave; - } - - dlm_lock_attach_lockres(newlock, res); - - status = dlmlock_master(dlm, res, newlock, be32_to_cpu(create->flags)); -leave: - if (status != DLM_NORMAL) - if (newlock) - dlm_lock_put(newlock); - - if (res) - dlm_lockres_put(res); - - dlm_put(dlm); - - return status; -} - - -/* fetch next node-local (u8 nodenum + u56 cookie) into u64 */ -static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie) -{ - u64 tmpnode = node_num; - - /* shift single byte of node num into top 8 bits */ - tmpnode <<= 56; - - spin_lock(&dlm_cookie_lock); - *cookie = (dlm_next_cookie | tmpnode); - if (++dlm_next_cookie & 0xff00000000000000ull) { - mlog(0, "This node's cookie will now wrap!\n"); - dlm_next_cookie = 1; - } - spin_unlock(&dlm_cookie_lock); -} - -enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, - struct dlm_lockstatus *lksb, int flags, - const char *name, int namelen, dlm_astlockfunc_t *ast, - void *data, dlm_bastlockfunc_t *bast) -{ - enum dlm_status status; - struct dlm_lock_resource *res = NULL; - struct dlm_lock *lock = NULL; - int convert = 0, recovery = 0; - - /* yes this function is a mess. - * TODO: clean this up. lots of common code in the - * lock and convert paths, especially in the retry blocks */ - if (!lksb) { - dlm_error(DLM_BADARGS); - return DLM_BADARGS; - } - - status = DLM_BADPARAM; - if (mode != LKM_EXMODE && mode != LKM_PRMODE && mode != LKM_NLMODE) { - dlm_error(status); - goto error; - } - - if (flags & ~LKM_VALID_FLAGS) { - dlm_error(status); - goto error; - } - - convert = (flags & LKM_CONVERT); - recovery = (flags & LKM_RECOVERY); - - if (recovery && - (!dlm_is_recovery_lock(name, namelen) || convert) ) { - dlm_error(status); - goto error; - } - if (convert && (flags & LKM_LOCAL)) { - mlog(ML_ERROR, "strange LOCAL convert request!\n"); - goto error; - } - - if (convert) { - /* CONVERT request */ - - /* if converting, must pass in a valid dlm_lock */ - lock = lksb->lockid; - if (!lock) { - mlog(ML_ERROR, "NULL lock pointer in convert " - "request\n"); - goto error; - } - - res = lock->lockres; - if (!res) { - mlog(ML_ERROR, "NULL lockres pointer in convert " - "request\n"); - goto error; - } - dlm_lockres_get(res); - - /* XXX: for ocfs2 purposes, the ast/bast/astdata/lksb are - * static after the original lock call. convert requests will - * ensure that everything is the same, or return DLM_BADARGS. - * this means that DLM_DENIED_NOASTS will never be returned. - */ - if (lock->lksb != lksb || lock->ast != ast || - lock->bast != bast || lock->astdata != data) { - status = DLM_BADARGS; - mlog(ML_ERROR, "new args: lksb=%p, ast=%p, bast=%p, " - "astdata=%p\n", lksb, ast, bast, data); - mlog(ML_ERROR, "orig args: lksb=%p, ast=%p, bast=%p, " - "astdata=%p\n", lock->lksb, lock->ast, - lock->bast, lock->astdata); - goto error; - } -retry_convert: - dlm_wait_for_recovery(dlm); - - if (res->owner == dlm->node_num) - status = dlmconvert_master(dlm, res, lock, flags, mode); - else - status = dlmconvert_remote(dlm, res, lock, flags, mode); - if (status == DLM_RECOVERING || status == DLM_MIGRATING || - status == DLM_FORWARD) { - /* for now, see how this works without sleeping - * and just retry right away. I suspect the reco - * or migration will complete fast enough that - * no waiting will be necessary */ - mlog(0, "retrying convert with migration/recovery/" - "in-progress\n"); - msleep(100); - goto retry_convert; - } - } else { - u64 tmpcookie; - - /* LOCK request */ - status = DLM_BADARGS; - if (!name) { - dlm_error(status); - goto error; - } - - status = DLM_IVBUFLEN; - if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) { - dlm_error(status); - goto error; - } - - dlm_get_next_cookie(dlm->node_num, &tmpcookie); - lock = dlm_new_lock(mode, dlm->node_num, tmpcookie, lksb); - if (!lock) { - dlm_error(status); - goto error; - } - - if (!recovery) - dlm_wait_for_recovery(dlm); - - /* find or create the lock resource */ - res = dlm_get_lock_resource(dlm, name, namelen, flags); - if (!res) { - status = DLM_IVLOCKID; - dlm_error(status); - goto error; - } - - mlog(0, "type=%d, flags = 0x%x\n", mode, flags); - mlog(0, "creating lock: lock=%p res=%p\n", lock, res); - - dlm_lock_attach_lockres(lock, res); - lock->ast = ast; - lock->bast = bast; - lock->astdata = data; - -retry_lock: - if (flags & LKM_VALBLK) { - mlog(0, "LKM_VALBLK passed by caller\n"); - - /* LVB requests for non PR, PW or EX locks are - * ignored. */ - if (mode < LKM_PRMODE) - flags &= ~LKM_VALBLK; - else { - flags |= LKM_GET_LVB; - lock->lksb->flags |= DLM_LKSB_GET_LVB; - } - } - - if (res->owner == dlm->node_num) - status = dlmlock_master(dlm, res, lock, flags); - else - status = dlmlock_remote(dlm, res, lock, flags); - - if (status == DLM_RECOVERING || status == DLM_MIGRATING || - status == DLM_FORWARD) { - msleep(100); - if (recovery) { - if (status != DLM_RECOVERING) - goto retry_lock; - /* wait to see the node go down, then - * drop down and allow the lockres to - * get cleaned up. need to remaster. */ - dlm_wait_for_node_death(dlm, res->owner, - DLM_NODE_DEATH_WAIT_MAX); - } else { - dlm_wait_for_recovery(dlm); - goto retry_lock; - } - } - - /* Inflight taken in dlm_get_lock_resource() is dropped here */ - spin_lock(&res->spinlock); - dlm_lockres_drop_inflight_ref(dlm, res); - spin_unlock(&res->spinlock); - - dlm_lockres_calc_usage(dlm, res); - dlm_kick_thread(dlm, res); - - if (status != DLM_NORMAL) { - lock->lksb->flags &= ~DLM_LKSB_GET_LVB; - if (status != DLM_NOTQUEUED) - dlm_error(status); - goto error; - } - } - -error: - if (status != DLM_NORMAL) { - if (lock && !convert) - dlm_lock_put(lock); - // this is kind of unnecessary - lksb->status = status; - } - - /* put lockres ref from the convert path - * or from dlm_get_lock_resource */ - if (res) - dlm_lockres_put(res); - - return status; -} -EXPORT_SYMBOL_GPL(dlmlock); diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmmaster.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmmaster.c deleted file mode 100644 index 005261c3..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmmaster.c +++ /dev/null @@ -1,3414 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmmod.c - * - * standalone DLM module - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/blkdev.h> -#include <linux/socket.h> -#include <linux/inet.h> -#include <linux/spinlock.h> -#include <linux/delay.h> - - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" -#include "dlmdomain.h" -#include "dlmdebug.h" - -#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) -#include "cluster/masklog.h" - -static void dlm_mle_node_down(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle, - struct o2nm_node *node, - int idx); -static void dlm_mle_node_up(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle, - struct o2nm_node *node, - int idx); - -static void dlm_assert_master_worker(struct dlm_work_item *item, void *data); -static int dlm_do_assert_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - void *nodemap, u32 flags); -static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data); - -static inline int dlm_mle_equal(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle, - const char *name, - unsigned int namelen) -{ - if (dlm != mle->dlm) - return 0; - - if (namelen != mle->mnamelen || - memcmp(name, mle->mname, namelen) != 0) - return 0; - - return 1; -} - -static struct kmem_cache *dlm_lockres_cache = NULL; -static struct kmem_cache *dlm_lockname_cache = NULL; -static struct kmem_cache *dlm_mle_cache = NULL; - -static void dlm_mle_release(struct kref *kref); -static void dlm_init_mle(struct dlm_master_list_entry *mle, - enum dlm_mle_type type, - struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - const char *name, - unsigned int namelen); -static void dlm_put_mle(struct dlm_master_list_entry *mle); -static void __dlm_put_mle(struct dlm_master_list_entry *mle); -static int dlm_find_mle(struct dlm_ctxt *dlm, - struct dlm_master_list_entry **mle, - char *name, unsigned int namelen); - -static int dlm_do_master_request(struct dlm_lock_resource *res, - struct dlm_master_list_entry *mle, int to); - - -static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_master_list_entry *mle, - int *blocked); -static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_master_list_entry *mle, - int blocked); -static int dlm_add_migration_mle(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_master_list_entry *mle, - struct dlm_master_list_entry **oldmle, - const char *name, unsigned int namelen, - u8 new_master, u8 master); - -static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); -static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); -static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 target); -static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); - - -int dlm_is_host_down(int errno) -{ - switch (errno) { - case -EBADF: - case -ECONNREFUSED: - case -ENOTCONN: - case -ECONNRESET: - case -EPIPE: - case -EHOSTDOWN: - case -EHOSTUNREACH: - case -ETIMEDOUT: - case -ECONNABORTED: - case -ENETDOWN: - case -ENETUNREACH: - case -ENETRESET: - case -ESHUTDOWN: - case -ENOPROTOOPT: - case -EINVAL: /* if returned from our tcp code, - this means there is no socket */ - return 1; - } - return 0; -} - - -/* - * MASTER LIST FUNCTIONS - */ - - -/* - * regarding master list entries and heartbeat callbacks: - * - * in order to avoid sleeping and allocation that occurs in - * heartbeat, master list entries are simply attached to the - * dlm's established heartbeat callbacks. the mle is attached - * when it is created, and since the dlm->spinlock is held at - * that time, any heartbeat event will be properly discovered - * by the mle. the mle needs to be detached from the - * dlm->mle_hb_events list as soon as heartbeat events are no - * longer useful to the mle, and before the mle is freed. - * - * as a general rule, heartbeat events are no longer needed by - * the mle once an "answer" regarding the lock master has been - * received. - */ -static inline void __dlm_mle_attach_hb_events(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle) -{ - assert_spin_locked(&dlm->spinlock); - - list_add_tail(&mle->hb_events, &dlm->mle_hb_events); -} - - -static inline void __dlm_mle_detach_hb_events(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle) -{ - if (!list_empty(&mle->hb_events)) - list_del_init(&mle->hb_events); -} - - -static inline void dlm_mle_detach_hb_events(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle) -{ - spin_lock(&dlm->spinlock); - __dlm_mle_detach_hb_events(dlm, mle); - spin_unlock(&dlm->spinlock); -} - -static void dlm_get_mle_inuse(struct dlm_master_list_entry *mle) -{ - struct dlm_ctxt *dlm; - dlm = mle->dlm; - - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&dlm->master_lock); - mle->inuse++; - kref_get(&mle->mle_refs); -} - -static void dlm_put_mle_inuse(struct dlm_master_list_entry *mle) -{ - struct dlm_ctxt *dlm; - dlm = mle->dlm; - - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - mle->inuse--; - __dlm_put_mle(mle); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - -} - -/* remove from list and free */ -static void __dlm_put_mle(struct dlm_master_list_entry *mle) -{ - struct dlm_ctxt *dlm; - dlm = mle->dlm; - - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&dlm->master_lock); - if (!atomic_read(&mle->mle_refs.refcount)) { - /* this may or may not crash, but who cares. - * it's a BUG. */ - mlog(ML_ERROR, "bad mle: %p\n", mle); - dlm_print_one_mle(mle); - BUG(); - } else - kref_put(&mle->mle_refs, dlm_mle_release); -} - - -/* must not have any spinlocks coming in */ -static void dlm_put_mle(struct dlm_master_list_entry *mle) -{ - struct dlm_ctxt *dlm; - dlm = mle->dlm; - - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - __dlm_put_mle(mle); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); -} - -static inline void dlm_get_mle(struct dlm_master_list_entry *mle) -{ - kref_get(&mle->mle_refs); -} - -static void dlm_init_mle(struct dlm_master_list_entry *mle, - enum dlm_mle_type type, - struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - const char *name, - unsigned int namelen) -{ - assert_spin_locked(&dlm->spinlock); - - mle->dlm = dlm; - mle->type = type; - INIT_HLIST_NODE(&mle->master_hash_node); - INIT_LIST_HEAD(&mle->hb_events); - memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); - spin_lock_init(&mle->spinlock); - init_waitqueue_head(&mle->wq); - atomic_set(&mle->woken, 0); - kref_init(&mle->mle_refs); - memset(mle->response_map, 0, sizeof(mle->response_map)); - mle->master = O2NM_MAX_NODES; - mle->new_master = O2NM_MAX_NODES; - mle->inuse = 0; - - BUG_ON(mle->type != DLM_MLE_BLOCK && - mle->type != DLM_MLE_MASTER && - mle->type != DLM_MLE_MIGRATION); - - if (mle->type == DLM_MLE_MASTER) { - BUG_ON(!res); - mle->mleres = res; - memcpy(mle->mname, res->lockname.name, res->lockname.len); - mle->mnamelen = res->lockname.len; - mle->mnamehash = res->lockname.hash; - } else { - BUG_ON(!name); - mle->mleres = NULL; - memcpy(mle->mname, name, namelen); - mle->mnamelen = namelen; - mle->mnamehash = dlm_lockid_hash(name, namelen); - } - - atomic_inc(&dlm->mle_tot_count[mle->type]); - atomic_inc(&dlm->mle_cur_count[mle->type]); - - /* copy off the node_map and register hb callbacks on our copy */ - memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map)); - memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map)); - clear_bit(dlm->node_num, mle->vote_map); - clear_bit(dlm->node_num, mle->node_map); - - /* attach the mle to the domain node up/down events */ - __dlm_mle_attach_hb_events(dlm, mle); -} - -void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) -{ - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&dlm->master_lock); - - if (!hlist_unhashed(&mle->master_hash_node)) - hlist_del_init(&mle->master_hash_node); -} - -void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) -{ - struct hlist_head *bucket; - - assert_spin_locked(&dlm->master_lock); - - bucket = dlm_master_hash(dlm, mle->mnamehash); - hlist_add_head(&mle->master_hash_node, bucket); -} - -/* returns 1 if found, 0 if not */ -static int dlm_find_mle(struct dlm_ctxt *dlm, - struct dlm_master_list_entry **mle, - char *name, unsigned int namelen) -{ - struct dlm_master_list_entry *tmpmle; - struct hlist_head *bucket; - struct hlist_node *list; - unsigned int hash; - - assert_spin_locked(&dlm->master_lock); - - hash = dlm_lockid_hash(name, namelen); - bucket = dlm_master_hash(dlm, hash); - hlist_for_each(list, bucket) { - tmpmle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); - if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) - continue; - dlm_get_mle(tmpmle); - *mle = tmpmle; - return 1; - } - return 0; -} - -void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) -{ - struct dlm_master_list_entry *mle; - - assert_spin_locked(&dlm->spinlock); - - list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { - if (node_up) - dlm_mle_node_up(dlm, mle, NULL, idx); - else - dlm_mle_node_down(dlm, mle, NULL, idx); - } -} - -static void dlm_mle_node_down(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle, - struct o2nm_node *node, int idx) -{ - spin_lock(&mle->spinlock); - - if (!test_bit(idx, mle->node_map)) - mlog(0, "node %u already removed from nodemap!\n", idx); - else - clear_bit(idx, mle->node_map); - - spin_unlock(&mle->spinlock); -} - -static void dlm_mle_node_up(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle, - struct o2nm_node *node, int idx) -{ - spin_lock(&mle->spinlock); - - if (test_bit(idx, mle->node_map)) - mlog(0, "node %u already in node map!\n", idx); - else - set_bit(idx, mle->node_map); - - spin_unlock(&mle->spinlock); -} - - -int dlm_init_mle_cache(void) -{ - dlm_mle_cache = kmem_cache_create("o2dlm_mle", - sizeof(struct dlm_master_list_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL); - if (dlm_mle_cache == NULL) - return -ENOMEM; - return 0; -} - -void dlm_destroy_mle_cache(void) -{ - if (dlm_mle_cache) - kmem_cache_destroy(dlm_mle_cache); -} - -static void dlm_mle_release(struct kref *kref) -{ - struct dlm_master_list_entry *mle; - struct dlm_ctxt *dlm; - - mle = container_of(kref, struct dlm_master_list_entry, mle_refs); - dlm = mle->dlm; - - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&dlm->master_lock); - - mlog(0, "Releasing mle for %.*s, type %d\n", mle->mnamelen, mle->mname, - mle->type); - - /* remove from list if not already */ - __dlm_unlink_mle(dlm, mle); - - /* detach the mle from the domain node up/down events */ - __dlm_mle_detach_hb_events(dlm, mle); - - atomic_dec(&dlm->mle_cur_count[mle->type]); - - /* NOTE: kfree under spinlock here. - * if this is bad, we can move this to a freelist. */ - kmem_cache_free(dlm_mle_cache, mle); -} - - -/* - * LOCK RESOURCE FUNCTIONS - */ - -int dlm_init_master_caches(void) -{ - dlm_lockres_cache = kmem_cache_create("o2dlm_lockres", - sizeof(struct dlm_lock_resource), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!dlm_lockres_cache) - goto bail; - - dlm_lockname_cache = kmem_cache_create("o2dlm_lockname", - DLM_LOCKID_NAME_MAX, 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!dlm_lockname_cache) - goto bail; - - return 0; -bail: - dlm_destroy_master_caches(); - return -ENOMEM; -} - -void dlm_destroy_master_caches(void) -{ - if (dlm_lockname_cache) - kmem_cache_destroy(dlm_lockname_cache); - - if (dlm_lockres_cache) - kmem_cache_destroy(dlm_lockres_cache); -} - -static void dlm_lockres_release(struct kref *kref) -{ - struct dlm_lock_resource *res; - struct dlm_ctxt *dlm; - - res = container_of(kref, struct dlm_lock_resource, refs); - dlm = res->dlm; - - /* This should not happen -- all lockres' have a name - * associated with them at init time. */ - BUG_ON(!res->lockname.name); - - mlog(0, "destroying lockres %.*s\n", res->lockname.len, - res->lockname.name); - - spin_lock(&dlm->track_lock); - if (!list_empty(&res->tracking)) - list_del_init(&res->tracking); - else { - mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", - res->lockname.len, res->lockname.name); - dlm_print_one_lock_resource(res); - } - spin_unlock(&dlm->track_lock); - - atomic_dec(&dlm->res_cur_count); - - if (!hlist_unhashed(&res->hash_node) || - !list_empty(&res->granted) || - !list_empty(&res->converting) || - !list_empty(&res->blocked) || - !list_empty(&res->dirty) || - !list_empty(&res->recovering) || - !list_empty(&res->purge)) { - mlog(ML_ERROR, - "Going to BUG for resource %.*s." - " We're on a list! [%c%c%c%c%c%c%c]\n", - res->lockname.len, res->lockname.name, - !hlist_unhashed(&res->hash_node) ? 'H' : ' ', - !list_empty(&res->granted) ? 'G' : ' ', - !list_empty(&res->converting) ? 'C' : ' ', - !list_empty(&res->blocked) ? 'B' : ' ', - !list_empty(&res->dirty) ? 'D' : ' ', - !list_empty(&res->recovering) ? 'R' : ' ', - !list_empty(&res->purge) ? 'P' : ' '); - - dlm_print_one_lock_resource(res); - } - - /* By the time we're ready to blow this guy away, we shouldn't - * be on any lists. */ - BUG_ON(!hlist_unhashed(&res->hash_node)); - BUG_ON(!list_empty(&res->granted)); - BUG_ON(!list_empty(&res->converting)); - BUG_ON(!list_empty(&res->blocked)); - BUG_ON(!list_empty(&res->dirty)); - BUG_ON(!list_empty(&res->recovering)); - BUG_ON(!list_empty(&res->purge)); - - kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); - - kmem_cache_free(dlm_lockres_cache, res); -} - -void dlm_lockres_put(struct dlm_lock_resource *res) -{ - kref_put(&res->refs, dlm_lockres_release); -} - -static void dlm_init_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - const char *name, unsigned int namelen) -{ - char *qname; - - /* If we memset here, we lose our reference to the kmalloc'd - * res->lockname.name, so be sure to init every field - * correctly! */ - - qname = (char *) res->lockname.name; - memcpy(qname, name, namelen); - - res->lockname.len = namelen; - res->lockname.hash = dlm_lockid_hash(name, namelen); - - init_waitqueue_head(&res->wq); - spin_lock_init(&res->spinlock); - INIT_HLIST_NODE(&res->hash_node); - INIT_LIST_HEAD(&res->granted); - INIT_LIST_HEAD(&res->converting); - INIT_LIST_HEAD(&res->blocked); - INIT_LIST_HEAD(&res->dirty); - INIT_LIST_HEAD(&res->recovering); - INIT_LIST_HEAD(&res->purge); - INIT_LIST_HEAD(&res->tracking); - atomic_set(&res->asts_reserved, 0); - res->migration_pending = 0; - res->inflight_locks = 0; - - res->dlm = dlm; - - kref_init(&res->refs); - - atomic_inc(&dlm->res_tot_count); - atomic_inc(&dlm->res_cur_count); - - /* just for consistency */ - spin_lock(&res->spinlock); - dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN); - spin_unlock(&res->spinlock); - - res->state = DLM_LOCK_RES_IN_PROGRESS; - - res->last_used = 0; - - spin_lock(&dlm->spinlock); - list_add_tail(&res->tracking, &dlm->tracking_list); - spin_unlock(&dlm->spinlock); - - memset(res->lvb, 0, DLM_LVB_LEN); - memset(res->refmap, 0, sizeof(res->refmap)); -} - -struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, - const char *name, - unsigned int namelen) -{ - struct dlm_lock_resource *res = NULL; - - res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS); - if (!res) - goto error; - - res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS); - if (!res->lockname.name) - goto error; - - dlm_init_lockres(dlm, res, name, namelen); - return res; - -error: - if (res && res->lockname.name) - kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); - - if (res) - kmem_cache_free(dlm_lockres_cache, res); - return NULL; -} - -void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, int bit) -{ - assert_spin_locked(&res->spinlock); - - mlog(0, "res %.*s, set node %u, %ps()\n", res->lockname.len, - res->lockname.name, bit, __builtin_return_address(0)); - - set_bit(bit, res->refmap); -} - -void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, int bit) -{ - assert_spin_locked(&res->spinlock); - - mlog(0, "res %.*s, clr node %u, %ps()\n", res->lockname.len, - res->lockname.name, bit, __builtin_return_address(0)); - - clear_bit(bit, res->refmap); -} - - -void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - assert_spin_locked(&res->spinlock); - - res->inflight_locks++; - - mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name, - res->lockname.len, res->lockname.name, res->inflight_locks, - __builtin_return_address(0)); -} - -void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - assert_spin_locked(&res->spinlock); - - BUG_ON(res->inflight_locks == 0); - - res->inflight_locks--; - - mlog(0, "%s: res %.*s, inflight--: now %u, %ps()\n", dlm->name, - res->lockname.len, res->lockname.name, res->inflight_locks, - __builtin_return_address(0)); - - wake_up(&res->wq); -} - -/* - * lookup a lock resource by name. - * may already exist in the hashtable. - * lockid is null terminated - * - * if not, allocate enough for the lockres and for - * the temporary structure used in doing the mastering. - * - * also, do a lookup in the dlm->master_list to see - * if another node has begun mastering the same lock. - * if so, there should be a block entry in there - * for this name, and we should *not* attempt to master - * the lock here. need to wait around for that node - * to assert_master (or die). - * - */ -struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, - const char *lockid, - int namelen, - int flags) -{ - struct dlm_lock_resource *tmpres=NULL, *res=NULL; - struct dlm_master_list_entry *mle = NULL; - struct dlm_master_list_entry *alloc_mle = NULL; - int blocked = 0; - int ret, nodenum; - struct dlm_node_iter iter; - unsigned int hash; - int tries = 0; - int bit, wait_on_recovery = 0; - - BUG_ON(!lockid); - - hash = dlm_lockid_hash(lockid, namelen); - - mlog(0, "get lockres %s (len %d)\n", lockid, namelen); - -lookup: - spin_lock(&dlm->spinlock); - tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); - if (tmpres) { - spin_unlock(&dlm->spinlock); - spin_lock(&tmpres->spinlock); - /* Wait on the thread that is mastering the resource */ - if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { - __dlm_wait_on_lockres(tmpres); - BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); - spin_unlock(&tmpres->spinlock); - dlm_lockres_put(tmpres); - tmpres = NULL; - goto lookup; - } - - /* Wait on the resource purge to complete before continuing */ - if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) { - BUG_ON(tmpres->owner == dlm->node_num); - __dlm_wait_on_lockres_flags(tmpres, - DLM_LOCK_RES_DROPPING_REF); - spin_unlock(&tmpres->spinlock); - dlm_lockres_put(tmpres); - tmpres = NULL; - goto lookup; - } - - /* Grab inflight ref to pin the resource */ - dlm_lockres_grab_inflight_ref(dlm, tmpres); - - spin_unlock(&tmpres->spinlock); - if (res) - dlm_lockres_put(res); - res = tmpres; - goto leave; - } - - if (!res) { - spin_unlock(&dlm->spinlock); - mlog(0, "allocating a new resource\n"); - /* nothing found and we need to allocate one. */ - alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); - if (!alloc_mle) - goto leave; - res = dlm_new_lockres(dlm, lockid, namelen); - if (!res) - goto leave; - goto lookup; - } - - mlog(0, "no lockres found, allocated our own: %p\n", res); - - if (flags & LKM_LOCAL) { - /* caller knows it's safe to assume it's not mastered elsewhere - * DONE! return right away */ - spin_lock(&res->spinlock); - dlm_change_lockres_owner(dlm, res, dlm->node_num); - __dlm_insert_lockres(dlm, res); - dlm_lockres_grab_inflight_ref(dlm, res); - spin_unlock(&res->spinlock); - spin_unlock(&dlm->spinlock); - /* lockres still marked IN_PROGRESS */ - goto wake_waiters; - } - - /* check master list to see if another node has started mastering it */ - spin_lock(&dlm->master_lock); - - /* if we found a block, wait for lock to be mastered by another node */ - blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen); - if (blocked) { - int mig; - if (mle->type == DLM_MLE_MASTER) { - mlog(ML_ERROR, "master entry for nonexistent lock!\n"); - BUG(); - } - mig = (mle->type == DLM_MLE_MIGRATION); - /* if there is a migration in progress, let the migration - * finish before continuing. we can wait for the absence - * of the MIGRATION mle: either the migrate finished or - * one of the nodes died and the mle was cleaned up. - * if there is a BLOCK here, but it already has a master - * set, we are too late. the master does not have a ref - * for us in the refmap. detach the mle and drop it. - * either way, go back to the top and start over. */ - if (mig || mle->master != O2NM_MAX_NODES) { - BUG_ON(mig && mle->master == dlm->node_num); - /* we arrived too late. the master does not - * have a ref for us. retry. */ - mlog(0, "%s:%.*s: late on %s\n", - dlm->name, namelen, lockid, - mig ? "MIGRATION" : "BLOCK"); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - - /* master is known, detach */ - if (!mig) - dlm_mle_detach_hb_events(dlm, mle); - dlm_put_mle(mle); - mle = NULL; - /* this is lame, but we can't wait on either - * the mle or lockres waitqueue here */ - if (mig) - msleep(100); - goto lookup; - } - } else { - /* go ahead and try to master lock on this node */ - mle = alloc_mle; - /* make sure this does not get freed below */ - alloc_mle = NULL; - dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0); - set_bit(dlm->node_num, mle->maybe_map); - __dlm_insert_mle(dlm, mle); - - /* still holding the dlm spinlock, check the recovery map - * to see if there are any nodes that still need to be - * considered. these will not appear in the mle nodemap - * but they might own this lockres. wait on them. */ - bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); - if (bit < O2NM_MAX_NODES) { - mlog(0, "%s: res %.*s, At least one node (%d) " - "to recover before lock mastery can begin\n", - dlm->name, namelen, (char *)lockid, bit); - wait_on_recovery = 1; - } - } - - /* at this point there is either a DLM_MLE_BLOCK or a - * DLM_MLE_MASTER on the master list, so it's safe to add the - * lockres to the hashtable. anyone who finds the lock will - * still have to wait on the IN_PROGRESS. */ - - /* finally add the lockres to its hash bucket */ - __dlm_insert_lockres(dlm, res); - - /* Grab inflight ref to pin the resource */ - spin_lock(&res->spinlock); - dlm_lockres_grab_inflight_ref(dlm, res); - spin_unlock(&res->spinlock); - - /* get an extra ref on the mle in case this is a BLOCK - * if so, the creator of the BLOCK may try to put the last - * ref at this time in the assert master handler, so we - * need an extra one to keep from a bad ptr deref. */ - dlm_get_mle_inuse(mle); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - -redo_request: - while (wait_on_recovery) { - /* any cluster changes that occurred after dropping the - * dlm spinlock would be detectable be a change on the mle, - * so we only need to clear out the recovery map once. */ - if (dlm_is_recovery_lock(lockid, namelen)) { - mlog(0, "%s: Recovery map is not empty, but must " - "master $RECOVERY lock now\n", dlm->name); - if (!dlm_pre_master_reco_lockres(dlm, res)) - wait_on_recovery = 0; - else { - mlog(0, "%s: waiting 500ms for heartbeat state " - "change\n", dlm->name); - msleep(500); - } - continue; - } - - dlm_kick_recovery_thread(dlm); - msleep(1000); - dlm_wait_for_recovery(dlm); - - spin_lock(&dlm->spinlock); - bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); - if (bit < O2NM_MAX_NODES) { - mlog(0, "%s: res %.*s, At least one node (%d) " - "to recover before lock mastery can begin\n", - dlm->name, namelen, (char *)lockid, bit); - wait_on_recovery = 1; - } else - wait_on_recovery = 0; - spin_unlock(&dlm->spinlock); - - if (wait_on_recovery) - dlm_wait_for_node_recovery(dlm, bit, 10000); - } - - /* must wait for lock to be mastered elsewhere */ - if (blocked) - goto wait; - - ret = -EINVAL; - dlm_node_iter_init(mle->vote_map, &iter); - while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { - ret = dlm_do_master_request(res, mle, nodenum); - if (ret < 0) - mlog_errno(ret); - if (mle->master != O2NM_MAX_NODES) { - /* found a master ! */ - if (mle->master <= nodenum) - break; - /* if our master request has not reached the master - * yet, keep going until it does. this is how the - * master will know that asserts are needed back to - * the lower nodes. */ - mlog(0, "%s: res %.*s, Requests only up to %u but " - "master is %u, keep going\n", dlm->name, namelen, - lockid, nodenum, mle->master); - } - } - -wait: - /* keep going until the response map includes all nodes */ - ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); - if (ret < 0) { - wait_on_recovery = 1; - mlog(0, "%s: res %.*s, Node map changed, redo the master " - "request now, blocked=%d\n", dlm->name, res->lockname.len, - res->lockname.name, blocked); - if (++tries > 20) { - mlog(ML_ERROR, "%s: res %.*s, Spinning on " - "dlm_wait_for_lock_mastery, blocked = %d\n", - dlm->name, res->lockname.len, - res->lockname.name, blocked); - dlm_print_one_lock_resource(res); - dlm_print_one_mle(mle); - tries = 0; - } - goto redo_request; - } - - mlog(0, "%s: res %.*s, Mastered by %u\n", dlm->name, res->lockname.len, - res->lockname.name, res->owner); - /* make sure we never continue without this */ - BUG_ON(res->owner == O2NM_MAX_NODES); - - /* master is known, detach if not already detached */ - dlm_mle_detach_hb_events(dlm, mle); - dlm_put_mle(mle); - /* put the extra ref */ - dlm_put_mle_inuse(mle); - -wake_waiters: - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - spin_unlock(&res->spinlock); - wake_up(&res->wq); - -leave: - /* need to free the unused mle */ - if (alloc_mle) - kmem_cache_free(dlm_mle_cache, alloc_mle); - - return res; -} - - -#define DLM_MASTERY_TIMEOUT_MS 5000 - -static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_master_list_entry *mle, - int *blocked) -{ - u8 m; - int ret, bit; - int map_changed, voting_done; - int assert, sleep; - -recheck: - ret = 0; - assert = 0; - - /* check if another node has already become the owner */ - spin_lock(&res->spinlock); - if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "%s:%.*s: owner is suddenly %u\n", dlm->name, - res->lockname.len, res->lockname.name, res->owner); - spin_unlock(&res->spinlock); - /* this will cause the master to re-assert across - * the whole cluster, freeing up mles */ - if (res->owner != dlm->node_num) { - ret = dlm_do_master_request(res, mle, res->owner); - if (ret < 0) { - /* give recovery a chance to run */ - mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); - msleep(500); - goto recheck; - } - } - ret = 0; - goto leave; - } - spin_unlock(&res->spinlock); - - spin_lock(&mle->spinlock); - m = mle->master; - map_changed = (memcmp(mle->vote_map, mle->node_map, - sizeof(mle->vote_map)) != 0); - voting_done = (memcmp(mle->vote_map, mle->response_map, - sizeof(mle->vote_map)) == 0); - - /* restart if we hit any errors */ - if (map_changed) { - int b; - mlog(0, "%s: %.*s: node map changed, restarting\n", - dlm->name, res->lockname.len, res->lockname.name); - ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked); - b = (mle->type == DLM_MLE_BLOCK); - if ((*blocked && !b) || (!*blocked && b)) { - mlog(0, "%s:%.*s: status change: old=%d new=%d\n", - dlm->name, res->lockname.len, res->lockname.name, - *blocked, b); - *blocked = b; - } - spin_unlock(&mle->spinlock); - if (ret < 0) { - mlog_errno(ret); - goto leave; - } - mlog(0, "%s:%.*s: restart lock mastery succeeded, " - "rechecking now\n", dlm->name, res->lockname.len, - res->lockname.name); - goto recheck; - } else { - if (!voting_done) { - mlog(0, "map not changed and voting not done " - "for %s:%.*s\n", dlm->name, res->lockname.len, - res->lockname.name); - } - } - - if (m != O2NM_MAX_NODES) { - /* another node has done an assert! - * all done! */ - sleep = 0; - } else { - sleep = 1; - /* have all nodes responded? */ - if (voting_done && !*blocked) { - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); - if (dlm->node_num <= bit) { - /* my node number is lowest. - * now tell other nodes that I am - * mastering this. */ - mle->master = dlm->node_num; - /* ref was grabbed in get_lock_resource - * will be dropped in dlmlock_master */ - assert = 1; - sleep = 0; - } - /* if voting is done, but we have not received - * an assert master yet, we must sleep */ - } - } - - spin_unlock(&mle->spinlock); - - /* sleep if we haven't finished voting yet */ - if (sleep) { - unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS); - - /* - if (atomic_read(&mle->mle_refs.refcount) < 2) - mlog(ML_ERROR, "mle (%p) refs=%d, name=%.*s\n", mle, - atomic_read(&mle->mle_refs.refcount), - res->lockname.len, res->lockname.name); - */ - atomic_set(&mle->woken, 0); - (void)wait_event_timeout(mle->wq, - (atomic_read(&mle->woken) == 1), - timeo); - if (res->owner == O2NM_MAX_NODES) { - mlog(0, "%s:%.*s: waiting again\n", dlm->name, - res->lockname.len, res->lockname.name); - goto recheck; - } - mlog(0, "done waiting, master is %u\n", res->owner); - ret = 0; - goto leave; - } - - ret = 0; /* done */ - if (assert) { - m = dlm->node_num; - mlog(0, "about to master %.*s here, this=%u\n", - res->lockname.len, res->lockname.name, m); - ret = dlm_do_assert_master(dlm, res, mle->vote_map, 0); - if (ret) { - /* This is a failure in the network path, - * not in the response to the assert_master - * (any nonzero response is a BUG on this node). - * Most likely a socket just got disconnected - * due to node death. */ - mlog_errno(ret); - } - /* no longer need to restart lock mastery. - * all living nodes have been contacted. */ - ret = 0; - } - - /* set the lockres owner */ - spin_lock(&res->spinlock); - /* mastery reference obtained either during - * assert_master_handler or in get_lock_resource */ - dlm_change_lockres_owner(dlm, res, m); - spin_unlock(&res->spinlock); - -leave: - return ret; -} - -struct dlm_bitmap_diff_iter -{ - int curnode; - unsigned long *orig_bm; - unsigned long *cur_bm; - unsigned long diff_bm[BITS_TO_LONGS(O2NM_MAX_NODES)]; -}; - -enum dlm_node_state_change -{ - NODE_DOWN = -1, - NODE_NO_CHANGE = 0, - NODE_UP -}; - -static void dlm_bitmap_diff_iter_init(struct dlm_bitmap_diff_iter *iter, - unsigned long *orig_bm, - unsigned long *cur_bm) -{ - unsigned long p1, p2; - int i; - - iter->curnode = -1; - iter->orig_bm = orig_bm; - iter->cur_bm = cur_bm; - - for (i = 0; i < BITS_TO_LONGS(O2NM_MAX_NODES); i++) { - p1 = *(iter->orig_bm + i); - p2 = *(iter->cur_bm + i); - iter->diff_bm[i] = (p1 & ~p2) | (p2 & ~p1); - } -} - -static int dlm_bitmap_diff_iter_next(struct dlm_bitmap_diff_iter *iter, - enum dlm_node_state_change *state) -{ - int bit; - - if (iter->curnode >= O2NM_MAX_NODES) - return -ENOENT; - - bit = find_next_bit(iter->diff_bm, O2NM_MAX_NODES, - iter->curnode+1); - if (bit >= O2NM_MAX_NODES) { - iter->curnode = O2NM_MAX_NODES; - return -ENOENT; - } - - /* if it was there in the original then this node died */ - if (test_bit(bit, iter->orig_bm)) - *state = NODE_DOWN; - else - *state = NODE_UP; - - iter->curnode = bit; - return bit; -} - - -static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_master_list_entry *mle, - int blocked) -{ - struct dlm_bitmap_diff_iter bdi; - enum dlm_node_state_change sc; - int node; - int ret = 0; - - mlog(0, "something happened such that the " - "master process may need to be restarted!\n"); - - assert_spin_locked(&mle->spinlock); - - dlm_bitmap_diff_iter_init(&bdi, mle->vote_map, mle->node_map); - node = dlm_bitmap_diff_iter_next(&bdi, &sc); - while (node >= 0) { - if (sc == NODE_UP) { - /* a node came up. clear any old vote from - * the response map and set it in the vote map - * then restart the mastery. */ - mlog(ML_NOTICE, "node %d up while restarting\n", node); - - /* redo the master request, but only for the new node */ - mlog(0, "sending request to new node\n"); - clear_bit(node, mle->response_map); - set_bit(node, mle->vote_map); - } else { - mlog(ML_ERROR, "node down! %d\n", node); - if (blocked) { - int lowest = find_next_bit(mle->maybe_map, - O2NM_MAX_NODES, 0); - - /* act like it was never there */ - clear_bit(node, mle->maybe_map); - - if (node == lowest) { - mlog(0, "expected master %u died" - " while this node was blocked " - "waiting on it!\n", node); - lowest = find_next_bit(mle->maybe_map, - O2NM_MAX_NODES, - lowest+1); - if (lowest < O2NM_MAX_NODES) { - mlog(0, "%s:%.*s:still " - "blocked. waiting on %u " - "now\n", dlm->name, - res->lockname.len, - res->lockname.name, - lowest); - } else { - /* mle is an MLE_BLOCK, but - * there is now nothing left to - * block on. we need to return - * all the way back out and try - * again with an MLE_MASTER. - * dlm_do_local_recovery_cleanup - * has already run, so the mle - * refcount is ok */ - mlog(0, "%s:%.*s: no " - "longer blocking. try to " - "master this here\n", - dlm->name, - res->lockname.len, - res->lockname.name); - mle->type = DLM_MLE_MASTER; - mle->mleres = res; - } - } - } - - /* now blank out everything, as if we had never - * contacted anyone */ - memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); - memset(mle->response_map, 0, sizeof(mle->response_map)); - /* reset the vote_map to the current node_map */ - memcpy(mle->vote_map, mle->node_map, - sizeof(mle->node_map)); - /* put myself into the maybe map */ - if (mle->type != DLM_MLE_BLOCK) - set_bit(dlm->node_num, mle->maybe_map); - } - ret = -EAGAIN; - node = dlm_bitmap_diff_iter_next(&bdi, &sc); - } - return ret; -} - - -/* - * DLM_MASTER_REQUEST_MSG - * - * returns: 0 on success, - * -errno on a network error - * - * on error, the caller should assume the target node is "dead" - * - */ - -static int dlm_do_master_request(struct dlm_lock_resource *res, - struct dlm_master_list_entry *mle, int to) -{ - struct dlm_ctxt *dlm = mle->dlm; - struct dlm_master_request request; - int ret, response=0, resend; - - memset(&request, 0, sizeof(request)); - request.node_idx = dlm->node_num; - - BUG_ON(mle->type == DLM_MLE_MIGRATION); - - request.namelen = (u8)mle->mnamelen; - memcpy(request.name, mle->mname, request.namelen); - -again: - ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request, - sizeof(request), to, &response); - if (ret < 0) { - if (ret == -ESRCH) { - /* should never happen */ - mlog(ML_ERROR, "TCP stack not ready!\n"); - BUG(); - } else if (ret == -EINVAL) { - mlog(ML_ERROR, "bad args passed to o2net!\n"); - BUG(); - } else if (ret == -ENOMEM) { - mlog(ML_ERROR, "out of memory while trying to send " - "network message! retrying\n"); - /* this is totally crude */ - msleep(50); - goto again; - } else if (!dlm_is_host_down(ret)) { - /* not a network error. bad. */ - mlog_errno(ret); - mlog(ML_ERROR, "unhandled error!"); - BUG(); - } - /* all other errors should be network errors, - * and likely indicate node death */ - mlog(ML_ERROR, "link to %d went down!\n", to); - goto out; - } - - ret = 0; - resend = 0; - spin_lock(&mle->spinlock); - switch (response) { - case DLM_MASTER_RESP_YES: - set_bit(to, mle->response_map); - mlog(0, "node %u is the master, response=YES\n", to); - mlog(0, "%s:%.*s: master node %u now knows I have a " - "reference\n", dlm->name, res->lockname.len, - res->lockname.name, to); - mle->master = to; - break; - case DLM_MASTER_RESP_NO: - mlog(0, "node %u not master, response=NO\n", to); - set_bit(to, mle->response_map); - break; - case DLM_MASTER_RESP_MAYBE: - mlog(0, "node %u not master, response=MAYBE\n", to); - set_bit(to, mle->response_map); - set_bit(to, mle->maybe_map); - break; - case DLM_MASTER_RESP_ERROR: - mlog(0, "node %u hit an error, resending\n", to); - resend = 1; - response = 0; - break; - default: - mlog(ML_ERROR, "bad response! %u\n", response); - BUG(); - } - spin_unlock(&mle->spinlock); - if (resend) { - /* this is also totally crude */ - msleep(50); - goto again; - } - -out: - return ret; -} - -/* - * locks that can be taken here: - * dlm->spinlock - * res->spinlock - * mle->spinlock - * dlm->master_list - * - * if possible, TRIM THIS DOWN!!! - */ -int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - u8 response = DLM_MASTER_RESP_MAYBE; - struct dlm_ctxt *dlm = data; - struct dlm_lock_resource *res = NULL; - struct dlm_master_request *request = (struct dlm_master_request *) msg->buf; - struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL; - char *name; - unsigned int namelen, hash; - int found, ret; - int set_maybe; - int dispatch_assert = 0; - - if (!dlm_grab(dlm)) - return DLM_MASTER_RESP_NO; - - if (!dlm_domain_fully_joined(dlm)) { - response = DLM_MASTER_RESP_NO; - goto send_response; - } - - name = request->name; - namelen = request->namelen; - hash = dlm_lockid_hash(name, namelen); - - if (namelen > DLM_LOCKID_NAME_MAX) { - response = DLM_IVBUFLEN; - goto send_response; - } - -way_up_top: - spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres(dlm, name, namelen, hash); - if (res) { - spin_unlock(&dlm->spinlock); - - /* take care of the easy cases up front */ - spin_lock(&res->spinlock); - if (res->state & (DLM_LOCK_RES_RECOVERING| - DLM_LOCK_RES_MIGRATING)) { - spin_unlock(&res->spinlock); - mlog(0, "returning DLM_MASTER_RESP_ERROR since res is " - "being recovered/migrated\n"); - response = DLM_MASTER_RESP_ERROR; - if (mle) - kmem_cache_free(dlm_mle_cache, mle); - goto send_response; - } - - if (res->owner == dlm->node_num) { - dlm_lockres_set_refmap_bit(dlm, res, request->node_idx); - spin_unlock(&res->spinlock); - response = DLM_MASTER_RESP_YES; - if (mle) - kmem_cache_free(dlm_mle_cache, mle); - - /* this node is the owner. - * there is some extra work that needs to - * happen now. the requesting node has - * caused all nodes up to this one to - * create mles. this node now needs to - * go back and clean those up. */ - dispatch_assert = 1; - goto send_response; - } else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) { - spin_unlock(&res->spinlock); - // mlog(0, "node %u is the master\n", res->owner); - response = DLM_MASTER_RESP_NO; - if (mle) - kmem_cache_free(dlm_mle_cache, mle); - goto send_response; - } - - /* ok, there is no owner. either this node is - * being blocked, or it is actively trying to - * master this lock. */ - if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) { - mlog(ML_ERROR, "lock with no owner should be " - "in-progress!\n"); - BUG(); - } - - // mlog(0, "lockres is in progress...\n"); - spin_lock(&dlm->master_lock); - found = dlm_find_mle(dlm, &tmpmle, name, namelen); - if (!found) { - mlog(ML_ERROR, "no mle found for this lock!\n"); - BUG(); - } - set_maybe = 1; - spin_lock(&tmpmle->spinlock); - if (tmpmle->type == DLM_MLE_BLOCK) { - // mlog(0, "this node is waiting for " - // "lockres to be mastered\n"); - response = DLM_MASTER_RESP_NO; - } else if (tmpmle->type == DLM_MLE_MIGRATION) { - mlog(0, "node %u is master, but trying to migrate to " - "node %u.\n", tmpmle->master, tmpmle->new_master); - if (tmpmle->master == dlm->node_num) { - mlog(ML_ERROR, "no owner on lockres, but this " - "node is trying to migrate it to %u?!\n", - tmpmle->new_master); - BUG(); - } else { - /* the real master can respond on its own */ - response = DLM_MASTER_RESP_NO; - } - } else if (tmpmle->master != DLM_LOCK_RES_OWNER_UNKNOWN) { - set_maybe = 0; - if (tmpmle->master == dlm->node_num) { - response = DLM_MASTER_RESP_YES; - /* this node will be the owner. - * go back and clean the mles on any - * other nodes */ - dispatch_assert = 1; - dlm_lockres_set_refmap_bit(dlm, res, - request->node_idx); - } else - response = DLM_MASTER_RESP_NO; - } else { - // mlog(0, "this node is attempting to " - // "master lockres\n"); - response = DLM_MASTER_RESP_MAYBE; - } - if (set_maybe) - set_bit(request->node_idx, tmpmle->maybe_map); - spin_unlock(&tmpmle->spinlock); - - spin_unlock(&dlm->master_lock); - spin_unlock(&res->spinlock); - - /* keep the mle attached to heartbeat events */ - dlm_put_mle(tmpmle); - if (mle) - kmem_cache_free(dlm_mle_cache, mle); - goto send_response; - } - - /* - * lockres doesn't exist on this node - * if there is an MLE_BLOCK, return NO - * if there is an MLE_MASTER, return MAYBE - * otherwise, add an MLE_BLOCK, return NO - */ - spin_lock(&dlm->master_lock); - found = dlm_find_mle(dlm, &tmpmle, name, namelen); - if (!found) { - /* this lockid has never been seen on this node yet */ - // mlog(0, "no mle found\n"); - if (!mle) { - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - - mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); - if (!mle) { - response = DLM_MASTER_RESP_ERROR; - mlog_errno(-ENOMEM); - goto send_response; - } - goto way_up_top; - } - - // mlog(0, "this is second time thru, already allocated, " - // "add the block.\n"); - dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen); - set_bit(request->node_idx, mle->maybe_map); - __dlm_insert_mle(dlm, mle); - response = DLM_MASTER_RESP_NO; - } else { - // mlog(0, "mle was found\n"); - set_maybe = 1; - spin_lock(&tmpmle->spinlock); - if (tmpmle->master == dlm->node_num) { - mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n"); - BUG(); - } - if (tmpmle->type == DLM_MLE_BLOCK) - response = DLM_MASTER_RESP_NO; - else if (tmpmle->type == DLM_MLE_MIGRATION) { - mlog(0, "migration mle was found (%u->%u)\n", - tmpmle->master, tmpmle->new_master); - /* real master can respond on its own */ - response = DLM_MASTER_RESP_NO; - } else - response = DLM_MASTER_RESP_MAYBE; - if (set_maybe) - set_bit(request->node_idx, tmpmle->maybe_map); - spin_unlock(&tmpmle->spinlock); - } - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - - if (found) { - /* keep the mle attached to heartbeat events */ - dlm_put_mle(tmpmle); - } -send_response: - /* - * __dlm_lookup_lockres() grabbed a reference to this lockres. - * The reference is released by dlm_assert_master_worker() under - * the call to dlm_dispatch_assert_master(). If - * dlm_assert_master_worker() isn't called, we drop it here. - */ - if (dispatch_assert) { - if (response != DLM_MASTER_RESP_YES) - mlog(ML_ERROR, "invalid response %d\n", response); - if (!res) { - mlog(ML_ERROR, "bad lockres while trying to assert!\n"); - BUG(); - } - mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", - dlm->node_num, res->lockname.len, res->lockname.name); - ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, - DLM_ASSERT_MASTER_MLE_CLEANUP); - if (ret < 0) { - mlog(ML_ERROR, "failed to dispatch assert master work\n"); - response = DLM_MASTER_RESP_ERROR; - dlm_lockres_put(res); - } - } else { - if (res) - dlm_lockres_put(res); - } - - dlm_put(dlm); - return response; -} - -/* - * DLM_ASSERT_MASTER_MSG - */ - - -/* - * NOTE: this can be used for debugging - * can periodically run all locks owned by this node - * and re-assert across the cluster... - */ -static int dlm_do_assert_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - void *nodemap, u32 flags) -{ - struct dlm_assert_master assert; - int to, tmpret; - struct dlm_node_iter iter; - int ret = 0; - int reassert; - const char *lockname = res->lockname.name; - unsigned int namelen = res->lockname.len; - - BUG_ON(namelen > O2NM_MAX_NAME_LEN); - - spin_lock(&res->spinlock); - res->state |= DLM_LOCK_RES_SETREF_INPROG; - spin_unlock(&res->spinlock); - -again: - reassert = 0; - - /* note that if this nodemap is empty, it returns 0 */ - dlm_node_iter_init(nodemap, &iter); - while ((to = dlm_node_iter_next(&iter)) >= 0) { - int r = 0; - struct dlm_master_list_entry *mle = NULL; - - mlog(0, "sending assert master to %d (%.*s)\n", to, - namelen, lockname); - memset(&assert, 0, sizeof(assert)); - assert.node_idx = dlm->node_num; - assert.namelen = namelen; - memcpy(assert.name, lockname, namelen); - assert.flags = cpu_to_be32(flags); - - tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, - &assert, sizeof(assert), to, &r); - if (tmpret < 0) { - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", tmpret, - DLM_ASSERT_MASTER_MSG, dlm->key, to); - if (!dlm_is_host_down(tmpret)) { - mlog(ML_ERROR, "unhandled error=%d!\n", tmpret); - BUG(); - } - /* a node died. finish out the rest of the nodes. */ - mlog(0, "link to %d went down!\n", to); - /* any nonzero status return will do */ - ret = tmpret; - r = 0; - } else if (r < 0) { - /* ok, something horribly messed. kill thyself. */ - mlog(ML_ERROR,"during assert master of %.*s to %u, " - "got %d.\n", namelen, lockname, to, r); - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - if (dlm_find_mle(dlm, &mle, (char *)lockname, - namelen)) { - dlm_print_one_mle(mle); - __dlm_put_mle(mle); - } - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - BUG(); - } - - if (r & DLM_ASSERT_RESPONSE_REASSERT && - !(r & DLM_ASSERT_RESPONSE_MASTERY_REF)) { - mlog(ML_ERROR, "%.*s: very strange, " - "master MLE but no lockres on %u\n", - namelen, lockname, to); - } - - if (r & DLM_ASSERT_RESPONSE_REASSERT) { - mlog(0, "%.*s: node %u create mles on other " - "nodes and requests a re-assert\n", - namelen, lockname, to); - reassert = 1; - } - if (r & DLM_ASSERT_RESPONSE_MASTERY_REF) { - mlog(0, "%.*s: node %u has a reference to this " - "lockres, set the bit in the refmap\n", - namelen, lockname, to); - spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(dlm, res, to); - spin_unlock(&res->spinlock); - } - } - - if (reassert) - goto again; - - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_SETREF_INPROG; - spin_unlock(&res->spinlock); - wake_up(&res->wq); - - return ret; -} - -/* - * locks that can be taken here: - * dlm->spinlock - * res->spinlock - * mle->spinlock - * dlm->master_list - * - * if possible, TRIM THIS DOWN!!! - */ -int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_master_list_entry *mle = NULL; - struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf; - struct dlm_lock_resource *res = NULL; - char *name; - unsigned int namelen, hash; - u32 flags; - int master_request = 0, have_lockres_ref = 0; - int ret = 0; - - if (!dlm_grab(dlm)) - return 0; - - name = assert->name; - namelen = assert->namelen; - hash = dlm_lockid_hash(name, namelen); - flags = be32_to_cpu(assert->flags); - - if (namelen > DLM_LOCKID_NAME_MAX) { - mlog(ML_ERROR, "Invalid name length!"); - goto done; - } - - spin_lock(&dlm->spinlock); - - if (flags) - mlog(0, "assert_master with flags: %u\n", flags); - - /* find the MLE */ - spin_lock(&dlm->master_lock); - if (!dlm_find_mle(dlm, &mle, name, namelen)) { - /* not an error, could be master just re-asserting */ - mlog(0, "just got an assert_master from %u, but no " - "MLE for it! (%.*s)\n", assert->node_idx, - namelen, name); - } else { - int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0); - if (bit >= O2NM_MAX_NODES) { - /* not necessarily an error, though less likely. - * could be master just re-asserting. */ - mlog(0, "no bits set in the maybe_map, but %u " - "is asserting! (%.*s)\n", assert->node_idx, - namelen, name); - } else if (bit != assert->node_idx) { - if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) { - mlog(0, "master %u was found, %u should " - "back off\n", assert->node_idx, bit); - } else { - /* with the fix for bug 569, a higher node - * number winning the mastery will respond - * YES to mastery requests, but this node - * had no way of knowing. let it pass. */ - mlog(0, "%u is the lowest node, " - "%u is asserting. (%.*s) %u must " - "have begun after %u won.\n", bit, - assert->node_idx, namelen, name, bit, - assert->node_idx); - } - } - if (mle->type == DLM_MLE_MIGRATION) { - if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) { - mlog(0, "%s:%.*s: got cleanup assert" - " from %u for migration\n", - dlm->name, namelen, name, - assert->node_idx); - } else if (!(flags & DLM_ASSERT_MASTER_FINISH_MIGRATION)) { - mlog(0, "%s:%.*s: got unrelated assert" - " from %u for migration, ignoring\n", - dlm->name, namelen, name, - assert->node_idx); - __dlm_put_mle(mle); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - goto done; - } - } - } - spin_unlock(&dlm->master_lock); - - /* ok everything checks out with the MLE - * now check to see if there is a lockres */ - res = __dlm_lookup_lockres(dlm, name, namelen, hash); - if (res) { - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_RECOVERING) { - mlog(ML_ERROR, "%u asserting but %.*s is " - "RECOVERING!\n", assert->node_idx, namelen, name); - goto kill; - } - if (!mle) { - if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN && - res->owner != assert->node_idx) { - mlog(ML_ERROR, "DIE! Mastery assert from %u, " - "but current owner is %u! (%.*s)\n", - assert->node_idx, res->owner, namelen, - name); - __dlm_print_one_lock_resource(res); - BUG(); - } - } else if (mle->type != DLM_MLE_MIGRATION) { - if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) { - /* owner is just re-asserting */ - if (res->owner == assert->node_idx) { - mlog(0, "owner %u re-asserting on " - "lock %.*s\n", assert->node_idx, - namelen, name); - goto ok; - } - mlog(ML_ERROR, "got assert_master from " - "node %u, but %u is the owner! " - "(%.*s)\n", assert->node_idx, - res->owner, namelen, name); - goto kill; - } - if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) { - mlog(ML_ERROR, "got assert from %u, but lock " - "with no owner should be " - "in-progress! (%.*s)\n", - assert->node_idx, - namelen, name); - goto kill; - } - } else /* mle->type == DLM_MLE_MIGRATION */ { - /* should only be getting an assert from new master */ - if (assert->node_idx != mle->new_master) { - mlog(ML_ERROR, "got assert from %u, but " - "new master is %u, and old master " - "was %u (%.*s)\n", - assert->node_idx, mle->new_master, - mle->master, namelen, name); - goto kill; - } - - } -ok: - spin_unlock(&res->spinlock); - } - - // mlog(0, "woo! got an assert_master from node %u!\n", - // assert->node_idx); - if (mle) { - int extra_ref = 0; - int nn = -1; - int rr, err = 0; - - spin_lock(&mle->spinlock); - if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) - extra_ref = 1; - else { - /* MASTER mle: if any bits set in the response map - * then the calling node needs to re-assert to clear - * up nodes that this node contacted */ - while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, - nn+1)) < O2NM_MAX_NODES) { - if (nn != dlm->node_num && nn != assert->node_idx) - master_request = 1; - } - } - mle->master = assert->node_idx; - atomic_set(&mle->woken, 1); - wake_up(&mle->wq); - spin_unlock(&mle->spinlock); - - if (res) { - int wake = 0; - spin_lock(&res->spinlock); - if (mle->type == DLM_MLE_MIGRATION) { - mlog(0, "finishing off migration of lockres %.*s, " - "from %u to %u\n", - res->lockname.len, res->lockname.name, - dlm->node_num, mle->new_master); - res->state &= ~DLM_LOCK_RES_MIGRATING; - wake = 1; - dlm_change_lockres_owner(dlm, res, mle->new_master); - BUG_ON(res->state & DLM_LOCK_RES_DIRTY); - } else { - dlm_change_lockres_owner(dlm, res, mle->master); - } - spin_unlock(&res->spinlock); - have_lockres_ref = 1; - if (wake) - wake_up(&res->wq); - } - - /* master is known, detach if not already detached. - * ensures that only one assert_master call will happen - * on this mle. */ - spin_lock(&dlm->master_lock); - - rr = atomic_read(&mle->mle_refs.refcount); - if (mle->inuse > 0) { - if (extra_ref && rr < 3) - err = 1; - else if (!extra_ref && rr < 2) - err = 1; - } else { - if (extra_ref && rr < 2) - err = 1; - else if (!extra_ref && rr < 1) - err = 1; - } - if (err) { - mlog(ML_ERROR, "%s:%.*s: got assert master from %u " - "that will mess up this node, refs=%d, extra=%d, " - "inuse=%d\n", dlm->name, namelen, name, - assert->node_idx, rr, extra_ref, mle->inuse); - dlm_print_one_mle(mle); - } - __dlm_unlink_mle(dlm, mle); - __dlm_mle_detach_hb_events(dlm, mle); - __dlm_put_mle(mle); - if (extra_ref) { - /* the assert master message now balances the extra - * ref given by the master / migration request message. - * if this is the last put, it will be removed - * from the list. */ - __dlm_put_mle(mle); - } - spin_unlock(&dlm->master_lock); - } else if (res) { - if (res->owner != assert->node_idx) { - mlog(0, "assert_master from %u, but current " - "owner is %u (%.*s), no mle\n", assert->node_idx, - res->owner, namelen, name); - } - } - spin_unlock(&dlm->spinlock); - -done: - ret = 0; - if (res) { - spin_lock(&res->spinlock); - res->state |= DLM_LOCK_RES_SETREF_INPROG; - spin_unlock(&res->spinlock); - *ret_data = (void *)res; - } - dlm_put(dlm); - if (master_request) { - mlog(0, "need to tell master to reassert\n"); - /* positive. negative would shoot down the node. */ - ret |= DLM_ASSERT_RESPONSE_REASSERT; - if (!have_lockres_ref) { - mlog(ML_ERROR, "strange, got assert from %u, MASTER " - "mle present here for %s:%.*s, but no lockres!\n", - assert->node_idx, dlm->name, namelen, name); - } - } - if (have_lockres_ref) { - /* let the master know we have a reference to the lockres */ - ret |= DLM_ASSERT_RESPONSE_MASTERY_REF; - mlog(0, "%s:%.*s: got assert from %u, need a ref\n", - dlm->name, namelen, name, assert->node_idx); - } - return ret; - -kill: - /* kill the caller! */ - mlog(ML_ERROR, "Bad message received from another node. Dumping state " - "and killing the other node now! This node is OK and can continue.\n"); - __dlm_print_one_lock_resource(res); - spin_unlock(&res->spinlock); - spin_unlock(&dlm->spinlock); - *ret_data = (void *)res; - dlm_put(dlm); - return -EINVAL; -} - -void dlm_assert_master_post_handler(int status, void *data, void *ret_data) -{ - struct dlm_lock_resource *res = (struct dlm_lock_resource *)ret_data; - - if (ret_data) { - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_SETREF_INPROG; - spin_unlock(&res->spinlock); - wake_up(&res->wq); - dlm_lockres_put(res); - } - return; -} - -int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - int ignore_higher, u8 request_from, u32 flags) -{ - struct dlm_work_item *item; - item = kzalloc(sizeof(*item), GFP_NOFS); - if (!item) - return -ENOMEM; - - - /* queue up work for dlm_assert_master_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ - dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL); - item->u.am.lockres = res; /* already have a ref */ - /* can optionally ignore node numbers higher than this node */ - item->u.am.ignore_higher = ignore_higher; - item->u.am.request_from = request_from; - item->u.am.flags = flags; - - if (ignore_higher) - mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, - res->lockname.name); - - spin_lock(&dlm->work_lock); - list_add_tail(&item->list, &dlm->work_list); - spin_unlock(&dlm->work_lock); - - queue_work(dlm->dlm_worker, &dlm->dispatched_work); - return 0; -} - -static void dlm_assert_master_worker(struct dlm_work_item *item, void *data) -{ - struct dlm_ctxt *dlm = data; - int ret = 0; - struct dlm_lock_resource *res; - unsigned long nodemap[BITS_TO_LONGS(O2NM_MAX_NODES)]; - int ignore_higher; - int bit; - u8 request_from; - u32 flags; - - dlm = item->dlm; - res = item->u.am.lockres; - ignore_higher = item->u.am.ignore_higher; - request_from = item->u.am.request_from; - flags = item->u.am.flags; - - spin_lock(&dlm->spinlock); - memcpy(nodemap, dlm->domain_map, sizeof(nodemap)); - spin_unlock(&dlm->spinlock); - - clear_bit(dlm->node_num, nodemap); - if (ignore_higher) { - /* if is this just to clear up mles for nodes below - * this node, do not send the message to the original - * caller or any node number higher than this */ - clear_bit(request_from, nodemap); - bit = dlm->node_num; - while (1) { - bit = find_next_bit(nodemap, O2NM_MAX_NODES, - bit+1); - if (bit >= O2NM_MAX_NODES) - break; - clear_bit(bit, nodemap); - } - } - - /* - * If we're migrating this lock to someone else, we are no - * longer allowed to assert out own mastery. OTOH, we need to - * prevent migration from starting while we're still asserting - * our dominance. The reserved ast delays migration. - */ - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_MIGRATING) { - mlog(0, "Someone asked us to assert mastery, but we're " - "in the middle of migration. Skipping assert, " - "the new master will handle that.\n"); - spin_unlock(&res->spinlock); - goto put; - } else - __dlm_lockres_reserve_ast(res); - spin_unlock(&res->spinlock); - - /* this call now finishes out the nodemap - * even if one or more nodes die */ - mlog(0, "worker about to master %.*s here, this=%u\n", - res->lockname.len, res->lockname.name, dlm->node_num); - ret = dlm_do_assert_master(dlm, res, nodemap, flags); - if (ret < 0) { - /* no need to restart, we are done */ - if (!dlm_is_host_down(ret)) - mlog_errno(ret); - } - - /* Ok, we've asserted ourselves. Let's let migration start. */ - dlm_lockres_release_ast(dlm, res); - -put: - dlm_lockres_put(res); - - mlog(0, "finished with dlm_assert_master_worker\n"); -} - -/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread. - * We cannot wait for node recovery to complete to begin mastering this - * lockres because this lockres is used to kick off recovery! ;-) - * So, do a pre-check on all living nodes to see if any of those nodes - * think that $RECOVERY is currently mastered by a dead node. If so, - * we wait a short time to allow that node to get notified by its own - * heartbeat stack, then check again. All $RECOVERY lock resources - * mastered by dead nodes are purged when the hearbeat callback is - * fired, so we can know for sure that it is safe to continue once - * the node returns a live node or no node. */ -static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - struct dlm_node_iter iter; - int nodenum; - int ret = 0; - u8 master = DLM_LOCK_RES_OWNER_UNKNOWN; - - spin_lock(&dlm->spinlock); - dlm_node_iter_init(dlm->domain_map, &iter); - spin_unlock(&dlm->spinlock); - - while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { - /* do not send to self */ - if (nodenum == dlm->node_num) - continue; - ret = dlm_do_master_requery(dlm, res, nodenum, &master); - if (ret < 0) { - mlog_errno(ret); - if (!dlm_is_host_down(ret)) - BUG(); - /* host is down, so answer for that node would be - * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */ - ret = 0; - } - - if (master != DLM_LOCK_RES_OWNER_UNKNOWN) { - /* check to see if this master is in the recovery map */ - spin_lock(&dlm->spinlock); - if (test_bit(master, dlm->recovery_map)) { - mlog(ML_NOTICE, "%s: node %u has not seen " - "node %u go down yet, and thinks the " - "dead node is mastering the recovery " - "lock. must wait.\n", dlm->name, - nodenum, master); - ret = -EAGAIN; - } - spin_unlock(&dlm->spinlock); - mlog(0, "%s: reco lock master is %u\n", dlm->name, - master); - break; - } - } - return ret; -} - -/* - * DLM_DEREF_LOCKRES_MSG - */ - -int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) -{ - struct dlm_deref_lockres deref; - int ret = 0, r; - const char *lockname; - unsigned int namelen; - - lockname = res->lockname.name; - namelen = res->lockname.len; - BUG_ON(namelen > O2NM_MAX_NAME_LEN); - - memset(&deref, 0, sizeof(deref)); - deref.node_idx = dlm->node_num; - deref.namelen = namelen; - memcpy(deref.name, lockname, namelen); - - ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, - &deref, sizeof(deref), res->owner, &r); - if (ret < 0) - mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF to node %u\n", - dlm->name, namelen, lockname, ret, res->owner); - else if (r < 0) { - /* BAD. other node says I did not have a ref. */ - mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n", - dlm->name, namelen, lockname, res->owner, r); - dlm_print_one_lock_resource(res); - BUG(); - } - return ret; -} - -int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_deref_lockres *deref = (struct dlm_deref_lockres *)msg->buf; - struct dlm_lock_resource *res = NULL; - char *name; - unsigned int namelen; - int ret = -EINVAL; - u8 node; - unsigned int hash; - struct dlm_work_item *item; - int cleared = 0; - int dispatch = 0; - - if (!dlm_grab(dlm)) - return 0; - - name = deref->name; - namelen = deref->namelen; - node = deref->node_idx; - - if (namelen > DLM_LOCKID_NAME_MAX) { - mlog(ML_ERROR, "Invalid name length!"); - goto done; - } - if (deref->node_idx >= O2NM_MAX_NODES) { - mlog(ML_ERROR, "Invalid node number: %u\n", node); - goto done; - } - - hash = dlm_lockid_hash(name, namelen); - - spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres_full(dlm, name, namelen, hash); - if (!res) { - spin_unlock(&dlm->spinlock); - mlog(ML_ERROR, "%s:%.*s: bad lockres name\n", - dlm->name, namelen, name); - goto done; - } - spin_unlock(&dlm->spinlock); - - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_SETREF_INPROG) - dispatch = 1; - else { - BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); - if (test_bit(node, res->refmap)) { - dlm_lockres_clear_refmap_bit(dlm, res, node); - cleared = 1; - } - } - spin_unlock(&res->spinlock); - - if (!dispatch) { - if (cleared) - dlm_lockres_calc_usage(dlm, res); - else { - mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref " - "but it is already dropped!\n", dlm->name, - res->lockname.len, res->lockname.name, node); - dlm_print_one_lock_resource(res); - } - ret = 0; - goto done; - } - - item = kzalloc(sizeof(*item), GFP_NOFS); - if (!item) { - ret = -ENOMEM; - mlog_errno(ret); - goto done; - } - - dlm_init_work_item(dlm, item, dlm_deref_lockres_worker, NULL); - item->u.dl.deref_res = res; - item->u.dl.deref_node = node; - - spin_lock(&dlm->work_lock); - list_add_tail(&item->list, &dlm->work_list); - spin_unlock(&dlm->work_lock); - - queue_work(dlm->dlm_worker, &dlm->dispatched_work); - return 0; - -done: - if (res) - dlm_lockres_put(res); - dlm_put(dlm); - - return ret; -} - -static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) -{ - struct dlm_ctxt *dlm; - struct dlm_lock_resource *res; - u8 node; - u8 cleared = 0; - - dlm = item->dlm; - res = item->u.dl.deref_res; - node = item->u.dl.deref_node; - - spin_lock(&res->spinlock); - BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); - if (test_bit(node, res->refmap)) { - __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); - dlm_lockres_clear_refmap_bit(dlm, res, node); - cleared = 1; - } - spin_unlock(&res->spinlock); - - if (cleared) { - mlog(0, "%s:%.*s node %u ref dropped in dispatch\n", - dlm->name, res->lockname.len, res->lockname.name, node); - dlm_lockres_calc_usage(dlm, res); - } else { - mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref " - "but it is already dropped!\n", dlm->name, - res->lockname.len, res->lockname.name, node); - dlm_print_one_lock_resource(res); - } - - dlm_lockres_put(res); -} - -/* - * A migrateable resource is one that is : - * 1. locally mastered, and, - * 2. zero local locks, and, - * 3. one or more non-local locks, or, one or more references - * Returns 1 if yes, 0 if not. - */ -static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - enum dlm_lockres_list idx; - int nonlocal = 0, node_ref; - struct list_head *queue; - struct dlm_lock *lock; - u64 cookie; - - assert_spin_locked(&res->spinlock); - - if (res->owner != dlm->node_num) - return 0; - - for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { - queue = dlm_list_idx_to_ptr(res, idx); - list_for_each_entry(lock, queue, list) { - if (lock->ml.node != dlm->node_num) { - nonlocal++; - continue; - } - cookie = be64_to_cpu(lock->ml.cookie); - mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " - "%s list\n", dlm->name, res->lockname.len, - res->lockname.name, - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - dlm_list_in_text(idx)); - return 0; - } - } - - if (!nonlocal) { - node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); - if (node_ref >= O2NM_MAX_NODES) - return 0; - } - - mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len, - res->lockname.name); - - return 1; -} - -/* - * DLM_MIGRATE_LOCKRES - */ - - -static int dlm_migrate_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, u8 target) -{ - struct dlm_master_list_entry *mle = NULL; - struct dlm_master_list_entry *oldmle = NULL; - struct dlm_migratable_lockres *mres = NULL; - int ret = 0; - const char *name; - unsigned int namelen; - int mle_added = 0; - int wake = 0; - - if (!dlm_grab(dlm)) - return -EINVAL; - - BUG_ON(target == O2NM_MAX_NODES); - - name = res->lockname.name; - namelen = res->lockname.len; - - mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, - target); - - /* preallocate up front. if this fails, abort */ - ret = -ENOMEM; - mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); - if (!mres) { - mlog_errno(ret); - goto leave; - } - - mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); - if (!mle) { - mlog_errno(ret); - goto leave; - } - ret = 0; - - /* - * clear any existing master requests and - * add the migration mle to the list - */ - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, - namelen, target, dlm->node_num); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - - if (ret == -EEXIST) { - mlog(0, "another process is already migrating it\n"); - goto fail; - } - mle_added = 1; - - /* - * set the MIGRATING flag and flush asts - * if we fail after this we need to re-dirty the lockres - */ - if (dlm_mark_lockres_migrating(dlm, res, target) < 0) { - mlog(ML_ERROR, "tried to migrate %.*s to %u, but " - "the target went down.\n", res->lockname.len, - res->lockname.name, target); - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_MIGRATING; - wake = 1; - spin_unlock(&res->spinlock); - ret = -EINVAL; - } - -fail: - if (oldmle) { - /* master is known, detach if not already detached */ - dlm_mle_detach_hb_events(dlm, oldmle); - dlm_put_mle(oldmle); - } - - if (ret < 0) { - if (mle_added) { - dlm_mle_detach_hb_events(dlm, mle); - dlm_put_mle(mle); - } else if (mle) { - kmem_cache_free(dlm_mle_cache, mle); - mle = NULL; - } - goto leave; - } - - /* - * at this point, we have a migration target, an mle - * in the master list, and the MIGRATING flag set on - * the lockres - */ - - /* now that remote nodes are spinning on the MIGRATING flag, - * ensure that all assert_master work is flushed. */ - flush_workqueue(dlm->dlm_worker); - - /* get an extra reference on the mle. - * otherwise the assert_master from the new - * master will destroy this. - * also, make sure that all callers of dlm_get_mle - * take both dlm->spinlock and dlm->master_lock */ - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - dlm_get_mle_inuse(mle); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - - /* notify new node and send all lock state */ - /* call send_one_lockres with migration flag. - * this serves as notice to the target node that a - * migration is starting. */ - ret = dlm_send_one_lockres(dlm, res, mres, target, - DLM_MRES_MIGRATION); - - if (ret < 0) { - mlog(0, "migration to node %u failed with %d\n", - target, ret); - /* migration failed, detach and clean up mle */ - dlm_mle_detach_hb_events(dlm, mle); - dlm_put_mle(mle); - dlm_put_mle_inuse(mle); - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_MIGRATING; - wake = 1; - spin_unlock(&res->spinlock); - if (dlm_is_host_down(ret)) - dlm_wait_for_node_death(dlm, target, - DLM_NODE_DEATH_WAIT_MAX); - goto leave; - } - - /* at this point, the target sends a message to all nodes, - * (using dlm_do_migrate_request). this node is skipped since - * we had to put an mle in the list to begin the process. this - * node now waits for target to do an assert master. this node - * will be the last one notified, ensuring that the migration - * is complete everywhere. if the target dies while this is - * going on, some nodes could potentially see the target as the - * master, so it is important that my recovery finds the migration - * mle and sets the master to UNKNOWN. */ - - - /* wait for new node to assert master */ - while (1) { - ret = wait_event_interruptible_timeout(mle->wq, - (atomic_read(&mle->woken) == 1), - msecs_to_jiffies(5000)); - - if (ret >= 0) { - if (atomic_read(&mle->woken) == 1 || - res->owner == target) - break; - - mlog(0, "%s:%.*s: timed out during migration\n", - dlm->name, res->lockname.len, res->lockname.name); - /* avoid hang during shutdown when migrating lockres - * to a node which also goes down */ - if (dlm_is_node_dead(dlm, target)) { - mlog(0, "%s:%.*s: expected migration " - "target %u is no longer up, restarting\n", - dlm->name, res->lockname.len, - res->lockname.name, target); - ret = -EINVAL; - /* migration failed, detach and clean up mle */ - dlm_mle_detach_hb_events(dlm, mle); - dlm_put_mle(mle); - dlm_put_mle_inuse(mle); - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_MIGRATING; - wake = 1; - spin_unlock(&res->spinlock); - goto leave; - } - } else - mlog(0, "%s:%.*s: caught signal during migration\n", - dlm->name, res->lockname.len, res->lockname.name); - } - - /* all done, set the owner, clear the flag */ - spin_lock(&res->spinlock); - dlm_set_lockres_owner(dlm, res, target); - res->state &= ~DLM_LOCK_RES_MIGRATING; - dlm_remove_nonlocal_locks(dlm, res); - spin_unlock(&res->spinlock); - wake_up(&res->wq); - - /* master is known, detach if not already detached */ - dlm_mle_detach_hb_events(dlm, mle); - dlm_put_mle_inuse(mle); - ret = 0; - - dlm_lockres_calc_usage(dlm, res); - -leave: - /* re-dirty the lockres if we failed */ - if (ret < 0) - dlm_kick_thread(dlm, res); - - /* wake up waiters if the MIGRATING flag got set - * but migration failed */ - if (wake) - wake_up(&res->wq); - - if (mres) - free_page((unsigned long)mres); - - dlm_put(dlm); - - mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen, - name, target, ret); - return ret; -} - -#define DLM_MIGRATION_RETRY_MS 100 - -/* - * Should be called only after beginning the domain leave process. - * There should not be any remaining locks on nonlocal lock resources, - * and there should be no local locks left on locally mastered resources. - * - * Called with the dlm spinlock held, may drop it to do migration, but - * will re-acquire before exit. - * - * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped - */ -int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) -{ - int ret; - int lock_dropped = 0; - u8 target = O2NM_MAX_NODES; - - assert_spin_locked(&dlm->spinlock); - - spin_lock(&res->spinlock); - if (dlm_is_lockres_migrateable(dlm, res)) - target = dlm_pick_migration_target(dlm, res); - spin_unlock(&res->spinlock); - - if (target == O2NM_MAX_NODES) - goto leave; - - /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ - spin_unlock(&dlm->spinlock); - lock_dropped = 1; - ret = dlm_migrate_lockres(dlm, res, target); - if (ret) - mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", - dlm->name, res->lockname.len, res->lockname.name, - target, ret); - spin_lock(&dlm->spinlock); -leave: - return lock_dropped; -} - -int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) -{ - int ret; - spin_lock(&dlm->ast_lock); - spin_lock(&lock->spinlock); - ret = (list_empty(&lock->bast_list) && !lock->bast_pending); - spin_unlock(&lock->spinlock); - spin_unlock(&dlm->ast_lock); - return ret; -} - -static int dlm_migration_can_proceed(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 mig_target) -{ - int can_proceed; - spin_lock(&res->spinlock); - can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING); - spin_unlock(&res->spinlock); - - /* target has died, so make the caller break out of the - * wait_event, but caller must recheck the domain_map */ - spin_lock(&dlm->spinlock); - if (!test_bit(mig_target, dlm->domain_map)) - can_proceed = 1; - spin_unlock(&dlm->spinlock); - return can_proceed; -} - -static int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - int ret; - spin_lock(&res->spinlock); - ret = !!(res->state & DLM_LOCK_RES_DIRTY); - spin_unlock(&res->spinlock); - return ret; -} - - -static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 target) -{ - int ret = 0; - - mlog(0, "dlm_mark_lockres_migrating: %.*s, from %u to %u\n", - res->lockname.len, res->lockname.name, dlm->node_num, - target); - /* need to set MIGRATING flag on lockres. this is done by - * ensuring that all asts have been flushed for this lockres. */ - spin_lock(&res->spinlock); - BUG_ON(res->migration_pending); - res->migration_pending = 1; - /* strategy is to reserve an extra ast then release - * it below, letting the release do all of the work */ - __dlm_lockres_reserve_ast(res); - spin_unlock(&res->spinlock); - - /* now flush all the pending asts */ - dlm_kick_thread(dlm, res); - /* before waiting on DIRTY, block processes which may - * try to dirty the lockres before MIGRATING is set */ - spin_lock(&res->spinlock); - BUG_ON(res->state & DLM_LOCK_RES_BLOCK_DIRTY); - res->state |= DLM_LOCK_RES_BLOCK_DIRTY; - spin_unlock(&res->spinlock); - /* now wait on any pending asts and the DIRTY state */ - wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); - dlm_lockres_release_ast(dlm, res); - - mlog(0, "about to wait on migration_wq, dirty=%s\n", - res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no"); - /* if the extra ref we just put was the final one, this - * will pass thru immediately. otherwise, we need to wait - * for the last ast to finish. */ -again: - ret = wait_event_interruptible_timeout(dlm->migration_wq, - dlm_migration_can_proceed(dlm, res, target), - msecs_to_jiffies(1000)); - if (ret < 0) { - mlog(0, "woken again: migrating? %s, dead? %s\n", - res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no", - test_bit(target, dlm->domain_map) ? "no":"yes"); - } else { - mlog(0, "all is well: migrating? %s, dead? %s\n", - res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no", - test_bit(target, dlm->domain_map) ? "no":"yes"); - } - if (!dlm_migration_can_proceed(dlm, res, target)) { - mlog(0, "trying again...\n"); - goto again; - } - - ret = 0; - /* did the target go down or die? */ - spin_lock(&dlm->spinlock); - if (!test_bit(target, dlm->domain_map)) { - mlog(ML_ERROR, "aha. migration target %u just went down\n", - target); - ret = -EHOSTDOWN; - } - spin_unlock(&dlm->spinlock); - - /* - * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for - * another try; otherwise, we are sure the MIGRATING state is there, - * drop the unneded state which blocked threads trying to DIRTY - */ - spin_lock(&res->spinlock); - BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); - res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; - if (!ret) - BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); - spin_unlock(&res->spinlock); - - /* - * at this point: - * - * o the DLM_LOCK_RES_MIGRATING flag is set if target not down - * o there are no pending asts on this lockres - * o all processes trying to reserve an ast on this - * lockres must wait for the MIGRATING flag to clear - */ - return ret; -} - -/* last step in the migration process. - * original master calls this to free all of the dlm_lock - * structures that used to be for other nodes. */ -static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - struct list_head *queue = &res->granted; - int i, bit; - struct dlm_lock *lock, *next; - - assert_spin_locked(&res->spinlock); - - BUG_ON(res->owner == dlm->node_num); - - for (i=0; i<3; i++) { - list_for_each_entry_safe(lock, next, queue, list) { - if (lock->ml.node != dlm->node_num) { - mlog(0, "putting lock for node %u\n", - lock->ml.node); - /* be extra careful */ - BUG_ON(!list_empty(&lock->ast_list)); - BUG_ON(!list_empty(&lock->bast_list)); - BUG_ON(lock->ast_pending); - BUG_ON(lock->bast_pending); - dlm_lockres_clear_refmap_bit(dlm, res, - lock->ml.node); - list_del_init(&lock->list); - dlm_lock_put(lock); - /* In a normal unlock, we would have added a - * DLM_UNLOCK_FREE_LOCK action. Force it. */ - dlm_lock_put(lock); - } - } - queue++; - } - bit = 0; - while (1) { - bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); - if (bit >= O2NM_MAX_NODES) - break; - /* do not clear the local node reference, if there is a - * process holding this, let it drop the ref itself */ - if (bit != dlm->node_num) { - mlog(0, "%s:%.*s: node %u had a ref to this " - "migrating lockres, clearing\n", dlm->name, - res->lockname.len, res->lockname.name, bit); - dlm_lockres_clear_refmap_bit(dlm, res, bit); - } - bit++; - } -} - -/* - * Pick a node to migrate the lock resource to. This function selects a - * potential target based first on the locks and then on refmap. It skips - * nodes that are in the process of exiting the domain. - */ -static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - enum dlm_lockres_list idx; - struct list_head *queue = &res->granted; - struct dlm_lock *lock; - int noderef; - u8 nodenum = O2NM_MAX_NODES; - - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&res->spinlock); - - /* Go through all the locks */ - for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { - queue = dlm_list_idx_to_ptr(res, idx); - list_for_each_entry(lock, queue, list) { - if (lock->ml.node == dlm->node_num) - continue; - if (test_bit(lock->ml.node, dlm->exit_domain_map)) - continue; - nodenum = lock->ml.node; - goto bail; - } - } - - /* Go thru the refmap */ - noderef = -1; - while (1) { - noderef = find_next_bit(res->refmap, O2NM_MAX_NODES, - noderef + 1); - if (noderef >= O2NM_MAX_NODES) - break; - if (noderef == dlm->node_num) - continue; - if (test_bit(noderef, dlm->exit_domain_map)) - continue; - nodenum = noderef; - goto bail; - } - -bail: - return nodenum; -} - -/* this is called by the new master once all lockres - * data has been received */ -static int dlm_do_migrate_request(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 master, u8 new_master, - struct dlm_node_iter *iter) -{ - struct dlm_migrate_request migrate; - int ret, skip, status = 0; - int nodenum; - - memset(&migrate, 0, sizeof(migrate)); - migrate.namelen = res->lockname.len; - memcpy(migrate.name, res->lockname.name, migrate.namelen); - migrate.new_master = new_master; - migrate.master = master; - - ret = 0; - - /* send message to all nodes, except the master and myself */ - while ((nodenum = dlm_node_iter_next(iter)) >= 0) { - if (nodenum == master || - nodenum == new_master) - continue; - - /* We could race exit domain. If exited, skip. */ - spin_lock(&dlm->spinlock); - skip = (!test_bit(nodenum, dlm->domain_map)); - spin_unlock(&dlm->spinlock); - if (skip) { - clear_bit(nodenum, iter->node_map); - continue; - } - - ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key, - &migrate, sizeof(migrate), nodenum, - &status); - if (ret < 0) { - mlog(ML_ERROR, "%s: res %.*s, Error %d send " - "MIGRATE_REQUEST to node %u\n", dlm->name, - migrate.namelen, migrate.name, ret, nodenum); - if (!dlm_is_host_down(ret)) { - mlog(ML_ERROR, "unhandled error=%d!\n", ret); - BUG(); - } - clear_bit(nodenum, iter->node_map); - ret = 0; - } else if (status < 0) { - mlog(0, "migrate request (node %u) returned %d!\n", - nodenum, status); - ret = status; - } else if (status == DLM_MIGRATE_RESPONSE_MASTERY_REF) { - /* during the migration request we short-circuited - * the mastery of the lockres. make sure we have - * a mastery ref for nodenum */ - mlog(0, "%s:%.*s: need ref for node %u\n", - dlm->name, res->lockname.len, res->lockname.name, - nodenum); - spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(dlm, res, nodenum); - spin_unlock(&res->spinlock); - } - } - - if (ret < 0) - mlog_errno(ret); - - mlog(0, "returning ret=%d\n", ret); - return ret; -} - - -/* if there is an existing mle for this lockres, we now know who the master is. - * (the one who sent us *this* message) we can clear it up right away. - * since the process that put the mle on the list still has a reference to it, - * we can unhash it now, set the master and wake the process. as a result, - * we will have no mle in the list to start with. now we can add an mle for - * the migration and this should be the only one found for those scanning the - * list. */ -int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_lock_resource *res = NULL; - struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf; - struct dlm_master_list_entry *mle = NULL, *oldmle = NULL; - const char *name; - unsigned int namelen, hash; - int ret = 0; - - if (!dlm_grab(dlm)) - return -EINVAL; - - name = migrate->name; - namelen = migrate->namelen; - hash = dlm_lockid_hash(name, namelen); - - /* preallocate.. if this fails, abort */ - mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); - - if (!mle) { - ret = -ENOMEM; - goto leave; - } - - /* check for pre-existing lock */ - spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres(dlm, name, namelen, hash); - if (res) { - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_RECOVERING) { - /* if all is working ok, this can only mean that we got - * a migrate request from a node that we now see as - * dead. what can we do here? drop it to the floor? */ - spin_unlock(&res->spinlock); - mlog(ML_ERROR, "Got a migrate request, but the " - "lockres is marked as recovering!"); - kmem_cache_free(dlm_mle_cache, mle); - ret = -EINVAL; /* need a better solution */ - goto unlock; - } - res->state |= DLM_LOCK_RES_MIGRATING; - spin_unlock(&res->spinlock); - } - - spin_lock(&dlm->master_lock); - /* ignore status. only nonzero status would BUG. */ - ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, - name, namelen, - migrate->new_master, - migrate->master); - - spin_unlock(&dlm->master_lock); -unlock: - spin_unlock(&dlm->spinlock); - - if (oldmle) { - /* master is known, detach if not already detached */ - dlm_mle_detach_hb_events(dlm, oldmle); - dlm_put_mle(oldmle); - } - - if (res) - dlm_lockres_put(res); -leave: - dlm_put(dlm); - return ret; -} - -/* must be holding dlm->spinlock and dlm->master_lock - * when adding a migration mle, we can clear any other mles - * in the master list because we know with certainty that - * the master is "master". so we remove any old mle from - * the list after setting it's master field, and then add - * the new migration mle. this way we can hold with the rule - * of having only one mle for a given lock name at all times. */ -static int dlm_add_migration_mle(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_master_list_entry *mle, - struct dlm_master_list_entry **oldmle, - const char *name, unsigned int namelen, - u8 new_master, u8 master) -{ - int found; - int ret = 0; - - *oldmle = NULL; - - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&dlm->master_lock); - - /* caller is responsible for any ref taken here on oldmle */ - found = dlm_find_mle(dlm, oldmle, (char *)name, namelen); - if (found) { - struct dlm_master_list_entry *tmp = *oldmle; - spin_lock(&tmp->spinlock); - if (tmp->type == DLM_MLE_MIGRATION) { - if (master == dlm->node_num) { - /* ah another process raced me to it */ - mlog(0, "tried to migrate %.*s, but some " - "process beat me to it\n", - namelen, name); - ret = -EEXIST; - } else { - /* bad. 2 NODES are trying to migrate! */ - mlog(ML_ERROR, "migration error mle: " - "master=%u new_master=%u // request: " - "master=%u new_master=%u // " - "lockres=%.*s\n", - tmp->master, tmp->new_master, - master, new_master, - namelen, name); - BUG(); - } - } else { - /* this is essentially what assert_master does */ - tmp->master = master; - atomic_set(&tmp->woken, 1); - wake_up(&tmp->wq); - /* remove it so that only one mle will be found */ - __dlm_unlink_mle(dlm, tmp); - __dlm_mle_detach_hb_events(dlm, tmp); - ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; - mlog(0, "%s:%.*s: master=%u, newmaster=%u, " - "telling master to get ref for cleared out mle " - "during migration\n", dlm->name, namelen, name, - master, new_master); - } - spin_unlock(&tmp->spinlock); - } - - /* now add a migration mle to the tail of the list */ - dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen); - mle->new_master = new_master; - /* the new master will be sending an assert master for this. - * at that point we will get the refmap reference */ - mle->master = master; - /* do this for consistency with other mle types */ - set_bit(new_master, mle->maybe_map); - __dlm_insert_mle(dlm, mle); - - return ret; -} - -/* - * Sets the owner of the lockres, associated to the mle, to UNKNOWN - */ -static struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle) -{ - struct dlm_lock_resource *res; - - /* Find the lockres associated to the mle and set its owner to UNK */ - res = __dlm_lookup_lockres(dlm, mle->mname, mle->mnamelen, - mle->mnamehash); - if (res) { - spin_unlock(&dlm->master_lock); - - /* move lockres onto recovery list */ - spin_lock(&res->spinlock); - dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN); - dlm_move_lockres_to_recovery_list(dlm, res); - spin_unlock(&res->spinlock); - dlm_lockres_put(res); - - /* about to get rid of mle, detach from heartbeat */ - __dlm_mle_detach_hb_events(dlm, mle); - - /* dump the mle */ - spin_lock(&dlm->master_lock); - __dlm_put_mle(mle); - spin_unlock(&dlm->master_lock); - } - - return res; -} - -static void dlm_clean_migration_mle(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle) -{ - __dlm_mle_detach_hb_events(dlm, mle); - - spin_lock(&mle->spinlock); - __dlm_unlink_mle(dlm, mle); - atomic_set(&mle->woken, 1); - spin_unlock(&mle->spinlock); - - wake_up(&mle->wq); -} - -static void dlm_clean_block_mle(struct dlm_ctxt *dlm, - struct dlm_master_list_entry *mle, u8 dead_node) -{ - int bit; - - BUG_ON(mle->type != DLM_MLE_BLOCK); - - spin_lock(&mle->spinlock); - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); - if (bit != dead_node) { - mlog(0, "mle found, but dead node %u would not have been " - "master\n", dead_node); - spin_unlock(&mle->spinlock); - } else { - /* Must drop the refcount by one since the assert_master will - * never arrive. This may result in the mle being unlinked and - * freed, but there may still be a process waiting in the - * dlmlock path which is fine. */ - mlog(0, "node %u was expected master\n", dead_node); - atomic_set(&mle->woken, 1); - spin_unlock(&mle->spinlock); - wake_up(&mle->wq); - - /* Do not need events any longer, so detach from heartbeat */ - __dlm_mle_detach_hb_events(dlm, mle); - __dlm_put_mle(mle); - } -} - -void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) -{ - struct dlm_master_list_entry *mle; - struct dlm_lock_resource *res; - struct hlist_head *bucket; - struct hlist_node *list; - unsigned int i; - - mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node); -top: - assert_spin_locked(&dlm->spinlock); - - /* clean the master list */ - spin_lock(&dlm->master_lock); - for (i = 0; i < DLM_HASH_BUCKETS; i++) { - bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); - - BUG_ON(mle->type != DLM_MLE_BLOCK && - mle->type != DLM_MLE_MASTER && - mle->type != DLM_MLE_MIGRATION); - - /* MASTER mles are initiated locally. The waiting - * process will notice the node map change shortly. - * Let that happen as normal. */ - if (mle->type == DLM_MLE_MASTER) - continue; - - /* BLOCK mles are initiated by other nodes. Need to - * clean up if the dead node would have been the - * master. */ - if (mle->type == DLM_MLE_BLOCK) { - dlm_clean_block_mle(dlm, mle, dead_node); - continue; - } - - /* Everything else is a MIGRATION mle */ - - /* The rule for MIGRATION mles is that the master - * becomes UNKNOWN if *either* the original or the new - * master dies. All UNKNOWN lockres' are sent to - * whichever node becomes the recovery master. The new - * master is responsible for determining if there is - * still a master for this lockres, or if he needs to - * take over mastery. Either way, this node should - * expect another message to resolve this. */ - - if (mle->master != dead_node && - mle->new_master != dead_node) - continue; - - /* If we have reached this point, this mle needs to be - * removed from the list and freed. */ - dlm_clean_migration_mle(dlm, mle); - - mlog(0, "%s: node %u died during migration from " - "%u to %u!\n", dlm->name, dead_node, mle->master, - mle->new_master); - - /* If we find a lockres associated with the mle, we've - * hit this rare case that messes up our lock ordering. - * If so, we need to drop the master lock so that we can - * take the lockres lock, meaning that we will have to - * restart from the head of list. */ - res = dlm_reset_mleres_owner(dlm, mle); - if (res) - /* restart */ - goto top; - - /* This may be the last reference */ - __dlm_put_mle(mle); - } - } - spin_unlock(&dlm->master_lock); -} - -int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - u8 old_master) -{ - struct dlm_node_iter iter; - int ret = 0; - - spin_lock(&dlm->spinlock); - dlm_node_iter_init(dlm->domain_map, &iter); - clear_bit(old_master, iter.node_map); - clear_bit(dlm->node_num, iter.node_map); - spin_unlock(&dlm->spinlock); - - /* ownership of the lockres is changing. account for the - * mastery reference here since old_master will briefly have - * a reference after the migration completes */ - spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(dlm, res, old_master); - spin_unlock(&res->spinlock); - - mlog(0, "now time to do a migrate request to other nodes\n"); - ret = dlm_do_migrate_request(dlm, res, old_master, - dlm->node_num, &iter); - if (ret < 0) { - mlog_errno(ret); - goto leave; - } - - mlog(0, "doing assert master of %.*s to all except the original node\n", - res->lockname.len, res->lockname.name); - /* this call now finishes out the nodemap - * even if one or more nodes die */ - ret = dlm_do_assert_master(dlm, res, iter.node_map, - DLM_ASSERT_MASTER_FINISH_MIGRATION); - if (ret < 0) { - /* no longer need to retry. all living nodes contacted. */ - mlog_errno(ret); - ret = 0; - } - - memset(iter.node_map, 0, sizeof(iter.node_map)); - set_bit(old_master, iter.node_map); - mlog(0, "doing assert master of %.*s back to %u\n", - res->lockname.len, res->lockname.name, old_master); - ret = dlm_do_assert_master(dlm, res, iter.node_map, - DLM_ASSERT_MASTER_FINISH_MIGRATION); - if (ret < 0) { - mlog(0, "assert master to original master failed " - "with %d.\n", ret); - /* the only nonzero status here would be because of - * a dead original node. we're done. */ - ret = 0; - } - - /* all done, set the owner, clear the flag */ - spin_lock(&res->spinlock); - dlm_set_lockres_owner(dlm, res, dlm->node_num); - res->state &= ~DLM_LOCK_RES_MIGRATING; - spin_unlock(&res->spinlock); - /* re-dirty it on the new master */ - dlm_kick_thread(dlm, res); - wake_up(&res->wq); -leave: - return ret; -} - -/* - * LOCKRES AST REFCOUNT - * this is integral to migration - */ - -/* for future intent to call an ast, reserve one ahead of time. - * this should be called only after waiting on the lockres - * with dlm_wait_on_lockres, and while still holding the - * spinlock after the call. */ -void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res) -{ - assert_spin_locked(&res->spinlock); - if (res->state & DLM_LOCK_RES_MIGRATING) { - __dlm_print_one_lock_resource(res); - } - BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); - - atomic_inc(&res->asts_reserved); -} - -/* - * used to drop the reserved ast, either because it went unused, - * or because the ast/bast was actually called. - * - * also, if there is a pending migration on this lockres, - * and this was the last pending ast on the lockres, - * atomically set the MIGRATING flag before we drop the lock. - * this is how we ensure that migration can proceed with no - * asts in progress. note that it is ok if the state of the - * queues is such that a lock should be granted in the future - * or that a bast should be fired, because the new master will - * shuffle the lists on this lockres as soon as it is migrated. - */ -void dlm_lockres_release_ast(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - if (!atomic_dec_and_lock(&res->asts_reserved, &res->spinlock)) - return; - - if (!res->migration_pending) { - spin_unlock(&res->spinlock); - return; - } - - BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); - res->migration_pending = 0; - res->state |= DLM_LOCK_RES_MIGRATING; - spin_unlock(&res->spinlock); - wake_up(&res->wq); - wake_up(&dlm->migration_wq); -} - -void dlm_force_free_mles(struct dlm_ctxt *dlm) -{ - int i; - struct hlist_head *bucket; - struct dlm_master_list_entry *mle; - struct hlist_node *tmp, *list; - - /* - * We notified all other nodes that we are exiting the domain and - * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still - * around we force free them and wake any processes that are waiting - * on the mles - */ - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - - BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); - BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); - - for (i = 0; i < DLM_HASH_BUCKETS; i++) { - bucket = dlm_master_hash(dlm, i); - hlist_for_each_safe(list, tmp, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); - if (mle->type != DLM_MLE_BLOCK) { - mlog(ML_ERROR, "bad mle: %p\n", mle); - dlm_print_one_mle(mle); - } - atomic_set(&mle->woken, 1); - wake_up(&mle->wq); - - __dlm_unlink_mle(dlm, mle); - __dlm_mle_detach_hb_events(dlm, mle); - __dlm_put_mle(mle); - } - } - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); -} diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmrecovery.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmrecovery.c deleted file mode 100644 index 01ebfd0b..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmrecovery.c +++ /dev/null @@ -1,2886 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmrecovery.c - * - * recovery stuff - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/blkdev.h> -#include <linux/socket.h> -#include <linux/inet.h> -#include <linux/timer.h> -#include <linux/kthread.h> -#include <linux/delay.h> - - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" -#include "dlmdomain.h" - -#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_RECOVERY) -#include "cluster/masklog.h" - -static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); - -static int dlm_recovery_thread(void *data); -void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); -int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); -void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); -static int dlm_do_recovery(struct dlm_ctxt *dlm); - -static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); -static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node); -static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node); -static int dlm_request_all_locks(struct dlm_ctxt *dlm, - u8 request_from, u8 dead_node); -static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node); - -static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res); -static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, - const char *lockname, int namelen, - int total_locks, u64 cookie, - u8 flags, u8 master); -static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, - struct dlm_migratable_lockres *mres, - u8 send_to, - struct dlm_lock_resource *res, - int total_locks); -static int dlm_process_recovery_data(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_migratable_lockres *mres); -static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm); -static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, - u8 dead_node, u8 send_to); -static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node); -static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, - struct list_head *list, u8 dead_node); -static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, - u8 dead_node, u8 new_master); -static void dlm_reco_ast(void *astdata); -static void dlm_reco_bast(void *astdata, int blocked_type); -static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st); -static void dlm_request_all_locks_worker(struct dlm_work_item *item, - void *data); -static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data); -static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 *real_master); - -static u64 dlm_get_next_mig_cookie(void); - -static DEFINE_SPINLOCK(dlm_reco_state_lock); -static DEFINE_SPINLOCK(dlm_mig_cookie_lock); -static u64 dlm_mig_cookie = 1; - -static u64 dlm_get_next_mig_cookie(void) -{ - u64 c; - spin_lock(&dlm_mig_cookie_lock); - c = dlm_mig_cookie; - if (dlm_mig_cookie == (~0ULL)) - dlm_mig_cookie = 1; - else - dlm_mig_cookie++; - spin_unlock(&dlm_mig_cookie_lock); - return c; -} - -static inline void dlm_set_reco_dead_node(struct dlm_ctxt *dlm, - u8 dead_node) -{ - assert_spin_locked(&dlm->spinlock); - if (dlm->reco.dead_node != dead_node) - mlog(0, "%s: changing dead_node from %u to %u\n", - dlm->name, dlm->reco.dead_node, dead_node); - dlm->reco.dead_node = dead_node; -} - -static inline void dlm_set_reco_master(struct dlm_ctxt *dlm, - u8 master) -{ - assert_spin_locked(&dlm->spinlock); - mlog(0, "%s: changing new_master from %u to %u\n", - dlm->name, dlm->reco.new_master, master); - dlm->reco.new_master = master; -} - -static inline void __dlm_reset_recovery(struct dlm_ctxt *dlm) -{ - assert_spin_locked(&dlm->spinlock); - clear_bit(dlm->reco.dead_node, dlm->recovery_map); - dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); - dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM); -} - -static inline void dlm_reset_recovery(struct dlm_ctxt *dlm) -{ - spin_lock(&dlm->spinlock); - __dlm_reset_recovery(dlm); - spin_unlock(&dlm->spinlock); -} - -/* Worker function used during recovery. */ -void dlm_dispatch_work(struct work_struct *work) -{ - struct dlm_ctxt *dlm = - container_of(work, struct dlm_ctxt, dispatched_work); - LIST_HEAD(tmp_list); - struct dlm_work_item *item, *next; - dlm_workfunc_t *workfunc; - int tot=0; - - spin_lock(&dlm->work_lock); - list_splice_init(&dlm->work_list, &tmp_list); - spin_unlock(&dlm->work_lock); - - list_for_each_entry(item, &tmp_list, list) { - tot++; - } - mlog(0, "%s: work thread has %d work items\n", dlm->name, tot); - - list_for_each_entry_safe(item, next, &tmp_list, list) { - workfunc = item->func; - list_del_init(&item->list); - - /* already have ref on dlm to avoid having - * it disappear. just double-check. */ - BUG_ON(item->dlm != dlm); - - /* this is allowed to sleep and - * call network stuff */ - workfunc(item, item->data); - - dlm_put(dlm); - kfree(item); - } -} - -/* - * RECOVERY THREAD - */ - -void dlm_kick_recovery_thread(struct dlm_ctxt *dlm) -{ - /* wake the recovery thread - * this will wake the reco thread in one of three places - * 1) sleeping with no recovery happening - * 2) sleeping with recovery mastered elsewhere - * 3) recovery mastered here, waiting on reco data */ - - wake_up(&dlm->dlm_reco_thread_wq); -} - -/* Launch the recovery thread */ -int dlm_launch_recovery_thread(struct dlm_ctxt *dlm) -{ - mlog(0, "starting dlm recovery thread...\n"); - - dlm->dlm_reco_thread_task = kthread_run(dlm_recovery_thread, dlm, - "dlm_reco_thread"); - if (IS_ERR(dlm->dlm_reco_thread_task)) { - mlog_errno(PTR_ERR(dlm->dlm_reco_thread_task)); - dlm->dlm_reco_thread_task = NULL; - return -EINVAL; - } - - return 0; -} - -void dlm_complete_recovery_thread(struct dlm_ctxt *dlm) -{ - if (dlm->dlm_reco_thread_task) { - mlog(0, "waiting for dlm recovery thread to exit\n"); - kthread_stop(dlm->dlm_reco_thread_task); - dlm->dlm_reco_thread_task = NULL; - } -} - - - -/* - * this is lame, but here's how recovery works... - * 1) all recovery threads cluster wide will work on recovering - * ONE node at a time - * 2) negotiate who will take over all the locks for the dead node. - * thats right... ALL the locks. - * 3) once a new master is chosen, everyone scans all locks - * and moves aside those mastered by the dead guy - * 4) each of these locks should be locked until recovery is done - * 5) the new master collects up all of secondary lock queue info - * one lock at a time, forcing each node to communicate back - * before continuing - * 6) each secondary lock queue responds with the full known lock info - * 7) once the new master has run all its locks, it sends a ALLDONE! - * message to everyone - * 8) upon receiving this message, the secondary queue node unlocks - * and responds to the ALLDONE - * 9) once the new master gets responses from everyone, he unlocks - * everything and recovery for this dead node is done - *10) go back to 2) while there are still dead nodes - * - */ - -static void dlm_print_reco_node_status(struct dlm_ctxt *dlm) -{ - struct dlm_reco_node_data *ndata; - struct dlm_lock_resource *res; - - mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n", - dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), - dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive", - dlm->reco.dead_node, dlm->reco.new_master); - - list_for_each_entry(ndata, &dlm->reco.node_data, list) { - char *st = "unknown"; - switch (ndata->state) { - case DLM_RECO_NODE_DATA_INIT: - st = "init"; - break; - case DLM_RECO_NODE_DATA_REQUESTING: - st = "requesting"; - break; - case DLM_RECO_NODE_DATA_DEAD: - st = "dead"; - break; - case DLM_RECO_NODE_DATA_RECEIVING: - st = "receiving"; - break; - case DLM_RECO_NODE_DATA_REQUESTED: - st = "requested"; - break; - case DLM_RECO_NODE_DATA_DONE: - st = "done"; - break; - case DLM_RECO_NODE_DATA_FINALIZE_SENT: - st = "finalize-sent"; - break; - default: - st = "bad"; - break; - } - mlog(ML_NOTICE, "%s: reco state, node %u, state=%s\n", - dlm->name, ndata->node_num, st); - } - list_for_each_entry(res, &dlm->reco.resources, recovering) { - mlog(ML_NOTICE, "%s: lockres %.*s on recovering list\n", - dlm->name, res->lockname.len, res->lockname.name); - } -} - -#define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000) - -static int dlm_recovery_thread(void *data) -{ - int status; - struct dlm_ctxt *dlm = data; - unsigned long timeout = msecs_to_jiffies(DLM_RECO_THREAD_TIMEOUT_MS); - - mlog(0, "dlm thread running for %s...\n", dlm->name); - - while (!kthread_should_stop()) { - if (dlm_domain_fully_joined(dlm)) { - status = dlm_do_recovery(dlm); - if (status == -EAGAIN) { - /* do not sleep, recheck immediately. */ - continue; - } - if (status < 0) - mlog_errno(status); - } - - wait_event_interruptible_timeout(dlm->dlm_reco_thread_wq, - kthread_should_stop(), - timeout); - } - - mlog(0, "quitting DLM recovery thread\n"); - return 0; -} - -/* returns true when the recovery master has contacted us */ -static int dlm_reco_master_ready(struct dlm_ctxt *dlm) -{ - int ready; - spin_lock(&dlm->spinlock); - ready = (dlm->reco.new_master != O2NM_INVALID_NODE_NUM); - spin_unlock(&dlm->spinlock); - return ready; -} - -/* returns true if node is no longer in the domain - * could be dead or just not joined */ -int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node) -{ - int dead; - spin_lock(&dlm->spinlock); - dead = !test_bit(node, dlm->domain_map); - spin_unlock(&dlm->spinlock); - return dead; -} - -/* returns true if node is no longer in the domain - * could be dead or just not joined */ -static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node) -{ - int recovered; - spin_lock(&dlm->spinlock); - recovered = !test_bit(node, dlm->recovery_map); - spin_unlock(&dlm->spinlock); - return recovered; -} - - -void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) -{ - if (dlm_is_node_dead(dlm, node)) - return; - - printk(KERN_NOTICE "o2dlm: Waiting on the death of node %u in " - "domain %s\n", node, dlm->name); - - if (timeout) - wait_event_timeout(dlm->dlm_reco_thread_wq, - dlm_is_node_dead(dlm, node), - msecs_to_jiffies(timeout)); - else - wait_event(dlm->dlm_reco_thread_wq, - dlm_is_node_dead(dlm, node)); -} - -void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout) -{ - if (dlm_is_node_recovered(dlm, node)) - return; - - printk(KERN_NOTICE "o2dlm: Waiting on the recovery of node %u in " - "domain %s\n", node, dlm->name); - - if (timeout) - wait_event_timeout(dlm->dlm_reco_thread_wq, - dlm_is_node_recovered(dlm, node), - msecs_to_jiffies(timeout)); - else - wait_event(dlm->dlm_reco_thread_wq, - dlm_is_node_recovered(dlm, node)); -} - -/* callers of the top-level api calls (dlmlock/dlmunlock) should - * block on the dlm->reco.event when recovery is in progress. - * the dlm recovery thread will set this state when it begins - * recovering a dead node (as the new master or not) and clear - * the state and wake as soon as all affected lock resources have - * been marked with the RECOVERY flag */ -static int dlm_in_recovery(struct dlm_ctxt *dlm) -{ - int in_recovery; - spin_lock(&dlm->spinlock); - in_recovery = !!(dlm->reco.state & DLM_RECO_STATE_ACTIVE); - spin_unlock(&dlm->spinlock); - return in_recovery; -} - - -void dlm_wait_for_recovery(struct dlm_ctxt *dlm) -{ - if (dlm_in_recovery(dlm)) { - mlog(0, "%s: reco thread %d in recovery: " - "state=%d, master=%u, dead=%u\n", - dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), - dlm->reco.state, dlm->reco.new_master, - dlm->reco.dead_node); - } - wait_event(dlm->reco.event, !dlm_in_recovery(dlm)); -} - -static void dlm_begin_recovery(struct dlm_ctxt *dlm) -{ - spin_lock(&dlm->spinlock); - BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); - printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", - dlm->name, dlm->reco.dead_node); - dlm->reco.state |= DLM_RECO_STATE_ACTIVE; - spin_unlock(&dlm->spinlock); -} - -static void dlm_end_recovery(struct dlm_ctxt *dlm) -{ - spin_lock(&dlm->spinlock); - BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE)); - dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE; - spin_unlock(&dlm->spinlock); - printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name); - wake_up(&dlm->reco.event); -} - -static void dlm_print_recovery_master(struct dlm_ctxt *dlm) -{ - printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the " - "dead node %u in domain %s\n", dlm->reco.new_master, - (dlm->node_num == dlm->reco.new_master ? "me" : "he"), - dlm->reco.dead_node, dlm->name); -} - -static int dlm_do_recovery(struct dlm_ctxt *dlm) -{ - int status = 0; - int ret; - - spin_lock(&dlm->spinlock); - - /* check to see if the new master has died */ - if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM && - test_bit(dlm->reco.new_master, dlm->recovery_map)) { - mlog(0, "new master %u died while recovering %u!\n", - dlm->reco.new_master, dlm->reco.dead_node); - /* unset the new_master, leave dead_node */ - dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM); - } - - /* select a target to recover */ - if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { - int bit; - - bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0); - if (bit >= O2NM_MAX_NODES || bit < 0) - dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); - else - dlm_set_reco_dead_node(dlm, bit); - } else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) { - /* BUG? */ - mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n", - dlm->reco.dead_node); - dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); - } - - if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { - // mlog(0, "nothing to recover! sleeping now!\n"); - spin_unlock(&dlm->spinlock); - /* return to main thread loop and sleep. */ - return 0; - } - mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", - dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), - dlm->reco.dead_node); - spin_unlock(&dlm->spinlock); - - /* take write barrier */ - /* (stops the list reshuffling thread, proxy ast handling) */ - dlm_begin_recovery(dlm); - - if (dlm->reco.new_master == dlm->node_num) - goto master_here; - - if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) { - /* choose a new master, returns 0 if this node - * is the master, -EEXIST if it's another node. - * this does not return until a new master is chosen - * or recovery completes entirely. */ - ret = dlm_pick_recovery_master(dlm); - if (!ret) { - /* already notified everyone. go. */ - goto master_here; - } - mlog(0, "another node will master this recovery session.\n"); - } - - dlm_print_recovery_master(dlm); - - /* it is safe to start everything back up here - * because all of the dead node's lock resources - * have been marked as in-recovery */ - dlm_end_recovery(dlm); - - /* sleep out in main dlm_recovery_thread loop. */ - return 0; - -master_here: - dlm_print_recovery_master(dlm); - - status = dlm_remaster_locks(dlm, dlm->reco.dead_node); - if (status < 0) { - /* we should never hit this anymore */ - mlog(ML_ERROR, "%s: Error %d remastering locks for node %u, " - "retrying.\n", dlm->name, status, dlm->reco.dead_node); - /* yield a bit to allow any final network messages - * to get handled on remaining nodes */ - msleep(100); - } else { - /* success! see if any other nodes need recovery */ - mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", - dlm->name, dlm->reco.dead_node, dlm->node_num); - dlm_reset_recovery(dlm); - } - dlm_end_recovery(dlm); - - /* continue and look for another dead node */ - return -EAGAIN; -} - -static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) -{ - int status = 0; - struct dlm_reco_node_data *ndata; - int all_nodes_done; - int destroy = 0; - int pass = 0; - - do { - /* we have become recovery master. there is no escaping - * this, so just keep trying until we get it. */ - status = dlm_init_recovery_area(dlm, dead_node); - if (status < 0) { - mlog(ML_ERROR, "%s: failed to alloc recovery area, " - "retrying\n", dlm->name); - msleep(1000); - } - } while (status != 0); - - /* safe to access the node data list without a lock, since this - * process is the only one to change the list */ - list_for_each_entry(ndata, &dlm->reco.node_data, list) { - BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); - ndata->state = DLM_RECO_NODE_DATA_REQUESTING; - - mlog(0, "%s: Requesting lock info from node %u\n", dlm->name, - ndata->node_num); - - if (ndata->node_num == dlm->node_num) { - ndata->state = DLM_RECO_NODE_DATA_DONE; - continue; - } - - do { - status = dlm_request_all_locks(dlm, ndata->node_num, - dead_node); - if (status < 0) { - mlog_errno(status); - if (dlm_is_host_down(status)) { - /* node died, ignore it for recovery */ - status = 0; - ndata->state = DLM_RECO_NODE_DATA_DEAD; - /* wait for the domain map to catch up - * with the network state. */ - wait_event_timeout(dlm->dlm_reco_thread_wq, - dlm_is_node_dead(dlm, - ndata->node_num), - msecs_to_jiffies(1000)); - mlog(0, "waited 1 sec for %u, " - "dead? %s\n", ndata->node_num, - dlm_is_node_dead(dlm, ndata->node_num) ? - "yes" : "no"); - } else { - /* -ENOMEM on the other node */ - mlog(0, "%s: node %u returned " - "%d during recovery, retrying " - "after a short wait\n", - dlm->name, ndata->node_num, - status); - msleep(100); - } - } - } while (status != 0); - - spin_lock(&dlm_reco_state_lock); - switch (ndata->state) { - case DLM_RECO_NODE_DATA_INIT: - case DLM_RECO_NODE_DATA_FINALIZE_SENT: - case DLM_RECO_NODE_DATA_REQUESTED: - BUG(); - break; - case DLM_RECO_NODE_DATA_DEAD: - mlog(0, "node %u died after requesting " - "recovery info for node %u\n", - ndata->node_num, dead_node); - /* fine. don't need this node's info. - * continue without it. */ - break; - case DLM_RECO_NODE_DATA_REQUESTING: - ndata->state = DLM_RECO_NODE_DATA_REQUESTED; - mlog(0, "now receiving recovery data from " - "node %u for dead node %u\n", - ndata->node_num, dead_node); - break; - case DLM_RECO_NODE_DATA_RECEIVING: - mlog(0, "already receiving recovery data from " - "node %u for dead node %u\n", - ndata->node_num, dead_node); - break; - case DLM_RECO_NODE_DATA_DONE: - mlog(0, "already DONE receiving recovery data " - "from node %u for dead node %u\n", - ndata->node_num, dead_node); - break; - } - spin_unlock(&dlm_reco_state_lock); - } - - mlog(0, "%s: Done requesting all lock info\n", dlm->name); - - /* nodes should be sending reco data now - * just need to wait */ - - while (1) { - /* check all the nodes now to see if we are - * done, or if anyone died */ - all_nodes_done = 1; - spin_lock(&dlm_reco_state_lock); - list_for_each_entry(ndata, &dlm->reco.node_data, list) { - mlog(0, "checking recovery state of node %u\n", - ndata->node_num); - switch (ndata->state) { - case DLM_RECO_NODE_DATA_INIT: - case DLM_RECO_NODE_DATA_REQUESTING: - mlog(ML_ERROR, "bad ndata state for " - "node %u: state=%d\n", - ndata->node_num, ndata->state); - BUG(); - break; - case DLM_RECO_NODE_DATA_DEAD: - mlog(0, "node %u died after " - "requesting recovery info for " - "node %u\n", ndata->node_num, - dead_node); - break; - case DLM_RECO_NODE_DATA_RECEIVING: - case DLM_RECO_NODE_DATA_REQUESTED: - mlog(0, "%s: node %u still in state %s\n", - dlm->name, ndata->node_num, - ndata->state==DLM_RECO_NODE_DATA_RECEIVING ? - "receiving" : "requested"); - all_nodes_done = 0; - break; - case DLM_RECO_NODE_DATA_DONE: - mlog(0, "%s: node %u state is done\n", - dlm->name, ndata->node_num); - break; - case DLM_RECO_NODE_DATA_FINALIZE_SENT: - mlog(0, "%s: node %u state is finalize\n", - dlm->name, ndata->node_num); - break; - } - } - spin_unlock(&dlm_reco_state_lock); - - mlog(0, "pass #%d, all_nodes_done?: %s\n", ++pass, - all_nodes_done?"yes":"no"); - if (all_nodes_done) { - int ret; - - /* all nodes are now in DLM_RECO_NODE_DATA_DONE state - * just send a finalize message to everyone and - * clean up */ - mlog(0, "all nodes are done! send finalize\n"); - ret = dlm_send_finalize_reco_message(dlm); - if (ret < 0) - mlog_errno(ret); - - spin_lock(&dlm->spinlock); - dlm_finish_local_lockres_recovery(dlm, dead_node, - dlm->node_num); - spin_unlock(&dlm->spinlock); - mlog(0, "should be done with recovery!\n"); - - mlog(0, "finishing recovery of %s at %lu, " - "dead=%u, this=%u, new=%u\n", dlm->name, - jiffies, dlm->reco.dead_node, - dlm->node_num, dlm->reco.new_master); - destroy = 1; - status = 0; - /* rescan everything marked dirty along the way */ - dlm_kick_thread(dlm, NULL); - break; - } - /* wait to be signalled, with periodic timeout - * to check for node death */ - wait_event_interruptible_timeout(dlm->dlm_reco_thread_wq, - kthread_should_stop(), - msecs_to_jiffies(DLM_RECO_THREAD_TIMEOUT_MS)); - - } - - if (destroy) - dlm_destroy_recovery_area(dlm, dead_node); - - return status; -} - -static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) -{ - int num=0; - struct dlm_reco_node_data *ndata; - - spin_lock(&dlm->spinlock); - memcpy(dlm->reco.node_map, dlm->domain_map, sizeof(dlm->domain_map)); - /* nodes can only be removed (by dying) after dropping - * this lock, and death will be trapped later, so this should do */ - spin_unlock(&dlm->spinlock); - - while (1) { - num = find_next_bit (dlm->reco.node_map, O2NM_MAX_NODES, num); - if (num >= O2NM_MAX_NODES) { - break; - } - BUG_ON(num == dead_node); - - ndata = kzalloc(sizeof(*ndata), GFP_NOFS); - if (!ndata) { - dlm_destroy_recovery_area(dlm, dead_node); - return -ENOMEM; - } - ndata->node_num = num; - ndata->state = DLM_RECO_NODE_DATA_INIT; - spin_lock(&dlm_reco_state_lock); - list_add_tail(&ndata->list, &dlm->reco.node_data); - spin_unlock(&dlm_reco_state_lock); - num++; - } - - return 0; -} - -static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) -{ - struct dlm_reco_node_data *ndata, *next; - LIST_HEAD(tmplist); - - spin_lock(&dlm_reco_state_lock); - list_splice_init(&dlm->reco.node_data, &tmplist); - spin_unlock(&dlm_reco_state_lock); - - list_for_each_entry_safe(ndata, next, &tmplist, list) { - list_del_init(&ndata->list); - kfree(ndata); - } -} - -static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, - u8 dead_node) -{ - struct dlm_lock_request lr; - enum dlm_status ret; - - mlog(0, "\n"); - - - mlog(0, "dlm_request_all_locks: dead node is %u, sending request " - "to %u\n", dead_node, request_from); - - memset(&lr, 0, sizeof(lr)); - lr.node_idx = dlm->node_num; - lr.dead_node = dead_node; - - // send message - ret = DLM_NOLOCKMGR; - ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, - &lr, sizeof(lr), request_from, NULL); - - /* negative status is handled by caller */ - if (ret < 0) - mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " - "to recover dead node %u\n", dlm->name, ret, - request_from, dead_node); - // return from here, then - // sleep until all received or error - return ret; - -} - -int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf; - char *buf = NULL; - struct dlm_work_item *item = NULL; - - if (!dlm_grab(dlm)) - return -EINVAL; - - if (lr->dead_node != dlm->reco.dead_node) { - mlog(ML_ERROR, "%s: node %u sent dead_node=%u, but local " - "dead_node is %u\n", dlm->name, lr->node_idx, - lr->dead_node, dlm->reco.dead_node); - dlm_print_reco_node_status(dlm); - /* this is a hack */ - dlm_put(dlm); - return -ENOMEM; - } - BUG_ON(lr->dead_node != dlm->reco.dead_node); - - item = kzalloc(sizeof(*item), GFP_NOFS); - if (!item) { - dlm_put(dlm); - return -ENOMEM; - } - - /* this will get freed by dlm_request_all_locks_worker */ - buf = (char *) __get_free_page(GFP_NOFS); - if (!buf) { - kfree(item); - dlm_put(dlm); - return -ENOMEM; - } - - /* queue up work for dlm_request_all_locks_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ - dlm_init_work_item(dlm, item, dlm_request_all_locks_worker, buf); - item->u.ral.reco_master = lr->node_idx; - item->u.ral.dead_node = lr->dead_node; - spin_lock(&dlm->work_lock); - list_add_tail(&item->list, &dlm->work_list); - spin_unlock(&dlm->work_lock); - queue_work(dlm->dlm_worker, &dlm->dispatched_work); - - dlm_put(dlm); - return 0; -} - -static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data) -{ - struct dlm_migratable_lockres *mres; - struct dlm_lock_resource *res; - struct dlm_ctxt *dlm; - LIST_HEAD(resources); - int ret; - u8 dead_node, reco_master; - int skip_all_done = 0; - - dlm = item->dlm; - dead_node = item->u.ral.dead_node; - reco_master = item->u.ral.reco_master; - mres = (struct dlm_migratable_lockres *)data; - - mlog(0, "%s: recovery worker started, dead=%u, master=%u\n", - dlm->name, dead_node, reco_master); - - if (dead_node != dlm->reco.dead_node || - reco_master != dlm->reco.new_master) { - /* worker could have been created before the recovery master - * died. if so, do not continue, but do not error. */ - if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) { - mlog(ML_NOTICE, "%s: will not send recovery state, " - "recovery master %u died, thread=(dead=%u,mas=%u)" - " current=(dead=%u,mas=%u)\n", dlm->name, - reco_master, dead_node, reco_master, - dlm->reco.dead_node, dlm->reco.new_master); - } else { - mlog(ML_NOTICE, "%s: reco state invalid: reco(dead=%u, " - "master=%u), request(dead=%u, master=%u)\n", - dlm->name, dlm->reco.dead_node, - dlm->reco.new_master, dead_node, reco_master); - } - goto leave; - } - - /* lock resources should have already been moved to the - * dlm->reco.resources list. now move items from that list - * to a temp list if the dead owner matches. note that the - * whole cluster recovers only one node at a time, so we - * can safely move UNKNOWN lock resources for each recovery - * session. */ - dlm_move_reco_locks_to_list(dlm, &resources, dead_node); - - /* now we can begin blasting lockreses without the dlm lock */ - - /* any errors returned will be due to the new_master dying, - * the dlm_reco_thread should detect this */ - list_for_each_entry(res, &resources, recovering) { - ret = dlm_send_one_lockres(dlm, res, mres, reco_master, - DLM_MRES_RECOVERY); - if (ret < 0) { - mlog(ML_ERROR, "%s: node %u went down while sending " - "recovery state for dead node %u, ret=%d\n", dlm->name, - reco_master, dead_node, ret); - skip_all_done = 1; - break; - } - } - - /* move the resources back to the list */ - spin_lock(&dlm->spinlock); - list_splice_init(&resources, &dlm->reco.resources); - spin_unlock(&dlm->spinlock); - - if (!skip_all_done) { - ret = dlm_send_all_done_msg(dlm, dead_node, reco_master); - if (ret < 0) { - mlog(ML_ERROR, "%s: node %u went down while sending " - "recovery all-done for dead node %u, ret=%d\n", - dlm->name, reco_master, dead_node, ret); - } - } -leave: - free_page((unsigned long)data); -} - - -static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to) -{ - int ret, tmpret; - struct dlm_reco_data_done done_msg; - - memset(&done_msg, 0, sizeof(done_msg)); - done_msg.node_idx = dlm->node_num; - done_msg.dead_node = dead_node; - mlog(0, "sending DATA DONE message to %u, " - "my node=%u, dead node=%u\n", send_to, done_msg.node_idx, - done_msg.dead_node); - - ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, - sizeof(done_msg), send_to, &tmpret); - if (ret < 0) { - mlog(ML_ERROR, "%s: Error %d send RECO_DATA_DONE to node %u " - "to recover dead node %u\n", dlm->name, ret, send_to, - dead_node); - if (!dlm_is_host_down(ret)) { - BUG(); - } - } else - ret = tmpret; - return ret; -} - - -int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; - struct dlm_reco_node_data *ndata = NULL; - int ret = -EINVAL; - - if (!dlm_grab(dlm)) - return -EINVAL; - - mlog(0, "got DATA DONE: dead_node=%u, reco.dead_node=%u, " - "node_idx=%u, this node=%u\n", done->dead_node, - dlm->reco.dead_node, done->node_idx, dlm->node_num); - - mlog_bug_on_msg((done->dead_node != dlm->reco.dead_node), - "Got DATA DONE: dead_node=%u, reco.dead_node=%u, " - "node_idx=%u, this node=%u\n", done->dead_node, - dlm->reco.dead_node, done->node_idx, dlm->node_num); - - spin_lock(&dlm_reco_state_lock); - list_for_each_entry(ndata, &dlm->reco.node_data, list) { - if (ndata->node_num != done->node_idx) - continue; - - switch (ndata->state) { - /* should have moved beyond INIT but not to FINALIZE yet */ - case DLM_RECO_NODE_DATA_INIT: - case DLM_RECO_NODE_DATA_DEAD: - case DLM_RECO_NODE_DATA_FINALIZE_SENT: - mlog(ML_ERROR, "bad ndata state for node %u:" - " state=%d\n", ndata->node_num, - ndata->state); - BUG(); - break; - /* these states are possible at this point, anywhere along - * the line of recovery */ - case DLM_RECO_NODE_DATA_DONE: - case DLM_RECO_NODE_DATA_RECEIVING: - case DLM_RECO_NODE_DATA_REQUESTED: - case DLM_RECO_NODE_DATA_REQUESTING: - mlog(0, "node %u is DONE sending " - "recovery data!\n", - ndata->node_num); - - ndata->state = DLM_RECO_NODE_DATA_DONE; - ret = 0; - break; - } - } - spin_unlock(&dlm_reco_state_lock); - - /* wake the recovery thread, some node is done */ - if (!ret) - dlm_kick_recovery_thread(dlm); - - if (ret < 0) - mlog(ML_ERROR, "failed to find recovery node data for node " - "%u\n", done->node_idx); - dlm_put(dlm); - - mlog(0, "leaving reco data done handler, ret=%d\n", ret); - return ret; -} - -static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, - struct list_head *list, - u8 dead_node) -{ - struct dlm_lock_resource *res, *next; - struct dlm_lock *lock; - - spin_lock(&dlm->spinlock); - list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { - /* always prune any $RECOVERY entries for dead nodes, - * otherwise hangs can occur during later recovery */ - if (dlm_is_recovery_lock(res->lockname.name, - res->lockname.len)) { - spin_lock(&res->spinlock); - list_for_each_entry(lock, &res->granted, list) { - if (lock->ml.node == dead_node) { - mlog(0, "AHA! there was " - "a $RECOVERY lock for dead " - "node %u (%s)!\n", - dead_node, dlm->name); - list_del_init(&lock->list); - dlm_lock_put(lock); - break; - } - } - spin_unlock(&res->spinlock); - continue; - } - - if (res->owner == dead_node) { - mlog(0, "found lockres owned by dead node while " - "doing recovery for node %u. sending it.\n", - dead_node); - list_move_tail(&res->recovering, list); - } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "found UNKNOWN owner while doing recovery " - "for node %u. sending it.\n", dead_node); - list_move_tail(&res->recovering, list); - } - } - spin_unlock(&dlm->spinlock); -} - -static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res) -{ - int total_locks = 0; - struct list_head *iter, *queue = &res->granted; - int i; - - for (i=0; i<3; i++) { - list_for_each(iter, queue) - total_locks++; - queue++; - } - return total_locks; -} - - -static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, - struct dlm_migratable_lockres *mres, - u8 send_to, - struct dlm_lock_resource *res, - int total_locks) -{ - u64 mig_cookie = be64_to_cpu(mres->mig_cookie); - int mres_total_locks = be32_to_cpu(mres->total_locks); - int sz, ret = 0, status = 0; - u8 orig_flags = mres->flags, - orig_master = mres->master; - - BUG_ON(mres->num_locks > DLM_MAX_MIGRATABLE_LOCKS); - if (!mres->num_locks) - return 0; - - sz = sizeof(struct dlm_migratable_lockres) + - (mres->num_locks * sizeof(struct dlm_migratable_lock)); - - /* add an all-done flag if we reached the last lock */ - orig_flags = mres->flags; - BUG_ON(total_locks > mres_total_locks); - if (total_locks == mres_total_locks) - mres->flags |= DLM_MRES_ALL_DONE; - - mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", - dlm->name, res->lockname.len, res->lockname.name, - orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery", - send_to); - - /* send it */ - ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, - sz, send_to, &status); - if (ret < 0) { - /* XXX: negative status is not handled. - * this will end up killing this node. */ - mlog(ML_ERROR, "%s: res %.*s, Error %d send MIG_LOCKRES to " - "node %u (%s)\n", dlm->name, mres->lockname_len, - mres->lockname, ret, send_to, - (orig_flags & DLM_MRES_MIGRATION ? - "migration" : "recovery")); - } else { - /* might get an -ENOMEM back here */ - ret = status; - if (ret < 0) { - mlog_errno(ret); - - if (ret == -EFAULT) { - mlog(ML_ERROR, "node %u told me to kill " - "myself!\n", send_to); - BUG(); - } - } - } - - /* zero and reinit the message buffer */ - dlm_init_migratable_lockres(mres, res->lockname.name, - res->lockname.len, mres_total_locks, - mig_cookie, orig_flags, orig_master); - return ret; -} - -static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, - const char *lockname, int namelen, - int total_locks, u64 cookie, - u8 flags, u8 master) -{ - /* mres here is one full page */ - clear_page(mres); - mres->lockname_len = namelen; - memcpy(mres->lockname, lockname, namelen); - mres->num_locks = 0; - mres->total_locks = cpu_to_be32(total_locks); - mres->mig_cookie = cpu_to_be64(cookie); - mres->flags = flags; - mres->master = master; -} - -static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock, - struct dlm_migratable_lockres *mres, - int queue) -{ - if (!lock->lksb) - return; - - /* Ignore lvb in all locks in the blocked list */ - if (queue == DLM_BLOCKED_LIST) - return; - - /* Only consider lvbs in locks with granted EX or PR lock levels */ - if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE) - return; - - if (dlm_lvb_is_empty(mres->lvb)) { - memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN); - return; - } - - /* Ensure the lvb copied for migration matches in other valid locks */ - if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN)) - return; - - mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, " - "node=%u\n", - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - lock->lockres->lockname.len, lock->lockres->lockname.name, - lock->ml.node); - dlm_print_one_lock_resource(lock->lockres); - BUG(); -} - -/* returns 1 if this lock fills the network structure, - * 0 otherwise */ -static int dlm_add_lock_to_array(struct dlm_lock *lock, - struct dlm_migratable_lockres *mres, int queue) -{ - struct dlm_migratable_lock *ml; - int lock_num = mres->num_locks; - - ml = &(mres->ml[lock_num]); - ml->cookie = lock->ml.cookie; - ml->type = lock->ml.type; - ml->convert_type = lock->ml.convert_type; - ml->highest_blocked = lock->ml.highest_blocked; - ml->list = queue; - if (lock->lksb) { - ml->flags = lock->lksb->flags; - dlm_prepare_lvb_for_migration(lock, mres, queue); - } - ml->node = lock->ml.node; - mres->num_locks++; - /* we reached the max, send this network message */ - if (mres->num_locks == DLM_MAX_MIGRATABLE_LOCKS) - return 1; - return 0; -} - -static void dlm_add_dummy_lock(struct dlm_ctxt *dlm, - struct dlm_migratable_lockres *mres) -{ - struct dlm_lock dummy; - memset(&dummy, 0, sizeof(dummy)); - dummy.ml.cookie = 0; - dummy.ml.type = LKM_IVMODE; - dummy.ml.convert_type = LKM_IVMODE; - dummy.ml.highest_blocked = LKM_IVMODE; - dummy.lksb = NULL; - dummy.ml.node = dlm->node_num; - dlm_add_lock_to_array(&dummy, mres, DLM_BLOCKED_LIST); -} - -static inline int dlm_is_dummy_lock(struct dlm_ctxt *dlm, - struct dlm_migratable_lock *ml, - u8 *nodenum) -{ - if (unlikely(ml->cookie == 0 && - ml->type == LKM_IVMODE && - ml->convert_type == LKM_IVMODE && - ml->highest_blocked == LKM_IVMODE && - ml->list == DLM_BLOCKED_LIST)) { - *nodenum = ml->node; - return 1; - } - return 0; -} - -int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - struct dlm_migratable_lockres *mres, - u8 send_to, u8 flags) -{ - struct list_head *queue; - int total_locks, i; - u64 mig_cookie = 0; - struct dlm_lock *lock; - int ret = 0; - - BUG_ON(!(flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION))); - - mlog(0, "sending to %u\n", send_to); - - total_locks = dlm_num_locks_in_lockres(res); - if (total_locks > DLM_MAX_MIGRATABLE_LOCKS) { - /* rare, but possible */ - mlog(0, "argh. lockres has %d locks. this will " - "require more than one network packet to " - "migrate\n", total_locks); - mig_cookie = dlm_get_next_mig_cookie(); - } - - dlm_init_migratable_lockres(mres, res->lockname.name, - res->lockname.len, total_locks, - mig_cookie, flags, res->owner); - - total_locks = 0; - for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) { - queue = dlm_list_idx_to_ptr(res, i); - list_for_each_entry(lock, queue, list) { - /* add another lock. */ - total_locks++; - if (!dlm_add_lock_to_array(lock, mres, i)) - continue; - - /* this filled the lock message, - * we must send it immediately. */ - ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, - res, total_locks); - if (ret < 0) - goto error; - } - } - if (total_locks == 0) { - /* send a dummy lock to indicate a mastery reference only */ - mlog(0, "%s:%.*s: sending dummy lock to %u, %s\n", - dlm->name, res->lockname.len, res->lockname.name, - send_to, flags & DLM_MRES_RECOVERY ? "recovery" : - "migration"); - dlm_add_dummy_lock(dlm, mres); - } - /* flush any remaining locks */ - ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); - if (ret < 0) - goto error; - return ret; - -error: - mlog(ML_ERROR, "%s: dlm_send_mig_lockres_msg returned %d\n", - dlm->name, ret); - if (!dlm_is_host_down(ret)) - BUG(); - mlog(0, "%s: node %u went down while sending %s " - "lockres %.*s\n", dlm->name, send_to, - flags & DLM_MRES_RECOVERY ? "recovery" : "migration", - res->lockname.len, res->lockname.name); - return ret; -} - - - -/* - * this message will contain no more than one page worth of - * recovery data, and it will work on only one lockres. - * there may be many locks in this page, and we may need to wait - * for additional packets to complete all the locks (rare, but - * possible). - */ -/* - * NOTE: the allocation error cases here are scary - * we really cannot afford to fail an alloc in recovery - * do we spin? returning an error only delays the problem really - */ - -int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_migratable_lockres *mres = - (struct dlm_migratable_lockres *)msg->buf; - int ret = 0; - u8 real_master; - u8 extra_refs = 0; - char *buf = NULL; - struct dlm_work_item *item = NULL; - struct dlm_lock_resource *res = NULL; - - if (!dlm_grab(dlm)) - return -EINVAL; - - BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION))); - - real_master = mres->master; - if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) { - /* cannot migrate a lockres with no master */ - BUG_ON(!(mres->flags & DLM_MRES_RECOVERY)); - } - - mlog(0, "%s message received from node %u\n", - (mres->flags & DLM_MRES_RECOVERY) ? - "recovery" : "migration", mres->master); - if (mres->flags & DLM_MRES_ALL_DONE) - mlog(0, "all done flag. all lockres data received!\n"); - - ret = -ENOMEM; - buf = kmalloc(be16_to_cpu(msg->data_len), GFP_NOFS); - item = kzalloc(sizeof(*item), GFP_NOFS); - if (!buf || !item) - goto leave; - - /* lookup the lock to see if we have a secondary queue for this - * already... just add the locks in and this will have its owner - * and RECOVERY flag changed when it completes. */ - res = dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len); - if (res) { - /* this will get a ref on res */ - /* mark it as recovering/migrating and hash it */ - spin_lock(&res->spinlock); - if (mres->flags & DLM_MRES_RECOVERY) { - res->state |= DLM_LOCK_RES_RECOVERING; - } else { - if (res->state & DLM_LOCK_RES_MIGRATING) { - /* this is at least the second - * lockres message */ - mlog(0, "lock %.*s is already migrating\n", - mres->lockname_len, - mres->lockname); - } else if (res->state & DLM_LOCK_RES_RECOVERING) { - /* caller should BUG */ - mlog(ML_ERROR, "node is attempting to migrate " - "lock %.*s, but marked as recovering!\n", - mres->lockname_len, mres->lockname); - ret = -EFAULT; - spin_unlock(&res->spinlock); - goto leave; - } - res->state |= DLM_LOCK_RES_MIGRATING; - } - spin_unlock(&res->spinlock); - } else { - /* need to allocate, just like if it was - * mastered here normally */ - res = dlm_new_lockres(dlm, mres->lockname, mres->lockname_len); - if (!res) - goto leave; - - /* to match the ref that we would have gotten if - * dlm_lookup_lockres had succeeded */ - dlm_lockres_get(res); - - /* mark it as recovering/migrating and hash it */ - if (mres->flags & DLM_MRES_RECOVERY) - res->state |= DLM_LOCK_RES_RECOVERING; - else - res->state |= DLM_LOCK_RES_MIGRATING; - - spin_lock(&dlm->spinlock); - __dlm_insert_lockres(dlm, res); - spin_unlock(&dlm->spinlock); - - /* Add an extra ref for this lock-less lockres lest the - * dlm_thread purges it before we get the chance to add - * locks to it */ - dlm_lockres_get(res); - - /* There are three refs that need to be put. - * 1. Taken above. - * 2. kref_init in dlm_new_lockres()->dlm_init_lockres(). - * 3. dlm_lookup_lockres() - * The first one is handled at the end of this function. The - * other two are handled in the worker thread after locks have - * been attached. Yes, we don't wait for purge time to match - * kref_init. The lockres will still have atleast one ref - * added because it is in the hash __dlm_insert_lockres() */ - extra_refs++; - - /* now that the new lockres is inserted, - * make it usable by other processes */ - spin_lock(&res->spinlock); - res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - spin_unlock(&res->spinlock); - wake_up(&res->wq); - } - - /* at this point we have allocated everything we need, - * and we have a hashed lockres with an extra ref and - * the proper res->state flags. */ - ret = 0; - spin_lock(&res->spinlock); - /* drop this either when master requery finds a different master - * or when a lock is added by the recovery worker */ - dlm_lockres_grab_inflight_ref(dlm, res); - if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) { - /* migration cannot have an unknown master */ - BUG_ON(!(mres->flags & DLM_MRES_RECOVERY)); - mlog(0, "recovery has passed me a lockres with an " - "unknown owner.. will need to requery: " - "%.*s\n", mres->lockname_len, mres->lockname); - } else { - /* take a reference now to pin the lockres, drop it - * when locks are added in the worker */ - dlm_change_lockres_owner(dlm, res, dlm->node_num); - } - spin_unlock(&res->spinlock); - - /* queue up work for dlm_mig_lockres_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ - memcpy(buf, msg->buf, be16_to_cpu(msg->data_len)); /* copy the whole message */ - dlm_init_work_item(dlm, item, dlm_mig_lockres_worker, buf); - item->u.ml.lockres = res; /* already have a ref */ - item->u.ml.real_master = real_master; - item->u.ml.extra_ref = extra_refs; - spin_lock(&dlm->work_lock); - list_add_tail(&item->list, &dlm->work_list); - spin_unlock(&dlm->work_lock); - queue_work(dlm->dlm_worker, &dlm->dispatched_work); - -leave: - /* One extra ref taken needs to be put here */ - if (extra_refs) - dlm_lockres_put(res); - - dlm_put(dlm); - if (ret < 0) { - if (buf) - kfree(buf); - if (item) - kfree(item); - mlog_errno(ret); - } - - return ret; -} - - -static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data) -{ - struct dlm_ctxt *dlm; - struct dlm_migratable_lockres *mres; - int ret = 0; - struct dlm_lock_resource *res; - u8 real_master; - u8 extra_ref; - - dlm = item->dlm; - mres = (struct dlm_migratable_lockres *)data; - - res = item->u.ml.lockres; - real_master = item->u.ml.real_master; - extra_ref = item->u.ml.extra_ref; - - if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) { - /* this case is super-rare. only occurs if - * node death happens during migration. */ -again: - ret = dlm_lockres_master_requery(dlm, res, &real_master); - if (ret < 0) { - mlog(0, "dlm_lockres_master_requery ret=%d\n", - ret); - goto again; - } - if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "lockres %.*s not claimed. " - "this node will take it.\n", - res->lockname.len, res->lockname.name); - } else { - spin_lock(&res->spinlock); - dlm_lockres_drop_inflight_ref(dlm, res); - spin_unlock(&res->spinlock); - mlog(0, "master needs to respond to sender " - "that node %u still owns %.*s\n", - real_master, res->lockname.len, - res->lockname.name); - /* cannot touch this lockres */ - goto leave; - } - } - - ret = dlm_process_recovery_data(dlm, res, mres); - if (ret < 0) - mlog(0, "dlm_process_recovery_data returned %d\n", ret); - else - mlog(0, "dlm_process_recovery_data succeeded\n"); - - if ((mres->flags & (DLM_MRES_MIGRATION|DLM_MRES_ALL_DONE)) == - (DLM_MRES_MIGRATION|DLM_MRES_ALL_DONE)) { - ret = dlm_finish_migration(dlm, res, mres->master); - if (ret < 0) - mlog_errno(ret); - } - -leave: - /* See comment in dlm_mig_lockres_handler() */ - if (res) { - if (extra_ref) - dlm_lockres_put(res); - dlm_lockres_put(res); - } - kfree(data); -} - - - -static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 *real_master) -{ - struct dlm_node_iter iter; - int nodenum; - int ret = 0; - - *real_master = DLM_LOCK_RES_OWNER_UNKNOWN; - - /* we only reach here if one of the two nodes in a - * migration died while the migration was in progress. - * at this point we need to requery the master. we - * know that the new_master got as far as creating - * an mle on at least one node, but we do not know - * if any nodes had actually cleared the mle and set - * the master to the new_master. the old master - * is supposed to set the owner to UNKNOWN in the - * event of a new_master death, so the only possible - * responses that we can get from nodes here are - * that the master is new_master, or that the master - * is UNKNOWN. - * if all nodes come back with UNKNOWN then we know - * the lock needs remastering here. - * if any node comes back with a valid master, check - * to see if that master is the one that we are - * recovering. if so, then the new_master died and - * we need to remaster this lock. if not, then the - * new_master survived and that node will respond to - * other nodes about the owner. - * if there is an owner, this node needs to dump this - * lockres and alert the sender that this lockres - * was rejected. */ - spin_lock(&dlm->spinlock); - dlm_node_iter_init(dlm->domain_map, &iter); - spin_unlock(&dlm->spinlock); - - while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { - /* do not send to self */ - if (nodenum == dlm->node_num) - continue; - ret = dlm_do_master_requery(dlm, res, nodenum, real_master); - if (ret < 0) { - mlog_errno(ret); - if (!dlm_is_host_down(ret)) - BUG(); - /* host is down, so answer for that node would be - * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */ - } - if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "lock master is %u\n", *real_master); - break; - } - } - return ret; -} - - -int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - u8 nodenum, u8 *real_master) -{ - int ret = -EINVAL; - struct dlm_master_requery req; - int status = DLM_LOCK_RES_OWNER_UNKNOWN; - - memset(&req, 0, sizeof(req)); - req.node_idx = dlm->node_num; - req.namelen = res->lockname.len; - memcpy(req.name, res->lockname.name, res->lockname.len); - - ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key, - &req, sizeof(req), nodenum, &status); - /* XXX: negative status not handled properly here. */ - if (ret < 0) - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG, - dlm->key, nodenum); - else { - BUG_ON(status < 0); - BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN); - *real_master = (u8) (status & 0xff); - mlog(0, "node %u responded to master requery with %u\n", - nodenum, *real_master); - ret = 0; - } - return ret; -} - - -/* this function cannot error, so unless the sending - * or receiving of the message failed, the owner can - * be trusted */ -int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; - struct dlm_lock_resource *res = NULL; - unsigned int hash; - int master = DLM_LOCK_RES_OWNER_UNKNOWN; - u32 flags = DLM_ASSERT_MASTER_REQUERY; - - if (!dlm_grab(dlm)) { - /* since the domain has gone away on this - * node, the proper response is UNKNOWN */ - return master; - } - - hash = dlm_lockid_hash(req->name, req->namelen); - - spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres(dlm, req->name, req->namelen, hash); - if (res) { - spin_lock(&res->spinlock); - master = res->owner; - if (master == dlm->node_num) { - int ret = dlm_dispatch_assert_master(dlm, res, - 0, 0, flags); - if (ret < 0) { - mlog_errno(-ENOMEM); - /* retry!? */ - BUG(); - } - } else /* put.. incase we are not the master */ - dlm_lockres_put(res); - spin_unlock(&res->spinlock); - } - spin_unlock(&dlm->spinlock); - - dlm_put(dlm); - return master; -} - -static inline struct list_head * -dlm_list_num_to_pointer(struct dlm_lock_resource *res, int list_num) -{ - struct list_head *ret; - BUG_ON(list_num < 0); - BUG_ON(list_num > 2); - ret = &(res->granted); - ret += list_num; - return ret; -} -/* TODO: do ast flush business - * TODO: do MIGRATING and RECOVERING spinning - */ - -/* -* NOTE about in-flight requests during migration: -* -* Before attempting the migrate, the master has marked the lockres as -* MIGRATING and then flushed all of its pending ASTS. So any in-flight -* requests either got queued before the MIGRATING flag got set, in which -* case the lock data will reflect the change and a return message is on -* the way, or the request failed to get in before MIGRATING got set. In -* this case, the caller will be told to spin and wait for the MIGRATING -* flag to be dropped, then recheck the master. -* This holds true for the convert, cancel and unlock cases, and since lvb -* updates are tied to these same messages, it applies to lvb updates as -* well. For the lock case, there is no way a lock can be on the master -* queue and not be on the secondary queue since the lock is always added -* locally first. This means that the new target node will never be sent -* a lock that he doesn't already have on the list. -* In total, this means that the local lock is correct and should not be -* updated to match the one sent by the master. Any messages sent back -* from the master before the MIGRATING flag will bring the lock properly -* up-to-date, and the change will be ordered properly for the waiter. -* We will *not* attempt to modify the lock underneath the waiter. -*/ - -static int dlm_process_recovery_data(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_migratable_lockres *mres) -{ - struct dlm_migratable_lock *ml; - struct list_head *queue; - struct list_head *tmpq = NULL; - struct dlm_lock *newlock = NULL; - struct dlm_lockstatus *lksb = NULL; - int ret = 0; - int i, j, bad; - struct dlm_lock *lock = NULL; - u8 from = O2NM_MAX_NODES; - unsigned int added = 0; - __be64 c; - - mlog(0, "running %d locks for this lockres\n", mres->num_locks); - for (i=0; i<mres->num_locks; i++) { - ml = &(mres->ml[i]); - - if (dlm_is_dummy_lock(dlm, ml, &from)) { - /* placeholder, just need to set the refmap bit */ - BUG_ON(mres->num_locks != 1); - mlog(0, "%s:%.*s: dummy lock for %u\n", - dlm->name, mres->lockname_len, mres->lockname, - from); - spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(dlm, res, from); - spin_unlock(&res->spinlock); - added++; - break; - } - BUG_ON(ml->highest_blocked != LKM_IVMODE); - newlock = NULL; - lksb = NULL; - - queue = dlm_list_num_to_pointer(res, ml->list); - tmpq = NULL; - - /* if the lock is for the local node it needs to - * be moved to the proper location within the queue. - * do not allocate a new lock structure. */ - if (ml->node == dlm->node_num) { - /* MIGRATION ONLY! */ - BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); - - spin_lock(&res->spinlock); - for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { - tmpq = dlm_list_idx_to_ptr(res, j); - list_for_each_entry(lock, tmpq, list) { - if (lock->ml.cookie != ml->cookie) - lock = NULL; - else - break; - } - if (lock) - break; - } - - /* lock is always created locally first, and - * destroyed locally last. it must be on the list */ - if (!lock) { - c = ml->cookie; - mlog(ML_ERROR, "Could not find local lock " - "with cookie %u:%llu, node %u, " - "list %u, flags 0x%x, type %d, " - "conv %d, highest blocked %d\n", - dlm_get_lock_cookie_node(be64_to_cpu(c)), - dlm_get_lock_cookie_seq(be64_to_cpu(c)), - ml->node, ml->list, ml->flags, ml->type, - ml->convert_type, ml->highest_blocked); - __dlm_print_one_lock_resource(res); - BUG(); - } - - if (lock->ml.node != ml->node) { - c = lock->ml.cookie; - mlog(ML_ERROR, "Mismatched node# in lock " - "cookie %u:%llu, name %.*s, node %u\n", - dlm_get_lock_cookie_node(be64_to_cpu(c)), - dlm_get_lock_cookie_seq(be64_to_cpu(c)), - res->lockname.len, res->lockname.name, - lock->ml.node); - c = ml->cookie; - mlog(ML_ERROR, "Migrate lock cookie %u:%llu, " - "node %u, list %u, flags 0x%x, type %d, " - "conv %d, highest blocked %d\n", - dlm_get_lock_cookie_node(be64_to_cpu(c)), - dlm_get_lock_cookie_seq(be64_to_cpu(c)), - ml->node, ml->list, ml->flags, ml->type, - ml->convert_type, ml->highest_blocked); - __dlm_print_one_lock_resource(res); - BUG(); - } - - if (tmpq != queue) { - c = ml->cookie; - mlog(0, "Lock cookie %u:%llu was on list %u " - "instead of list %u for %.*s\n", - dlm_get_lock_cookie_node(be64_to_cpu(c)), - dlm_get_lock_cookie_seq(be64_to_cpu(c)), - j, ml->list, res->lockname.len, - res->lockname.name); - __dlm_print_one_lock_resource(res); - spin_unlock(&res->spinlock); - continue; - } - - /* see NOTE above about why we do not update - * to match the master here */ - - /* move the lock to its proper place */ - /* do not alter lock refcount. switching lists. */ - list_move_tail(&lock->list, queue); - spin_unlock(&res->spinlock); - added++; - - mlog(0, "just reordered a local lock!\n"); - continue; - } - - /* lock is for another node. */ - newlock = dlm_new_lock(ml->type, ml->node, - be64_to_cpu(ml->cookie), NULL); - if (!newlock) { - ret = -ENOMEM; - goto leave; - } - lksb = newlock->lksb; - dlm_lock_attach_lockres(newlock, res); - - if (ml->convert_type != LKM_IVMODE) { - BUG_ON(queue != &res->converting); - newlock->ml.convert_type = ml->convert_type; - } - lksb->flags |= (ml->flags & - (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)); - - if (ml->type == LKM_NLMODE) - goto skip_lvb; - - if (!dlm_lvb_is_empty(mres->lvb)) { - if (lksb->flags & DLM_LKSB_PUT_LVB) { - /* other node was trying to update - * lvb when node died. recreate the - * lksb with the updated lvb. */ - memcpy(lksb->lvb, mres->lvb, DLM_LVB_LEN); - /* the lock resource lvb update must happen - * NOW, before the spinlock is dropped. - * we no longer wait for the AST to update - * the lvb. */ - memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); - } else { - /* otherwise, the node is sending its - * most recent valid lvb info */ - BUG_ON(ml->type != LKM_EXMODE && - ml->type != LKM_PRMODE); - if (!dlm_lvb_is_empty(res->lvb) && - (ml->type == LKM_EXMODE || - memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) { - int i; - mlog(ML_ERROR, "%s:%.*s: received bad " - "lvb! type=%d\n", dlm->name, - res->lockname.len, - res->lockname.name, ml->type); - printk("lockres lvb=["); - for (i=0; i<DLM_LVB_LEN; i++) - printk("%02x", res->lvb[i]); - printk("]\nmigrated lvb=["); - for (i=0; i<DLM_LVB_LEN; i++) - printk("%02x", mres->lvb[i]); - printk("]\n"); - dlm_print_one_lock_resource(res); - BUG(); - } - memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); - } - } -skip_lvb: - - /* NOTE: - * wrt lock queue ordering and recovery: - * 1. order of locks on granted queue is - * meaningless. - * 2. order of locks on converting queue is - * LOST with the node death. sorry charlie. - * 3. order of locks on the blocked queue is - * also LOST. - * order of locks does not affect integrity, it - * just means that a lock request may get pushed - * back in line as a result of the node death. - * also note that for a given node the lock order - * for its secondary queue locks is preserved - * relative to each other, but clearly *not* - * preserved relative to locks from other nodes. - */ - bad = 0; - spin_lock(&res->spinlock); - list_for_each_entry(lock, queue, list) { - if (lock->ml.cookie == ml->cookie) { - c = lock->ml.cookie; - mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " - "exists on this lockres!\n", dlm->name, - res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(c)), - dlm_get_lock_cookie_seq(be64_to_cpu(c))); - - mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, " - "node=%u, cookie=%u:%llu, queue=%d\n", - ml->type, ml->convert_type, ml->node, - dlm_get_lock_cookie_node(be64_to_cpu(ml->cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(ml->cookie)), - ml->list); - - __dlm_print_one_lock_resource(res); - bad = 1; - break; - } - } - if (!bad) { - dlm_lock_get(newlock); - list_add_tail(&newlock->list, queue); - mlog(0, "%s:%.*s: added lock for node %u, " - "setting refmap bit\n", dlm->name, - res->lockname.len, res->lockname.name, ml->node); - dlm_lockres_set_refmap_bit(dlm, res, ml->node); - added++; - } - spin_unlock(&res->spinlock); - } - mlog(0, "done running all the locks\n"); - -leave: - /* balance the ref taken when the work was queued */ - spin_lock(&res->spinlock); - dlm_lockres_drop_inflight_ref(dlm, res); - spin_unlock(&res->spinlock); - - if (ret < 0) { - mlog_errno(ret); - if (newlock) - dlm_lock_put(newlock); - } - - return ret; -} - -void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - int i; - struct list_head *queue; - struct dlm_lock *lock, *next; - - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&res->spinlock); - res->state |= DLM_LOCK_RES_RECOVERING; - if (!list_empty(&res->recovering)) { - mlog(0, - "Recovering res %s:%.*s, is already on recovery list!\n", - dlm->name, res->lockname.len, res->lockname.name); - list_del_init(&res->recovering); - dlm_lockres_put(res); - } - /* We need to hold a reference while on the recovery list */ - dlm_lockres_get(res); - list_add_tail(&res->recovering, &dlm->reco.resources); - - /* find any pending locks and put them back on proper list */ - for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) { - queue = dlm_list_idx_to_ptr(res, i); - list_for_each_entry_safe(lock, next, queue, list) { - dlm_lock_get(lock); - if (lock->convert_pending) { - /* move converting lock back to granted */ - BUG_ON(i != DLM_CONVERTING_LIST); - mlog(0, "node died with convert pending " - "on %.*s. move back to granted list.\n", - res->lockname.len, res->lockname.name); - dlm_revert_pending_convert(res, lock); - lock->convert_pending = 0; - } else if (lock->lock_pending) { - /* remove pending lock requests completely */ - BUG_ON(i != DLM_BLOCKED_LIST); - mlog(0, "node died with lock pending " - "on %.*s. remove from blocked list and skip.\n", - res->lockname.len, res->lockname.name); - /* lock will be floating until ref in - * dlmlock_remote is freed after the network - * call returns. ok for it to not be on any - * list since no ast can be called - * (the master is dead). */ - dlm_revert_pending_lock(res, lock); - lock->lock_pending = 0; - } else if (lock->unlock_pending) { - /* if an unlock was in progress, treat as - * if this had completed successfully - * before sending this lock state to the - * new master. note that the dlm_unlock - * call is still responsible for calling - * the unlockast. that will happen after - * the network call times out. for now, - * just move lists to prepare the new - * recovery master. */ - BUG_ON(i != DLM_GRANTED_LIST); - mlog(0, "node died with unlock pending " - "on %.*s. remove from blocked list and skip.\n", - res->lockname.len, res->lockname.name); - dlm_commit_pending_unlock(res, lock); - lock->unlock_pending = 0; - } else if (lock->cancel_pending) { - /* if a cancel was in progress, treat as - * if this had completed successfully - * before sending this lock state to the - * new master */ - BUG_ON(i != DLM_CONVERTING_LIST); - mlog(0, "node died with cancel pending " - "on %.*s. move back to granted list.\n", - res->lockname.len, res->lockname.name); - dlm_commit_pending_cancel(res, lock); - lock->cancel_pending = 0; - } - dlm_lock_put(lock); - } - } -} - - - -/* removes all recovered locks from the recovery list. - * sets the res->owner to the new master. - * unsets the RECOVERY flag and wakes waiters. */ -static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, - u8 dead_node, u8 new_master) -{ - int i; - struct hlist_node *hash_iter; - struct hlist_head *bucket; - struct dlm_lock_resource *res, *next; - - assert_spin_locked(&dlm->spinlock); - - list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { - if (res->owner == dead_node) { - mlog(0, "%s: res %.*s, Changing owner from %u to %u\n", - dlm->name, res->lockname.len, res->lockname.name, - res->owner, new_master); - list_del_init(&res->recovering); - spin_lock(&res->spinlock); - /* new_master has our reference from - * the lock state sent during recovery */ - dlm_change_lockres_owner(dlm, res, new_master); - res->state &= ~DLM_LOCK_RES_RECOVERING; - if (__dlm_lockres_has_locks(res)) - __dlm_dirty_lockres(dlm, res); - spin_unlock(&res->spinlock); - wake_up(&res->wq); - dlm_lockres_put(res); - } - } - - /* this will become unnecessary eventually, but - * for now we need to run the whole hash, clear - * the RECOVERING state and set the owner - * if necessary */ - for (i = 0; i < DLM_HASH_BUCKETS; i++) { - bucket = dlm_lockres_hash(dlm, i); - hlist_for_each_entry(res, hash_iter, bucket, hash_node) { - if (!(res->state & DLM_LOCK_RES_RECOVERING)) - continue; - - if (res->owner != dead_node && - res->owner != dlm->node_num) - continue; - - if (!list_empty(&res->recovering)) { - list_del_init(&res->recovering); - dlm_lockres_put(res); - } - - /* new_master has our reference from - * the lock state sent during recovery */ - mlog(0, "%s: res %.*s, Changing owner from %u to %u\n", - dlm->name, res->lockname.len, res->lockname.name, - res->owner, new_master); - spin_lock(&res->spinlock); - dlm_change_lockres_owner(dlm, res, new_master); - res->state &= ~DLM_LOCK_RES_RECOVERING; - if (__dlm_lockres_has_locks(res)) - __dlm_dirty_lockres(dlm, res); - spin_unlock(&res->spinlock); - wake_up(&res->wq); - } - } -} - -static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local) -{ - if (local) { - if (lock->ml.type != LKM_EXMODE && - lock->ml.type != LKM_PRMODE) - return 1; - } else if (lock->ml.type == LKM_EXMODE) - return 1; - return 0; -} - -static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, u8 dead_node) -{ - struct list_head *queue; - struct dlm_lock *lock; - int blank_lvb = 0, local = 0; - int i; - u8 search_node; - - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&res->spinlock); - - if (res->owner == dlm->node_num) - /* if this node owned the lockres, and if the dead node - * had an EX when he died, blank out the lvb */ - search_node = dead_node; - else { - /* if this is a secondary lockres, and we had no EX or PR - * locks granted, we can no longer trust the lvb */ - search_node = dlm->node_num; - local = 1; /* check local state for valid lvb */ - } - - for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) { - queue = dlm_list_idx_to_ptr(res, i); - list_for_each_entry(lock, queue, list) { - if (lock->ml.node == search_node) { - if (dlm_lvb_needs_invalidation(lock, local)) { - /* zero the lksb lvb and lockres lvb */ - blank_lvb = 1; - memset(lock->lksb->lvb, 0, DLM_LVB_LEN); - } - } - } - } - - if (blank_lvb) { - mlog(0, "clearing %.*s lvb, dead node %u had EX\n", - res->lockname.len, res->lockname.name, dead_node); - memset(res->lvb, 0, DLM_LVB_LEN); - } -} - -static void dlm_free_dead_locks(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, u8 dead_node) -{ - struct dlm_lock *lock, *next; - unsigned int freed = 0; - - /* this node is the lockres master: - * 1) remove any stale locks for the dead node - * 2) if the dead node had an EX when he died, blank out the lvb - */ - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&res->spinlock); - - /* We do two dlm_lock_put(). One for removing from list and the other is - * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */ - - /* TODO: check pending_asts, pending_basts here */ - list_for_each_entry_safe(lock, next, &res->granted, list) { - if (lock->ml.node == dead_node) { - list_del_init(&lock->list); - dlm_lock_put(lock); - /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ - dlm_lock_put(lock); - freed++; - } - } - list_for_each_entry_safe(lock, next, &res->converting, list) { - if (lock->ml.node == dead_node) { - list_del_init(&lock->list); - dlm_lock_put(lock); - /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ - dlm_lock_put(lock); - freed++; - } - } - list_for_each_entry_safe(lock, next, &res->blocked, list) { - if (lock->ml.node == dead_node) { - list_del_init(&lock->list); - dlm_lock_put(lock); - /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ - dlm_lock_put(lock); - freed++; - } - } - - if (freed) { - mlog(0, "%s:%.*s: freed %u locks for dead node %u, " - "dropping ref from lockres\n", dlm->name, - res->lockname.len, res->lockname.name, freed, dead_node); - if(!test_bit(dead_node, res->refmap)) { - mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, " - "but ref was not set\n", dlm->name, - res->lockname.len, res->lockname.name, freed, dead_node); - __dlm_print_one_lock_resource(res); - } - dlm_lockres_clear_refmap_bit(dlm, res, dead_node); - } else if (test_bit(dead_node, res->refmap)) { - mlog(0, "%s:%.*s: dead node %u had a ref, but had " - "no locks and had not purged before dying\n", dlm->name, - res->lockname.len, res->lockname.name, dead_node); - dlm_lockres_clear_refmap_bit(dlm, res, dead_node); - } - - /* do not kick thread yet */ - __dlm_dirty_lockres(dlm, res); -} - -/* if this node is the recovery master, and there are no - * locks for a given lockres owned by this node that are in - * either PR or EX mode, zero out the lvb before requesting. - * - */ - - -static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) -{ - struct hlist_node *iter; - struct dlm_lock_resource *res; - int i; - struct hlist_head *bucket; - struct dlm_lock *lock; - - - /* purge any stale mles */ - dlm_clean_master_list(dlm, dead_node); - - /* - * now clean up all lock resources. there are two rules: - * - * 1) if the dead node was the master, move the lockres - * to the recovering list. set the RECOVERING flag. - * this lockres needs to be cleaned up before it can - * be used further. - * - * 2) if this node was the master, remove all locks from - * each of the lockres queues that were owned by the - * dead node. once recovery finishes, the dlm thread - * can be kicked again to see if any ASTs or BASTs - * need to be fired as a result. - */ - for (i = 0; i < DLM_HASH_BUCKETS; i++) { - bucket = dlm_lockres_hash(dlm, i); - hlist_for_each_entry(res, iter, bucket, hash_node) { - /* always prune any $RECOVERY entries for dead nodes, - * otherwise hangs can occur during later recovery */ - if (dlm_is_recovery_lock(res->lockname.name, - res->lockname.len)) { - spin_lock(&res->spinlock); - list_for_each_entry(lock, &res->granted, list) { - if (lock->ml.node == dead_node) { - mlog(0, "AHA! there was " - "a $RECOVERY lock for dead " - "node %u (%s)!\n", - dead_node, dlm->name); - list_del_init(&lock->list); - dlm_lock_put(lock); - break; - } - } - spin_unlock(&res->spinlock); - continue; - } - spin_lock(&res->spinlock); - /* zero the lvb if necessary */ - dlm_revalidate_lvb(dlm, res, dead_node); - if (res->owner == dead_node) { - if (res->state & DLM_LOCK_RES_DROPPING_REF) { - mlog(ML_NOTICE, "%s: res %.*s, Skip " - "recovery as it is being freed\n", - dlm->name, res->lockname.len, - res->lockname.name); - } else - dlm_move_lockres_to_recovery_list(dlm, - res); - - } else if (res->owner == dlm->node_num) { - dlm_free_dead_locks(dlm, res, dead_node); - __dlm_lockres_calc_usage(dlm, res); - } - spin_unlock(&res->spinlock); - } - } - -} - -static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) -{ - assert_spin_locked(&dlm->spinlock); - - if (dlm->reco.new_master == idx) { - mlog(0, "%s: recovery master %d just died\n", - dlm->name, idx); - if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) { - /* finalize1 was reached, so it is safe to clear - * the new_master and dead_node. that recovery - * is complete. */ - mlog(0, "%s: dead master %d had reached " - "finalize1 state, clearing\n", dlm->name, idx); - dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; - __dlm_reset_recovery(dlm); - } - } - - /* Clean up join state on node death. */ - if (dlm->joining_node == idx) { - mlog(0, "Clearing join state for node %u\n", idx); - __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); - } - - /* check to see if the node is already considered dead */ - if (!test_bit(idx, dlm->live_nodes_map)) { - mlog(0, "for domain %s, node %d is already dead. " - "another node likely did recovery already.\n", - dlm->name, idx); - return; - } - - /* check to see if we do not care about this node */ - if (!test_bit(idx, dlm->domain_map)) { - /* This also catches the case that we get a node down - * but haven't joined the domain yet. */ - mlog(0, "node %u already removed from domain!\n", idx); - return; - } - - clear_bit(idx, dlm->live_nodes_map); - - /* make sure local cleanup occurs before the heartbeat events */ - if (!test_bit(idx, dlm->recovery_map)) - dlm_do_local_recovery_cleanup(dlm, idx); - - /* notify anything attached to the heartbeat events */ - dlm_hb_event_notify_attached(dlm, idx, 0); - - mlog(0, "node %u being removed from domain map!\n", idx); - clear_bit(idx, dlm->domain_map); - clear_bit(idx, dlm->exit_domain_map); - /* wake up migration waiters if a node goes down. - * perhaps later we can genericize this for other waiters. */ - wake_up(&dlm->migration_wq); - - if (test_bit(idx, dlm->recovery_map)) - mlog(0, "domain %s, node %u already added " - "to recovery map!\n", dlm->name, idx); - else - set_bit(idx, dlm->recovery_map); -} - -void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data) -{ - struct dlm_ctxt *dlm = data; - - if (!dlm_grab(dlm)) - return; - - /* - * This will notify any dlm users that a node in our domain - * went away without notifying us first. - */ - if (test_bit(idx, dlm->domain_map)) - dlm_fire_domain_eviction_callbacks(dlm, idx); - - spin_lock(&dlm->spinlock); - __dlm_hb_node_down(dlm, idx); - spin_unlock(&dlm->spinlock); - - dlm_put(dlm); -} - -void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data) -{ - struct dlm_ctxt *dlm = data; - - if (!dlm_grab(dlm)) - return; - - spin_lock(&dlm->spinlock); - set_bit(idx, dlm->live_nodes_map); - /* do NOT notify mle attached to the heartbeat events. - * new nodes are not interesting in mastery until joined. */ - spin_unlock(&dlm->spinlock); - - dlm_put(dlm); -} - -static void dlm_reco_ast(void *astdata) -{ - struct dlm_ctxt *dlm = astdata; - mlog(0, "ast for recovery lock fired!, this=%u, dlm=%s\n", - dlm->node_num, dlm->name); -} -static void dlm_reco_bast(void *astdata, int blocked_type) -{ - struct dlm_ctxt *dlm = astdata; - mlog(0, "bast for recovery lock fired!, this=%u, dlm=%s\n", - dlm->node_num, dlm->name); -} -static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st) -{ - mlog(0, "unlockast for recovery lock fired!\n"); -} - -/* - * dlm_pick_recovery_master will continually attempt to use - * dlmlock() on the special "$RECOVERY" lockres with the - * LKM_NOQUEUE flag to get an EX. every thread that enters - * this function on each node racing to become the recovery - * master will not stop attempting this until either: - * a) this node gets the EX (and becomes the recovery master), - * or b) dlm->reco.new_master gets set to some nodenum - * != O2NM_INVALID_NODE_NUM (another node will do the reco). - * so each time a recovery master is needed, the entire cluster - * will sync at this point. if the new master dies, that will - * be detected in dlm_do_recovery */ -static int dlm_pick_recovery_master(struct dlm_ctxt *dlm) -{ - enum dlm_status ret; - struct dlm_lockstatus lksb; - int status = -EINVAL; - - mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", - dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num); -again: - memset(&lksb, 0, sizeof(lksb)); - - ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, - DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN, - dlm_reco_ast, dlm, dlm_reco_bast); - - mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", - dlm->name, ret, lksb.status); - - if (ret == DLM_NORMAL) { - mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n", - dlm->name, dlm->node_num); - - /* got the EX lock. check to see if another node - * just became the reco master */ - if (dlm_reco_master_ready(dlm)) { - mlog(0, "%s: got reco EX lock, but %u will " - "do the recovery\n", dlm->name, - dlm->reco.new_master); - status = -EEXIST; - } else { - status = 0; - - /* see if recovery was already finished elsewhere */ - spin_lock(&dlm->spinlock); - if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { - status = -EINVAL; - mlog(0, "%s: got reco EX lock, but " - "node got recovered already\n", dlm->name); - if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { - mlog(ML_ERROR, "%s: new master is %u " - "but no dead node!\n", - dlm->name, dlm->reco.new_master); - BUG(); - } - } - spin_unlock(&dlm->spinlock); - } - - /* if this node has actually become the recovery master, - * set the master and send the messages to begin recovery */ - if (!status) { - mlog(0, "%s: dead=%u, this=%u, sending " - "begin_reco now\n", dlm->name, - dlm->reco.dead_node, dlm->node_num); - status = dlm_send_begin_reco_message(dlm, - dlm->reco.dead_node); - /* this always succeeds */ - BUG_ON(status); - - /* set the new_master to this node */ - spin_lock(&dlm->spinlock); - dlm_set_reco_master(dlm, dlm->node_num); - spin_unlock(&dlm->spinlock); - } - - /* recovery lock is a special case. ast will not get fired, - * so just go ahead and unlock it. */ - ret = dlmunlock(dlm, &lksb, 0, dlm_reco_unlock_ast, dlm); - if (ret == DLM_DENIED) { - mlog(0, "got DLM_DENIED, trying LKM_CANCEL\n"); - ret = dlmunlock(dlm, &lksb, LKM_CANCEL, dlm_reco_unlock_ast, dlm); - } - if (ret != DLM_NORMAL) { - /* this would really suck. this could only happen - * if there was a network error during the unlock - * because of node death. this means the unlock - * is actually "done" and the lock structure is - * even freed. we can continue, but only - * because this specific lock name is special. */ - mlog(ML_ERROR, "dlmunlock returned %d\n", ret); - } - } else if (ret == DLM_NOTQUEUED) { - mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n", - dlm->name, dlm->node_num); - /* another node is master. wait on - * reco.new_master != O2NM_INVALID_NODE_NUM - * for at most one second */ - wait_event_timeout(dlm->dlm_reco_thread_wq, - dlm_reco_master_ready(dlm), - msecs_to_jiffies(1000)); - if (!dlm_reco_master_ready(dlm)) { - mlog(0, "%s: reco master taking awhile\n", - dlm->name); - goto again; - } - /* another node has informed this one that it is reco master */ - mlog(0, "%s: reco master %u is ready to recover %u\n", - dlm->name, dlm->reco.new_master, dlm->reco.dead_node); - status = -EEXIST; - } else if (ret == DLM_RECOVERING) { - mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n", - dlm->name, dlm->node_num); - goto again; - } else { - struct dlm_lock_resource *res; - - /* dlmlock returned something other than NOTQUEUED or NORMAL */ - mlog(ML_ERROR, "%s: got %s from dlmlock($RECOVERY), " - "lksb.status=%s\n", dlm->name, dlm_errname(ret), - dlm_errname(lksb.status)); - res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME, - DLM_RECOVERY_LOCK_NAME_LEN); - if (res) { - dlm_print_one_lock_resource(res); - dlm_lockres_put(res); - } else { - mlog(ML_ERROR, "recovery lock not found\n"); - } - BUG(); - } - - return status; -} - -static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node) -{ - struct dlm_begin_reco br; - int ret = 0; - struct dlm_node_iter iter; - int nodenum; - int status; - - mlog(0, "%s: dead node is %u\n", dlm->name, dead_node); - - spin_lock(&dlm->spinlock); - dlm_node_iter_init(dlm->domain_map, &iter); - spin_unlock(&dlm->spinlock); - - clear_bit(dead_node, iter.node_map); - - memset(&br, 0, sizeof(br)); - br.node_idx = dlm->node_num; - br.dead_node = dead_node; - - while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { - ret = 0; - if (nodenum == dead_node) { - mlog(0, "not sending begin reco to dead node " - "%u\n", dead_node); - continue; - } - if (nodenum == dlm->node_num) { - mlog(0, "not sending begin reco to self\n"); - continue; - } -retry: - ret = -EINVAL; - mlog(0, "attempting to send begin reco msg to %d\n", - nodenum); - ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key, - &br, sizeof(br), nodenum, &status); - /* negative status is handled ok by caller here */ - if (ret >= 0) - ret = status; - if (dlm_is_host_down(ret)) { - /* node is down. not involved in recovery - * so just keep going */ - mlog(ML_NOTICE, "%s: node %u was down when sending " - "begin reco msg (%d)\n", dlm->name, nodenum, ret); - ret = 0; - } - - /* - * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8, - * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN. - * We are handling both for compatibility reasons. - */ - if (ret == -EAGAIN || ret == EAGAIN) { - mlog(0, "%s: trying to start recovery of node " - "%u, but node %u is waiting for last recovery " - "to complete, backoff for a bit\n", dlm->name, - dead_node, nodenum); - msleep(100); - goto retry; - } - if (ret < 0) { - struct dlm_lock_resource *res; - - /* this is now a serious problem, possibly ENOMEM - * in the network stack. must retry */ - mlog_errno(ret); - mlog(ML_ERROR, "begin reco of dlm %s to node %u " - "returned %d\n", dlm->name, nodenum, ret); - res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME, - DLM_RECOVERY_LOCK_NAME_LEN); - if (res) { - dlm_print_one_lock_resource(res); - dlm_lockres_put(res); - } else { - mlog(ML_ERROR, "recovery lock not found\n"); - } - /* sleep for a bit in hopes that we can avoid - * another ENOMEM */ - msleep(100); - goto retry; - } - } - - return ret; -} - -int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf; - - /* ok to return 0, domain has gone away */ - if (!dlm_grab(dlm)) - return 0; - - spin_lock(&dlm->spinlock); - if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) { - mlog(0, "%s: node %u wants to recover node %u (%u:%u) " - "but this node is in finalize state, waiting on finalize2\n", - dlm->name, br->node_idx, br->dead_node, - dlm->reco.dead_node, dlm->reco.new_master); - spin_unlock(&dlm->spinlock); - return -EAGAIN; - } - spin_unlock(&dlm->spinlock); - - mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n", - dlm->name, br->node_idx, br->dead_node, - dlm->reco.dead_node, dlm->reco.new_master); - - dlm_fire_domain_eviction_callbacks(dlm, br->dead_node); - - spin_lock(&dlm->spinlock); - if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { - if (test_bit(dlm->reco.new_master, dlm->recovery_map)) { - mlog(0, "%s: new_master %u died, changing " - "to %u\n", dlm->name, dlm->reco.new_master, - br->node_idx); - } else { - mlog(0, "%s: new_master %u NOT DEAD, changing " - "to %u\n", dlm->name, dlm->reco.new_master, - br->node_idx); - /* may not have seen the new master as dead yet */ - } - } - if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) { - mlog(ML_NOTICE, "%s: dead_node previously set to %u, " - "node %u changing it to %u\n", dlm->name, - dlm->reco.dead_node, br->node_idx, br->dead_node); - } - dlm_set_reco_master(dlm, br->node_idx); - dlm_set_reco_dead_node(dlm, br->dead_node); - if (!test_bit(br->dead_node, dlm->recovery_map)) { - mlog(0, "recovery master %u sees %u as dead, but this " - "node has not yet. marking %u as dead\n", - br->node_idx, br->dead_node, br->dead_node); - if (!test_bit(br->dead_node, dlm->domain_map) || - !test_bit(br->dead_node, dlm->live_nodes_map)) - mlog(0, "%u not in domain/live_nodes map " - "so setting it in reco map manually\n", - br->dead_node); - /* force the recovery cleanup in __dlm_hb_node_down - * both of these will be cleared in a moment */ - set_bit(br->dead_node, dlm->domain_map); - set_bit(br->dead_node, dlm->live_nodes_map); - __dlm_hb_node_down(dlm, br->dead_node); - } - spin_unlock(&dlm->spinlock); - - dlm_kick_recovery_thread(dlm); - - mlog(0, "%s: recovery started by node %u, for %u (%u:%u)\n", - dlm->name, br->node_idx, br->dead_node, - dlm->reco.dead_node, dlm->reco.new_master); - - dlm_put(dlm); - return 0; -} - -#define DLM_FINALIZE_STAGE2 0x01 -static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm) -{ - int ret = 0; - struct dlm_finalize_reco fr; - struct dlm_node_iter iter; - int nodenum; - int status; - int stage = 1; - - mlog(0, "finishing recovery for node %s:%u, " - "stage %d\n", dlm->name, dlm->reco.dead_node, stage); - - spin_lock(&dlm->spinlock); - dlm_node_iter_init(dlm->domain_map, &iter); - spin_unlock(&dlm->spinlock); - -stage2: - memset(&fr, 0, sizeof(fr)); - fr.node_idx = dlm->node_num; - fr.dead_node = dlm->reco.dead_node; - if (stage == 2) - fr.flags |= DLM_FINALIZE_STAGE2; - - while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { - if (nodenum == dlm->node_num) - continue; - ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key, - &fr, sizeof(fr), nodenum, &status); - if (ret >= 0) - ret = status; - if (ret < 0) { - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG, - dlm->key, nodenum); - if (dlm_is_host_down(ret)) { - /* this has no effect on this recovery - * session, so set the status to zero to - * finish out the last recovery */ - mlog(ML_ERROR, "node %u went down after this " - "node finished recovery.\n", nodenum); - ret = 0; - continue; - } - break; - } - } - if (stage == 1) { - /* reset the node_iter back to the top and send finalize2 */ - iter.curnode = -1; - stage = 2; - goto stage2; - } - - return ret; -} - -int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; - int stage = 1; - - /* ok to return 0, domain has gone away */ - if (!dlm_grab(dlm)) - return 0; - - if (fr->flags & DLM_FINALIZE_STAGE2) - stage = 2; - - mlog(0, "%s: node %u finalizing recovery stage%d of " - "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage, - fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master); - - spin_lock(&dlm->spinlock); - - if (dlm->reco.new_master != fr->node_idx) { - mlog(ML_ERROR, "node %u sent recovery finalize msg, but node " - "%u is supposed to be the new master, dead=%u\n", - fr->node_idx, dlm->reco.new_master, fr->dead_node); - BUG(); - } - if (dlm->reco.dead_node != fr->dead_node) { - mlog(ML_ERROR, "node %u sent recovery finalize msg for dead " - "node %u, but node %u is supposed to be dead\n", - fr->node_idx, fr->dead_node, dlm->reco.dead_node); - BUG(); - } - - switch (stage) { - case 1: - dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx); - if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) { - mlog(ML_ERROR, "%s: received finalize1 from " - "new master %u for dead node %u, but " - "this node has already received it!\n", - dlm->name, fr->node_idx, fr->dead_node); - dlm_print_reco_node_status(dlm); - BUG(); - } - dlm->reco.state |= DLM_RECO_STATE_FINALIZE; - spin_unlock(&dlm->spinlock); - break; - case 2: - if (!(dlm->reco.state & DLM_RECO_STATE_FINALIZE)) { - mlog(ML_ERROR, "%s: received finalize2 from " - "new master %u for dead node %u, but " - "this node did not have finalize1!\n", - dlm->name, fr->node_idx, fr->dead_node); - dlm_print_reco_node_status(dlm); - BUG(); - } - dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; - spin_unlock(&dlm->spinlock); - dlm_reset_recovery(dlm); - dlm_kick_recovery_thread(dlm); - break; - default: - BUG(); - } - - mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", - dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master); - - dlm_put(dlm); - return 0; -} diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmthread.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmthread.c deleted file mode 100644 index e73c833f..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmthread.c +++ /dev/null @@ -1,762 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmthread.c - * - * standalone DLM module - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/blkdev.h> -#include <linux/socket.h> -#include <linux/inet.h> -#include <linux/timer.h> -#include <linux/kthread.h> -#include <linux/delay.h> - - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" -#include "dlmdomain.h" - -#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_THREAD) -#include "cluster/masklog.h" - -static int dlm_thread(void *data); -static void dlm_flush_asts(struct dlm_ctxt *dlm); - -#define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) - -/* will exit holding res->spinlock, but may drop in function */ -/* waits until flags are cleared on res->state */ -void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags) -{ - DECLARE_WAITQUEUE(wait, current); - - assert_spin_locked(&res->spinlock); - - add_wait_queue(&res->wq, &wait); -repeat: - set_current_state(TASK_UNINTERRUPTIBLE); - if (res->state & flags) { - spin_unlock(&res->spinlock); - schedule(); - spin_lock(&res->spinlock); - goto repeat; - } - remove_wait_queue(&res->wq, &wait); - __set_current_state(TASK_RUNNING); -} - -int __dlm_lockres_has_locks(struct dlm_lock_resource *res) -{ - if (list_empty(&res->granted) && - list_empty(&res->converting) && - list_empty(&res->blocked)) - return 0; - return 1; -} - -/* "unused": the lockres has no locks, is not on the dirty list, - * has no inflight locks (in the gap between mastery and acquiring - * the first lock), and has no bits in its refmap. - * truly ready to be freed. */ -int __dlm_lockres_unused(struct dlm_lock_resource *res) -{ - int bit; - - assert_spin_locked(&res->spinlock); - - if (__dlm_lockres_has_locks(res)) - return 0; - - /* Locks are in the process of being created */ - if (res->inflight_locks) - return 0; - - if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) - return 0; - - if (res->state & DLM_LOCK_RES_RECOVERING) - return 0; - - /* Another node has this resource with this node as the master */ - bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); - if (bit < O2NM_MAX_NODES) - return 0; - - return 1; -} - - -/* Call whenever you may have added or deleted something from one of - * the lockres queue's. This will figure out whether it belongs on the - * unused list or not and does the appropriate thing. */ -void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&res->spinlock); - - if (__dlm_lockres_unused(res)){ - if (list_empty(&res->purge)) { - mlog(0, "%s: Adding res %.*s to purge list\n", - dlm->name, res->lockname.len, res->lockname.name); - - res->last_used = jiffies; - dlm_lockres_get(res); - list_add_tail(&res->purge, &dlm->purge_list); - dlm->purge_count++; - } - } else if (!list_empty(&res->purge)) { - mlog(0, "%s: Removing res %.*s from purge list\n", - dlm->name, res->lockname.len, res->lockname.name); - - list_del_init(&res->purge); - dlm_lockres_put(res); - dlm->purge_count--; - } -} - -void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - spin_lock(&dlm->spinlock); - spin_lock(&res->spinlock); - - __dlm_lockres_calc_usage(dlm, res); - - spin_unlock(&res->spinlock); - spin_unlock(&dlm->spinlock); -} - -static void dlm_purge_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - int master; - int ret = 0; - - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&res->spinlock); - - master = (res->owner == dlm->node_num); - - mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name, - res->lockname.len, res->lockname.name, master); - - if (!master) { - res->state |= DLM_LOCK_RES_DROPPING_REF; - /* drop spinlock... retake below */ - spin_unlock(&res->spinlock); - spin_unlock(&dlm->spinlock); - - spin_lock(&res->spinlock); - /* This ensures that clear refmap is sent after the set */ - __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); - spin_unlock(&res->spinlock); - - /* clear our bit from the master's refmap, ignore errors */ - ret = dlm_drop_lockres_ref(dlm, res); - if (ret < 0) { - if (!dlm_is_host_down(ret)) - BUG(); - } - spin_lock(&dlm->spinlock); - spin_lock(&res->spinlock); - } - - if (!list_empty(&res->purge)) { - mlog(0, "%s: Removing res %.*s from purgelist, master %d\n", - dlm->name, res->lockname.len, res->lockname.name, master); - list_del_init(&res->purge); - dlm_lockres_put(res); - dlm->purge_count--; - } - - if (!__dlm_lockres_unused(res)) { - mlog(ML_ERROR, "%s: res %.*s in use after deref\n", - dlm->name, res->lockname.len, res->lockname.name); - __dlm_print_one_lock_resource(res); - BUG(); - } - - __dlm_unhash_lockres(dlm, res); - - /* lockres is not in the hash now. drop the flag and wake up - * any processes waiting in dlm_get_lock_resource. */ - if (!master) { - res->state &= ~DLM_LOCK_RES_DROPPING_REF; - spin_unlock(&res->spinlock); - wake_up(&res->wq); - } else - spin_unlock(&res->spinlock); -} - -static void dlm_run_purge_list(struct dlm_ctxt *dlm, - int purge_now) -{ - unsigned int run_max, unused; - unsigned long purge_jiffies; - struct dlm_lock_resource *lockres; - - spin_lock(&dlm->spinlock); - run_max = dlm->purge_count; - - while(run_max && !list_empty(&dlm->purge_list)) { - run_max--; - - lockres = list_entry(dlm->purge_list.next, - struct dlm_lock_resource, purge); - - spin_lock(&lockres->spinlock); - - purge_jiffies = lockres->last_used + - msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); - - /* Make sure that we want to be processing this guy at - * this time. */ - if (!purge_now && time_after(purge_jiffies, jiffies)) { - /* Since resources are added to the purge list - * in tail order, we can stop at the first - * unpurgable resource -- anyone added after - * him will have a greater last_used value */ - spin_unlock(&lockres->spinlock); - break; - } - - /* Status of the lockres *might* change so double - * check. If the lockres is unused, holding the dlm - * spinlock will prevent people from getting and more - * refs on it. */ - unused = __dlm_lockres_unused(lockres); - if (!unused || - (lockres->state & DLM_LOCK_RES_MIGRATING)) { - mlog(0, "%s: res %.*s is in use or being remastered, " - "used %d, state %d\n", dlm->name, - lockres->lockname.len, lockres->lockname.name, - !unused, lockres->state); - list_move_tail(&dlm->purge_list, &lockres->purge); - spin_unlock(&lockres->spinlock); - continue; - } - - dlm_lockres_get(lockres); - - dlm_purge_lockres(dlm, lockres); - - dlm_lockres_put(lockres); - - /* Avoid adding any scheduling latencies */ - cond_resched_lock(&dlm->spinlock); - } - - spin_unlock(&dlm->spinlock); -} - -static void dlm_shuffle_lists(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - struct dlm_lock *lock, *target; - struct list_head *iter; - struct list_head *head; - int can_grant = 1; - - /* - * Because this function is called with the lockres - * spinlock, and because we know that it is not migrating/ - * recovering/in-progress, it is fine to reserve asts and - * basts right before queueing them all throughout - */ - assert_spin_locked(&dlm->ast_lock); - assert_spin_locked(&res->spinlock); - BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| - DLM_LOCK_RES_RECOVERING| - DLM_LOCK_RES_IN_PROGRESS))); - -converting: - if (list_empty(&res->converting)) - goto blocked; - mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name, - res->lockname.len, res->lockname.name); - - target = list_entry(res->converting.next, struct dlm_lock, list); - if (target->ml.convert_type == LKM_IVMODE) { - mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n", - dlm->name, res->lockname.len, res->lockname.name); - BUG(); - } - head = &res->granted; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock==target) - continue; - if (!dlm_lock_compatible(lock->ml.type, - target->ml.convert_type)) { - can_grant = 0; - /* queue the BAST if not already */ - if (lock->ml.highest_blocked == LKM_IVMODE) { - __dlm_lockres_reserve_ast(res); - __dlm_queue_bast(dlm, lock); - } - /* update the highest_blocked if needed */ - if (lock->ml.highest_blocked < target->ml.convert_type) - lock->ml.highest_blocked = - target->ml.convert_type; - } - } - head = &res->converting; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock==target) - continue; - if (!dlm_lock_compatible(lock->ml.type, - target->ml.convert_type)) { - can_grant = 0; - if (lock->ml.highest_blocked == LKM_IVMODE) { - __dlm_lockres_reserve_ast(res); - __dlm_queue_bast(dlm, lock); - } - if (lock->ml.highest_blocked < target->ml.convert_type) - lock->ml.highest_blocked = - target->ml.convert_type; - } - } - - /* we can convert the lock */ - if (can_grant) { - spin_lock(&target->spinlock); - BUG_ON(target->ml.highest_blocked != LKM_IVMODE); - - mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type " - "%d => %d, node %u\n", dlm->name, res->lockname.len, - res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)), - target->ml.type, - target->ml.convert_type, target->ml.node); - - target->ml.type = target->ml.convert_type; - target->ml.convert_type = LKM_IVMODE; - list_move_tail(&target->list, &res->granted); - - BUG_ON(!target->lksb); - target->lksb->status = DLM_NORMAL; - - spin_unlock(&target->spinlock); - - __dlm_lockres_reserve_ast(res); - __dlm_queue_ast(dlm, target); - /* go back and check for more */ - goto converting; - } - -blocked: - if (list_empty(&res->blocked)) - goto leave; - target = list_entry(res->blocked.next, struct dlm_lock, list); - - head = &res->granted; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock==target) - continue; - if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { - can_grant = 0; - if (lock->ml.highest_blocked == LKM_IVMODE) { - __dlm_lockres_reserve_ast(res); - __dlm_queue_bast(dlm, lock); - } - if (lock->ml.highest_blocked < target->ml.type) - lock->ml.highest_blocked = target->ml.type; - } - } - - head = &res->converting; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock==target) - continue; - if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { - can_grant = 0; - if (lock->ml.highest_blocked == LKM_IVMODE) { - __dlm_lockres_reserve_ast(res); - __dlm_queue_bast(dlm, lock); - } - if (lock->ml.highest_blocked < target->ml.type) - lock->ml.highest_blocked = target->ml.type; - } - } - - /* we can grant the blocked lock (only - * possible if converting list empty) */ - if (can_grant) { - spin_lock(&target->spinlock); - BUG_ON(target->ml.highest_blocked != LKM_IVMODE); - - mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, " - "node %u\n", dlm->name, res->lockname.len, - res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)), - target->ml.type, target->ml.node); - - /* target->ml.type is already correct */ - list_move_tail(&target->list, &res->granted); - - BUG_ON(!target->lksb); - target->lksb->status = DLM_NORMAL; - - spin_unlock(&target->spinlock); - - __dlm_lockres_reserve_ast(res); - __dlm_queue_ast(dlm, target); - /* go back and check for more */ - goto converting; - } - -leave: - return; -} - -/* must have NO locks when calling this with res !=NULL * */ -void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) -{ - if (res) { - spin_lock(&dlm->spinlock); - spin_lock(&res->spinlock); - __dlm_dirty_lockres(dlm, res); - spin_unlock(&res->spinlock); - spin_unlock(&dlm->spinlock); - } - wake_up(&dlm->dlm_thread_wq); -} - -void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) -{ - assert_spin_locked(&dlm->spinlock); - assert_spin_locked(&res->spinlock); - - /* don't shuffle secondary queues */ - if ((res->owner == dlm->node_num)) { - if (res->state & (DLM_LOCK_RES_MIGRATING | - DLM_LOCK_RES_BLOCK_DIRTY)) - return; - - if (list_empty(&res->dirty)) { - /* ref for dirty_list */ - dlm_lockres_get(res); - list_add_tail(&res->dirty, &dlm->dirty_list); - res->state |= DLM_LOCK_RES_DIRTY; - } - } - - mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len, - res->lockname.name); -} - - -/* Launch the NM thread for the mounted volume */ -int dlm_launch_thread(struct dlm_ctxt *dlm) -{ - mlog(0, "Starting dlm_thread...\n"); - - dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread"); - if (IS_ERR(dlm->dlm_thread_task)) { - mlog_errno(PTR_ERR(dlm->dlm_thread_task)); - dlm->dlm_thread_task = NULL; - return -EINVAL; - } - - return 0; -} - -void dlm_complete_thread(struct dlm_ctxt *dlm) -{ - if (dlm->dlm_thread_task) { - mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n"); - kthread_stop(dlm->dlm_thread_task); - dlm->dlm_thread_task = NULL; - } -} - -static int dlm_dirty_list_empty(struct dlm_ctxt *dlm) -{ - int empty; - - spin_lock(&dlm->spinlock); - empty = list_empty(&dlm->dirty_list); - spin_unlock(&dlm->spinlock); - - return empty; -} - -static void dlm_flush_asts(struct dlm_ctxt *dlm) -{ - int ret; - struct dlm_lock *lock; - struct dlm_lock_resource *res; - u8 hi; - - spin_lock(&dlm->ast_lock); - while (!list_empty(&dlm->pending_asts)) { - lock = list_entry(dlm->pending_asts.next, - struct dlm_lock, ast_list); - /* get an extra ref on lock */ - dlm_lock_get(lock); - res = lock->lockres; - mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, " - "node %u\n", dlm->name, res->lockname.len, - res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - lock->ml.type, lock->ml.node); - - BUG_ON(!lock->ast_pending); - - /* remove from list (including ref) */ - list_del_init(&lock->ast_list); - dlm_lock_put(lock); - spin_unlock(&dlm->ast_lock); - - if (lock->ml.node != dlm->node_num) { - ret = dlm_do_remote_ast(dlm, res, lock); - if (ret < 0) - mlog_errno(ret); - } else - dlm_do_local_ast(dlm, res, lock); - - spin_lock(&dlm->ast_lock); - - /* possible that another ast was queued while - * we were delivering the last one */ - if (!list_empty(&lock->ast_list)) { - mlog(0, "%s: res %.*s, AST queued while flushing last " - "one\n", dlm->name, res->lockname.len, - res->lockname.name); - } else - lock->ast_pending = 0; - - /* drop the extra ref. - * this may drop it completely. */ - dlm_lock_put(lock); - dlm_lockres_release_ast(dlm, res); - } - - while (!list_empty(&dlm->pending_basts)) { - lock = list_entry(dlm->pending_basts.next, - struct dlm_lock, bast_list); - /* get an extra ref on lock */ - dlm_lock_get(lock); - res = lock->lockres; - - BUG_ON(!lock->bast_pending); - - /* get the highest blocked lock, and reset */ - spin_lock(&lock->spinlock); - BUG_ON(lock->ml.highest_blocked <= LKM_IVMODE); - hi = lock->ml.highest_blocked; - lock->ml.highest_blocked = LKM_IVMODE; - spin_unlock(&lock->spinlock); - - /* remove from list (including ref) */ - list_del_init(&lock->bast_list); - dlm_lock_put(lock); - spin_unlock(&dlm->ast_lock); - - mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, " - "blocked %d, node %u\n", - dlm->name, res->lockname.len, res->lockname.name, - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - hi, lock->ml.node); - - if (lock->ml.node != dlm->node_num) { - ret = dlm_send_proxy_bast(dlm, res, lock, hi); - if (ret < 0) - mlog_errno(ret); - } else - dlm_do_local_bast(dlm, res, lock, hi); - - spin_lock(&dlm->ast_lock); - - /* possible that another bast was queued while - * we were delivering the last one */ - if (!list_empty(&lock->bast_list)) { - mlog(0, "%s: res %.*s, BAST queued while flushing last " - "one\n", dlm->name, res->lockname.len, - res->lockname.name); - } else - lock->bast_pending = 0; - - /* drop the extra ref. - * this may drop it completely. */ - dlm_lock_put(lock); - dlm_lockres_release_ast(dlm, res); - } - wake_up(&dlm->ast_wq); - spin_unlock(&dlm->ast_lock); -} - - -#define DLM_THREAD_TIMEOUT_MS (4 * 1000) -#define DLM_THREAD_MAX_DIRTY 100 -#define DLM_THREAD_MAX_ASTS 10 - -static int dlm_thread(void *data) -{ - struct dlm_lock_resource *res; - struct dlm_ctxt *dlm = data; - unsigned long timeout = msecs_to_jiffies(DLM_THREAD_TIMEOUT_MS); - - mlog(0, "dlm thread running for %s...\n", dlm->name); - - while (!kthread_should_stop()) { - int n = DLM_THREAD_MAX_DIRTY; - - /* dlm_shutting_down is very point-in-time, but that - * doesn't matter as we'll just loop back around if we - * get false on the leading edge of a state - * transition. */ - dlm_run_purge_list(dlm, dlm_shutting_down(dlm)); - - /* We really don't want to hold dlm->spinlock while - * calling dlm_shuffle_lists on each lockres that - * needs to have its queues adjusted and AST/BASTs - * run. So let's pull each entry off the dirty_list - * and drop dlm->spinlock ASAP. Once off the list, - * res->spinlock needs to be taken again to protect - * the queues while calling dlm_shuffle_lists. */ - spin_lock(&dlm->spinlock); - while (!list_empty(&dlm->dirty_list)) { - int delay = 0; - res = list_entry(dlm->dirty_list.next, - struct dlm_lock_resource, dirty); - - /* peel a lockres off, remove it from the list, - * unset the dirty flag and drop the dlm lock */ - BUG_ON(!res); - dlm_lockres_get(res); - - spin_lock(&res->spinlock); - /* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */ - list_del_init(&res->dirty); - spin_unlock(&res->spinlock); - spin_unlock(&dlm->spinlock); - /* Drop dirty_list ref */ - dlm_lockres_put(res); - - /* lockres can be re-dirtied/re-added to the - * dirty_list in this gap, but that is ok */ - - spin_lock(&dlm->ast_lock); - spin_lock(&res->spinlock); - if (res->owner != dlm->node_num) { - __dlm_print_one_lock_resource(res); - mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d," - " dirty %d\n", dlm->name, - !!(res->state & DLM_LOCK_RES_IN_PROGRESS), - !!(res->state & DLM_LOCK_RES_MIGRATING), - !!(res->state & DLM_LOCK_RES_RECOVERING), - !!(res->state & DLM_LOCK_RES_DIRTY)); - } - BUG_ON(res->owner != dlm->node_num); - - /* it is now ok to move lockreses in these states - * to the dirty list, assuming that they will only be - * dirty for a short while. */ - BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); - if (res->state & (DLM_LOCK_RES_IN_PROGRESS | - DLM_LOCK_RES_RECOVERING)) { - /* move it to the tail and keep going */ - res->state &= ~DLM_LOCK_RES_DIRTY; - spin_unlock(&res->spinlock); - spin_unlock(&dlm->ast_lock); - mlog(0, "%s: res %.*s, inprogress, delay list " - "shuffle, state %d\n", dlm->name, - res->lockname.len, res->lockname.name, - res->state); - delay = 1; - goto in_progress; - } - - /* at this point the lockres is not migrating/ - * recovering/in-progress. we have the lockres - * spinlock and do NOT have the dlm lock. - * safe to reserve/queue asts and run the lists. */ - - /* called while holding lockres lock */ - dlm_shuffle_lists(dlm, res); - res->state &= ~DLM_LOCK_RES_DIRTY; - spin_unlock(&res->spinlock); - spin_unlock(&dlm->ast_lock); - - dlm_lockres_calc_usage(dlm, res); - -in_progress: - - spin_lock(&dlm->spinlock); - /* if the lock was in-progress, stick - * it on the back of the list */ - if (delay) { - spin_lock(&res->spinlock); - __dlm_dirty_lockres(dlm, res); - spin_unlock(&res->spinlock); - } - dlm_lockres_put(res); - - /* unlikely, but we may need to give time to - * other tasks */ - if (!--n) { - mlog(0, "%s: Throttling dlm thread\n", - dlm->name); - break; - } - } - - spin_unlock(&dlm->spinlock); - dlm_flush_asts(dlm); - - /* yield and continue right away if there is more work to do */ - if (!n) { - cond_resched(); - continue; - } - - wait_event_interruptible_timeout(dlm->dlm_thread_wq, - !dlm_dirty_list_empty(dlm) || - kthread_should_stop(), - timeout); - } - - mlog(0, "quitting DLM thread\n"); - return 0; -} diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmunlock.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmunlock.c deleted file mode 100644 index 850aa7e8..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmunlock.c +++ /dev/null @@ -1,692 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmunlock.c - * - * underlying calls for unlocking locks - * - * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - */ - - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/blkdev.h> -#include <linux/socket.h> -#include <linux/inet.h> -#include <linux/spinlock.h> -#include <linux/delay.h> - -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" -#include "dlmcommon.h" - -#define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" - -#define DLM_UNLOCK_FREE_LOCK 0x00000001 -#define DLM_UNLOCK_CALL_AST 0x00000002 -#define DLM_UNLOCK_REMOVE_LOCK 0x00000004 -#define DLM_UNLOCK_REGRANT_LOCK 0x00000008 -#define DLM_UNLOCK_CLEAR_CONVERT_TYPE 0x00000010 - - -static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int *actions); -static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int *actions); - -static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int flags, - u8 owner); - - -/* - * according to the spec: - * http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf - * - * flags & LKM_CANCEL != 0: must be converting or blocked - * flags & LKM_CANCEL == 0: must be granted - * - * So to unlock a converting lock, you must first cancel the - * convert (passing LKM_CANCEL in flags), then call the unlock - * again (with no LKM_CANCEL in flags). - */ - - -/* - * locking: - * caller needs: none - * taken: res->spinlock and lock->spinlock taken and dropped - * held on exit: none - * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network - * all callers should have taken an extra ref on lock coming in - */ -static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int flags, int *call_ast, - int master_node) -{ - enum dlm_status status; - int actions = 0; - int in_use; - u8 owner; - - mlog(0, "master_node = %d, valblk = %d\n", master_node, - flags & LKM_VALBLK); - - if (master_node) - BUG_ON(res->owner != dlm->node_num); - else - BUG_ON(res->owner == dlm->node_num); - - spin_lock(&dlm->ast_lock); - /* We want to be sure that we're not freeing a lock - * that still has AST's pending... */ - in_use = !list_empty(&lock->ast_list); - spin_unlock(&dlm->ast_lock); - if (in_use && !(flags & LKM_CANCEL)) { - mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " - "while waiting for an ast!", res->lockname.len, - res->lockname.name); - return DLM_BADPARAM; - } - - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_IN_PROGRESS) { - if (master_node && !(flags & LKM_CANCEL)) { - mlog(ML_ERROR, "lockres in progress!\n"); - spin_unlock(&res->spinlock); - return DLM_FORWARD; - } - /* ok for this to sleep if not in a network handler */ - __dlm_wait_on_lockres(res); - res->state |= DLM_LOCK_RES_IN_PROGRESS; - } - spin_lock(&lock->spinlock); - - if (res->state & DLM_LOCK_RES_RECOVERING) { - status = DLM_RECOVERING; - goto leave; - } - - if (res->state & DLM_LOCK_RES_MIGRATING) { - status = DLM_MIGRATING; - goto leave; - } - - /* see above for what the spec says about - * LKM_CANCEL and the lock queue state */ - if (flags & LKM_CANCEL) - status = dlm_get_cancel_actions(dlm, res, lock, lksb, &actions); - else - status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions); - - if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node)) - goto leave; - - /* By now this has been masked out of cancel requests. */ - if (flags & LKM_VALBLK) { - /* make the final update to the lvb */ - if (master_node) - memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN); - else - flags |= LKM_PUT_LVB; /* let the send function - * handle it. */ - } - - if (!master_node) { - owner = res->owner; - /* drop locks and send message */ - if (flags & LKM_CANCEL) - lock->cancel_pending = 1; - else - lock->unlock_pending = 1; - spin_unlock(&lock->spinlock); - spin_unlock(&res->spinlock); - status = dlm_send_remote_unlock_request(dlm, res, lock, lksb, - flags, owner); - spin_lock(&res->spinlock); - spin_lock(&lock->spinlock); - /* if the master told us the lock was already granted, - * let the ast handle all of these actions */ - if (status == DLM_CANCELGRANT) { - actions &= ~(DLM_UNLOCK_REMOVE_LOCK| - DLM_UNLOCK_REGRANT_LOCK| - DLM_UNLOCK_CLEAR_CONVERT_TYPE); - } else if (status == DLM_RECOVERING || - status == DLM_MIGRATING || - status == DLM_FORWARD) { - /* must clear the actions because this unlock - * is about to be retried. cannot free or do - * any list manipulation. */ - mlog(0, "%s:%.*s: clearing actions, %s\n", - dlm->name, res->lockname.len, - res->lockname.name, - status==DLM_RECOVERING?"recovering": - (status==DLM_MIGRATING?"migrating": - "forward")); - actions = 0; - } - if (flags & LKM_CANCEL) - lock->cancel_pending = 0; - else - lock->unlock_pending = 0; - - } - - /* get an extra ref on lock. if we are just switching - * lists here, we dont want the lock to go away. */ - dlm_lock_get(lock); - - if (actions & DLM_UNLOCK_REMOVE_LOCK) { - list_del_init(&lock->list); - dlm_lock_put(lock); - } - if (actions & DLM_UNLOCK_REGRANT_LOCK) { - dlm_lock_get(lock); - list_add_tail(&lock->list, &res->granted); - } - if (actions & DLM_UNLOCK_CLEAR_CONVERT_TYPE) { - mlog(0, "clearing convert_type at %smaster node\n", - master_node ? "" : "non-"); - lock->ml.convert_type = LKM_IVMODE; - } - - /* remove the extra ref on lock */ - dlm_lock_put(lock); - -leave: - res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - if (!dlm_lock_on_list(&res->converting, lock)) - BUG_ON(lock->ml.convert_type != LKM_IVMODE); - else - BUG_ON(lock->ml.convert_type == LKM_IVMODE); - spin_unlock(&lock->spinlock); - spin_unlock(&res->spinlock); - wake_up(&res->wq); - - /* let the caller's final dlm_lock_put handle the actual kfree */ - if (actions & DLM_UNLOCK_FREE_LOCK) { - /* this should always be coupled with list removal */ - BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK)); - mlog(0, "lock %u:%llu should be gone now! refs=%d\n", - dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - atomic_read(&lock->lock_refs.refcount)-1); - dlm_lock_put(lock); - } - if (actions & DLM_UNLOCK_CALL_AST) - *call_ast = 1; - - /* if cancel or unlock succeeded, lvb work is done */ - if (status == DLM_NORMAL) - lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); - - return status; -} - -void dlm_commit_pending_unlock(struct dlm_lock_resource *res, - struct dlm_lock *lock) -{ - /* leave DLM_LKSB_PUT_LVB on the lksb so any final - * update of the lvb will be sent to the new master */ - list_del_init(&lock->list); -} - -void dlm_commit_pending_cancel(struct dlm_lock_resource *res, - struct dlm_lock *lock) -{ - list_move_tail(&lock->list, &res->granted); - lock->ml.convert_type = LKM_IVMODE; -} - - -static inline enum dlm_status dlmunlock_master(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int flags, - int *call_ast) -{ - return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 1); -} - -static inline enum dlm_status dlmunlock_remote(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int flags, int *call_ast) -{ - return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 0); -} - -/* - * locking: - * caller needs: none - * taken: none - * held on exit: none - * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network - */ -static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int flags, - u8 owner) -{ - struct dlm_unlock_lock unlock; - int tmpret; - enum dlm_status ret; - int status = 0; - struct kvec vec[2]; - size_t veclen = 1; - - mlog(0, "%.*s\n", res->lockname.len, res->lockname.name); - - if (owner == dlm->node_num) { - /* ended up trying to contact ourself. this means - * that the lockres had been remote but became local - * via a migration. just retry it, now as local */ - mlog(0, "%s:%.*s: this node became the master due to a " - "migration, re-evaluate now\n", dlm->name, - res->lockname.len, res->lockname.name); - return DLM_FORWARD; - } - - memset(&unlock, 0, sizeof(unlock)); - unlock.node_idx = dlm->node_num; - unlock.flags = cpu_to_be32(flags); - unlock.cookie = lock->ml.cookie; - unlock.namelen = res->lockname.len; - memcpy(unlock.name, res->lockname.name, unlock.namelen); - - vec[0].iov_len = sizeof(struct dlm_unlock_lock); - vec[0].iov_base = &unlock; - - if (flags & LKM_PUT_LVB) { - /* extra data to send if we are updating lvb */ - vec[1].iov_len = DLM_LVB_LEN; - vec[1].iov_base = lock->lksb->lvb; - veclen++; - } - - tmpret = o2net_send_message_vec(DLM_UNLOCK_LOCK_MSG, dlm->key, - vec, veclen, owner, &status); - if (tmpret >= 0) { - // successfully sent and received - if (status == DLM_FORWARD) - mlog(0, "master was in-progress. retry\n"); - ret = status; - } else { - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner); - if (dlm_is_host_down(tmpret)) { - /* NOTE: this seems strange, but it is what we want. - * when the master goes down during a cancel or - * unlock, the recovery code completes the operation - * as if the master had not died, then passes the - * updated state to the recovery master. this thread - * just needs to finish out the operation and call - * the unlockast. */ - ret = DLM_NORMAL; - } else { - /* something bad. this will BUG in ocfs2 */ - ret = dlm_err_to_dlm_status(tmpret); - } - } - - return ret; -} - -/* - * locking: - * caller needs: none - * taken: takes and drops res->spinlock - * held on exit: none - * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID, - * return value from dlmunlock_master - */ -int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, - void **ret_data) -{ - struct dlm_ctxt *dlm = data; - struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; - struct dlm_lock_resource *res = NULL; - struct list_head *iter; - struct dlm_lock *lock = NULL; - enum dlm_status status = DLM_NORMAL; - int found = 0, i; - struct dlm_lockstatus *lksb = NULL; - int ignore; - u32 flags; - struct list_head *queue; - - flags = be32_to_cpu(unlock->flags); - - if (flags & LKM_GET_LVB) { - mlog(ML_ERROR, "bad args! GET_LVB specified on unlock!\n"); - return DLM_BADARGS; - } - - if ((flags & (LKM_PUT_LVB|LKM_CANCEL)) == (LKM_PUT_LVB|LKM_CANCEL)) { - mlog(ML_ERROR, "bad args! cannot modify lvb on a CANCEL " - "request!\n"); - return DLM_BADARGS; - } - - if (unlock->namelen > DLM_LOCKID_NAME_MAX) { - mlog(ML_ERROR, "Invalid name length in unlock handler!\n"); - return DLM_IVBUFLEN; - } - - if (!dlm_grab(dlm)) - return DLM_REJECTED; - - mlog_bug_on_msg(!dlm_domain_fully_joined(dlm), - "Domain %s not fully joined!\n", dlm->name); - - mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : "none"); - - res = dlm_lookup_lockres(dlm, unlock->name, unlock->namelen); - if (!res) { - /* We assume here that a no lock resource simply means - * it was migrated away and destroyed before the other - * node could detect it. */ - mlog(0, "returning DLM_FORWARD -- res no longer exists\n"); - status = DLM_FORWARD; - goto not_found; - } - - queue=&res->granted; - found = 0; - spin_lock(&res->spinlock); - if (res->state & DLM_LOCK_RES_RECOVERING) { - spin_unlock(&res->spinlock); - mlog(0, "returning DLM_RECOVERING\n"); - status = DLM_RECOVERING; - goto leave; - } - - if (res->state & DLM_LOCK_RES_MIGRATING) { - spin_unlock(&res->spinlock); - mlog(0, "returning DLM_MIGRATING\n"); - status = DLM_MIGRATING; - goto leave; - } - - if (res->owner != dlm->node_num) { - spin_unlock(&res->spinlock); - mlog(0, "returning DLM_FORWARD -- not master\n"); - status = DLM_FORWARD; - goto leave; - } - - for (i=0; i<3; i++) { - list_for_each(iter, queue) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock->ml.cookie == unlock->cookie && - lock->ml.node == unlock->node_idx) { - dlm_lock_get(lock); - found = 1; - break; - } - } - if (found) - break; - /* scan granted -> converting -> blocked queues */ - queue++; - } - spin_unlock(&res->spinlock); - if (!found) { - status = DLM_IVLOCKID; - goto not_found; - } - - /* lock was found on queue */ - lksb = lock->lksb; - if (flags & (LKM_VALBLK|LKM_PUT_LVB) && - lock->ml.type != LKM_EXMODE) - flags &= ~(LKM_VALBLK|LKM_PUT_LVB); - - /* unlockast only called on originating node */ - if (flags & LKM_PUT_LVB) { - lksb->flags |= DLM_LKSB_PUT_LVB; - memcpy(&lksb->lvb[0], &unlock->lvb[0], DLM_LVB_LEN); - } - - /* if this is in-progress, propagate the DLM_FORWARD - * all the way back out */ - status = dlmunlock_master(dlm, res, lock, lksb, flags, &ignore); - if (status == DLM_FORWARD) - mlog(0, "lockres is in progress\n"); - - if (flags & LKM_PUT_LVB) - lksb->flags &= ~DLM_LKSB_PUT_LVB; - - dlm_lockres_calc_usage(dlm, res); - dlm_kick_thread(dlm, res); - -not_found: - if (!found) - mlog(ML_ERROR, "failed to find lock to unlock! " - "cookie=%u:%llu\n", - dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)), - dlm_get_lock_cookie_seq(be64_to_cpu(unlock->cookie))); - else - dlm_lock_put(lock); - -leave: - if (res) - dlm_lockres_put(res); - - dlm_put(dlm); - - return status; -} - - -static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int *actions) -{ - enum dlm_status status; - - if (dlm_lock_on_list(&res->blocked, lock)) { - /* cancel this outright */ - status = DLM_NORMAL; - *actions = (DLM_UNLOCK_CALL_AST | - DLM_UNLOCK_REMOVE_LOCK); - } else if (dlm_lock_on_list(&res->converting, lock)) { - /* cancel the request, put back on granted */ - status = DLM_NORMAL; - *actions = (DLM_UNLOCK_CALL_AST | - DLM_UNLOCK_REMOVE_LOCK | - DLM_UNLOCK_REGRANT_LOCK | - DLM_UNLOCK_CLEAR_CONVERT_TYPE); - } else if (dlm_lock_on_list(&res->granted, lock)) { - /* too late, already granted. */ - status = DLM_CANCELGRANT; - *actions = DLM_UNLOCK_CALL_AST; - } else { - mlog(ML_ERROR, "lock to cancel is not on any list!\n"); - status = DLM_IVLOCKID; - *actions = 0; - } - return status; -} - -static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - struct dlm_lock *lock, - struct dlm_lockstatus *lksb, - int *actions) -{ - enum dlm_status status; - - /* unlock request */ - if (!dlm_lock_on_list(&res->granted, lock)) { - status = DLM_DENIED; - dlm_error(status); - *actions = 0; - } else { - /* unlock granted lock */ - status = DLM_NORMAL; - *actions = (DLM_UNLOCK_FREE_LOCK | - DLM_UNLOCK_CALL_AST | - DLM_UNLOCK_REMOVE_LOCK); - } - return status; -} - -/* there seems to be no point in doing this async - * since (even for the remote case) there is really - * no work to queue up... so just do it and fire the - * unlockast by hand when done... */ -enum dlm_status dlmunlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb, - int flags, dlm_astunlockfunc_t *unlockast, void *data) -{ - enum dlm_status status; - struct dlm_lock_resource *res; - struct dlm_lock *lock = NULL; - int call_ast, is_master; - - if (!lksb) { - dlm_error(DLM_BADARGS); - return DLM_BADARGS; - } - - if (flags & ~(LKM_CANCEL | LKM_VALBLK | LKM_INVVALBLK)) { - dlm_error(DLM_BADPARAM); - return DLM_BADPARAM; - } - - if ((flags & (LKM_VALBLK | LKM_CANCEL)) == (LKM_VALBLK | LKM_CANCEL)) { - mlog(0, "VALBLK given with CANCEL: ignoring VALBLK\n"); - flags &= ~LKM_VALBLK; - } - - if (!lksb->lockid || !lksb->lockid->lockres) { - dlm_error(DLM_BADPARAM); - return DLM_BADPARAM; - } - - lock = lksb->lockid; - BUG_ON(!lock); - dlm_lock_get(lock); - - res = lock->lockres; - BUG_ON(!res); - dlm_lockres_get(res); -retry: - call_ast = 0; - /* need to retry up here because owner may have changed */ - mlog(0, "lock=%p res=%p\n", lock, res); - - spin_lock(&res->spinlock); - is_master = (res->owner == dlm->node_num); - if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE) - flags &= ~LKM_VALBLK; - spin_unlock(&res->spinlock); - - if (is_master) { - status = dlmunlock_master(dlm, res, lock, lksb, flags, - &call_ast); - mlog(0, "done calling dlmunlock_master: returned %d, " - "call_ast is %d\n", status, call_ast); - } else { - status = dlmunlock_remote(dlm, res, lock, lksb, flags, - &call_ast); - mlog(0, "done calling dlmunlock_remote: returned %d, " - "call_ast is %d\n", status, call_ast); - } - - if (status == DLM_RECOVERING || - status == DLM_MIGRATING || - status == DLM_FORWARD) { - /* We want to go away for a tiny bit to allow recovery - * / migration to complete on this resource. I don't - * know of any wait queue we could sleep on as this - * may be happening on another node. Perhaps the - * proper solution is to queue up requests on the - * other end? */ - - /* do we want to yield(); ?? */ - msleep(50); - - mlog(0, "retrying unlock due to pending recovery/" - "migration/in-progress\n"); - goto retry; - } - - if (call_ast) { - mlog(0, "calling unlockast(%p, %d)\n", data, status); - if (is_master) { - /* it is possible that there is one last bast - * pending. make sure it is flushed, then - * call the unlockast. - * not an issue if this is a mastered remotely, - * since this lock has been removed from the - * lockres queues and cannot be found. */ - dlm_kick_thread(dlm, NULL); - wait_event(dlm->ast_wq, - dlm_lock_basts_flushed(dlm, lock)); - } - (*unlockast)(data, status); - } - - if (status == DLM_CANCELGRANT) - status = DLM_NORMAL; - - if (status == DLM_NORMAL) { - mlog(0, "kicking the thread\n"); - dlm_kick_thread(dlm, res); - } else - dlm_error(status); - - dlm_lockres_calc_usage(dlm, res); - dlm_lockres_put(res); - dlm_lock_put(lock); - - mlog(0, "returning status=%d!\n", status); - return status; -} -EXPORT_SYMBOL_GPL(dlmunlock); - diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmver.c b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmver.c deleted file mode 100644 index dfc0da4d..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmver.c +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmver.c - * - * version string - * - * Copyright (C) 2002, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include <linux/module.h> -#include <linux/kernel.h> - -#include "dlmver.h" - -#define DLM_BUILD_VERSION "1.5.0" - -#define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION - -void dlm_print_version(void) -{ - printk(KERN_INFO "%s\n", VERSION_STR); -} - -MODULE_DESCRIPTION(VERSION_STR); - -MODULE_VERSION(DLM_BUILD_VERSION); diff --git a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmver.h b/ANDROID_3.4.5/fs/ocfs2/dlm/dlmver.h deleted file mode 100644 index f674aee7..00000000 --- a/ANDROID_3.4.5/fs/ocfs2/dlm/dlmver.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmfsver.h - * - * Function prototypes - * - * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef DLM_VER_H -#define DLM_VER_H - -void dlm_print_version(void); - -#endif /* DLM_VER_H */ |