summaryrefslogtreecommitdiff
path: root/eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/win32mbcs.py
diff options
context:
space:
mode:
Diffstat (limited to 'eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/win32mbcs.py')
-rw-r--r--eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/win32mbcs.py159
1 files changed, 159 insertions, 0 deletions
diff --git a/eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/win32mbcs.py b/eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/win32mbcs.py
new file mode 100644
index 0000000..f83c43e
--- /dev/null
+++ b/eggs/mercurial-1.7.3-py2.6-linux-x86_64.egg/hgext/win32mbcs.py
@@ -0,0 +1,159 @@
+# win32mbcs.py -- MBCS filename support for Mercurial
+#
+# Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
+#
+# Version: 0.3
+# Author: Shun-ichi Goto <shunichi.goto@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+#
+
+'''allow the use of MBCS paths with problematic encodings
+
+Some MBCS encodings are not good for some path operations (i.e.
+splitting path, case conversion, etc.) with its encoded bytes. We call
+such a encoding (i.e. shift_jis and big5) as "problematic encoding".
+This extension can be used to fix the issue with those encodings by
+wrapping some functions to convert to Unicode string before path
+operation.
+
+This extension is useful for:
+
+- Japanese Windows users using shift_jis encoding.
+- Chinese Windows users using big5 encoding.
+- All users who use a repository with one of problematic encodings on
+ case-insensitive file system.
+
+This extension is not needed for:
+
+- Any user who use only ASCII chars in path.
+- Any user who do not use any of problematic encodings.
+
+Note that there are some limitations on using this extension:
+
+- You should use single encoding in one repository.
+- If the repository path ends with 0x5c, .hg/hgrc cannot be read.
+- win32mbcs is not compatible with fixutf8 extention.
+
+By default, win32mbcs uses encoding.encoding decided by Mercurial.
+You can specify the encoding by config option::
+
+ [win32mbcs]
+ encoding = sjis
+
+It is useful for the users who want to commit with UTF-8 log message.
+'''
+
+import os, sys
+from mercurial.i18n import _
+from mercurial import util, encoding
+
+_encoding = None # see extsetup
+
+def decode(arg):
+ if isinstance(arg, str):
+ uarg = arg.decode(_encoding)
+ if arg == uarg.encode(_encoding):
+ return uarg
+ raise UnicodeError("Not local encoding")
+ elif isinstance(arg, tuple):
+ return tuple(map(decode, arg))
+ elif isinstance(arg, list):
+ return map(decode, arg)
+ elif isinstance(arg, dict):
+ for k, v in arg.items():
+ arg[k] = decode(v)
+ return arg
+
+def encode(arg):
+ if isinstance(arg, unicode):
+ return arg.encode(_encoding)
+ elif isinstance(arg, tuple):
+ return tuple(map(encode, arg))
+ elif isinstance(arg, list):
+ return map(encode, arg)
+ elif isinstance(arg, dict):
+ for k, v in arg.items():
+ arg[k] = encode(v)
+ return arg
+
+def appendsep(s):
+ # ensure the path ends with os.sep, appending it if necessary.
+ try:
+ us = decode(s)
+ except UnicodeError:
+ us = s
+ if us and us[-1] not in ':/\\':
+ s += os.sep
+ return s
+
+def wrapper(func, args, kwds):
+ # check argument is unicode, then call original
+ for arg in args:
+ if isinstance(arg, unicode):
+ return func(*args, **kwds)
+
+ try:
+ # convert arguments to unicode, call func, then convert back
+ return encode(func(*decode(args), **decode(kwds)))
+ except UnicodeError:
+ raise util.Abort(_("[win32mbcs] filename conversion failed with"
+ " %s encoding\n") % (_encoding))
+
+def wrapperforlistdir(func, args, kwds):
+ # Ensure 'path' argument ends with os.sep to avoids
+ # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
+ if args:
+ args = list(args)
+ args[0] = appendsep(args[0])
+ if 'path' in kwds:
+ kwds['path'] = appendsep(kwds['path'])
+ return func(*args, **kwds)
+
+def wrapname(name, wrapper):
+ module, name = name.rsplit('.', 1)
+ module = sys.modules[module]
+ func = getattr(module, name)
+ def f(*args, **kwds):
+ return wrapper(func, args, kwds)
+ try:
+ f.__name__ = func.__name__ # fail with python23
+ except Exception:
+ pass
+ setattr(module, name, f)
+
+# List of functions to be wrapped.
+# NOTE: os.path.dirname() and os.path.basename() are safe because
+# they use result of os.path.split()
+funcs = '''os.path.join os.path.split os.path.splitext
+ os.path.splitunc os.path.normpath os.path.normcase os.makedirs
+ mercurial.util.endswithsep mercurial.util.splitpath mercurial.util.checkcase
+ mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath'''
+
+# codec and alias names of sjis and big5 to be faked.
+problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs
+ hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
+ sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
+ shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
+
+def extsetup(ui):
+ # TODO: decide use of config section for this extension
+ if not os.path.supports_unicode_filenames:
+ ui.warn(_("[win32mbcs] cannot activate on this platform.\n"))
+ return
+ # determine encoding for filename
+ global _encoding
+ _encoding = ui.config('win32mbcs', 'encoding', encoding.encoding)
+ # fake is only for relevant environment.
+ if _encoding.lower() in problematic_encodings.split():
+ for f in funcs.split():
+ wrapname(f, wrapper)
+ wrapname("mercurial.osutil.listdir", wrapperforlistdir)
+ # Check sys.args manually instead of using ui.debug() because
+ # command line options is not yet applied when
+ # extensions.loadall() is called.
+ if '--debug' in sys.argv:
+ ui.write("[win32mbcs] activated with encoding: %s\n"
+ % _encoding)
+