[PATCH 8 of 9 RFC] normfn: introduce normfn extension to manage filename normalization policy per repository

Fri May 25 10:00:57 CDT 2012

# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1337957587 -32400
# Node ID 69cdf8af32ac0098c86d8f64832f027f3fb6f939
# Parent  3b83b592a5b4289ecb65cf9b26225bce20162ada
normfn: introduce normfn extension to manage filename normalization policy per repository

now, this just enables normalization functisons, only when:

  - hg runs on MacOS environment, and
  - '[normfn] type' value in '.hgnormfn' is set as 'NFC'

diff -r 3b83b592a5b4 -r 69cdf8af32ac hgext/normfn.py

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/normfn.py	Fri May 25 23:53:07 2012 +0900
@@ -0,0 +1,161 @@
+# normfn.py - unify the normalization type for filenames in the history
+#
+# Copyright (C) 2012 FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''unify the normalization type for filenames in the history
+
+Example of ``.hgnormfn`` file::
+
+    [normalize]
+    type = nfc|nfd|none
+
+To force Mercurial to use specific normalization type,
+add configuration below to hgrc::
+
+    [normfn]
+    type = <type of normalize>
+
+For example, when you should work on the filesystem which does not
+normalize filenames (e.g.: FAT or NFS) in the Mac OS X environment,
+this configuration can prevent normfn extension from doing as same
+as on HFS+.
+'''
+
+import sys, unicodedata
+from mercurial import util, encoding, config
+from mercurial.i18n import _
+
+def normalize(s, type, ignorecase):
+    try:
+        u = s.decode(encoding.encoding, encoding.encodingmode)
+    except UnicodeDecodeError, inst:
+        sub = s[max(0, inst.start - 10):inst.start + 10]
+        raise util.Abort("decoding near '%s': %s!" % (sub, inst))
+    except LookupError, k:
+        raise util.Abort(k, hint="please check your locale settings")
+
+    nu = unicodedata.normalize(type, u)
+    if ignorecase:
+        return nu.lower().encode(encoding.encoding)
+    else:
+        return nu.encode(encoding.encoding)
+
+def tonfc(s):
+    return normalize(s, 'NFC', False)
+
+def tonfd(s):
+    return normalize(s, 'NFD', False)
+
+#      on darwin          on other platforms
+# NFC: convert            abort for filename in NFD
+# NFD: nop                abort for filename in NFC
+# ---: nop                warn for colliding filenames
+
+def reposetupnfc(ui, repo, cfg):
+    if sys.platform == 'darwin':
+        ui.debug("[normfn] activated on darwin\n")
+        repo.fntolocal = tonfd
+        repo.fnfromlocal = tonfc
+    else:
+        # add protability check (abort)
+        # add colliding check (abort)
+        pass
+
+def reposetupnfd(ui, repo, cfg):
+    # add protability check (abort)
+    # add colliding check (abort)
+    pass
+
+def reposetupnone(ui, repo, cfg):
+    pass
+
+normtypes = {
+    'nfc': reposetupnfc,
+    'nfd': reposetupnfd,
+    'none': reposetupnone,
+}
+
+def normalizetype(cfg, cfgsrc):
+    normtype = cfg.get('normalize', 'type', 'none')
+    if normtype.lower() not in normtypes:
+        # invalid type
+        raise util.Abort(_('unknown normalization type "%s" in %s')
+                         % (normtype, cfgsrc),
+                         hint=_('see "hg help normfn" for detail'))
+    return normtype.lower()
+
+def loadconfig(repo, node):
+    try:
+        if node is None:
+            data = repo.wfile('.hgnormfn').read()
+            cfgsrc = _('.hgnormfn of current working directory')
+        else:
+            ctx = repo[node]
+            data = ctx['.hgnormfn'].data()
+            cfgsrc = _('.hgnormfn of revision %s') % ctx.rev()
+    except (IOError, LookupError):
+        fl = repo.file('.hgnormfn')
+        if not len(fl):
+            return config.config(), None # empty cfg
+        # use the latest '.hgnormfn' in repository
+        flrev = len(fl) - 1
+        data = fl.read(flrev)
+        cfgsrc = _('.hgnormfn of revision %s') % fl.linkrev(flrev)
+    cfg = config.config()
+    cfg.parse(cfgsrc, data)
+    return cfg, cfgsrc
+
+# some changelogs/manifests are already loaded into memory
+def preupdate(ui, repo, hooktype, parent1, parent2):
+    if parent2:
+        p1cfg, p1src = loadconfig(repo, parent1)
+        p2cfg, p2src = loadconfig(repo, parent2)
+        p1type = normalizetype(p1cfg, p1src)
+        p2type = normalizetype(p2cfg, p2src)
+        if p1type != p2type:
+            # abort if:
+            # - p2 has files not in p1type(may be None), or
+            # - p1 has files not in p2type(may be None)
+            pass
+    else:
+        curcfg, cursrc = loadconfig(repo, None)
+        p1cfg, p1src = loadconfig(repo, parent1)
+        curtype = normalizetype(curcfg, cursrc)
+        p1type = normalizetype(p1cfg, p1src)
+        if curtype != p1type:
+            # invalidate cache for changelogs/manifests for safety
+            #  - localrepository.invalidate
+
+            # if p1 has files not in curtype, this update should be
+
+            pass
+    normtypes[p1type](ui, repo, p1cfg)
+    return False
+
+# some changelogs/manifests are already loaded into memory
+def pretxncommit(ui, repo, hooktype, node, parent1, parent2, pending):
+    cfg, cfgsrc = loadconfig(repo, node)
+    normtype = normalizetype(cfg, cfgsrc)
+    if normtype is None:
+        return False
+    # check whether manifest has any files not in normtype
+
+def debug(ui, s):
+    if '--debug' in sys.argv:
+        ui.write(s)
+
+def reposetup(ui, repo):
+#    ui.setconfig('hooks', 'preupdate.normfn', preupdate)
+#    ui.setconfig('hooks', 'pretxncommit.normfn', pretxncommit)
+    cfg, cfgsrc = loadconfig(repo, None)
+    type = normalizetype(cfg, cfgsrc)
+    debug(ui, "[normfn] enable %s normalization\n" % (type))
+    normtypes[type](ui, repo, cfg)
+
+# operations for data exchanging:
+#   - bundle/unbundle
+#   - export/import/diff
+#   - archive