[PATCH] Check case-folding clashes during addremove

Andrei Vermel andrei.vermel at gmail.com
Fri Mar 30 03:35:42 CDT 2007


> I think we should optimize for the common case of no collisions. So
> instead we should do:
>
>   fl = file.lower()
>   if fl in self.foldmap:
>      issue error/warning
>      self.foldmap[fl] += 1
>   else:
>      self.foldmap[fl] = 1
>

Yes, this optimisation makes sense. Attached is an updated patch.

After a bit of thinking, it seems to me that dirstate.update is not
a good place to call the check function, due to the fact that it gets called
with only a single file. Some of the problems:

* On a casefolding file system abort happens before displaying
  all clashes.
* When a file is renamed from aaa.txt to AAA.TXT, it gets copied
   first and an unnecessary clash warning appears.
   (On a casefolding file system this can't be done anyway, so one has to
   rename aaa.txt to tmp.txt and then tmp.txt to AAA.TXT, and this works)
 * A case changing rename of a folder with many files would have a
   performance overhead. If the check is done where all the file names
   to be added are available, the new files detected as clashing can be
   put to a map so that finding the corresponding clashes would require
   only a single iteration over dirstate.map.

Andrei

# HG changeset patch
# User Andrei Vermel <avermel at mail.ru>
# Date 1175241778 -14400
# Node ID 8593076e366928df2401a33f3f1e0187262ecd94
# Parent  a1406a50ca83e9d3388f7273c8f6ee9283d83c5c
Check case-folding clashes during addremove

diff -r a1406a50ca83 -r 8593076e3669 mercurial/dirstate.py
--- a/mercurial/dirstate.py  Tue Mar 13 13:17:26 2007 +0100
+++ b/mercurial/dirstate.py  Fri Mar 30 12:02:58 2007 +0400
@@ -20,6 +20,7 @@ class dirstate(object):
         self.dirty = 0
         self.ui = ui
         self.map = None
+        self.foldmap = None
         self.pl = None
         self.dirs = None
         self.copymap = {}
@@ -173,6 +174,17 @@ class dirstate(object):
         if self.map is None:
             self.read()

+    def add_foldmap(self, file):
+        fl = file.lower()
+        if fl in self.foldmap:
+            self.foldmap[fl]+=1
+        else:
+            self.foldmap[fl]=1
+
+    def del_foldmap(self, file):
+        fl = file.lower()
+        self.foldmap[fl]-=1
+
     def parse(self, st):
         self.pl = [st[:20], st[20: 40]]

@@ -196,10 +208,12 @@ class dirstate(object):
                 f, c = f.split('\0')
                 copymap[f] = c
             map[f] = e[:4]
+            self.add_foldmap(f)
             pos = newpos

     def read(self):
         self.map = {}
+        self.foldmap = {}
         self.pl = [nullid, nullid]
         try:
             st = self.opener("dirstate").read()
@@ -268,9 +282,11 @@ class dirstate(object):
         if state == "a":
             self.initdirs()
             self.checkinterfering(files)
+            self.check_add_case_clashes(files)
         for f in files:
             if state == "r":
                 self.map[f] = ('r', 0, 0, 0)
+                self.add_foldmap(f)
                 self.updatedirs(f, -1)
             else:
                 if state == "a":
@@ -279,6 +295,7 @@ class dirstate(object):
                 st_size = kw.get('st_size', s.st_size)
                 st_mtime = kw.get('st_mtime', s.st_mtime)
                 self.map[f] = (state, s.st_mode, st_size, st_mtime)
+                self.add_foldmap(f)
             if self.copymap.has_key(f):
                 del self.copymap[f]

@@ -290,6 +307,7 @@ class dirstate(object):
         for f in files:
             try:
                 del self.map[f]
+                self.del_foldmap(f)
                 self.updatedirs(f, -1)
             except KeyError:
                 self.ui.warn(_("not in dirstate: %s!\n") % f)
@@ -297,6 +315,7 @@ class dirstate(object):

     def clear(self):
         self.map = {}
+        self.foldmap = {}
         self.copymap = {}
         self.dirs = None
         self.markdirty()
@@ -308,6 +327,7 @@ class dirstate(object):
                 self.map[f] = ('n', 0777, -1, 0)
             else:
                 self.map[f] = ('n', 0666, -1, 0)
+            self.add_foldmap(f)
         self.pl = (parent, nullid)
         self.markdirty()

@@ -552,3 +572,37 @@ class dirstate(object):

         return (lookup, modified, added, removed, deleted, unknown,
ignored,
                 clean)
+
+    def check_add_case_clashes(self, add_files):
+        self.lazyread()
+        foldmap = self.foldmap
+        fold_clash = None
+        for fn in add_files:
+            fold=fn.lower()
+            if fold in foldmap:
+                nmb = foldmap[fold]
+                if nmb == 0:
+                    continue
+                if fn in self.map: # add existing file is valid,
+                    continue       #  e.g. qrefresh does it
+
+                coll=self.report_case_collisions(fn)
+
+
+    def report_case_collisions(self, file):
+        fold = file.lower()
+        clashes=[]
+        for fn in self.map.iterkeys():
+            fl = fn.lower()
+            if fl == fold and fn != file and self.map[fn][0] != 'r':
+                clashes.append(fn)
+
+        if clashes != []:
+           self.ui.warn(_('\nname case fold clashes found!\n'))
+           self.ui.warn(_('    was %s') % clashes[0])
+           for fn in clashes[1:]:
+               self.ui.warn(_(',\n        %s') % fn)
+           self.ui.warn(_(', now %s\n') % file)
+
+           if not util.checkfolding(self.root):
+               raise util.Abort(_('add aborted due to case folding
clashes'))


----- Original Message ----- 
From: "Matt Mackall" <mpm at selenic.com>
To: "Andrei Vermel" <avermel at mail.ru>
Cc: "Alexis S. L. Carvalho" <alexis at cecm.usp.br>;
<mercurial-devel at selenic.com>
Sent: Friday, March 30, 2007 12:14 AM
Subject: Re: [PATCH] Check case-folding clashes during addremove


> On Thu, Mar 29, 2007 at 11:32:09PM +0400, Andrei Vermel wrote:
>> Oops. My patch breaks qrefresh. It adds files that are already present in
>> dirstate with 'n' status.
>> This seems to fix it.
>
> Please post patches inline. Replying to them (or even reading them!)
> is a nuisance otherwise.
>
> # HG changeset patch
> # User Andrei Vermel <avermel at mail.ru>
> # Date 1175196554 -14400
> # Node ID 2842d34e6d74de65f5ea2cecc9b420d034a434b5
> # Parent  a1406a50ca83e9d3388f7273c8f6ee9283d83c5c
> Check case-folding clashes during addremove
>
> diff -r a1406a50ca83 -r 2842d34e6d74 mercurial/dirstate.py
> --- a/mercurial/dirstate.py Tue Mar 13 13:17:26 2007 +0100
> +++ b/mercurial/dirstate.py Thu Mar 29 23:29:14 2007 +0400
> @@ -20,6 +20,7 @@ class dirstate(object):
>         self.dirty = 0
>         self.ui = ui
>         self.map = None
> +        self.foldmap = None
>         self.pl = None
>         self.dirs = None
>         self.copymap = {}
> @@ -173,6 +174,25 @@ class dirstate(object):
>         if self.map is None:
>             self.read()
>
> +    def add_foldmap(self, file):
> +        fl = file.lower()
> +        if fl in self.foldmap:
> +            fmaped = self.foldmap[fl]
> +            if type(fmaped) == list:
> +                fmaped.append(file)
> +            else:
> +                self.foldmap[fl]=[fmaped, file]
> +        else:
> +            self.foldmap[fl]=file
>
> I think we should optimize for the common case of no collisions. So
> instead we should do:
>
>   fl = file.lower()
>   if fl in self.foldmap:
>      issue error/warning
>      self.foldmap[fl] += 1
>   else:
>      self.foldmap[fl] = 1
>
> We're not keeping track of -which- files collide, instead we have a
> helper function findcollisions(file) which just runs through the map
> and complains about any files that fold to the same name as file. This
> is slower, but we only take the hit when a collision happens, which
> means everything else is faster.
>
> +    def del_foldmap(self, file):
> +        fl = file.lower()
> +        fmaped = self.foldmap[fl]
> +        if type(fmaped) == list:
> +            fmaped.remove(file)
> +        else:
> +            del self.foldmap[fl]
>
> And simply self.foldmap[fl] != 1 here.
>
> I think this lets us eliminate a bunch of complexity (including the
> pre-existing collision detection).
>
> -- 
> Mathematics is the supreme nostalgia of our time.
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: addremove_clashes.patch
Type: application/octet-stream
Size: 4324 bytes
Desc: not available
Url : http://www.selenic.com/pipermail/mercurial-devel/attachments/20070330/8716fd45/addremove_clashes-0001.obj


More information about the Mercurial-devel mailing list