[PATCH 2 of 2] config: support config file with BOM (issue2162)

Yuya Nishihara yuya at tcha.org
Sat Jun 19 08:56:35 CDT 2010


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1276951078 -32400
# Node ID 3d8b0c0ca3b29ed1f1037b668497504b9343ef46
# Parent  19dc05994d6cc2727efbde32c53bcbb1dfbdc83b
config: support config file with BOM (issue2162)

Some text editors like Notepad.exe insert BOM (byte-order-mark) silently.
It's really confusing because we cannot see BOM but Mercurial complains
about it.

This changes config.read() to strip BOM and convert the string to local
encoding before parsing.

diff --git a/mercurial/config.py b/mercurial/config.py
--- a/mercurial/config.py
+++ b/mercurial/config.py
@@ -6,7 +6,7 @@
 # GNU General Public License version 2 or any later version.
 
 from i18n import _
-import error, util
+import error, util, encoding
 import re, os
 
 class sortdict(dict):
@@ -139,4 +139,5 @@ class config(object):
     def read(self, path, fp=None, sections=None, remap=None):
         if not fp:
             fp = open(path)
-        self.parse(path, fp.read(), sections, remap, self.read)
+        self.parse(path, encoding.bomtolocal(fp.read()),
+                   sections, remap, self.read)
diff --git a/tests/test-hgrc b/tests/test-hgrc
--- a/tests/test-hgrc
+++ b/tests/test-hgrc
@@ -74,3 +74,29 @@ hg showconfig | sed -e "s:$p:...:"
 echo '% plain hgrc'
 HGPLAIN=; export HGPLAIN
 hg showconfig --config ui.traceback=True --debug | sed -e "s:$p:...:"
+
+# issue2162: BOM (byte order mark) support
+echo '% utf-8w/bom hgrc'
+python -c 'import sys; sys.stdout.write("\357\273\277")' > $HGRCPATH
+printf '[a]\nx=y' >> $HGRCPATH
+hg showconfig | sed -e "s:$p:...:"
+
+echo '% utf-16-le hgrc'
+python -c 'import sys; sys.stdout.write("\377\376")' > $HGRCPATH
+printf '[\0a\0]\0\n\0x\0=\0y\0' >> $HGRCPATH
+hg showconfig | sed -e "s:$p:...:"
+
+echo '% utf-16-be hgrc'
+python -c 'import sys; sys.stdout.write("\376\377")' > $HGRCPATH
+printf '\0[\0a\0]\0\n\0x\0=\0y' >> $HGRCPATH
+hg showconfig | sed -e "s:$p:...:"
+
+echo '% utf-32-le hgrc'
+python -c 'import sys; sys.stdout.write("\377\376\0\0")' > $HGRCPATH
+printf '[\0\0\0a\0\0\0]\0\0\0\n\0\0\0x\0\0\0=\0\0\0y\0\0\0' >> $HGRCPATH
+hg showconfig | sed -e "s:$p:...:"
+
+echo '% utf-32-be hgrc'
+python -c 'import sys; sys.stdout.write("\0\0\376\377")' > $HGRCPATH
+printf '\0\0\0[\0\0\0a\0\0\0]\0\0\0\n\0\0\0x\0\0\0=\0\0\0y' >> $HGRCPATH
+hg showconfig | sed -e "s:$p:...:"
diff --git a/tests/test-hgrc.out b/tests/test-hgrc.out
--- a/tests/test-hgrc.out
+++ b/tests/test-hgrc.out
@@ -31,3 +31,13 @@ none: ui.traceback=True
 none: ui.verbose=False
 none: ui.debug=True
 none: ui.quiet=False
+% utf-8w/bom hgrc
+a.x=y
+% utf-16-le hgrc
+a.x=y
+% utf-16-be hgrc
+a.x=y
+% utf-32-le hgrc
+a.x=y
+% utf-32-be hgrc
+a.x=y


More information about the Mercurial-devel mailing list