[PATCH] Don't consider UTF-16 and UTF-32 files as binary (issue1975) (version2 -- resend)

Benoît Allard benoit at aeteurope.nl
Mon Feb 22 04:49:13 CST 2010


ping !

Ben wrote:
> The generated patches works fine with GNU diff. I mainly needs that to
> review my commits on Windows where some tools generate config files
> automatically in UTF-16 ...
> 
> # HG changeset patch
> # User Benoit Allard <benoit at aeteurope.nl>
> # Date 1265306735 -3600
> Don't consider UTF-16 and UTF-32 files as binary (issue1975)
> 
> diff --git a/mercurial/util.py b/mercurial/util.py
> --- a/mercurial/util.py
> +++ b/mercurial/util.py
> @@ -14,7 +14,7 @@
>  """
> 
>  from i18n import _
> -import error, osutil, encoding
> +import error, osutil, encoding, codecs
>  import cStringIO, errno, re, shutil, sys, tempfile, traceback
>  import os, stat, time, calendar, textwrap
>  import imp
> @@ -210,9 +210,20 @@
>              return fn(s, cmd[len(name):].lstrip())
>      return pipefilter(s, cmd)
> 
> +boms = [
> +    codecs.BOM_UTF8,
> +    codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE,
> +    codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE
> +    ]
> +
>  def binary(s):
>      """return true if a string is binary data"""
> -    return bool(s and '\0' in s)
> +    if s:
> +        for bom in boms:
> +            if s.startswith(bom):
> +                return False
> +        return '\0' in s
> +    return False
> 
>  def increasingchunks(source, min=1024, max=65536):
>      '''return no less than min bytes per chunk while data remains,
> diff --git a/tests/test-diff-binary-file b/tests/test-diff-binary-file
> --- a/tests/test-diff-binary-file
> +++ b/tests/test-diff-binary-file
> @@ -23,3 +23,12 @@
> 
>  echo % diff --git -r 0 -r 2
>  hg diff --git -r 0 -r 2
> +
> +echo % UTF-16 diff
> +echo "\xff\xfe\xe4\x00\x0a\x00" > utf-16le
> +echo "\x00\x00\xfe\xff\x00\x00\x00\x31\x00\x00\x00\x0a" > utf-32be
> +hg add utf-16le utf-32be
> +hg ci -m "add files"
> +echo "\x31\x00\x0a\x00" >> utf-16le
> +echo "\x00\x00\x00\x32\x00\x00\x00\x0a" >> utf-32be
> +hg diff --nodates utf-16le utf-32be
> diff --git a/tests/test-diff-binary-file.out b/tests/test-diff-binary-file.out
> index 048f20400cae4202b0d5c43d344f8273d5a216c5..1fa8e52e8b1f5ff863f2a586185879fb168997ec
> GIT binary patch
> literal 1315
> zc${@q*^;7I5cQd_pu#0eZF<;|P#Q`_g;Lzbss~F}K{gR76)C at VzN354h^eWGo_WaS
> z<jpu4CnEC}LESb at 5ZZ(A%^|?w5U;5ig%C7F;`bLtGZa)&aiXTxK-Kn5+cmyt5op at J
> zvOj}2{Iw2RN(5?_;%^KcDE at m2`dNTB_TjhT_oDL0S2Fc00&HJ5P9R393Z^3(X(AMY
> zQ97*=CP7i8slYg_DJEeW6lR+A_wPha*Dyq(4b;S8lfq3C%Y at J{W>Pwd!fMSRR0<<V
> zpexL78g6=a%0aUL*FG3~#RUl(2NJEol|{JGQx2nY!>VJ?Y2Sfi*gzz)KMS+W>)zZD
> zjv at hdbX6E+5T+GMZF*U<oCC={z!TYzsq?`ZqRYq;1|4W_(KETe*bTPF_pF70B*$&M
> zwrQ>-k0QpXQ<ugMxggCQ9Y~f1-+$>J3Q%G3z{ck)vM8>T5^phzmFh7+Y2fKtwze_0
> zQCEJaQnOzI^(1$pFkHF}dL;%xQW1i7vmA9-EmMkvrWf{Si}|Wi=lo-D^ne`qvcA4N
> zdcA;LNK5%}wl0dBgkIFEoL`dNdw&Kbef^~|W1J{G1{Sm~!N`NL%|D*h1!AT<p~4jF
> zI#ldb_{uW>+FRp7k(^Ge1zv3PCGEh}b^(ysQvSleZBwURZ0TuWhkCR=d0agZp%p82
> zv92EOBbJCOiBP}z!R8d5uB)`;uU^=$u91y8keCCpYIhKE-5;DvPbjPNM at g==TtiJp
> zGgYuIT)~=>m9^935WUzBDDx@*AtB{JqShO)a9sdN_1He;%fn7M(D+OAAWATFvbr_l
> z at U&ex;*r<XH73ihXwK0+TOa8~-J4WH&YSLSBBbAB2uPF(dgWV7wuv6A3EvNr793T`
> z*K0cNuFMN2<Qr?YJf`N5_?m`<T&Kr1`;FN(<7pE6CuW`rp<88Q+YDZg^or$s=dt at Z
> z_&|p4SZghRxXWf`*p^@zdo7I+>WwR7Bb%||qYC2<nSF=i59&GF3(p%rWokD3LpY&>
> z+IGJ#GK%_EimgYNoiO|QY-@*sPa^^fSwhSb)Rn8?7G=@#aPpjIfX!Gqh%$7U1~ac*
> zONo5mNpFh_!;<s9T*k1sH8Jt%L_L%Z_rQN2@$I(fvkrui_t($=1kqFtgLPaXND_<>
> zX4c>u00cqVrGLU|wVIv!C*(K~LcXD375RpP90!2Ee*T#Qz#LFTGRyrxH- at TTx$p2F
> b?mPUAJD2<UE0 at b5nE_zNp39-Pv<mzMgj9x;
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
> 


More information about the Mercurial-devel mailing list