[PATCH 2 of 6] Determine default locale encoding and stdio encoding on start-up

Andrey grooz-work at gorodok.net
Sat Nov 18 15:04:35 CST 2006


I've just looked at how bzr does it. These functions may be of interest for us. :)

Andrey

# taken from http://bazaar-vcs.org/bzr/bzr.dev/bzrlib/osutils.py
def get_terminal_encoding():
    """Find the best encoding for printing to the screen.

    This attempts to check both sys.stdout and sys.stdin to see
    what encoding they are in, and if that fails it falls back to
    bzrlib.user_encoding.
    The problem is that on Windows, locale.getpreferredencoding()
    is not the same encoding as that used by the console:
    http://mail.python.org/pipermail/python-list/2003-May/162357.html

    On my standard US Windows XP, the preferred encoding is
    cp1252, but the console is cp437
    """
    output_encoding = getattr(sys.stdout, 'encoding', None)
    if not output_encoding:
        input_encoding = getattr(sys.stdin, 'encoding', None)
        if not input_encoding:
            output_encoding = bzrlib.user_encoding
            mutter('encoding stdout as bzrlib.user_encoding %r', output_encoding)
        else:
            output_encoding = input_encoding
            mutter('encoding stdout as sys.stdin encoding %r', output_encoding)
    else:
        mutter('encoding stdout as sys.stdout encoding %r', output_encoding)
    return output_encoding

def get_user_encoding():
    """Find out what the preferred user encoding is.

    This is generally the encoding that is used for command line parameters
    and file contents. This may be different from the terminal encoding
    or the filesystem encoding.

    :return: A string defining the preferred user encoding
    """
    global _cached_user_encoding
    if _cached_user_encoding is not None:
        return _cached_user_encoding

    if sys.platform == 'darwin':
        # work around egregious python 2.4 bug
        sys.platform = 'posix'
        try:
            import locale
        finally:
            sys.platform = 'darwin'
    else:
        import locale

    try:
        _cached_user_encoding = locale.getpreferredencoding()
    except locale.Error, e:
        sys.stderr.write('bzr: warning: %s\n'
                         '  Could not determine what text encoding to use.\n'
                         '  This error usually means your Python interpreter\n'
                         '  doesn\'t support the locale set by $LANG (%s)\n'
                         "  Continuing with ascii encoding.\n"
                         % (e, os.environ.get('LANG')))

    if _cached_user_encoding is None:
        _cached_user_encoding = 'ascii'
    return _cached_user_encoding


More information about the Mercurial-devel mailing list