[PATCH] osutil: implement listdir for linux

Maciej Fijalkowski fijall at gmail.com
Mon Oct 17 12:33:14 EDT 2016


Hi Gregory.

It's needed because fstatat is faster (uses less system time). As far
as backport of scandir - scandir is written entirely in C, so it's not
completely clear to me how you would backport that to use cffi. I
don't believe dropping python2 support will be a viable option for
quite a while, if you ask me

On Sun, Oct 16, 2016 at 6:55 PM, Gregory Szorc <gregory.szorc at gmail.com> wrote:
> On Fri, Oct 14, 2016 at 12:02 AM, Maciej Fijalkowski <fijall at gmail.com>
> wrote:
>>
>> # HG changeset patch
>> # User Maciej Fijalkowski <fijall at gmail.com>
>> # Date 1476428549 -7200
>> #      Fri Oct 14 09:02:29 2016 +0200
>> # Node ID 4e80a66124279e235dec7ae8f58c0a14b0b137bf
>> # Parent  c770219dc4c253d7cd82519ce3c74438bb2829d3
>> osutil: implement listdir for linux
>>
>> diff --git a/mercurial/pure/osutil.py b/mercurial/pure/osutil.py
>> --- a/mercurial/pure/osutil.py
>> +++ b/mercurial/pure/osutil.py
>> @@ -66,13 +66,20 @@
>>      return result
>>
>>  ffi = None
>> -if modulepolicy not in policynocffi and sys.platform == 'darwin':
>> +if modulepolicy not in policynocffi and (
>> +        sys.platform == 'darwin' or sys.platform.startswith('linux')):
>>      try:
>>          from _osutil_cffi import ffi, lib
>>      except ImportError:
>>          if modulepolicy == 'cffi': # strict cffi import
>>              raise
>>
>> +class stat_res(object):
>> +    def __init__(self, st_mode, st_mtime, st_size):
>> +        self.st_mode = st_mode
>> +        self.st_mtime = st_mtime
>> +        self.st_size = st_size
>> +
>>  if sys.platform == 'darwin' and ffi is not None:
>>      listdir_batch_size = 4096
>>      # tweakable number, only affects performance, which chunks
>> @@ -88,12 +95,6 @@
>>      attrkinds[lib.VFIFO] = statmod.S_IFIFO
>>      attrkinds[lib.VSOCK] = statmod.S_IFSOCK
>>
>> -    class stat_res(object):
>> -        def __init__(self, st_mode, st_mtime, st_size):
>> -            self.st_mode = st_mode
>> -            self.st_mtime = st_mtime
>> -            self.st_size = st_size
>> -
>>      tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
>>      buf = ffi.new("char[]", listdir_batch_size)
>>
>> @@ -117,7 +118,7 @@
>>                  tp = attrkinds[cur.obj_type]
>>                  if name == "." or name == "..":
>>                      continue
>> -                if skip == name and tp == statmod.S_ISDIR:
>> +                if skip == name and tp == statmod.S_IFDIR:
>>                      return []
>>                  if stat:
>>                      mtime = cur.mtime.tv_sec
>> @@ -146,12 +147,74 @@
>>          try:
>>              ret = listdirinternal(dfd, req, stat, skip)
>>          finally:
>> +            lib.close(dfd)
>> +        return ret
>> +
>> +elif sys.platform.startswith('linux') and ffi is not None:
>> +
>> +    _known_common_file_types = {
>> +        lib.DT_DIR:  statmod.S_IFDIR,
>> +        lib.DT_CHR:  statmod.S_IFCHR,
>> +        lib.DT_BLK:  statmod.S_IFBLK,
>> +        lib.DT_REG:  statmod.S_IFREG,
>> +        lib.DT_FIFO: statmod.S_IFIFO,
>> +        lib.DT_LNK:  statmod.S_IFLNK,
>> +        lib.DT_SOCK: statmod.S_IFSOCK,
>> +    }
>> +
>> +    def listdirinternal(dir, dirfd, stat, skip):
>> +        buf = ffi.new("struct stat *")
>> +        ret = []
>> +        while True:
>> +            ffi.errno = 0
>> +            dirent = lib.readdir(dir)
>> +            if not dirent:
>> +                break
>> +            dname = dirent.d_name
>> +            if dname[0] == "." and (dname[1] == "\x00" or
>> +                    (dname[1] == "." and dname[2] == "\x00")):
>> +                continue
>> +            #
>> +            dtype = dirent.d_type
>> +            if stat or dtype not in _known_common_file_types:
>> +                if lib.fstatat(dirfd, dirent.d_name, buf,
>> +                               lib.AT_SYMLINK_NOFOLLOW) != 0:
>> +                    raise OSError(ffi.errno, os.strerror(ffi.errno))
>> +                tp = statmod.S_IFMT(buf.st_mode)
>> +            else:
>> +                tp = _known_common_file_types[dtype]
>> +            #
>> +            name = ffi.string(dirent.d_name)
>> +            if skip == name and tp == statmod.S_IFDIR:
>> +                return []
>> +            if stat:
>> +                ret.append((name, tp, stat_res(
>> +                               st_mode  = buf.st_mode,
>> +                               st_mtime = buf.st_mtim.tv_sec,
>> +                               st_size  = buf.st_size)))
>> +            else:
>> +                ret.append((name, tp))
>> +
>> +        if ffi.errno != 0:
>> +            raise OSError(ffi.errno, os.strerror(ffi.errno))
>> +        return ret
>> +
>> +    def listdir(path, stat=False, skip=None):
>> +        dfd = os.open(path, os.O_RDONLY)
>> +        dir = lib.fdopendir(dfd)
>> +        if dir == ffi.NULL:
>>              try:
>> -                lib.close(dfd)
>> +                os.close(dfd)
>>              except BaseException:
>> -                pass # we ignore all the errors from closing, not
>> -                # much we can do about that
>> +                pass
>> +            raise OSError(ffi.errno, os.strerror(ffi.errno))
>> +
>> +        try:
>> +            ret = listdirinternal(dir, dfd, stat, skip)
>> +        finally:
>> +            lib.closedir(dir)
>>          return ret
>> +
>>  else:
>>      listdir = listdirpure
>>
>> diff --git a/setup.py b/setup.py
>> --- a/setup.py
>> +++ b/setup.py
>> @@ -323,7 +323,7 @@
>>              exts = [setup_mpatch_cffi.ffi.distutils_extension(),
>>                      setup_bdiff_cffi.ffi.distutils_extension()]
>>              # cffi modules go here
>> -            if sys.platform == 'darwin':
>> +            if sys.platform == 'darwin' or
>> sys.platform.startswith('linux'):
>>                  import setup_osutil_cffi
>>                  exts.append(setup_osutil_cffi.ffi.distutils_extension())
>>              self.distribution.ext_modules = exts
>> diff --git a/setup_osutil_cffi.py b/setup_osutil_cffi.py
>> --- a/setup_osutil_cffi.py
>> +++ b/setup_osutil_cffi.py
>> @@ -1,102 +1,159 @@
>>  from __future__ import absolute_import
>>
>>  import cffi
>> +import sys
>>
>> -ffi = cffi.FFI()
>> -ffi.set_source("_osutil_cffi", """
>> -#include <sys/attr.h>
>> -#include <sys/vnode.h>
>> -#include <unistd.h>
>> -#include <fcntl.h>
>> -#include <time.h>
>> +if sys.platform == "darwin":
>> +    ffi = cffi.FFI()
>> +    ffi.set_source("_osutil_cffi", """
>> +    #include <sys/attr.h>
>> +    #include <sys/vnode.h>
>> +    #include <unistd.h>
>> +    #include <fcntl.h>
>> +    #include <time.h>
>>
>> -typedef struct val_attrs {
>> -    uint32_t          length;
>> -    attribute_set_t   returned;
>> -    attrreference_t   name_info;
>> -    fsobj_type_t      obj_type;
>> -    struct timespec   mtime;
>> -    uint32_t          accessmask;
>> -    off_t             datalength;
>> -} __attribute__((aligned(4), packed)) val_attrs_t;
>> -""", include_dirs=['mercurial'])
>> -ffi.cdef('''
>> +    typedef struct val_attrs {
>> +        uint32_t          length;
>> +        attribute_set_t   returned;
>> +        attrreference_t   name_info;
>> +        fsobj_type_t      obj_type;
>> +        struct timespec   mtime;
>> +        uint32_t          accessmask;
>> +        off_t             datalength;
>> +    } __attribute__((aligned(4), packed)) val_attrs_t;
>> +    """, include_dirs=['mercurial'])
>> +    ffi.cdef('''
>>
>> -typedef uint32_t attrgroup_t;
>> +    typedef uint32_t attrgroup_t;
>>
>> -typedef struct attrlist {
>> -    uint16_t     bitmapcount; /* number of attr. bit sets in list */
>> -    uint16_t   reserved;    /* (to maintain 4-byte alignment) */
>> -    attrgroup_t commonattr;  /* common attribute group */
>> -    attrgroup_t volattr;     /* volume attribute group */
>> -    attrgroup_t dirattr;     /* directory attribute group */
>> -    attrgroup_t fileattr;    /* file attribute group */
>> -    attrgroup_t forkattr;    /* fork attribute group */
>> -    ...;
>> -};
>> +    typedef struct attrlist {
>> +        uint16_t     bitmapcount; /* number of attr. bit sets in list */
>> +        uint16_t   reserved;    /* (to maintain 4-byte alignment) */
>> +        attrgroup_t commonattr;  /* common attribute group */
>> +        attrgroup_t volattr;     /* volume attribute group */
>> +        attrgroup_t dirattr;     /* directory attribute group */
>> +        attrgroup_t fileattr;    /* file attribute group */
>> +        attrgroup_t forkattr;    /* fork attribute group */
>> +        ...;
>> +    };
>>
>> -typedef struct attribute_set {
>> -    ...;
>> -} attribute_set_t;
>> +    typedef struct attribute_set {
>> +        ...;
>> +    } attribute_set_t;
>>
>> -typedef struct attrreference {
>> -    int attr_dataoffset;
>> -    int attr_length;
>> -    ...;
>> -} attrreference_t;
>> +    typedef struct attrreference {
>> +        int attr_dataoffset;
>> +        int attr_length;
>> +        ...;
>> +    } attrreference_t;
>>
>> -typedef int ... off_t;
>> +    typedef int ... off_t;
>>
>> -typedef struct val_attrs {
>> -    uint32_t          length;
>> -    attribute_set_t   returned;
>> -    attrreference_t   name_info;
>> -    uint32_t          obj_type;
>> -    struct timespec   mtime;
>> -    uint32_t          accessmask;
>> -    off_t             datalength;
>> -    ...;
>> -} val_attrs_t;
>> +    typedef struct val_attrs {
>> +        uint32_t          length;
>> +        attribute_set_t   returned;
>> +        attrreference_t   name_info;
>> +        uint32_t          obj_type;
>> +        struct timespec   mtime;
>> +        uint32_t          accessmask;
>> +        off_t             datalength;
>> +        ...;
>> +    } val_attrs_t;
>>
>> -/* the exact layout of the above struct will be figured out during build
>> time */
>> +    /* the exact layout of the above struct will be figured out during
>> +       build time */
>>
>> -typedef int ... time_t;
>> +    typedef int ... time_t;
>>
>> -typedef struct timespec {
>> -    time_t tv_sec;
>> -    ...;
>> -};
>> +    typedef struct timespec {
>> +        time_t tv_sec;
>> +        ...;
>> +    };
>>
>> -int getattrlist(const char* path, struct attrlist * attrList, void *
>> attrBuf,
>> -                size_t attrBufSize, unsigned int options);
>> +    int getattrlist(const char* path, struct attrlist * attrList,
>> +                    void * attrBuf, size_t attrBufSize, unsigned int
>> options);
>>
>> -int getattrlistbulk(int dirfd, struct attrlist * attrList, void *
>> attrBuf,
>> -                    size_t attrBufSize, uint64_t options);
>> +    int getattrlistbulk(int dirfd, struct attrlist * attrList, void *
>> attrBuf,
>> +                        size_t attrBufSize, uint64_t options);
>>
>> -#define ATTR_BIT_MAP_COUNT ...
>> -#define ATTR_CMN_NAME ...
>> -#define ATTR_CMN_OBJTYPE ...
>> -#define ATTR_CMN_MODTIME ...
>> -#define ATTR_CMN_ACCESSMASK ...
>> -#define ATTR_CMN_ERROR ...
>> -#define ATTR_CMN_RETURNED_ATTRS ...
>> -#define ATTR_FILE_DATALENGTH ...
>> +    #define ATTR_BIT_MAP_COUNT ...
>> +    #define ATTR_CMN_NAME ...
>> +    #define ATTR_CMN_OBJTYPE ...
>> +    #define ATTR_CMN_MODTIME ...
>> +    #define ATTR_CMN_ACCESSMASK ...
>> +    #define ATTR_CMN_ERROR ...
>> +    #define ATTR_CMN_RETURNED_ATTRS ...
>> +    #define ATTR_FILE_DATALENGTH ...
>>
>> -#define VREG ...
>> -#define VDIR ...
>> -#define VLNK ...
>> -#define VBLK ...
>> -#define VCHR ...
>> -#define VFIFO ...
>> -#define VSOCK ...
>> +    #define VREG ...
>> +    #define VDIR ...
>> +    #define VLNK ...
>> +    #define VBLK ...
>> +    #define VCHR ...
>> +    #define VFIFO ...
>> +    #define VSOCK ...
>>
>> -#define S_IFMT ...
>> +    #define S_IFMT ...
>>
>> -int open(const char *path, int oflag, int perm);
>> -int close(int);
>> +    int open(const char *path, int oflag, int perm);
>> +    int close(int);
>>
>> -#define O_RDONLY ...
>> -''')
>> +    #define O_RDONLY ...
>> +    ''')
>> +
>> +else:
>> +
>> +    ffi = cffi.FFI()
>> +
>> +    ffi.set_source("_osutil_cffi", """
>> +    #define _GNU_SOURCE 1
>> +    #include <sys/stat.h>
>> +    #include <sys/types.h>
>> +    #include <dirent.h>
>> +    #include <fcntl.h>
>> +    """)
>> +
>> +    ffi.cdef("""
>> +    typedef ... DIR;
>> +
>> +    struct dirent {
>> +        unsigned char  d_type;      /* Type of file; not supported
>> +                                       by all filesystem types */
>> +        char           d_name[...]; /* Null-terminated filename */
>> +        ...;
>> +    };
>> +
>> +    typedef int... mode_t;
>> +    typedef int... off_t;
>> +    typedef int... time_t;
>> +
>> +    struct timespec {
>> +        time_t tv_sec;
>> +        ...;
>> +    };
>> +
>> +    struct stat {
>> +        mode_t   st_mode;   /* inode protection mode */
>> +        struct timespec st_mtim;  /* time of last data modification */
>> +        off_t    st_size;   /* file size, in bytes */
>> +        ...;
>> +    };
>> +
>> +    DIR *fdopendir(int fd);
>> +    struct dirent *readdir(DIR *dirp);
>> +    int closedir(DIR *dirp);
>> +
>> +    #define AT_SYMLINK_NOFOLLOW ...
>> +    #define DT_DIR ...
>> +    #define DT_CHR ...
>> +    #define DT_BLK ...
>> +    #define DT_REG ...
>> +    #define DT_FIFO ...
>> +    #define DT_LNK ...
>> +    #define DT_SOCK ...
>> +
>> +    int fstatat(int fd, const char *path, struct stat *buf, int flag);
>> +    """)
>>
>>  if __name__ == '__main__':
>>      ffi.compile()
>
>
> Few questions.
>
> First, why is this needed? I assume performance. But it isn't clear to me
> what the benefit is.
>
> Second, do you think we should import a backport of scandir from Python 3.5?
> I think that's essentially the same as what our custom listdir code is
> doing. I'd rather we take a snapshot of upstream code, modify it to work on
> Python 2.6/2.7, then call it a day until we drop Python 2 support in several
> years.
>


More information about the Mercurial-devel mailing list