[PATCH] osutil: implement listdir for linux
Maciej Fijalkowski
fijall at gmail.com
Mon Oct 17 12:33:14 EDT 2016
Hi Gregory.
It's needed because fstatat is faster (uses less system time). As far
as backport of scandir - scandir is written entirely in C, so it's not
completely clear to me how you would backport that to use cffi. I
don't believe dropping python2 support will be a viable option for
quite a while, if you ask me
On Sun, Oct 16, 2016 at 6:55 PM, Gregory Szorc <gregory.szorc at gmail.com> wrote:
> On Fri, Oct 14, 2016 at 12:02 AM, Maciej Fijalkowski <fijall at gmail.com>
> wrote:
>>
>> # HG changeset patch
>> # User Maciej Fijalkowski <fijall at gmail.com>
>> # Date 1476428549 -7200
>> # Fri Oct 14 09:02:29 2016 +0200
>> # Node ID 4e80a66124279e235dec7ae8f58c0a14b0b137bf
>> # Parent c770219dc4c253d7cd82519ce3c74438bb2829d3
>> osutil: implement listdir for linux
>>
>> diff --git a/mercurial/pure/osutil.py b/mercurial/pure/osutil.py
>> --- a/mercurial/pure/osutil.py
>> +++ b/mercurial/pure/osutil.py
>> @@ -66,13 +66,20 @@
>> return result
>>
>> ffi = None
>> -if modulepolicy not in policynocffi and sys.platform == 'darwin':
>> +if modulepolicy not in policynocffi and (
>> + sys.platform == 'darwin' or sys.platform.startswith('linux')):
>> try:
>> from _osutil_cffi import ffi, lib
>> except ImportError:
>> if modulepolicy == 'cffi': # strict cffi import
>> raise
>>
>> +class stat_res(object):
>> + def __init__(self, st_mode, st_mtime, st_size):
>> + self.st_mode = st_mode
>> + self.st_mtime = st_mtime
>> + self.st_size = st_size
>> +
>> if sys.platform == 'darwin' and ffi is not None:
>> listdir_batch_size = 4096
>> # tweakable number, only affects performance, which chunks
>> @@ -88,12 +95,6 @@
>> attrkinds[lib.VFIFO] = statmod.S_IFIFO
>> attrkinds[lib.VSOCK] = statmod.S_IFSOCK
>>
>> - class stat_res(object):
>> - def __init__(self, st_mode, st_mtime, st_size):
>> - self.st_mode = st_mode
>> - self.st_mtime = st_mtime
>> - self.st_size = st_size
>> -
>> tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
>> buf = ffi.new("char[]", listdir_batch_size)
>>
>> @@ -117,7 +118,7 @@
>> tp = attrkinds[cur.obj_type]
>> if name == "." or name == "..":
>> continue
>> - if skip == name and tp == statmod.S_ISDIR:
>> + if skip == name and tp == statmod.S_IFDIR:
>> return []
>> if stat:
>> mtime = cur.mtime.tv_sec
>> @@ -146,12 +147,74 @@
>> try:
>> ret = listdirinternal(dfd, req, stat, skip)
>> finally:
>> + lib.close(dfd)
>> + return ret
>> +
>> +elif sys.platform.startswith('linux') and ffi is not None:
>> +
>> + _known_common_file_types = {
>> + lib.DT_DIR: statmod.S_IFDIR,
>> + lib.DT_CHR: statmod.S_IFCHR,
>> + lib.DT_BLK: statmod.S_IFBLK,
>> + lib.DT_REG: statmod.S_IFREG,
>> + lib.DT_FIFO: statmod.S_IFIFO,
>> + lib.DT_LNK: statmod.S_IFLNK,
>> + lib.DT_SOCK: statmod.S_IFSOCK,
>> + }
>> +
>> + def listdirinternal(dir, dirfd, stat, skip):
>> + buf = ffi.new("struct stat *")
>> + ret = []
>> + while True:
>> + ffi.errno = 0
>> + dirent = lib.readdir(dir)
>> + if not dirent:
>> + break
>> + dname = dirent.d_name
>> + if dname[0] == "." and (dname[1] == "\x00" or
>> + (dname[1] == "." and dname[2] == "\x00")):
>> + continue
>> + #
>> + dtype = dirent.d_type
>> + if stat or dtype not in _known_common_file_types:
>> + if lib.fstatat(dirfd, dirent.d_name, buf,
>> + lib.AT_SYMLINK_NOFOLLOW) != 0:
>> + raise OSError(ffi.errno, os.strerror(ffi.errno))
>> + tp = statmod.S_IFMT(buf.st_mode)
>> + else:
>> + tp = _known_common_file_types[dtype]
>> + #
>> + name = ffi.string(dirent.d_name)
>> + if skip == name and tp == statmod.S_IFDIR:
>> + return []
>> + if stat:
>> + ret.append((name, tp, stat_res(
>> + st_mode = buf.st_mode,
>> + st_mtime = buf.st_mtim.tv_sec,
>> + st_size = buf.st_size)))
>> + else:
>> + ret.append((name, tp))
>> +
>> + if ffi.errno != 0:
>> + raise OSError(ffi.errno, os.strerror(ffi.errno))
>> + return ret
>> +
>> + def listdir(path, stat=False, skip=None):
>> + dfd = os.open(path, os.O_RDONLY)
>> + dir = lib.fdopendir(dfd)
>> + if dir == ffi.NULL:
>> try:
>> - lib.close(dfd)
>> + os.close(dfd)
>> except BaseException:
>> - pass # we ignore all the errors from closing, not
>> - # much we can do about that
>> + pass
>> + raise OSError(ffi.errno, os.strerror(ffi.errno))
>> +
>> + try:
>> + ret = listdirinternal(dir, dfd, stat, skip)
>> + finally:
>> + lib.closedir(dir)
>> return ret
>> +
>> else:
>> listdir = listdirpure
>>
>> diff --git a/setup.py b/setup.py
>> --- a/setup.py
>> +++ b/setup.py
>> @@ -323,7 +323,7 @@
>> exts = [setup_mpatch_cffi.ffi.distutils_extension(),
>> setup_bdiff_cffi.ffi.distutils_extension()]
>> # cffi modules go here
>> - if sys.platform == 'darwin':
>> + if sys.platform == 'darwin' or
>> sys.platform.startswith('linux'):
>> import setup_osutil_cffi
>> exts.append(setup_osutil_cffi.ffi.distutils_extension())
>> self.distribution.ext_modules = exts
>> diff --git a/setup_osutil_cffi.py b/setup_osutil_cffi.py
>> --- a/setup_osutil_cffi.py
>> +++ b/setup_osutil_cffi.py
>> @@ -1,102 +1,159 @@
>> from __future__ import absolute_import
>>
>> import cffi
>> +import sys
>>
>> -ffi = cffi.FFI()
>> -ffi.set_source("_osutil_cffi", """
>> -#include <sys/attr.h>
>> -#include <sys/vnode.h>
>> -#include <unistd.h>
>> -#include <fcntl.h>
>> -#include <time.h>
>> +if sys.platform == "darwin":
>> + ffi = cffi.FFI()
>> + ffi.set_source("_osutil_cffi", """
>> + #include <sys/attr.h>
>> + #include <sys/vnode.h>
>> + #include <unistd.h>
>> + #include <fcntl.h>
>> + #include <time.h>
>>
>> -typedef struct val_attrs {
>> - uint32_t length;
>> - attribute_set_t returned;
>> - attrreference_t name_info;
>> - fsobj_type_t obj_type;
>> - struct timespec mtime;
>> - uint32_t accessmask;
>> - off_t datalength;
>> -} __attribute__((aligned(4), packed)) val_attrs_t;
>> -""", include_dirs=['mercurial'])
>> -ffi.cdef('''
>> + typedef struct val_attrs {
>> + uint32_t length;
>> + attribute_set_t returned;
>> + attrreference_t name_info;
>> + fsobj_type_t obj_type;
>> + struct timespec mtime;
>> + uint32_t accessmask;
>> + off_t datalength;
>> + } __attribute__((aligned(4), packed)) val_attrs_t;
>> + """, include_dirs=['mercurial'])
>> + ffi.cdef('''
>>
>> -typedef uint32_t attrgroup_t;
>> + typedef uint32_t attrgroup_t;
>>
>> -typedef struct attrlist {
>> - uint16_t bitmapcount; /* number of attr. bit sets in list */
>> - uint16_t reserved; /* (to maintain 4-byte alignment) */
>> - attrgroup_t commonattr; /* common attribute group */
>> - attrgroup_t volattr; /* volume attribute group */
>> - attrgroup_t dirattr; /* directory attribute group */
>> - attrgroup_t fileattr; /* file attribute group */
>> - attrgroup_t forkattr; /* fork attribute group */
>> - ...;
>> -};
>> + typedef struct attrlist {
>> + uint16_t bitmapcount; /* number of attr. bit sets in list */
>> + uint16_t reserved; /* (to maintain 4-byte alignment) */
>> + attrgroup_t commonattr; /* common attribute group */
>> + attrgroup_t volattr; /* volume attribute group */
>> + attrgroup_t dirattr; /* directory attribute group */
>> + attrgroup_t fileattr; /* file attribute group */
>> + attrgroup_t forkattr; /* fork attribute group */
>> + ...;
>> + };
>>
>> -typedef struct attribute_set {
>> - ...;
>> -} attribute_set_t;
>> + typedef struct attribute_set {
>> + ...;
>> + } attribute_set_t;
>>
>> -typedef struct attrreference {
>> - int attr_dataoffset;
>> - int attr_length;
>> - ...;
>> -} attrreference_t;
>> + typedef struct attrreference {
>> + int attr_dataoffset;
>> + int attr_length;
>> + ...;
>> + } attrreference_t;
>>
>> -typedef int ... off_t;
>> + typedef int ... off_t;
>>
>> -typedef struct val_attrs {
>> - uint32_t length;
>> - attribute_set_t returned;
>> - attrreference_t name_info;
>> - uint32_t obj_type;
>> - struct timespec mtime;
>> - uint32_t accessmask;
>> - off_t datalength;
>> - ...;
>> -} val_attrs_t;
>> + typedef struct val_attrs {
>> + uint32_t length;
>> + attribute_set_t returned;
>> + attrreference_t name_info;
>> + uint32_t obj_type;
>> + struct timespec mtime;
>> + uint32_t accessmask;
>> + off_t datalength;
>> + ...;
>> + } val_attrs_t;
>>
>> -/* the exact layout of the above struct will be figured out during build
>> time */
>> + /* the exact layout of the above struct will be figured out during
>> + build time */
>>
>> -typedef int ... time_t;
>> + typedef int ... time_t;
>>
>> -typedef struct timespec {
>> - time_t tv_sec;
>> - ...;
>> -};
>> + typedef struct timespec {
>> + time_t tv_sec;
>> + ...;
>> + };
>>
>> -int getattrlist(const char* path, struct attrlist * attrList, void *
>> attrBuf,
>> - size_t attrBufSize, unsigned int options);
>> + int getattrlist(const char* path, struct attrlist * attrList,
>> + void * attrBuf, size_t attrBufSize, unsigned int
>> options);
>>
>> -int getattrlistbulk(int dirfd, struct attrlist * attrList, void *
>> attrBuf,
>> - size_t attrBufSize, uint64_t options);
>> + int getattrlistbulk(int dirfd, struct attrlist * attrList, void *
>> attrBuf,
>> + size_t attrBufSize, uint64_t options);
>>
>> -#define ATTR_BIT_MAP_COUNT ...
>> -#define ATTR_CMN_NAME ...
>> -#define ATTR_CMN_OBJTYPE ...
>> -#define ATTR_CMN_MODTIME ...
>> -#define ATTR_CMN_ACCESSMASK ...
>> -#define ATTR_CMN_ERROR ...
>> -#define ATTR_CMN_RETURNED_ATTRS ...
>> -#define ATTR_FILE_DATALENGTH ...
>> + #define ATTR_BIT_MAP_COUNT ...
>> + #define ATTR_CMN_NAME ...
>> + #define ATTR_CMN_OBJTYPE ...
>> + #define ATTR_CMN_MODTIME ...
>> + #define ATTR_CMN_ACCESSMASK ...
>> + #define ATTR_CMN_ERROR ...
>> + #define ATTR_CMN_RETURNED_ATTRS ...
>> + #define ATTR_FILE_DATALENGTH ...
>>
>> -#define VREG ...
>> -#define VDIR ...
>> -#define VLNK ...
>> -#define VBLK ...
>> -#define VCHR ...
>> -#define VFIFO ...
>> -#define VSOCK ...
>> + #define VREG ...
>> + #define VDIR ...
>> + #define VLNK ...
>> + #define VBLK ...
>> + #define VCHR ...
>> + #define VFIFO ...
>> + #define VSOCK ...
>>
>> -#define S_IFMT ...
>> + #define S_IFMT ...
>>
>> -int open(const char *path, int oflag, int perm);
>> -int close(int);
>> + int open(const char *path, int oflag, int perm);
>> + int close(int);
>>
>> -#define O_RDONLY ...
>> -''')
>> + #define O_RDONLY ...
>> + ''')
>> +
>> +else:
>> +
>> + ffi = cffi.FFI()
>> +
>> + ffi.set_source("_osutil_cffi", """
>> + #define _GNU_SOURCE 1
>> + #include <sys/stat.h>
>> + #include <sys/types.h>
>> + #include <dirent.h>
>> + #include <fcntl.h>
>> + """)
>> +
>> + ffi.cdef("""
>> + typedef ... DIR;
>> +
>> + struct dirent {
>> + unsigned char d_type; /* Type of file; not supported
>> + by all filesystem types */
>> + char d_name[...]; /* Null-terminated filename */
>> + ...;
>> + };
>> +
>> + typedef int... mode_t;
>> + typedef int... off_t;
>> + typedef int... time_t;
>> +
>> + struct timespec {
>> + time_t tv_sec;
>> + ...;
>> + };
>> +
>> + struct stat {
>> + mode_t st_mode; /* inode protection mode */
>> + struct timespec st_mtim; /* time of last data modification */
>> + off_t st_size; /* file size, in bytes */
>> + ...;
>> + };
>> +
>> + DIR *fdopendir(int fd);
>> + struct dirent *readdir(DIR *dirp);
>> + int closedir(DIR *dirp);
>> +
>> + #define AT_SYMLINK_NOFOLLOW ...
>> + #define DT_DIR ...
>> + #define DT_CHR ...
>> + #define DT_BLK ...
>> + #define DT_REG ...
>> + #define DT_FIFO ...
>> + #define DT_LNK ...
>> + #define DT_SOCK ...
>> +
>> + int fstatat(int fd, const char *path, struct stat *buf, int flag);
>> + """)
>>
>> if __name__ == '__main__':
>> ffi.compile()
>
>
> Few questions.
>
> First, why is this needed? I assume performance. But it isn't clear to me
> what the benefit is.
>
> Second, do you think we should import a backport of scandir from Python 3.5?
> I think that's essentially the same as what our custom listdir code is
> doing. I'd rather we take a snapshot of upstream code, modify it to work on
> Python 2.6/2.7, then call it a day until we drop Python 2 support in several
> years.
>
More information about the Mercurial-devel
mailing list