[PATCH v2] osutil: add darwin-only version of os.listdir using cffi
Jun Wu
quark at fb.com
Mon Jul 18 08:30:23 EDT 2016
The code looks reasonable. I don't have OSX at hand to test now. Someone
with OSX may want to have a second pass.
Excerpts from Maciej Fijalkowski's message of 2016-07-18 14:19:33 +0200:
> # HG changeset patch
> # User Maciej Fijalkowski <fijall at gmail.com>
> # Date 1468227908 -7200
> # Mon Jul 11 11:05:08 2016 +0200
> # Node ID 4d330a2c13cf52d9e8f80ad18b82a47a0e62eb55
> # Parent 065fb34034a3849f07ca836ce2f4eb17c7f543db
> osutil: add darwin-only version of os.listdir using cffi
>
> diff --git a/mercurial/pure/osutil.py b/mercurial/pure/osutil.py
> --- a/mercurial/pure/osutil.py
> +++ b/mercurial/pure/osutil.py
> @@ -14,6 +14,10 @@
> import stat as statmod
> import sys
>
> +from . import policy
> +modulepolicy = policy.policy
> +policynocffi = policy.policynocffi
> +
> def _mode_to_kind(mode):
> if statmod.S_ISREG(mode):
> return statmod.S_IFREG
> @@ -31,7 +35,7 @@
> return statmod.S_IFSOCK
> return mode
>
> -def listdir(path, stat=False, skip=None):
> +def listdirpure(path, stat=False, skip=None):
> '''listdir(path, stat=False) -> list_of_tuples
>
> Return a sorted list containing information about the entries
> @@ -61,6 +65,95 @@
> result.append((fn, _mode_to_kind(st.st_mode)))
> return result
>
> +ffi = None
> +if modulepolicy not in policynocffi and sys.platform == 'darwin':
> + try:
> + from _osutil_cffi import ffi, lib
> + except ImportError:
> + if modulepolicy == 'cffi': # strict cffi import
> + raise
> +
> +if sys.platform == 'darwin' and ffi is not None:
> + listdir_batch_size = 4096
> + # tweakable number, only affects performance, which chunks
> + # of bytes do we get back from getattrlistbulk
> +
> + attrkinds = [None] * 20 # we need the max no for enum VXXX, 20 is plenty
> +
> + attrkinds[lib.VREG] = statmod.S_IFREG
> + attrkinds[lib.VDIR] = statmod.S_IFDIR
> + attrkinds[lib.VLNK] = statmod.S_IFLNK
> + attrkinds[lib.VBLK] = statmod.S_IFBLK
> + attrkinds[lib.VCHR] = statmod.S_IFCHR
> + attrkinds[lib.VFIFO] = statmod.S_IFIFO
> + attrkinds[lib.VSOCK] = statmod.S_IFSOCK
> +
> + class stat_res(object):
> + def __init__(self, st_mode, st_mtime, st_size):
> + self.st_mode = st_mode
> + self.st_mtime = st_mtime
> + self.st_size = st_size
> +
> + tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
> + buf = ffi.new("char[]", listdir_batch_size)
> +
> + def listdirinternal(dfd, req, stat, skip):
> + ret = []
> + while True:
> + r = lib.getattrlistbulk(dfd, req, buf, listdir_batch_size, 0)
> + if r == 0:
> + break
> + if r == -1:
> + raise OSError(ffi.errno, os.strerror(ffi.errno))
> + cur = ffi.cast("val_attrs_t*", buf)
> + for i in range(r):
> + lgt = cur.length
> + assert lgt == ffi.cast('uint32_t*', cur)[0]
> + ofs = cur.name_info.attr_dataoffset
> + str_lgt = cur.name_info.attr_length
> + base_ofs = ffi.offsetof('val_attrs_t', 'name_info')
> + name = str(ffi.buffer(ffi.cast("char*", cur) + base_ofs + ofs,
> + str_lgt - 1))
> + tp = attrkinds[cur.obj_type]
> + if name == "." or name == "..":
> + continue
> + if skip == name and tp == statmod.S_ISDIR:
> + return []
> + if stat:
> + mtime = cur.time.tv_sec
> + mode = (cur.accessmask & ~lib.S_IFMT)| tp
> + ret.append((name, tp, stat_res(st_mode=mode, st_mtime=mtime,
> + st_size=cur.datalength)))
> + else:
> + ret.append((name, tp))
> + cur += lgt
> + return ret
> +
> + def listdir(path, stat=False, skip=None):
> + req = ffi.new("struct attrlist*")
> + req.bitmapcount = lib.ATTR_BIT_MAP_COUNT
> + req.commonattr = (lib.ATTR_CMN_RETURNED_ATTRS |
> + lib.ATTR_CMN_NAME |
> + lib.ATTR_CMN_OBJTYPE |
> + lib.ATTR_CMN_ACCESSMASK |
> + lib.ATTR_CMN_MODTIME)
> + req.fileattr = lib.ATTR_FILE_DATALENGTH
> + dfd = lib.open(path, lib.O_RDONLY, 0)
> + if dfd == -1:
> + raise OSError(ffi.errno, os.strerror(ffi.errno))
> +
> + try:
> + ret = listdirinternal(dfd, req, stat, skip)
> + finally:
> + try:
> + lib.close(dfd)
> + except BaseException:
> + pass # we ignore all the errors from closing, not
> + # much we can do about that
> + return ret
> +else:
> + listdir = listdirpure
> +
> if os.name != 'nt':
> posixfile = open
>
> diff --git a/setup.py b/setup.py
> --- a/setup.py
> +++ b/setup.py
> @@ -320,6 +320,9 @@
> elif self.distribution.cffi:
> exts = []
> # cffi modules go here
> + if sys.platform == 'darwin':
> + import setup_osutil_cffi
> + exts.append(setup_osutil_cffi.ffi.distutils_extension())
> self.distribution.ext_modules = exts
> else:
> h = os.path.join(get_python_inc(), 'Python.h')
> diff --git a/setup_osutil_cffi.py b/setup_osutil_cffi.py
> new file mode 100644
> --- /dev/null
> +++ b/setup_osutil_cffi.py
> @@ -0,0 +1,101 @@
> +from __future__ import absolute_import
> +
> +import cffi
> +
> +ffi = cffi.FFI()
> +ffi.set_source("_osutil_cffi", """
> +#include <sys/attr.h>
> +#include <sys/vnode.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <time.h>
> +
> +typedef struct val_attrs {
> + uint32_t length;
> + attribute_set_t returned;
> + attrreference_t name_info;
> + fsobj_type_t obj_type;
> + struct timespec mtime;
> + uint32_t accessmask;
> + off_t datalength;
> +} __attribute__((aligned(4), packed)) val_attrs_t;
> +""", include_dirs=['mercurial'])
> +ffi.cdef('''
> +
> +typedef uint32_t attrgroup_t;
> +
> +typedef struct attrlist {
> + uint16_t bitmapcount; /* number of attr. bit sets in list */
> + uint16_t reserved; /* (to maintain 4-byte alignment) */
> + attrgroup_t commonattr; /* common attribute group */
> + attrgroup_t volattr; /* volume attribute group */
> + attrgroup_t dirattr; /* directory attribute group */
> + attrgroup_t fileattr; /* file attribute group */
> + attrgroup_t forkattr; /* fork attribute group */
> + ...;
> +};
> +
> +typedef struct attribute_set {
> + ...;
> +} attribute_set_t;
> +
> +typedef struct attrreference {
> + int attr_dataoffset;
> + int attr_length;
> + ...;
> +} attrreference_t;
> +
> +typedef struct val_attrs {
> + uint32_t length;
> + attribute_set_t returned;
> + attrreference_t name_info;
> + uint32_t obj_type;
> + struct timespec mtime;
> + uint32_t accessmask;
> + int datalength;
> + ...;
> +} val_attrs_t;
> +
> +/* the exact layout of the above struct will be figured out during build time */
> +
> +typedef int ... time_t;
> +typedef int ... off_t;
> +
> +typedef struct timespec {
> + time_t tv_sec;
> + ...;
> +};
> +
> +int getattrlist(const char* path, struct attrlist * attrList, void * attrBuf,
> + size_t attrBufSize, unsigned int options);
> +
> +int getattrlistbulk(int dirfd, struct attrlist * attrList, void * attrBuf,
> + size_t attrBufSize, uint64_t options);
> +
> +#define ATTR_BIT_MAP_COUNT ...
> +#define ATTR_CMN_NAME ...
> +#define ATTR_CMN_OBJTYPE ...
> +#define ATTR_CMN_MODTIME ...
> +#define ATTR_CMN_ACCESSMASK ...
> +#define ATTR_CMN_ERROR ...
> +#define ATTR_CMN_RETURNED_ATTRS ...
> +#define ATTR_FILE_DATALENGTH ...
> +
> +#define VREG ...
> +#define VDIR ...
> +#define VLNK ...
> +#define VBLK ...
> +#define VCHR ...
> +#define VFIFO ...
> +#define VSOCK ...
> +
> +#define S_IFMT ...
> +
> +int open(const char *path, int oflag, int perm);
> +int close(int);
> +
> +#define O_RDONLY ...
> +''')
> +
> +if __name__ == '__main__':
> + ffi.compile()
More information about the Mercurial-devel
mailing list