[PATCH 2 of 2] osutil: use getdirentriesattr on OS X if possible
Siddharth Agarwal
sid at less-broken.com
Wed Mar 25 22:19:29 CDT 2015
On 03/25/2015 08:14 PM, Siddharth Agarwal wrote:
> # HG changeset patch
> # User Siddharth Agarwal <sid0 at fb.com>
> # Date 1427324131 25200
> # Wed Mar 25 15:55:31 2015 -0700
> # Node ID c015024cd8c4f82ed1f581f151f645a2228130af
> # Parent 247fb9a8ff9de9026d2b5d58b825da9975a37e5c
> osutil: use getdirentriesattr on OS X if possible
There are a couple of error cases I need to handle properly. I'll send
an updated V2.
>
> This is a significant win for large repositories on OS X, especially with a
> cold cache. Unfortunately not all filesystems support getdirentriesattr, which
> means that we need to keep the lstat-based implementation around as well.
>
> The below tests are all performed on a Mac with an SSD running OS X 10.9, on a
> repository with over 200k files. The results are best of 5 with simulated
> best-effort conditions.
>
> The gains with a hot cache are pretty impressive: 'hg status' goes from 5.18
> seconds to 3.79 seconds.
>
> However, in the hot cache case, a repository that large will probably already be
> using something like hgwatchman [1]. Where this really helps is when the cache
> is cold [2]. hg status goes from 31.0 seconds to 9.66.
>
> See http://lists.apple.com/archives/filesystem-dev/2014/Dec/msg00002.html for
> some more discussion about this function.
>
> This is based on a patch by Sean Farley <smf at smf.io>.
>
> [1] https://bitbucket.org/facebook/hgwatchman
>
> [2] There appears to be no easy way to clear the file cache (aka "vnodes") on
> OS X short of rebooting. purge(8) purportedly does that but in my testing had
> little effect. The workaround I came up with was to assume that vnode eviction
> was LRU, make sure the kern.maxvnodes sysctl is smaller than the size of the
> repository, then make sure we'd always miss the cache by running 'hg status' in
> another clone of the repository before running it in the test repository.
>
> diff --git a/mercurial/osutil.c b/mercurial/osutil.c
> --- a/mercurial/osutil.c
> +++ b/mercurial/osutil.c
> @@ -24,6 +24,11 @@
> #include <unistd.h>
> #endif
>
> +#ifdef __APPLE__
> +#include <sys/attr.h>
> +#include <sys/vnode.h>
> +#endif
> +
> #include "util.h"
>
> /* some platforms lack the PATH_MAX definition (eg. GNU/Hurd) */
> @@ -392,8 +397,171 @@
> return ret;
> }
>
> +#ifdef __APPLE__
> +
> +typedef struct {
> + u_int32_t length;
> + attrreference_t name;
> + fsobj_type_t obj_type;
> + struct timespec mtime;
> +#if __LITTLE_ENDIAN__
> + mode_t access_mask;
> + uint16_t padding;
> +#else
> + uint16_t padding;
> + mode_t access_mask;
> +#endif
> + off_t size;
> +} __attribute__((packed)) attrbuf_entry;
> +
> +int attrkind(attrbuf_entry *entry)
> +{
> + switch (entry->obj_type) {
> + case VREG: return S_IFREG;
> + case VDIR: return S_IFDIR;
> + case VLNK: return S_IFLNK;
> + case VBLK: return S_IFBLK;
> + case VCHR: return S_IFCHR;
> + case VFIFO: return S_IFIFO;
> + case VSOCK: return S_IFSOCK;
> + }
> + return -1;
> +}
> +
> +/* get these many entries at a time */
> +#define LISTDIR_BATCH_SIZE 50
> +
> +static PyObject *_listdir_batch(char *path, int pathlen, int keepstat,
> + char *skip, bool *fallback)
> +{
> + PyObject *list, *elem, *stat = NULL, *ret = NULL;
> + char fullpath[PATH_MAX + 10];
> + int kind, err;
> + unsigned long index;
> + int done = 0;
> + unsigned int count;
> + attrbuf_entry *entry;
> + /* from the getattrlist(2) man page: a path can be no longer than
> + (NAME_MAX * 3 + 1) bytes. Also, "The getattrlist() function will
> + silently truncate attribute data if attrBufSize is too small." So
> + pass in a buffer big enough for the worst case. */
> + char attrbuf[LISTDIR_BATCH_SIZE * (sizeof(attrbuf_entry) + NAME_MAX * 3 + 1)];
> + unsigned int unused1, unused2;
> +
> + struct stat st;
> +
> + /* these must match the attrbuf_entry struct, otherwise you'll end up
> + with garbage */
> + struct attrlist requested_attr = {0};
> + requested_attr.bitmapcount = ATTR_BIT_MAP_COUNT;
> + requested_attr.commonattr = (ATTR_CMN_NAME | ATTR_CMN_OBJTYPE |
> + ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK);
> + requested_attr.fileattr = ATTR_FILE_TOTALSIZE;
> +
> + *fallback = false;
> +
> + int dfd = -1;
> +
> + if (pathlen >= PATH_MAX) {
> + errno = ENAMETOOLONG;
> + PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
> + goto error_value;
> + }
> +
> + strncpy(fullpath, path, PATH_MAX);
> + fullpath[pathlen] = '/';
> +
> + dfd = open(path, O_RDONLY);
> + if (dfd == -1) {
> + PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
> + goto error_value;
> + }
> +
> + list = PyList_New(0);
> + if (!list)
> + goto error_dir;
> +
> + do {
> + count = LISTDIR_BATCH_SIZE;
> + err = getdirentriesattr(dfd, &requested_attr, &attrbuf,
> + sizeof(attrbuf), &count, &unused1,
> + &unused2, 0);
> + if (err < 0) {
> + if (errno == ENOTSUP) {
> + /* We're on a filesystem that doesn't support
> + getdirentriesattr. Fall back to the
> + stat-based implementation. */
> + *fallback = true;
> + } else
> + PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
> + goto error;
> + } else
> + done = err;
> +
> + entry = (attrbuf_entry *)attrbuf;
> +
> + for (index = 0; index < count; index++) {
> + char *filename = ((char *)&entry->name) +
> + entry->name.attr_dataoffset;
> +
> + if (!strcmp(filename, ".") || !strcmp(filename, ".."))
> + continue;
> +
> + kind = attrkind(entry);
> +
> + /* quit early? */
> + if (skip && kind == S_IFDIR && !strcmp(filename, skip)) {
> + ret = PyList_New(0);
> + goto error;
> + }
> +
> + if (keepstat) {
> + /* from the getattrlist(2) man page: "Only the
> + permission bits ... are valid". */
> + st.st_mode = (entry->access_mask & ~S_IFMT) | kind;
> + st.st_mtime = entry->mtime.tv_sec;
> + st.st_size = entry->size;
> + stat = makestat(&st);
> + if (!stat)
> + goto error;
> + elem = Py_BuildValue("siN", filename, kind, stat);
> + } else
> + elem = Py_BuildValue("si", filename, kind);
> + if (!elem)
> + goto error;
> + stat = NULL;
> +
> + PyList_Append(list, elem);
> + Py_DECREF(elem);
> +
> + entry = (attrbuf_entry *)((char *)entry + entry->length);
> + }
> + } while (err == 0 && !done);
> +
> + ret = list;
> + Py_INCREF(ret);
> +
> +error:
> + Py_DECREF(list);
> + Py_XDECREF(stat);
> +error_dir:
> + close(dfd);
> +error_value:
> + return ret;
> +}
> +
> +#endif /* __APPLE__ */
> +
> static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
> {
> +#ifdef __APPLE__
> + PyObject *ret;
> + bool fallback = false;
> +
> + ret = _listdir_batch(path, pathlen, keepstat, skip, &fallback);
> + if (ret != NULL || !fallback)
> + return ret;
> +#endif
> return _listdir_stat(path, pathlen, keepstat, skip);
> }
>
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list