osutil.c optimization for Win32

Petr Kodl petrkodl at gmail.com
Wed Sep 3 13:53:25 CDT 2008


This is an optimization taking avoiding most of the lstat calls by taking
advantage of Win32 FindFirst/FindNext embedded iterator information.
The speedup mileage may vary - I see factor of 2x with my average repository
(~30k files) for simple hg stat on local repo - which makes it faster than
Git ;-) on same tree

patch is recorded against hg-crew


# HG changeset patch
# User Petr Kodl<petrkodl at gmail.com>
# Date 1220467465 14400
# Node ID b3110f7d0be144275457047ae32a3530ca5c37bc
# Parent  5e1a867e5d65c059dc2a79e35769c5a99748c177
lstat optimization on Win32 similar to osutil.c

diff -r 5e1a867e5d65 -r b3110f7d0be1 mercurial/osutil.py
--- a/mercurial/osutil.py    Sat Aug 02 19:34:50 2008 +0400
+++ b/mercurial/osutil.py    Wed Sep 03 14:44:25 2008 -0400
@@ -1,4 +1,5 @@
 import os, stat
+

 def _mode_to_kind(mode):
     if stat.S_ISREG(mode): return stat.S_IFREG
@@ -35,3 +36,9 @@
         else:
             result.append((fn, _mode_to_kind(st.st_mode)))
     return result
+
+try:
+    import osutil_win32
+    listdir = osutil_win32.listdir
+except:
+    pass
diff -r 5e1a867e5d65 -r b3110f7d0be1 mercurial/osutil_win32.c
--- /dev/null    Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/osutil_win32.c    Wed Sep 03 14:44:25 2008 -0400
@@ -0,0 +1,235 @@
+/*
+ osutil_win32.c - native operating system services for win32
+
+ Copyright 2007 pko <petrkodl at gmail.com>
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+
+ Based on osutil.c
+
+*/
+#include <Python.h>
+#include <string.h>
+#include <windows.h>
+#include <stdlib.h>
+
+/*
+minimal stat struct compatible with hg expectations
+*/
+struct my_stat
+{
+    int     st_dev;
+    int     st_mode;
+    int     st_nlink;
+    __int64 st_size;
+    int     st_mtime;
+    int     st_ctime;
+};
+
+struct listdir_stat {
+    PyObject_HEAD
+    struct my_stat st;
+};
+
+#define listdir_slot(name) \
+    static PyObject *listdir_stat_##name(PyObject *self, void *x) \
+    { \
+        return PyInt_FromLong(((struct listdir_stat *)self)->st.name); \
+    }
+
+static PyObject *listdir_stat_st_size(PyObject *self, void *x)
+{
+    return PyLong_FromLongLong((PY_LONG_LONG)((struct listdir_stat
*)self)->st.st_size);
+}
+
+
+listdir_slot(st_dev)
+listdir_slot(st_mode)
+listdir_slot(st_nlink)
+listdir_slot(st_mtime)
+listdir_slot(st_ctime)
+
+static struct PyGetSetDef listdir_stat_getsets[] = {
+    {"st_dev",  listdir_stat_st_dev,    0, 0, 0},
+    {"st_mode", listdir_stat_st_mode,   0, 0, 0},
+    {"st_nlink",listdir_stat_st_nlink,  0, 0, 0},
+    {"st_size", listdir_stat_st_size,   0, 0, 0},
+    {"st_mtime",listdir_stat_st_mtime,  0, 0, 0},
+    {"st_ctime",listdir_stat_st_ctime,  0, 0, 0},
+    {0, 0, 0, 0, 0}
+};
+
+static PyObject *listdir_stat_new(PyTypeObject *t, PyObject *a, PyObject
*k)
+{
+    return t->tp_alloc(t, 0);
+}
+
+static void listdir_stat_dealloc(PyObject *o)
+{
+    o->ob_type->tp_free(o);
+}
+
+static PyTypeObject listdir_stat_type = {
+    PyObject_HEAD_INIT(NULL)
+    0,                         /*ob_size*/
+    "osutil_win32.stat",             /*tp_name*/
+    sizeof(struct listdir_stat), /*tp_basicsize*/
+    0,                         /*tp_itemsize*/
+    (destructor)listdir_stat_dealloc, /*tp_dealloc*/
+    0,                         /*tp_print*/
+    0,                         /*tp_getattr*/
+    0,                         /*tp_setattr*/
+    0,                         /*tp_compare*/
+    0,                         /*tp_repr*/
+    0,                         /*tp_as_number*/
+    0,                         /*tp_as_sequence*/
+    0,                         /*tp_as_mapping*/
+    0,                         /*tp_hash */
+    0,                         /*tp_call*/
+    0,                         /*tp_str*/
+    0,                         /*tp_getattro*/
+    0,                         /*tp_setattro*/
+    0,                         /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+    "stat objects",            /* tp_doc */
+    0,                         /* tp_traverse */
+    0,                         /* tp_clear */
+    0,                         /* tp_richcompare */
+    0,                         /* tp_weaklistoffset */
+    0,                         /* tp_iter */
+    0,                         /* tp_iternext */
+    0,                         /* tp_methods */
+    0,                         /* tp_members */
+    listdir_stat_getsets,      /* tp_getset */
+    0,                         /* tp_base */
+    0,                         /* tp_dict */
+    0,                         /* tp_descr_get */
+    0,                         /* tp_descr_set */
+    0,                         /* tp_dictoffset */
+    0,                         /* tp_init */
+    0,                         /* tp_alloc */
+    listdir_stat_new,          /* tp_new */
+};
+
+static __int64 a0 =
(__int64)134774L*(__int64)24L*(__int64)3600L*(__int64)1000L*(__int64)1000L*(__int64)10L;
+static __int64 a1 = 1000*1000*10;
+
+static int to_python_time(FILETIME* ms_time)
+{
+    __int64 tmp;
+    memcpy(&tmp,ms_time,sizeof(__int64));
+    return (int)((tmp-a0)/a1);
+}
+
+static PyObject *listdir(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    static char *kwlist[] = { "path", "stat", NULL };
+    PyObject *list        = NULL;
+    PyObject* item        = NULL;
+    PyObject *py_st       = NULL;
+    PyObject *statobj     = NULL;
+    PyObject *ctor_args   = PyTuple_New(0);
+    struct my_stat* stp = 0;
+    char  *path;
+    int   path_len;
+    int   keep_stat;
+    WIN32_FIND_DATA fd;
+    HANDLE  fh=NULL;
+    char full_path[_MAX_PATH+10];
+
+
if(!PyArg_ParseTupleAndKeywords(args,kwargs,"s#|O:listdir",kwlist,&path,&path_len,&statobj))
+        goto end;
+
+    keep_stat = statobj && PyObject_IsTrue(statobj);
+
+    strncpy(full_path,path,path_len);
+    strncpy(full_path+path_len,"\\*.*",5);
+
+    fh = FindFirstFile(full_path,&fd);
+    if(INVALID_HANDLE_VALUE!=fh)
+    {
+        list = PyList_New(0);
+        if(!list)
+        {
+            PyErr_NoMemory();
+            goto end;
+        }
+        do
+        {
+            int isdir = (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
+            int isro  = (fd.dwFileAttributes & FILE_ATTRIBUTE_READONLY);
+            if(!isdir || (strcmp(fd.cFileName,".") &&
strcmp(fd.cFileName,"..") && strcmp(fd.cFileName,".hg")))
+            {
+                PyObject* item = PyTuple_New(keep_stat ? 3 : 2);
+                if(!item)
+                {
+                    PyErr_NoMemory();
+                    goto end;
+                }
+                PyTuple_SetItem(item,0,PyString_FromString(fd.cFileName));
+                PyTuple_SetItem(item,1,PyInt_FromLong(isdir ? _S_IFDIR :
_S_IFREG));
+                if(keep_stat)
+                {
+                    py_st = PyObject_CallObject((PyObject
*)&listdir_stat_type,ctor_args);
+                    if(!py_st)
+                    {
+                        PyErr_NoMemory();
+                        goto end;
+                    }
+                    stp = &((struct listdir_stat *)py_st)->st;
+                    stp->st_mtime = to_python_time(&fd.ftLastWriteTime);
+                    stp->st_ctime = to_python_time(&fd.ftCreationTime);
+                    stp->st_dev   = 0;
+                    stp->st_size  = 0;
+                    stp->st_mode  = (isdir ? (S_IFDIR | 0111) : S_IFREG) |
(isro ? 0444 : 0666);
+                    if(!isdir)
+                    {
+                        char* dot = strrchr(fd.cFileName,'.');
+                        if (dot)
+                        {
+                            if(     !stricmp(dot,".bat")
+                                ||  !stricmp(dot,".cmd")
+                                ||  !stricmp(dot,".exe")
+                                ||  !stricmp(dot,".com"))
+                            stp->st_mode |= 0111;
+                        }
+                        stp->st_size = (__int64)(fd.nFileSizeHigh<<32) +
fd.nFileSizeLow;
+                    }
+                    PyTuple_SetItem(item,2,py_st);
+                    py_st = NULL;
+                }
+                PyList_Append(list,item);
+                Py_XDECREF(item);
+                item=NULL;
+            }
+        }
+        while(FindNextFile(fh,&fd));
+        FindClose(fh);
+    }
+    else
+    {
+
PyErr_SetExcFromWindowsErrWithFilename(PyExc_OSError,GetLastError(),path);
+    }
+end:
+    Py_XDECREF(ctor_args);
+    Py_XDECREF(item);
+    Py_XDECREF(py_st);
+    if(list) PyList_Sort(list);
+    return list;
+}
+
+static char osutil_doc[] = "Native Win32 operating system services.";
+
+static PyMethodDef methods[] = {
+    {"listdir", (PyCFunction)listdir, METH_VARARGS | METH_KEYWORDS,
+     "list a directory\n"},
+    {NULL, NULL}
+};
+
+PyMODINIT_FUNC initosutil_win32(void)
+{
+    if (PyType_Ready(&listdir_stat_type) == -1)
+        return;
+    Py_InitModule3("osutil_win32", methods, osutil_doc);
+}
diff -r 5e1a867e5d65 -r b3110f7d0be1 setup.py
--- a/setup.py    Sat Aug 02 19:34:50 2008 +0400
+++ b/setup.py    Wed Sep 03 14:44:25 2008 -0400
@@ -115,6 +115,12 @@
             packages.extend(['hgext.inotify', 'hgext.inotify.linux'])
 except ImportError:
     pass
+
+try:
+    import msvcrt
+    ext_modules.append(Extension('mercurial.osutil_win32',
['mercurial/osutil_win32.c']))
+except ImportError:
+    pass

 setup(name='mercurial',
       version=mercurial.version.get_version(),
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://selenic.com/pipermail/mercurial-devel/attachments/20080903/265d1264/attachment.htm 


More information about the Mercurial-devel mailing list