osutil.c optimization for Win32
Petr Kodl
petrkodl at gmail.com
Wed Sep 3 13:53:25 CDT 2008
This is an optimization taking avoiding most of the lstat calls by taking
advantage of Win32 FindFirst/FindNext embedded iterator information.
The speedup mileage may vary - I see factor of 2x with my average repository
(~30k files) for simple hg stat on local repo - which makes it faster than
Git ;-) on same tree
patch is recorded against hg-crew
# HG changeset patch
# User Petr Kodl<petrkodl at gmail.com>
# Date 1220467465 14400
# Node ID b3110f7d0be144275457047ae32a3530ca5c37bc
# Parent 5e1a867e5d65c059dc2a79e35769c5a99748c177
lstat optimization on Win32 similar to osutil.c
diff -r 5e1a867e5d65 -r b3110f7d0be1 mercurial/osutil.py
--- a/mercurial/osutil.py Sat Aug 02 19:34:50 2008 +0400
+++ b/mercurial/osutil.py Wed Sep 03 14:44:25 2008 -0400
@@ -1,4 +1,5 @@
import os, stat
+
def _mode_to_kind(mode):
if stat.S_ISREG(mode): return stat.S_IFREG
@@ -35,3 +36,9 @@
else:
result.append((fn, _mode_to_kind(st.st_mode)))
return result
+
+try:
+ import osutil_win32
+ listdir = osutil_win32.listdir
+except:
+ pass
diff -r 5e1a867e5d65 -r b3110f7d0be1 mercurial/osutil_win32.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/osutil_win32.c Wed Sep 03 14:44:25 2008 -0400
@@ -0,0 +1,235 @@
+/*
+ osutil_win32.c - native operating system services for win32
+
+ Copyright 2007 pko <petrkodl at gmail.com>
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+
+ Based on osutil.c
+
+*/
+#include <Python.h>
+#include <string.h>
+#include <windows.h>
+#include <stdlib.h>
+
+/*
+minimal stat struct compatible with hg expectations
+*/
+struct my_stat
+{
+ int st_dev;
+ int st_mode;
+ int st_nlink;
+ __int64 st_size;
+ int st_mtime;
+ int st_ctime;
+};
+
+struct listdir_stat {
+ PyObject_HEAD
+ struct my_stat st;
+};
+
+#define listdir_slot(name) \
+ static PyObject *listdir_stat_##name(PyObject *self, void *x) \
+ { \
+ return PyInt_FromLong(((struct listdir_stat *)self)->st.name); \
+ }
+
+static PyObject *listdir_stat_st_size(PyObject *self, void *x)
+{
+ return PyLong_FromLongLong((PY_LONG_LONG)((struct listdir_stat
*)self)->st.st_size);
+}
+
+
+listdir_slot(st_dev)
+listdir_slot(st_mode)
+listdir_slot(st_nlink)
+listdir_slot(st_mtime)
+listdir_slot(st_ctime)
+
+static struct PyGetSetDef listdir_stat_getsets[] = {
+ {"st_dev", listdir_stat_st_dev, 0, 0, 0},
+ {"st_mode", listdir_stat_st_mode, 0, 0, 0},
+ {"st_nlink",listdir_stat_st_nlink, 0, 0, 0},
+ {"st_size", listdir_stat_st_size, 0, 0, 0},
+ {"st_mtime",listdir_stat_st_mtime, 0, 0, 0},
+ {"st_ctime",listdir_stat_st_ctime, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+};
+
+static PyObject *listdir_stat_new(PyTypeObject *t, PyObject *a, PyObject
*k)
+{
+ return t->tp_alloc(t, 0);
+}
+
+static void listdir_stat_dealloc(PyObject *o)
+{
+ o->ob_type->tp_free(o);
+}
+
+static PyTypeObject listdir_stat_type = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ "osutil_win32.stat", /*tp_name*/
+ sizeof(struct listdir_stat), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ (destructor)listdir_stat_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash */
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ "stat objects", /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ listdir_stat_getsets, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ listdir_stat_new, /* tp_new */
+};
+
+static __int64 a0 =
(__int64)134774L*(__int64)24L*(__int64)3600L*(__int64)1000L*(__int64)1000L*(__int64)10L;
+static __int64 a1 = 1000*1000*10;
+
+static int to_python_time(FILETIME* ms_time)
+{
+ __int64 tmp;
+ memcpy(&tmp,ms_time,sizeof(__int64));
+ return (int)((tmp-a0)/a1);
+}
+
+static PyObject *listdir(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "path", "stat", NULL };
+ PyObject *list = NULL;
+ PyObject* item = NULL;
+ PyObject *py_st = NULL;
+ PyObject *statobj = NULL;
+ PyObject *ctor_args = PyTuple_New(0);
+ struct my_stat* stp = 0;
+ char *path;
+ int path_len;
+ int keep_stat;
+ WIN32_FIND_DATA fd;
+ HANDLE fh=NULL;
+ char full_path[_MAX_PATH+10];
+
+
if(!PyArg_ParseTupleAndKeywords(args,kwargs,"s#|O:listdir",kwlist,&path,&path_len,&statobj))
+ goto end;
+
+ keep_stat = statobj && PyObject_IsTrue(statobj);
+
+ strncpy(full_path,path,path_len);
+ strncpy(full_path+path_len,"\\*.*",5);
+
+ fh = FindFirstFile(full_path,&fd);
+ if(INVALID_HANDLE_VALUE!=fh)
+ {
+ list = PyList_New(0);
+ if(!list)
+ {
+ PyErr_NoMemory();
+ goto end;
+ }
+ do
+ {
+ int isdir = (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
+ int isro = (fd.dwFileAttributes & FILE_ATTRIBUTE_READONLY);
+ if(!isdir || (strcmp(fd.cFileName,".") &&
strcmp(fd.cFileName,"..") && strcmp(fd.cFileName,".hg")))
+ {
+ PyObject* item = PyTuple_New(keep_stat ? 3 : 2);
+ if(!item)
+ {
+ PyErr_NoMemory();
+ goto end;
+ }
+ PyTuple_SetItem(item,0,PyString_FromString(fd.cFileName));
+ PyTuple_SetItem(item,1,PyInt_FromLong(isdir ? _S_IFDIR :
_S_IFREG));
+ if(keep_stat)
+ {
+ py_st = PyObject_CallObject((PyObject
*)&listdir_stat_type,ctor_args);
+ if(!py_st)
+ {
+ PyErr_NoMemory();
+ goto end;
+ }
+ stp = &((struct listdir_stat *)py_st)->st;
+ stp->st_mtime = to_python_time(&fd.ftLastWriteTime);
+ stp->st_ctime = to_python_time(&fd.ftCreationTime);
+ stp->st_dev = 0;
+ stp->st_size = 0;
+ stp->st_mode = (isdir ? (S_IFDIR | 0111) : S_IFREG) |
(isro ? 0444 : 0666);
+ if(!isdir)
+ {
+ char* dot = strrchr(fd.cFileName,'.');
+ if (dot)
+ {
+ if( !stricmp(dot,".bat")
+ || !stricmp(dot,".cmd")
+ || !stricmp(dot,".exe")
+ || !stricmp(dot,".com"))
+ stp->st_mode |= 0111;
+ }
+ stp->st_size = (__int64)(fd.nFileSizeHigh<<32) +
fd.nFileSizeLow;
+ }
+ PyTuple_SetItem(item,2,py_st);
+ py_st = NULL;
+ }
+ PyList_Append(list,item);
+ Py_XDECREF(item);
+ item=NULL;
+ }
+ }
+ while(FindNextFile(fh,&fd));
+ FindClose(fh);
+ }
+ else
+ {
+
PyErr_SetExcFromWindowsErrWithFilename(PyExc_OSError,GetLastError(),path);
+ }
+end:
+ Py_XDECREF(ctor_args);
+ Py_XDECREF(item);
+ Py_XDECREF(py_st);
+ if(list) PyList_Sort(list);
+ return list;
+}
+
+static char osutil_doc[] = "Native Win32 operating system services.";
+
+static PyMethodDef methods[] = {
+ {"listdir", (PyCFunction)listdir, METH_VARARGS | METH_KEYWORDS,
+ "list a directory\n"},
+ {NULL, NULL}
+};
+
+PyMODINIT_FUNC initosutil_win32(void)
+{
+ if (PyType_Ready(&listdir_stat_type) == -1)
+ return;
+ Py_InitModule3("osutil_win32", methods, osutil_doc);
+}
diff -r 5e1a867e5d65 -r b3110f7d0be1 setup.py
--- a/setup.py Sat Aug 02 19:34:50 2008 +0400
+++ b/setup.py Wed Sep 03 14:44:25 2008 -0400
@@ -115,6 +115,12 @@
packages.extend(['hgext.inotify', 'hgext.inotify.linux'])
except ImportError:
pass
+
+try:
+ import msvcrt
+ ext_modules.append(Extension('mercurial.osutil_win32',
['mercurial/osutil_win32.c']))
+except ImportError:
+ pass
setup(name='mercurial',
version=mercurial.version.get_version(),
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://selenic.com/pipermail/mercurial-devel/attachments/20080903/265d1264/attachment.htm
More information about the Mercurial-devel
mailing list