[PATCH 1 of 3] osutil: implement osutil.c for Win32

Patrick Mézard pmezard at gmail.com
Sun Sep 7 14:54:53 CDT 2008


Hello Petr,

Here is a more thorough review of your latest proposal. First, you did a great job, this a valuable contribution. 

I have still many remarks, going in 3 categories:
- Real errors: I am concerned in one place about what looks like a possible _MAX_PATH buffer overflow.
- Style edits: try to reuse the existing coding style (NULL instead of 0, spaces, reducing indentation and so forth)
- Unrelated edits: your IDE or trial and errors probably caused a couple of useless changes in this patch.

I still want to investigate the execute-bit part but otherwise the patch is great.

Petr Kodl a écrit :
> # HG changeset patch
> # User Petr Kodl <petrkodl at gmail.com>
> # Date 1220646244 14400
> # Node ID 1344bb0a54d56e0b5c8ffe1bba7849b666b097aa
> # Parent  4e62be0208d3465cf241d8fb6fb7c9ce125a490b
> osutil: implement osutil.c for Win32
> 
> diff -r 4e62be0208d3 -r 1344bb0a54d5 mercurial/osutil.c
> --- a/mercurial/osutil.c	Fri Sep 05 11:04:36 2008 +0200
> +++ b/mercurial/osutil.c	Fri Sep 05 16:24:04 2008 -0400
> @@ -7,32 +7,68 @@

General remark: there are a lot of trailing spaces introduced with this patch (you can see them with emacs blank-mode for instance). Would be great to kill them all, so djc has not to do it himself :-).

> +#endif 
> +
> +#ifdef _WIN32
> +/* 
> +minimal stat struct compatible with hg expectations
> +supporting large files (64 bit size)
> +*/
> +struct non_posix_stat
> +{
> +	int     st_dev;
> +	int     st_mode;
> +	int     st_nlink;
> +	__int64 st_size;
> +	int     st_mtime;
> +	int     st_ctime; 
> +};
> +typedef struct non_posix_stat my_stat;
> +#else 
> +typedef struct stat my_stat; 
> +#endif 
> +

"my_stat", seriously... What about hg_stat or custom_stat ?

>  
>  struct listdir_stat {
>  	PyObject_HEAD
> -	struct stat st;
> +	my_stat st;
>  };
>  
>  #define listdir_slot(name) \
> -    static PyObject *listdir_stat_##name(PyObject *self, void *x) \
> -    { \
> -        return PyInt_FromLong(((struct listdir_stat *)self)->st.name); \
> -    }
> +	static PyObject *listdir_stat_##name(PyObject *self, void *x) \
> +	{ \
> +		return PyInt_FromLong(((struct listdir_stat *)self)->st.name); \
> +	}

Useless edit.

>  
>  listdir_slot(st_dev)
>  listdir_slot(st_mode)
>  listdir_slot(st_nlink)
> -listdir_slot(st_size)
>  listdir_slot(st_mtime)
>  listdir_slot(st_ctime)
> +#ifdef _WIN32 
> +static PyObject *listdir_stat_st_size(PyObject *self, void *x)
> +{ 
> +	return PyLong_FromLongLong(
> +		(PY_LONG_LONG)((struct listdir_stat *)self)->st.st_size);
> +}
> +#else
> +listdir_slot(st_size)
> +#endif
>  
>  static struct PyGetSetDef listdir_stat_getsets[] = {
>  	{"st_dev", listdir_stat_st_dev, 0, 0, 0},
> @@ -95,6 +131,202 @@
>  	0,                         /* tp_alloc */
>  	listdir_stat_new,          /* tp_new */
>  };
> +
> +#ifdef _WIN32 
> +
> +static int to_python_time(FILETIME* ms_time)
> +{

s/100-nanosecond/100-nanoseconds/

> +	/* this is number of 100-nanosecond between epoch and January 1 1601 */
> +	static __int64 a0 = (__int64)134774L*(__int64)24L
> +						*(__int64)3600L*(__int64)1000L
> +						*(__int64)1000L*(__int64)10L;
> +	/* conversion factor back to 1s resoluion required by Python ctime */

s/resoluion/resolution/

> +	static __int64 a1 = 1000*1000*10;
> +	__int64 tmp; 
> +	memcpy(&tmp,ms_time,sizeof(__int64));

People may be interested in http://blogs.msdn.com/oldnewthing/archive/2004/08/25/220195.aspx.
Your version is correct, I think.

> +	return (int)((tmp-a0)/a1);
> +}
> +
> +static int allow_unicode()
> +{
> +	static int allow = -1;
> +	/* unicode supported for NT */
> +	if(allow==-1) allow = (GetVersion() < 0x80000000) ? 1 : 0; 
> +	return allow;
> +}
> +
> +static PyObject *listdir(PyObject *self, PyObject *args, PyObject *kwargs)
> +{
> +	PyObject *pathobj     = NULL,
> +			 *statobj     = NULL,
> +			 *list        = NULL,
> +			 *items       = NULL, 
> +			 *ctor_args   = NULL,
> +			 *item0       = NULL,
> +			 *item1       = NULL,
> +			 *py_st       = NULL;
> +	HANDLE    fh = 0;
> +	struct non_posix_stat* stp=0;

Posix code already uses NULL instead of 0. Can we do the same here ?
Also, everything is separated with spaces, without smart indentation which makes it readable whatever editor people use.

> +	static char *kwlist[] = { "path", "stat", NULL };
> +	if(PyArg_ParseTupleAndKeywords(args, kwargs, "O|O:listdir", 
> +		kwlist, &pathobj, &statobj))
> +	{

Can you reverse the test and "goto error" (like in Posix code) ? That would make the whole block much more readable IMHO.

> +		int keepstat= statobj && PyObject_IsTrue(statobj);

Please add a space between the variable name and the equal sign.

> +		int unicode = allow_unicode() && PyUnicode_CheckExact(pathobj);
> +		WIN32_FIND_DATAA fd_a;
> +		WIN32_FIND_DATAW fd_w;
> +		if(unicode) 
> +		{
> +			Py_ssize_t len = PyUnicode_GET_SIZE(pathobj); 
> +			wchar_t *wpath = _alloca((len+5)*sizeof(wchar_t));

Please separate operands and operators with a space (single notice, there many many other ones in the code).
Also I wonder if there are code paths in calling code which can lead the stack to be blown in all cases. Perhaps we could check the input string size is reasonable. Or not allocate from the stack.

> +			memset(wpath, 0, (len+5)*sizeof(wchar_t));
> +			if(PyUnicode_AsWideChar((PyUnicodeObject*)pathobj, wpath, len)!=len) 
> +				return 0; 

Can you avoid intermediate "return" and always goto error/another mark, like in the Posix code.

> +			if(len>0 && wpath[len-1]!=L':' 
> +				&& wpath[len-1]!=L'/' && wpath[len-1]!=L'\\') 
> +				wpath[len++]=L'\\';
> +			wcscpy(wpath+len, L"*.*");

Is there any reason to use "*.*" instead of "*" ?

> +			fh = FindFirstFileW(wpath, &fd_w); 
> +		}
> +		else if(PyString_CheckExact(pathobj))
> +		{
> +			Py_ssize_t  len = PyString_GET_SIZE(pathobj);
> +			char path[_MAX_PATH]; 
> +			strncpy(path, PyString_AS_STRING(pathobj), _MAX_PATH);
> +			if(len>0 && path[len-1]!=':' 
> +				&& path[len-1]!='/' && path[len-1]!='\\') 
> +				path[len++]='\\';

What if len >= _MAX_PATH ?

> +			strcpy(path+len, "*.*");
> +			fh = FindFirstFileA(path, &fd_a); 
> +		}
> +		else
> +		{
> +			PyErr_SetString(PyExc_TypeError, 
> +			"listdir - expected string or unicode as first argument"); 
> +			goto error;
> +		}
> +		if(INVALID_HANDLE_VALUE!=fh) 
> +		{

Let's reverse the test, "goto error" after the PyErr call and deindent the whole block.

> +			list      = PyList_New(0);
> +			ctor_args = PyTuple_New(0);
> +			if(!list || !ctor_args) 
> +			{
> +				PyErr_NoMemory();
> +				goto error;
> +			}
> +			do
> +			{
> +				#define FD(NAME) (unicode ? fd_w.NAME : fd_a.NAME)
> +				int isdir = (FD(dwFileAttributes) & FILE_ATTRIBUTE_DIRECTORY);
> +				int isro  = (FD(dwFileAttributes) & FILE_ATTRIBUTE_READONLY);
> +				if(     !isdir 
> +					||  (unicode && wcscmp(fd_w.cFileName, L".") 
> +							&& wcscmp(fd_w.cFileName, L".."))
> +					||  (!unicode&& strcmp(fd_a.cFileName, ".")  
> +							&& strcmp(fd_a.cFileName, "..")))

Or again:

if(isdir && 
	((unicode && (wcscmp(fd_w.cFileName, L".") == 0 || wcscmp(fd_w.cFileName, L"..") == 0)) 
	|| (!unicode && (strcmp(fd_a.cFileName, ".") == 0 || strcmp(fa_w.cFileName, "..") == 0))))
	continue;

and deindent what follows ?

> +				{
> +					items = PyTuple_New(keepstat ? 3 : 2);
> +					item0 = unicode ? 
> +							PyUnicode_FromWideChar(fd_w.cFileName, 
> +								wcslen(fd_w.cFileName)) 
> +							: PyString_FromString(fd_a.cFileName);
> +					item1 = PyInt_FromLong(isdir ? _S_IFDIR : _S_IFREG);
> +					if(!items || !item0 || !item1) 
> +					{
> +						PyErr_NoMemory();
> +						goto error;
> +					}
> +					PyTuple_SetItem(items, 0, item0);
> +					PyTuple_SetItem(items, 1, item1);
> +					item0 = item1 = 0;
> +					if(keepstat) 
> +					{
> +						py_st = PyObject_CallObject( 
> +									(PyObject *)&listdir_stat_type,
> +									ctor_args);
> +						if(!py_st) 
> +						{
> +							PyErr_NoMemory();
> +							goto error;
> +						}
> +						stp = &((struct listdir_stat *)py_st)->st;
> +						stp->st_mtime = to_python_time(unicode 
> +										? &fd_w.ftLastWriteTime 
> +										: &fd_a.ftLastWriteTime);
> +						stp->st_ctime = to_python_time(unicode 
> +										? &fd_w.ftCreationTime  
> +										: &fd_a.ftCreationTime);
> +						stp->st_dev   = 0;
> +						stp->st_size  = 0;
> +						stp->st_mode  = (isdir ? (S_IFDIR | 0111) : S_IFREG) 
> +										| (isro ? 0444 : 0666); 
> +						if(!isdir) 
> +						{
> +							stp->st_size = (__int64)(FD(nFileSizeHigh)<<32) 
> +											+ FD(nFileSizeLow);
> +							if(!unicode)
> +							{
> +								char* dot = strrchr(fd_a.cFileName, '.');
> +								if (dot) 
> +								{
> +									if( !stricmp(dot, ".bat") 
> +										|| !stricmp(dot, ".cmd")
> +										|| !stricmp(dot, ".exe")
> +										|| !stricmp(dot, ".com"))
> +									stp->st_mode |= 0111;
> +								}
> +							}
> +							else
> +							{
> +								wchar_t* dot = wcsrchr(fd_w.cFileName, L'.');
> +								if (dot) 
> +								{
> +									if( !_wcsicmp(dot, L".bat") 
> +										|| !_wcsicmp(dot, L".cmd")
> +										|| !_wcsicmp(dot, L".exe")
> +										|| !_wcsicmp(dot, L".com"))
> +									stp->st_mode |= 0111;
> +								}
> +							}

I am still not sure we want this code. I will try to review these blocks later after looking at the current listdir/stat behaviour and use.

> +						}
> +						PyTuple_SET_ITEM(items, 2, py_st); 
> +						py_st = 0;
> +					}
> +					if(-1==PyList_Append(list, items)) 

We do not use this idiom, I would even write:

if(PyList_Append(list, items) < 0) 

> +					{
> +						goto error;
> +					}

No need for the braces.

> +					Py_XDECREF(items); 
> +					items = 0;
> +				}
> +				#undef FD
> +			}
> +			while(unicode ? FindNextFileW(fh, &fd_w) 
> +							: FindNextFileA(fh, &fd_a));
> +			Py_XDECREF(ctor_args); ctor_args=0;
> +			if(GetLastError()!=ERROR_NO_MORE_FILES || !FindClose(fh)) 
> +			{
> +				PyErr_SetExcFromWindowsErr(PyExc_OSError, GetLastError());
> +				goto error;
> +			}
> +			fh = 0;
> +			if(-1==PyList_Sort(list)) goto error;

if(PyList_Sort(list) < 0) 
	goto error;

> +			return list;

Again, it would be great to have a single exit point in this function. Could be done by having two goto label, one cleaning up the list and not the other.

> +		}
> +		else
> +			PyErr_SetExcFromWindowsErr(PyExc_OSError, GetLastError());
> +	}
> +error:
> +	Py_XDECREF(list);
> +	Py_XDECREF(ctor_args);
> +	Py_XDECREF(items);
> +	Py_XDECREF(item0);    
> +	Py_XDECREF(item1);
> +	if(fh) FindClose(fh);
> +	return 0;
> +}
> +
> +#else
>  
>  static PyObject *listfiles(PyObject *list, DIR *dir,
>  			   int keep_stat, int *need_stat)
> @@ -200,7 +432,7 @@
>  #endif
>  		if (ret == -1)
>  			return PyErr_SetFromErrnoWithFilename(PyExc_OSError,
> -							      path);
> +								  path);

Useless edit.

>  
>  		if (kind == -1) {
>  			if (S_ISREG(stp->st_mode))
> @@ -295,7 +527,7 @@
>  		closedir(dir);
>  	return err ? err : list;
>  }
> -
> +#endif
>  
>  static char osutil_doc[] = "Native operating system services.";


--
Patrick Mézard



More information about the Mercurial-devel mailing list