classification
Title: "es#" parser marker leaks memory
Type: Stage:
Components: Unicode Versions:
process
Status: closed Resolution: fixed
Dependencies: Superseder:
Assigned To: lemburg Nosy List: lemburg, loewis, mbrierst
Priority: normal Keywords:

Created on 2002-01-10 10:39 by lemburg, last changed 2003-05-03 10:01 by loewis. This issue is now closed.

Messages (6)
msg8689 - (view) Author: Marc-Andre Lemburg (lemburg) * (Python committer) Date: 2002-01-10 10:39
... if subsequent parsing fails, a buffer which was possibly 
allocated by "es#" is not freed.
msg8690 - (view) Author: Michael Stone (mbrierst) Date: 2003-02-07 20:20
Logged In: YES 
user_id=670441

In what case does this happen?  The only case
I can see is if the string is successfully encoded,
but the encoding results in a non-null terminated 
string.  That might cause problems, otherwise it
looks okay?
msg8691 - (view) Author: Marc-Andre Lemburg (lemburg) * (Python committer) Date: 2003-02-08 14:29
Logged In: YES 
user_id=38388

What I meant was that in the case of a parser marker
string like "es#iii" if the last "i" fails to convert, then the
buffer used for "es#" won't be freed up again.
msg8692 - (view) Author: Michael Stone (mbrierst) Date: 2003-02-10 20:37
Logged In: YES 
user_id=670441

Ahhhh... thank you for your reply.  I understand now.
I have a patch that gets rid of this problem by putting allocated
memory into a list of CObjects which are freed when exceptions occur.
The list is only created when necessary, to hopefully preserve efficiency
in most cases.  It has to be passed around, so several function calls had
to be changed, and to do the cleanup on return I made a macro CLEANRETURN.

This patch also fixes a memory leak when the buffer_len pointer is NULL.
This isn't a big deal since the documentation says never to pass in a NULL
buffer_len.

And it fixes another leak when a string with encoded NULL's is passed in
(again not a big deal)

If you want, I have C code that can be called from python to demonstrate
any of these leaks.  Let me know what you think of the patch/approach.

Index: Python/getargs.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/getargs.c,v
retrieving revision 2.95
diff -c -r2.95 getargs.c
*** Python/getargs.c	24 Jan 2003 22:15:21 -0000	2.95
--- Python/getargs.c	10 Feb 2003 20:28:52 -0000
***************
*** 17,26 ****
  static int vgetargs1(PyObject *, char *, va_list *, int);
  static void seterror(int, char *, int *, char *, char *);
  static char *convertitem(PyObject *, char **, va_list *, int *, char *, 
! 			 size_t);
  static char *converttuple(PyObject *, char **, va_list *,
! 			  int *, char *, size_t, int);
! static char *convertsimple(PyObject *, char **, va_list *, char *, size_t);
  static int convertbuffer(PyObject *, void **p, char **);
  
  static int vgetargskeywords(PyObject *, PyObject *,
--- 17,27 ----
  static int vgetargs1(PyObject *, char *, va_list *, int);
  static void seterror(int, char *, int *, char *, char *);
  static char *convertitem(PyObject *, char **, va_list *, int *, char *, 
! 			 size_t, PyObject **, int *);
  static char *converttuple(PyObject *, char **, va_list *,
! 			  int *, char *, size_t, int, PyObject **, int *);
! static char *convertsimple(PyObject *, char **, va_list *, char *,
! 			   size_t, PyObject **, int *);
  static int convertbuffer(PyObject *, void **p, char **);
  
  static int vgetargskeywords(PyObject *, PyObject *,
***************
*** 72,77 ****
--- 73,120 ----
  }
  
  
+ /* Handle cleanup of allocated memory in case of exception */
+ 
+ static void
+ docleanup(void *ptr, void *dofree)
+ {
+ 	if (*(int *)dofree)
+ 		PyMem_FREE(ptr);
+ }
+ 
+ static int
+ addcleanup(void *ptr, PyObject **freelist, int *dofree)
+ {
+ 	PyObject *cobj;
+ 	if (!*freelist) {
+ 		*freelist = PyList_New(0);
+ 		if (!*freelist) {
+ 			PyMem_FREE(ptr);
+ 			return -1;
+ 		}
+ 	}
+ 	cobj = PyCObject_FromVoidPtrAndDesc(ptr, dofree, docleanup);
+ 	if (!cobj) {
+ 		PyMem_FREE(ptr);
+ 		return -1;
+ 	}
+ 	if(PyList_Append(*freelist, cobj)) {
+ 		*dofree = 1;
+ 		Py_DECREF(cobj);
+ 		return -1;
+ 	}
+         Py_DECREF(cobj);
+ 	return 0;
+ }
+ 
+ #define CLEANRETURN(retval, freelist, dofree) do	\
+ {							\
+ 	dofree = ((retval) == 0);			\
+ 	Py_XDECREF(freelist);				\
+ 	return (retval);				\
+ } while(0)
+ 
+ 
  static int
  vgetargs1(PyObject *args, char *format, va_list *p_va, int compat)
  {
***************
*** 86,91 ****
--- 129,136 ----
  	char *formatsave = format;
  	int i, len;
  	char *msg;
+ 	PyObject *freelist = NULL;
+ 	int dofree = 0;
  	
  	assert(compat || (args != (PyObject*)NULL));
  
***************
*** 157,167 ****
  				return 0;
  			}
  			msg = convertitem(args, &format, p_va, levels, msgbuf,
! 					  sizeof(msgbuf));
  			if (msg == NULL)
! 				return 1;
  			seterror(levels[0], msg, levels+1, fname, message);
! 			return 0;
  		}
  		else {
  			PyErr_SetString(PyExc_SystemError,
--- 202,212 ----
  				return 0;
  			}
  			msg = convertitem(args, &format, p_va, levels, msgbuf,
! 					  sizeof(msgbuf), &freelist, &dofree);
  			if (msg == NULL)
! 				CLEANRETURN(1, freelist, dofree);
  			seterror(levels[0], msg, levels+1, fname, message);
! 			CLEANRETURN(0, freelist, dofree);
  		}
  		else {
  			PyErr_SetString(PyExc_SystemError,
***************
*** 200,209 ****
  		if (*format == '|')
  			format++;
  		msg = convertitem(PyTuple_GET_ITEM(args, i), &format, p_va,
! 				  levels, msgbuf, sizeof(msgbuf));
  		if (msg) {
  			seterror(i+1, msg, levels, fname, message);
! 			return 0;
  		}
  	}
  
--- 245,254 ----
  		if (*format == '|')
  			format++;
  		msg = convertitem(PyTuple_GET_ITEM(args, i), &format, p_va,
! 				  levels, msgbuf, sizeof(msgbuf), &freelist, &dofree);
  		if (msg) {
  			seterror(i+1, msg, levels, fname, message);
! 			CLEANRETURN(0, freelist, dofree);
  		}
  	}
  
***************
*** 212,221 ****
  	    *format != '|' && *format != ':' && *format != ';') {
  		PyErr_Format(PyExc_SystemError,
  			     "bad format string: %.200s", formatsave);
! 		return 0;
  	}
  	
! 	return 1;
  }
  
  
--- 257,266 ----
  	    *format != '|' && *format != ':' && *format != ';') {
  		PyErr_Format(PyExc_SystemError,
  			     "bad format string: %.200s", formatsave);
! 		CLEANRETURN(0, freelist, dofree);
  	}
  	
! 	CLEANRETURN(1, freelist, dofree);
  }
  
  
***************
*** 277,283 ****
  
  static char *
  converttuple(PyObject *arg, char **p_format, va_list *p_va, int *levels,
! 	     char *msgbuf, size_t bufsize, int toplevel)
  {
  	int level = 0;
  	int n = 0;
--- 322,329 ----
  
  static char *
  converttuple(PyObject *arg, char **p_format, va_list *p_va, int *levels,
! 	     char *msgbuf, size_t bufsize, int toplevel,
! 	     PyObject **freelist, int *dofree)
  {
  	int level = 0;
  	int n = 0;
***************
*** 327,333 ****
  		PyObject *item;
  		item = PySequence_GetItem(arg, i);
  		msg = convertitem(item, &format, p_va, levels+1, msgbuf,
! 				  bufsize);
  		/* PySequence_GetItem calls tp->sq_item, which INCREFs */
  		Py_XDECREF(item);
  		if (msg != NULL) {
--- 373,379 ----
  		PyObject *item;
  		item = PySequence_GetItem(arg, i);
  		msg = convertitem(item, &format, p_va, levels+1, msgbuf,
! 				  bufsize, freelist, dofree);
  		/* PySequence_GetItem calls tp->sq_item, which INCREFs */
  		Py_XDECREF(item);
  		if (msg != NULL) {
***************
*** 345,351 ****
  
  static char *
  convertitem(PyObject *arg, char **p_format, va_list *p_va, int *levels,
! 	    char *msgbuf, size_t bufsize)
  {
  	char *msg;
  	char *format = *p_format;
--- 391,397 ----
  
  static char *
  convertitem(PyObject *arg, char **p_format, va_list *p_va, int *levels,
! 	    char *msgbuf, size_t bufsize, PyObject **freelist, int *dofree)
  {
  	char *msg;
  	char *format = *p_format;
***************
*** 353,364 ****
  	if (*format == '(' /* ')' */) {
  		format++;
  		msg = converttuple(arg, &format, p_va, levels, msgbuf, 
! 				   bufsize, 0);
  		if (msg == NULL)
  			format++;
  	}
  	else {
! 		msg = convertsimple(arg, &format, p_va, msgbuf, bufsize);
  		if (msg != NULL)
  			levels[0] = 0;
  	}
--- 399,411 ----
  	if (*format == '(' /* ')' */) {
  		format++;
  		msg = converttuple(arg, &format, p_va, levels, msgbuf, 
! 				   bufsize, 0, freelist, dofree);
  		if (msg == NULL)
  			format++;
  	}
  	else {
! 		msg = convertsimple(arg, &format, p_va, msgbuf, bufsize,
! 				    freelist, dofree);
  		if (msg != NULL)
  			levels[0] = 0;
  	}
***************
*** 396,402 ****
  
  static char *
  convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf,
! 	      size_t bufsize)
  {
  	char *format = *p_format;
  	char c = *format++;
--- 443,449 ----
  
  static char *
  convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf,
! 	      size_t bufsize, PyObject **freelist, int *dofree)
  {
  	char *format = *p_format;
  	char c = *format++;
***************
*** 801,810 ****
  			int *buffer_len = va_arg(*p_va, int *);
  
  			format++;
! 			if (buffer_len == NULL)
  				return converterr(
  					"(buffer_len is NULL)",
  					arg, msgbuf, bufsize);
  			if (*buffer == NULL) {
  				*buffer = PyMem_NEW(char, size + 1);
  				if (*buffer == NULL) {
--- 848,859 ----
  			int *buffer_len = va_arg(*p_va, int *);
  
  			format++;
! 			if (buffer_len == NULL) {
! 				Py_DECREF(s);
  				return converterr(
  					"(buffer_len is NULL)",
  					arg, msgbuf, bufsize);
+ 			}
  			if (*buffer == NULL) {
  				*buffer = PyMem_NEW(char, size + 1);
  				if (*buffer == NULL) {
***************
*** 813,818 ****
--- 862,873 ----
  						"(memory error)",
  						arg, msgbuf, bufsize);
  				}
+ 				if(addcleanup(*buffer, freelist, dofree)) {
+ 					Py_DECREF(s);
+ 					return converterr(
+ 						"(cleanup problem)",
+ 						arg, msgbuf, bufsize);
+ 				}
  			} else {
  				if (size + 1 > *buffer_len) {
  					Py_DECREF(s);
***************
*** 839,854 ****
  			   PyMem_Free()ing it after usage
  
  			*/
! 			if ((int)strlen(PyString_AS_STRING(s)) != size)
  				return converterr(
  					"(encoded string without NULL bytes)",
  					arg, msgbuf, bufsize);
  			*buffer = PyMem_NEW(char, size + 1);
  			if (*buffer == NULL) {
  				Py_DECREF(s);
  				return converterr("(memory error)",
  						  arg, msgbuf, bufsize);
  			}
  			memcpy(*buffer,
  			       PyString_AS_STRING(s),
  			       size + 1);
--- 894,916 ----
  			   PyMem_Free()ing it after usage
  
  			*/
! 			if ((int)strlen(PyString_AS_STRING(s)) != size) {
! 				Py_DECREF(s);
  				return converterr(
  					"(encoded string without NULL bytes)",
  					arg, msgbuf, bufsize);
+ 			}
  			*buffer = PyMem_NEW(char, size + 1);
  			if (*buffer == NULL) {
  				Py_DECREF(s);
  				return converterr("(memory error)",
  						  arg, msgbuf, bufsize);
  			}
+ 			if(addcleanup(*buffer, freelist, dofree)) {
+ 				Py_DECREF(s);
+ 				return converterr("(cleanup problem)",
+ 						arg, msgbuf, bufsize);
+ 			}
  			memcpy(*buffer,
  			       PyString_AS_STRING(s),
  			       size + 1);
***************
*** 1068,1073 ****
--- 1130,1137 ----
  	char *formatsave;
  	int i, len, nargs, nkeywords;
  	char *msg, **p;
+ 	PyObject *freelist = NULL;
+ 	int dofree = 0;
  
  	assert(args != NULL && PyTuple_Check(args));
  	assert(keywords == NULL || PyDict_Check(keywords));
***************
*** 1192,1207 ****
  		if (*format == '|')
  			format++;
  		msg = convertitem(PyTuple_GET_ITEM(args, i), &format, p_va,
! 				 levels, msgbuf, sizeof(msgbuf));
  		if (msg) {
  			seterror(i+1, msg, levels, fname, message);
! 			return 0;
  		}
  	}
  
  	/* handle no keyword parameters in call */	
  	if (nkeywords == 0)
! 		return 1; 
  
  	/* convert the keyword arguments; this uses the format 
  	   string where it was left after processing args */
--- 1256,1272 ----
  		if (*format == '|')
  			format++;
  		msg = convertitem(PyTuple_GET_ITEM(args, i), &format, p_va,
! 				  levels, msgbuf, sizeof(msgbuf), &freelist,
! 				  &dofree);
  		if (msg) {
  			seterror(i+1, msg, levels, fname, message);
! 			CLEANRETURN(0, freelist, dofree);
  		}
  	}
  
  	/* handle no keyword parameters in call */	
  	if (nkeywords == 0)
! 		CLEANRETURN(1, freelist, dofree);
  
  	/* convert the keyword arguments; this uses the format 
  	   string where it was left after processing args */
***************
*** 1213,1235 ****
  		if (item != NULL) {
  			Py_INCREF(item);
  			msg = convertitem(item, &format, p_va, levels, msgbuf,
! 					  sizeof(msgbuf));
  			Py_DECREF(item);
  			if (msg) {
  				seterror(i+1, msg, levels, fname, message);
! 				return 0;
  			}
  			--nkeywords;
  			if (nkeywords == 0)
  				break;
  		}
  		else if (PyErr_Occurred())
! 			return 0;
  		else {
  			msg = skipitem(&format, p_va);
  			if (msg) {
  				seterror(i+1, msg, levels, fname, message);
! 				return 0;
  			}
  		}
  	}
--- 1278,1300 ----
  		if (item != NULL) {
  			Py_INCREF(item);
  			msg = convertitem(item, &format, p_va, levels, msgbuf,
! 					  sizeof(msgbuf), &freelist, &dofree);
  			Py_DECREF(item);
  			if (msg) {
  				seterror(i+1, msg, levels, fname, message);
! 				CLEANRETURN(0, freelist, dofree);
  			}
  			--nkeywords;
  			if (nkeywords == 0)
  				break;
  		}
  		else if (PyErr_Occurred())
! 			CLEANRETURN(0, freelist, dofree);
  		else {
  			msg = skipitem(&format, p_va);
  			if (msg) {
  				seterror(i+1, msg, levels, fname, message);
! 				CLEANRETURN(0, freelist, dofree);
  			}
  		}
  	}
***************
*** 1244,1250 ****
  			if (!PyString_Check(key)) {
  				PyErr_SetString(PyExc_TypeError, 
  					        "keywords must be strings");
! 				return 0;
  			}
  			ks = PyString_AsString(key);
  			for (i = 0; i < max; i++) {
--- 1309,1315 ----
  			if (!PyString_Check(key)) {
  				PyErr_SetString(PyExc_TypeError, 
  					        "keywords must be strings");
! 				CLEANRETURN(0, freelist, dofree);
  			}
  			ks = PyString_AsString(key);
  			for (i = 0; i < max; i++) {
***************
*** 1258,1269 ****
  					     "'%s' is an invalid keyword "
  					     "argument for this function",
  					     ks);
! 				return 0;
  			}
  		}
  	}
  
! 	return 1;
  }
  
  
--- 1323,1334 ----
  					     "'%s' is an invalid keyword "
  					     "argument for this function",
  					     ks);
! 				CLEANRETURN(0, freelist, dofree);
  			}
  		}
  	}
  
! 	CLEANRETURN(1, freelist, dofree);
  }
  
  
msg8693 - (view) Author: Michael Stone (mbrierst) Date: 2003-02-11 23:11
Logged In: YES 
user_id=670441

I submitted this as patch 684981 so it can be
attached to the report instead of pasted in.
Also, the patch there is nicer than the one here.
msg8694 - (view) Author: Martin v. Löwis (loewis) * (Python committer) Date: 2003-05-03 10:01
Logged In: YES 
user_id=21627

Fixed in getargs.c 2.100 (with patch 684981).
History
Date User Action Args
2002-01-10 10:39:18lemburgcreate