diff -r 2cc44cd8098e Modules/expat/expat.h --- a/Modules/expat/expat.h Fri Mar 09 00:52:07 2012 +0100 +++ b/Modules/expat/expat.h Thu Mar 08 16:42:57 2012 -0800 @@ -883,6 +883,15 @@ XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing); +/* Sets the hash salt to use for internal hash calculations. + Helps in preventing DoS attacks based on predicting hash + function behavior. This must be called before parsing is started. + Returns 1 if successful, 0 when called after parsing has started. +*/ +XMLPARSEAPI(int) +XML_SetHashSalt(XML_Parser parser, + unsigned long hash_salt); + /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then XML_GetErrorCode returns information about the error. */ diff -r 2cc44cd8098e Modules/expat/pyexpatns.h --- a/Modules/expat/pyexpatns.h Fri Mar 09 00:52:07 2012 +0100 +++ b/Modules/expat/pyexpatns.h Thu Mar 08 16:42:57 2012 -0800 @@ -97,6 +97,7 @@ #define XML_SetEntityDeclHandler PyExpat_XML_SetEntityDeclHandler #define XML_SetExternalEntityRefHandler PyExpat_XML_SetExternalEntityRefHandler #define XML_SetExternalEntityRefHandlerArg PyExpat_XML_SetExternalEntityRefHandlerArg +#define XML_SetHashSalt PyExpat_XML_SetHashSalt #define XML_SetNamespaceDeclHandler PyExpat_XML_SetNamespaceDeclHandler #define XML_SetNotationDeclHandler PyExpat_XML_SetNotationDeclHandler #define XML_SetNotStandaloneHandler PyExpat_XML_SetNotStandaloneHandler diff -r 2cc44cd8098e Modules/expat/xmlparse.c --- a/Modules/expat/xmlparse.c Fri Mar 09 00:52:07 2012 +0100 +++ b/Modules/expat/xmlparse.c Thu Mar 08 16:42:57 2012 -0800 @@ -17,6 +17,8 @@ #include #include /* memset(), memcpy() */ #include +#include /* UINT_MAX */ +#include /* time() */ #include "expat.h" @@ -387,12 +389,13 @@ static void dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); static int -dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); +dtdCopy(XML_Parser oldParser, + DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); static int -copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); - +copyEntityTable(XML_Parser oldParser, + HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); static NAMED * -lookup(HASH_TABLE *table, KEY name, size_t createSize); +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); static void FASTCALL hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms); static void FASTCALL hashTableClear(HASH_TABLE *); @@ -542,6 +545,7 @@ XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; #endif + unsigned long m_hash_secret_salt; }; #define MALLOC(s) (parser->m_mem.malloc_fcn((s))) @@ -649,6 +653,7 @@ #define useForeignDTD (parser->m_useForeignDTD) #define paramEntityParsing (parser->m_paramEntityParsing) #endif /* XML_DTD */ +#define hash_secret_salt (parser->m_hash_secret_salt) XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encodingName) @@ -671,6 +676,14 @@ 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '\0' }; +static unsigned long +generate_hash_secret_salt(void) +{ + unsigned int seed = time(NULL) % UINT_MAX; + srand(seed); + return rand(); +} + XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, @@ -860,6 +873,7 @@ useForeignDTD = XML_FALSE; paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif + hash_secret_salt = 0; } /* moves list of bindings to freeBindingList */ @@ -976,6 +990,12 @@ int oldInEntityValue = prologState.inEntityValue; #endif XML_Bool oldns_triplets = ns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + unsigned long oldhash_secret_salt = hash_secret_salt; #ifdef XML_DTD if (!context) @@ -1029,13 +1049,14 @@ externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; defaultExpandInternalEntities = oldDefaultExpandInternalEntities; ns_triplets = oldns_triplets; + hash_secret_salt = oldhash_secret_salt; parentParser = oldParser; #ifdef XML_DTD paramEntityParsing = oldParamEntityParsing; prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (!dtdCopy(_dtd, oldDtd, &parser->m_mem) + if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem) || !setContext(parser, context)) { XML_ParserFree(parser); return NULL; @@ -1420,6 +1441,17 @@ #endif } +int XMLCALL +XML_SetHashSalt(XML_Parser parser, + unsigned long hash_salt) +{ + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + return 0; + hash_secret_salt = hash_salt; + return 1; +} + enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { @@ -1430,6 +1462,9 @@ case XML_FINISHED: errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (hash_secret_salt == 0) + hash_secret_salt = generate_hash_secret_salt(); default: ps_parsing = XML_PARSING; } @@ -1488,11 +1523,13 @@ break; case XML_INITIALIZED: case XML_PARSING: - result = XML_STATUS_OK; if (isFinal) { ps_parsing = XML_FINISHED; - return result; + return XML_STATUS_OK; } + /* fall through */ + default: + result = XML_STATUS_OK; } } @@ -1553,6 +1590,9 @@ case XML_FINISHED: errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (hash_secret_salt == 0) + hash_secret_salt = generate_hash_secret_salt(); default: ps_parsing = XML_PARSING; } @@ -2231,7 +2271,7 @@ next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); poolDiscard(&dtd->pool); /* First, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, @@ -2618,12 +2658,12 @@ const XML_Char *localPart; /* lookup the element type name */ - elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, tagNamePtr->str,0); + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0); if (!elementType) { const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); if (!name) return XML_ERROR_NO_MEMORY; - elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, name, + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (!elementType) return XML_ERROR_NO_MEMORY; @@ -2792,9 +2832,9 @@ if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; const BINDING *b; - unsigned long uriHash = 0; + unsigned long uriHash = hash_secret_salt; ((XML_Char *)s)[-1] = 0; /* clear flag */ - id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, s, 0); + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); if (!id) return XML_ERROR_NO_MEMORY; b = id->prefix->binding; @@ -2818,7 +2858,7 @@ } while (*s++); { /* Check hash table for duplicate of expanded name (uriName). - Derived from code in lookup(HASH_TABLE *table, ...). + Derived from code in lookup(parser, HASH_TABLE *table, ...). */ unsigned char step = 0; unsigned long mask = nsAttsSize - 1; @@ -3756,7 +3796,8 @@ case XML_ROLE_DOCTYPE_PUBLIC_ID: #ifdef XML_DTD useForeignDTD = XML_FALSE; - declEntity = (ENTITY *)lookup(&dtd->paramEntities, + declEntity = (ENTITY *)lookup(parser, + &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!declEntity) @@ -3811,7 +3852,8 @@ XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, + ENTITY *entity = (ENTITY *)lookup(parser, + &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!entity) @@ -3855,7 +3897,7 @@ XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!entity) @@ -4069,7 +4111,8 @@ break; #else /* XML_DTD */ if (!declEntity) { - declEntity = (ENTITY *)lookup(&dtd->paramEntities, + declEntity = (ENTITY *)lookup(parser, + &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!declEntity) @@ -4144,7 +4187,7 @@ const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd->generalEntities, name, + declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; @@ -4176,7 +4219,7 @@ const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd->paramEntities, + declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; @@ -4358,7 +4401,7 @@ next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&dtd->pool); /* first, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, @@ -4882,7 +4925,7 @@ next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); poolDiscard(&temp2Pool); /* First, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal. @@ -4991,7 +5034,7 @@ result = XML_ERROR_NO_MEMORY; goto endEntityValue; } - entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&tempPool); if (!entity) { /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ @@ -5281,7 +5324,7 @@ } if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return 0; - prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); if (!prefix) return 0; @@ -5310,7 +5353,7 @@ return NULL; /* skip quotation mark - its storage will be re-used (like in name[-1]) */ ++name; - id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); if (!id) return NULL; if (id->name != name) @@ -5328,7 +5371,7 @@ if (name[5] == XML_T('\0')) id->prefix = &dtd->defaultPrefix; else - id->prefix = (PREFIX *)lookup(&dtd->prefixes, name + 6, sizeof(PREFIX)); + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX)); id->xmlns = XML_TRUE; } else { @@ -5343,7 +5386,7 @@ } if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; - id->prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); if (!id->prefix) return NULL; @@ -5441,7 +5484,7 @@ ENTITY *e; if (!poolAppendChar(&tempPool, XML_T('\0'))) return XML_FALSE; - e = (ENTITY *)lookup(&dtd->generalEntities, poolStart(&tempPool), 0); + e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0); if (e) e->open = XML_TRUE; if (*s != XML_T('\0')) @@ -5456,7 +5499,7 @@ else { if (!poolAppendChar(&tempPool, XML_T('\0'))) return XML_FALSE; - prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&tempPool), + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool), sizeof(PREFIX)); if (!prefix) return XML_FALSE; @@ -5620,7 +5663,7 @@ The new DTD has already been initialized. */ static int -dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) +dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; @@ -5635,7 +5678,7 @@ name = poolCopyString(&(newDtd->pool), oldP->name); if (!name) return 0; - if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX))) + if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } @@ -5657,7 +5700,7 @@ if (!name) return 0; ++name; - newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, + newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); if (!newA) return 0; @@ -5667,7 +5710,7 @@ if (oldA->prefix == &oldDtd->defaultPrefix) newA->prefix = &newDtd->defaultPrefix; else - newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldA->prefix->name, 0); } } @@ -5686,7 +5729,7 @@ name = poolCopyString(&(newDtd->pool), oldE->name); if (!name) return 0; - newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, + newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); if (!newE) return 0; @@ -5700,14 +5743,14 @@ } if (oldE->idAtt) newE->idAtt = (ATTRIBUTE_ID *) - lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); + lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) - newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { newE->defaultAtts[i].id = (ATTRIBUTE_ID *) - lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); + lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { newE->defaultAtts[i].value @@ -5721,13 +5764,15 @@ } /* Copy the entity tables. */ - if (!copyEntityTable(&(newDtd->generalEntities), + if (!copyEntityTable(oldParser, + &(newDtd->generalEntities), &(newDtd->pool), &(oldDtd->generalEntities))) return 0; #ifdef XML_DTD - if (!copyEntityTable(&(newDtd->paramEntities), + if (!copyEntityTable(oldParser, + &(newDtd->paramEntities), &(newDtd->pool), &(oldDtd->paramEntities))) return 0; @@ -5750,7 +5795,8 @@ } /* End dtdCopy */ static int -copyEntityTable(HASH_TABLE *newTable, +copyEntityTable(XML_Parser oldParser, + HASH_TABLE *newTable, STRING_POOL *newPool, const HASH_TABLE *oldTable) { @@ -5769,7 +5815,7 @@ name = poolCopyString(newPool, oldE->name); if (!name) return 0; - newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY)); + newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); if (!newE) return 0; if (oldE->systemId) { @@ -5827,16 +5873,16 @@ } static unsigned long FASTCALL -hash(KEY s) +hash(XML_Parser parser, KEY s) { - unsigned long h = 0; + unsigned long h = hash_secret_salt; while (*s) h = CHAR_HASH(h, *s++); return h; } static NAMED * -lookup(HASH_TABLE *table, KEY name, size_t createSize) +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { @@ -5853,10 +5899,10 @@ return NULL; } memset(table->v, 0, tsize); - i = hash(name) & ((unsigned long)table->size - 1); + i = hash(parser, name) & ((unsigned long)table->size - 1); } else { - unsigned long h = hash(name); + unsigned long h = hash(parser, name); unsigned long mask = (unsigned long)table->size - 1; unsigned char step = 0; i = h & mask; @@ -5882,7 +5928,7 @@ memset(newV, 0, tsize); for (i = 0; i < table->size; i++) if (table->v[i]) { - unsigned long newHash = hash(table->v[i]->name); + unsigned long newHash = hash(parser, table->v[i]->name); size_t j = newHash & newMask; step = 0; while (newV[j]) { @@ -6257,7 +6303,7 @@ if (!name) return NULL; - ret = (ELEMENT_TYPE *) lookup(&dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); + ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (!ret) return NULL; if (ret->name != name) diff -r 2cc44cd8098e Modules/pyexpat.c --- a/Modules/pyexpat.c Fri Mar 09 00:52:07 2012 +0100 +++ b/Modules/pyexpat.c Thu Mar 08 16:42:57 2012 -0800 @@ -1156,6 +1156,8 @@ else { self->itself = XML_ParserCreate(encoding); } + XML_SetHashSalt(self->itself, + (unsigned long)_Py_HashSecret.prefix); self->intern = intern; Py_XINCREF(self->intern); PyObject_GC_Track(self);