[3.14] gh-149018: Use `XML_SetHashSalt16Bytes` in `pyexpat`/`_elementtree` when possible (GH-149023) by StanFromIreland · Pull Request #149646 · python/cpython
I installed python3-dbg and expat-dbg for debug symbols and here's what that looks like:
Thread 1 "FreeCAD" received signal SIGSEGV, Segmentation fault.
0x00007ffff0431d73 in getRootParserOf (parser=<optimized out>, outLevelDiff=<optimized out>) at ./lib/xmlparse.c:8653
⚠️ warning: 8653 ./lib/xmlparse.c: No such file or directory
(gdb) disassemble /m XML_SetHashSalt16Bytes
Dump of assembler code for function XML_SetHashSalt16Bytes:
1068 in ./lib/xmlparse.c
1069 in ./lib/xmlparse.c
0x00007ffff0431d90 <+48>: lea 0x18314(%rip),%rdi # 0x7ffff044a0ab
0x00007ffff0431db8 <+88>: call 0x7ffff042c8c0 <getDebugLevel>
0x00007ffff0431dbd <+93>: test %rax,%rax
0x00007ffff0431dc0 <+96>: jne 0x7ffff0431dd8 <XML_SetHashSalt16Bytes+120>
1070 in ./lib/xmlparse.c
1071 in ./lib/xmlparse.c
1072 in ./lib/xmlparse.c
1073 in ./lib/xmlparse.c
1074 in ./lib/xmlparse.c
1075 in ./lib/xmlparse.c
1076 in ./lib/xmlparse.c
1077 in ./lib/xmlparse.c
[...]
(gdb) frame 2
#2 0x00007fffc410d567 in _elementtree_XMLParser___init___impl (self=0x7ffef84fe480, target=0x7ffef85ddfc0, encoding=<optimized out>)
at ./Modules/_elementtree.c:3733
⚠️ warning: 3733 ./Modules/_elementtree.c: No such file or directory
(gdb) info locals
st = 0x7fff871f17f0
st = <optimized out>
_tmp_op_ptr = <optimized out>
_tmp_old_op = <optimized out>
_tmp_op_ptr = <optimized out>
_tmp_old_op = <optimized out>
_tmp_op_ptr = <optimized out>
_tmp_old_op = <optimized out>
_tmp_op_ptr = <optimized out>
_tmp_old_op = <optimized out>
_tmp_op_ptr = <optimized out>
_tmp_old_op = <optimized out>
(gdb) info args
self = 0x7ffef84fe480
target = 0x7ffef85ddfc0
encoding = <optimized out>
(gdb) p self->parser
$1 = (XML_Parser) 0x55555b9c80c0
(gdb) p self
$2 = (XMLParserObject *) 0x7ffef84fe480
(gdb) frame 0
#0 0x00007ffff0431d73 in getRootParserOf (parser=<optimized out>, outLevelDiff=<optimized out>) at ./lib/xmlparse.c:8653
⚠️ warning: 8653 ./lib/xmlparse.c: No such file or directory
(gdb) p *(XML_Parser)0x55555b9c80c0
$3 = {m_userData = 0x0, m_handlerArg = 0x0, m_buffer = 0x0, m_mem = {malloc_fcn = 0x7ffff45d1ff0 <PyMem_Malloc>,
realloc_fcn = 0x7ffff45d24c0 <PyMem_Realloc>, free_fcn = 0x7ffff45d2180 <PyMem_Free>}, m_bufferPtr = 0x0, m_bufferEnd = 0x0,
m_bufferLim = 0x0, m_parseEndByteIndex = 0, m_parseEndPtr = 0x0, m_partialTokenBytesBefore = 93825098000736,
m_reparseDeferralEnabled = 96 '`', m_lastBufferRequestSize = 21845, m_dataBuf = 0x0, m_dataBufEnd = 0x0, m_startElementHandler = 0x0,
m_endElementHandler = 0x0, m_characterDataHandler = 0x0, m_processingInstructionHandler = 0x0, m_commentHandler = 0x0,
m_startCdataSectionHandler = 0x0, m_endCdataSectionHandler = 0x0, m_defaultHandler = 0x0, m_startDoctypeDeclHandler = 0x0,
m_endDoctypeDeclHandler = 0x0, m_unparsedEntityDeclHandler = 0x0, m_notationDeclHandler = 0x0, m_startNamespaceDeclHandler = 0x0,
m_endNamespaceDeclHandler = 0x0, m_notStandaloneHandler = 0x55555b9c80c0, m_externalEntityRefHandler = 0x0,
m_externalEntityRefHandlerArg = 0x0, m_skippedEntityHandler = 0x0, m_unknownEncodingHandler = 0x0, m_elementDeclHandler = 0x0,
m_attlistDeclHandler = 0x0, m_entityDeclHandler = 0x55555b9c81e8, m_xmlDeclHandler = 0x7ffff1df8570, m_encoding = 0x7ffff1df8550,
m_initEncoding = {initEnc = {scanners = {0x7ffef843ce30, 0x7ffef85d0df0, 0x7ffef843cf30, 0x7ffef843d030}, literalScanners = {
0x7ffef85d0f50, 0x7ffef85d10b0}, nameMatchesAscii = 0x7ffef843d130, nameLength = 0x7ffef85d1210, skipS = 0x7ffef85d1370,
getAtts = 0x7ffff1df7e50, charRefNumber = 0x7ffef866f6d0, predefinedEntityName = 0x7ffef866f800, updatePosition = 0x7ffef85c2730,
isPublicId = 0x6555b9c31e0, utf8Convert = 0x55555b9c81e0, utf16Convert = 0x7ffff2273a80, minBytesPerChar = 0, isUtf8 = 0 '\000',
isUtf16 = 0 '\000'}, encPtr = 0x55555b9c0001}, m_internalEncoding = 0x0, m_protocolEncodingName = 0x0, m_ns = 0 '\000',
m_ns_triplets = 0 '\000', m_unknownEncodingMem = 0x0, m_unknownEncodingData = 0x7ffff1df4bd0,
m_unknownEncodingHandlerData = 0x55555b9c6a50, m_unknownEncodingRelease = 0x7ffff1df1ec0, m_prologState = {handler = 0x555500000000,
level = 0, role_none = 0, includeLevel = 0, documentEntity = 0, inEntityValue = 0}, m_processor = 0x0, m_errorCode = XML_ERROR_NONE,
m_eventPtr = 0xf493fa01 <error: Cannot access memory at address 0xf493fa01>, m_eventEndPtr = 0x0, m_positionPtr = 0x0,
m_openInternalEntities = 0x0, m_freeInternalEntities = 0x0, m_openAttributeEntities = 0x0, m_freeAttributeEntities = 0x0,
m_openValueEntities = 0x0, m_freeValueEntities = 0x0, m_defaultExpandInternalEntities = 0 '\000', m_tagLevel = 0,
m_declEntity = 0x7ffff4930000 <_PyRuntime+12256>, m_doctypeName = 0x7ffef833d430 "", m_doctypeSysid = 0x0, m_doctypePubid = 0x0,
m_declAttributeType = 0x0, m_declNotationName = 0x0, m_declNotationPublicId = 0x0, m_declElementType = 0x10,
m_declAttributeId = 0x7ffff4940080 <_PyRuntime+77920>, m_declAttributeIsCdata = 48 '0', m_declAttributeIsId = 204 '\314', m_dtd = 0x0,
m_curBase = 0x0, m_tagStack = 0x7ffff4941300 <_PyRuntime+82656>, m_freeTagList = 0x0, m_inheritedBindings = 0x0,
m_freeBindingList = 0x0, m_attsSize = 0, m_nSpecifiedAtts = 0, m_idAttIndex = 0, m_atts = 0x0, m_nsAtts = 0x0,
m_nsAttsVersion = 93825097564376, m_nsAttsPower = 0 '\000', m_position = {lineNumber = 0, columnNumber = 0}, m_tempPool = {
blocks = 0x0, freeBlocks = 0x0, end = 0x55555b9c80d8 "\360\037]\364\377\177", ptr = 0x0,
start = 0x557d00000000 <error: Cannot access memory at address 0x557d00000000>, parser = 0x0}, m_temp2Pool = {
blocks = 0x7ffe00000000, freeBlocks = 0x0, end = 0x7ffff49044c0 <_Py_NoneStruct> "",
ptr = 0x251 <error: Cannot access memory at address 0x251>, start = 0x1 <error: Cannot access memory at address 0x1>,
parser = 0x7ffff48f0260 <PyCode_Type>}, m_groupConnector = 0xb6 <error: Cannot access memory at address 0xb6>,
m_groupSize = 4165263632, m_namespaceSeparator = -2 '\376', m_parentParser = 0x7ffef84294d0, m_parsingStatus = {parsing = 4103284688,
finalBuffer = 255 '\377'}, m_isParamEntity = 3 '\003', m_useForeignDTD = 0 '\000',
m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE, m_hash_secret_salt_128 = {k = {0, 1198295875592}},
m_hash_secret_salt_set = 3 '\003', m_accounting = {countBytesDirect = 3, countBytesIndirect = 55366423412736,
debugLevel = 140733063684608, maximumAmplificationFactor = -1.56324889e+34, activationThresholdBytes = 140733063601712},
m_alloc_tracker = {bytesAllocated = 140733063683632, peakBytesAllocated = 140733063684672, debugLevel = 140733063428944,
maximumAmplificationFactor = 0, activationThresholdBytes = 0}, m_entity_stats = {countEverOpened = 0, currentDepth = 0,
maximumDepthSeen = 0, debugLevel = 0}, m_reenter = 3 '\003'}
(gdb) p *(XML_Parser)0x7ffef84294d0
$5 = {m_userData = 0x1, m_handlerArg = 0x7ffff4908420 <PyTuple_Type>, m_buffer = 0x8 <error: Cannot access memory at address 0x8>,
m_mem = {malloc_fcn = 0xffffffffffffffff, realloc_fcn = 0x7fffeefacb40, free_fcn = 0x7ffef844acf0},
m_bufferPtr = 0x7ffff493c768 <_PyRuntime+63304> "", m_bufferEnd = 0x7ffff493bb38 <_PyRuntime+60184> "",
m_bufferLim = 0x7fff9be17ef0 "", m_parseEndByteIndex = 140733063670256, m_parseEndPtr = 0x7ffef840acd0 "",
m_partialTokenBytesBefore = 140733063665520, m_reparseDeferralEnabled = 1 '\001', m_lastBufferRequestSize = 0,
m_dataBuf = 0x7ffff490ca40 <PyUnicode_Type> "", m_dataBufEnd = 0x3e <error: Cannot access memory at address 0x3e>,
m_startElementHandler = 0xffffffffffffffff, m_endElementHandler = 0x6e10570064, m_characterDataHandler = 0x2020202020202020,
m_processingInstructionHandler = 0x7261747320202020, m_commentHandler = 0x4e3d6b72616d5f74,
m_startCdataSectionHandler = 0x646e65202c656e6f, m_endCdataSectionHandler = 0x6f4e3d6b72616d5f, m_defaultHandler = 0x776f6c66202c656e,
m_startDoctypeDeclHandler = 0x4e3d656c7974735f, m_endDoctypeDeclHandler = 0xa3a29656e6f, m_unparsedEntityDeclHandler = 0x0,
m_notationDeclHandler = 0x1, m_startNamespaceDeclHandler = 0x7ffff490ca40 <PyUnicode_Type>, m_endNamespaceDeclHandler = 0x3a,
m_notStandaloneHandler = 0xffffffffffffffff, m_externalEntityRefHandler = 0x64, m_externalEntityRefHandlerArg = 0x2066656420202020,
m_skippedEntityHandler = 0x5f5f74696e695f5f, m_unknownEncodingHandler = 0x74202c666c657328, m_elementDeclHandler = 0x756c6176202c6761,
m_attlistDeclHandler = 0x7472617473202c65, m_entityDeclHandler = 0x65202c6b72616d5f, m_xmlDeclHandler = 0x296b72616d5f646e,
m_encoding = 0xa3a, m_initEncoding = {initEnc = {scanners = {0x0, 0x7ffef84fa080, 0x7ffef84f9fd0, 0x1}, literalScanners = {
0x7ffff4908420 <PyTuple_Type>, 0x7}, nameMatchesAscii = 0xffffffffffffffff, nameLength = 0x7ffff49044c0 <_Py_NoneStruct>,
skipS = 0x55555b9fed80, getAtts = 0x7ffff49044c0 <_Py_NoneStruct>, charRefNumber = 0x7ffff49044c0 <_Py_NoneStruct>,
predefinedEntityName = 0x7ffff49044c0 <_Py_NoneStruct>, updatePosition = 0x7ffff49044c0 <_Py_NoneStruct>,
isPublicId = 0x7ffff49044c0 <_Py_NoneStruct>, utf8Convert = 0x63, utf16Convert = 0x0, minBytesPerChar = 0, isUtf8 = 0 '\000',
isUtf16 = 0 '\000'}, encPtr = 0x1}, m_internalEncoding = 0x7ffff4908420 <PyTuple_Type>,
m_protocolEncodingName = 0x7 <error: Cannot access memory at address 0x7>, m_ns = 255 '\377', m_ns_triplets = 255 '\377',
m_unknownEncodingMem = 0x7fffeedbc780, m_unknownEncodingData = 0x7ffff4936f10 <_PyRuntime+40688>,
m_unknownEncodingHandlerData = 0x7fffeec1e880, m_unknownEncodingRelease = 0x7fffeec04d50, m_prologState = {handler = 0x7ffef8448b70,
level = 4165242032, role_none = 32766, includeLevel = 4165242096, documentEntity = 32766, inEntityValue = 0}, m_processor = 0x0,
m_errorCode = XML_ERROR_NONE, m_eventPtr = 0x1 <error: Cannot access memory at address 0x1>,
m_eventEndPtr = 0x7ffff4908420 <PyTuple_Type> "", m_positionPtr = 0x7 <error: Cannot access memory at address 0x7>,
m_openInternalEntities = 0xffffffffffffffff, m_freeInternalEntities = 0x7fffeedbc780,
m_openAttributeEntities = 0x7ffff4936f10 <_PyRuntime+40688>, m_freeAttributeEntities = 0x7fffeec1e880,
m_openValueEntities = 0x7fffeec04d50, m_freeValueEntities = 0x7ffef8448b70, m_defaultExpandInternalEntities = 112 'p',
m_tagLevel = 32766, m_declEntity = 0x7ffef8448cf0, m_doctypeName = 0x0, m_doctypeSysid = 0x0, m_doctypePubid = 0x0,
m_declAttributeType = 0x2 <error: Cannot access memory at address 0x2>, m_declNotationName = 0x7ffff4908420 <PyTuple_Type> "",
m_declNotationPublicId = 0x7 <error: Cannot access memory at address 0x7>, m_declElementType = 0xffffffffffffffff,
m_declAttributeId = 0x7fffeedbc780, m_declAttributeIsCdata = 16 '\020', m_declAttributeIsId = 111 'o', m_dtd = 0x7fffeec1e880,
m_curBase = 0x7fffeec04d50 "", m_tagStack = 0x7ffef8448b70, m_freeTagList = 0x7fff931ffdf0, m_inheritedBindings = 0x7ffef8448cf0,
m_freeBindingList = 0x0, m_attsSize = 1, m_nSpecifiedAtts = 0, m_idAttIndex = -191837632, m_atts = 0x38,
m_nsAtts = 0xffffffffffffffff, m_nsAttsVersion = 100, m_nsAttsPower = 35 '#', m_position = {lineNumber = 7306080452898615328,
columnNumber = 8027139005750714483}, m_tempPool = {blocks = 0x20676e69776f6c6c, freeBlocks = 0x2073646f6874656d,
end = 0x7274746120646e61 <error: Cannot access memory at address 0x7274746120646e61>,
ptr = 0xa3a736574756269 <error: Cannot access memory at address 0xa3a736574756269>, start = 0x0, parser = 0x301000402},
m_temp2Pool = {blocks = 0x1, freeBlocks = 0x7ffff490ca40 <PyUnicode_Type>, end = 0x3e <error: Cannot access memory at address 0x3e>,
ptr = 0xffffffffffffffff <error: Cannot access memory at address 0xffffffffffffffff>,
start = 0x64 <error: Cannot access memory at address 0x64>, parser = 0x646f632027732527},
m_groupConnector = 0x74276e6163206365 <error: Cannot access memory at address 0x74276e6163206365>, m_groupSize = 1667589152,
m_namespaceSeparator = 111 'o', m_parentParser = 0x2578232065747962, m_parsingStatus = {parsing = 980955696, finalBuffer = 32 ' '},
m_isParamEntity = 32 ' ', m_useForeignDTD = 32 ' ', m_paramEntityParsing = (unknown: 0x73252220), m_hash_secret_salt_128 = {k = {
8388362703413980194, 110110620675945}}, m_hash_secret_salt_set = 0 '\000', m_accounting = {countBytesDirect = 1,
countBytesIndirect = 140737296517696, debugLevel = 56, maximumAmplificationFactor = -nan(0x7fffff), activationThresholdBytes = 100},
m_alloc_tracker = {bytesAllocated = 8390317583334731381, peakBytesAllocated = 7018969010048623201, debugLevel = 2531148770652152178,
maximumAmplificationFactor = 1.64049084e-07, activationThresholdBytes = 2459086794333882995}, m_entity_stats = {
countEverOpened = 740455205, currentDepth = 1936683040, maximumDepthSeen = 1869182057, debugLevel = 0}, m_reenter = 0 '\000'}
It's hard to read but the struct doesn't look so great. Some of these addresses look they were overwritten with strings?
m_characterDataHandler = 0x2020202020202020 looks like a bunch of ascii space characters, m_commentHandler = 0x4e3d6b72616d5f74 spells out "t_mark=N", etc... Or am I looking in the wrong place?
I'll try to run it with ASan next which means rebuilding FreeCAD with -fsanitize=address -fno-omit-frame-pointer correct?