[3.6] bpo-31170: Write unit test for Expat 2.2.4 UTF-8 bug (#3570) by vstinner · Pull Request #3746 · python/cpython

python

@@ -33,6 +33,7 @@ except UnicodeEncodeError: raise unittest.SkipTest("filename is not encodable to utf8") SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
SAMPLE_XML = """\ <body> @@ -1724,6 +1725,37 @@ def __eq__(self, other): self.assertIsInstance(e[0].tag, str) self.assertEqual(e[0].tag, 'changed')
def check_expat224_utf8_bug(self, text): xml = b'<a b="%s"/>' % text root = ET.XML(xml) self.assertEqual(root.get('b'), text.decode('utf-8'))
def test_expat224_utf8_bug(self): # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. # Check that Expat 2.2.4 fixed the bug. # # Test buffer bounds at odd and even positions.
text = b'\xc3\xa0' * 1024 self.check_expat224_utf8_bug(text)
text = b'x' + b'\xc3\xa0' * 1024 self.check_expat224_utf8_bug(text)
def test_expat224_utf8_bug_file(self): with open(UTF8_BUG_XMLFILE, 'rb') as fp: raw = fp.read() root = ET.fromstring(raw) xmlattr = root.get('b')
# "Parse" manually the XML file to extract the value of the 'b' # attribute of the <a b='xxx' /> XML element text = raw.decode('utf-8').strip() text = text.replace('\r\n', ' ') text = text[6:-4] self.assertEqual(root.get('b'), text)

# --------------------------------------------------------------------