bpo-15216: TextIOWrapper support set encoding, errors and newline after creation by methane · Pull Request #199 · python/cpython
def test_set_encoding_same_codec(self): data = 'foobar\n'.encode('latin1') raw = self.BytesIO(data) txt = self.TextIOWrapper(raw, encoding='latin1') self.assertEqual(txt.encoding, 'latin1')
# Just an alias, shouldn't change anything txt.set_encoding('ISO-8859-1') self.assertEqual(txt.encoding, 'latin1')
# This is an actual change txt.set_encoding('iso8859-15') self.assertEqual(txt.encoding, 'iso8859-15')
def test_set_encoding_read(self): # latin1 -> utf8 # (latin1 can decode utf-8 encoded string) data = 'abc\xe9\n'.encode('latin1') + 'd\xe9f\n'.encode('utf8') raw = self.BytesIO(data) txt = self.TextIOWrapper(raw, encoding='latin1', newline='\n') self.assertEqual(txt.readline(), 'abc\xe9\n') with self.assertRaises(self.UnsupportedOperation): txt.set_encoding('utf-8')
def test_set_encoding_write_fromascii(self): # ascii has a specific encodefunc in the C implementation, # but utf-8-sig has not. Make sure that we get rid of the # cached encodefunc when we switch encoders. raw = self.BytesIO() txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n') txt.write('foo\n') txt.set_encoding('utf-8-sig') txt.write('\xe9\n') txt.flush() self.assertEqual(raw.getvalue(), b'foo\n\xc3\xa9\n')
def test_set_encoding_write(self): # latin -> utf8 raw = self.BytesIO() txt = self.TextIOWrapper(raw, encoding='latin1', newline='\n') txt.write('abc\xe9\n') txt.set_encoding('utf-8') self.assertEqual(raw.getvalue(), b'abc\xe9\n') txt.write('d\xe9f\n') txt.flush() self.assertEqual(raw.getvalue(), b'abc\xe9\nd\xc3\xa9f\n')
# ascii -> utf-8-sig: ensure that no BOM is written in the middle of # the file raw = self.BytesIO() txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n') txt.write('abc\n') txt.set_encoding('utf-8-sig') txt.write('d\xe9f\n') txt.flush() self.assertEqual(raw.getvalue(), b'abc\nd\xc3\xa9f\n')
def test_set_encoding_write_non_seekable(self): raw = self.BytesIO() raw.seekable = lambda: False raw.seek = None txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n') txt.write('abc\n') txt.set_encoding('utf-8-sig') txt.write('d\xe9f\n') txt.flush()
# If the raw stream is not seekable, there'll be a BOM self.assertEqual(raw.getvalue(), b'abc\n\xef\xbb\xbfd\xc3\xa9f\n')
def test_set_encoding_defaults(self): txt = self.TextIOWrapper(self.BytesIO(), 'ascii', 'replace', '\n') txt.set_encoding(None, None) self.assertEqual(txt.encoding, 'ascii') self.assertEqual(txt.errors, 'replace') txt.write('LF\n')
txt.set_encoding(newline='\r\n') self.assertEqual(txt.encoding, 'ascii') self.assertEqual(txt.errors, 'replace')
txt.set_encoding(errors='ignore') self.assertEqual(txt.encoding, 'ascii') txt.write('CRLF\n')
txt.set_encoding(encoding='utf-8', newline=None) self.assertEqual(txt.errors, 'strict') txt.seek(0) self.assertEqual(txt.read(), 'LF\nCRLF\n')
self.assertEqual(txt.detach().getvalue(), b'LF\nCRLF\r\n')
def test_set_encoding_newline(self): raw = self.BytesIO(b'CR\rEOF') txt = self.TextIOWrapper(raw, 'ascii', newline='\n') txt.set_encoding(newline=None) self.assertEqual(txt.readline(), 'CR\n') raw = self.BytesIO(b'CR\rEOF') txt = self.TextIOWrapper(raw, 'ascii', newline='\n') txt.set_encoding(newline='') self.assertEqual(txt.readline(), 'CR\r') raw = self.BytesIO(b'CR\rLF\nEOF') txt = self.TextIOWrapper(raw, 'ascii', newline='\r') txt.set_encoding(newline='\n') self.assertEqual(txt.readline(), 'CR\rLF\n') raw = self.BytesIO(b'LF\nCR\rEOF') txt = self.TextIOWrapper(raw, 'ascii', newline='\n') txt.set_encoding(newline='\r') self.assertEqual(txt.readline(), 'LF\nCR\r') raw = self.BytesIO(b'CR\rCRLF\r\nEOF') txt = self.TextIOWrapper(raw, 'ascii', newline='\r') txt.set_encoding(newline='\r\n') self.assertEqual(txt.readline(), 'CR\rCRLF\r\n')
txt = self.TextIOWrapper(self.BytesIO(), 'ascii', newline='\r') txt.set_encoding(newline=None) txt.write('linesep\n') txt.set_encoding(newline='') txt.write('LF\n') txt.set_encoding(newline='\n') txt.write('LF\n') txt.set_encoding(newline='\r') txt.write('CR\n') txt.set_encoding(newline='\r\n') txt.write('CRLF\n') expected = 'linesep' + os.linesep + 'LF\nLF\nCR\rCRLF\r\n' self.assertEqual(txt.detach().getvalue().decode('ascii'), expected)
class MemviewBytesIO(io.BytesIO): '''A BytesIO object whose read method returns memoryviews