◐ Shell
reader mode source ↗
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
File filter
Conversations
Jump to
Diff view
Apply and reload
Show whitespace
Diff view
Apply and reload
254 changes: 4 additions & 250 deletions git/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,66 +10,23 @@
import locale
import os
import sys
import codecs


from gitdb.utils.compat import (
xrange,
MAXSIZE, # @UnusedImport
izip, # @UnusedImport
)
from gitdb.utils.encoding import (
string_types, # @UnusedImport
text_type, # @UnusedImport
force_bytes, # @UnusedImport
force_text # @UnusedImport
)


PY3 = sys.version_info[0] >= 3
is_win = (os.name == 'nt')
is_posix = (os.name == 'posix')
is_darwin = (os.name == 'darwin')
if hasattr(sys, 'getfilesystemencoding'):
defenc = sys.getfilesystemencoding()
if defenc is None:
defenc = sys.getdefaultencoding()

if PY3:
import io
FileType = io.IOBase

def byte_ord(b):
return b

def bchr(n):
return bytes([n])

def mviter(d):
return d.values()

range = xrange # @ReservedAssignment
unicode = str
binary_type = bytes
else:
FileType = file # @UndefinedVariable on PY3
# usually, this is just ascii, which might not enough for our encoding needs
# Unless it's set specifically, we override it to be utf-8
if defenc == 'ascii':
defenc = 'utf-8'
byte_ord = ord
bchr = chr
unicode = unicode
binary_type = str
range = xrange # @ReservedAssignment

def mviter(d):
return d.itervalues()


def safe_decode(s):
"""Safely decodes a binary string to unicode"""
if isinstance(s, unicode):
return s
elif isinstance(s, bytes):
return s.decode(defenc, 'surrogateescape')
Expand All @@ -79,7 +36,7 @@ def safe_decode(s):

def safe_encode(s):
"""Safely decodes a binary string to unicode"""
if isinstance(s, unicode):
return s.encode(defenc)
elif isinstance(s, bytes):
return s
Expand All @@ -89,7 +46,7 @@ def safe_encode(s):

def win_encode(s):
"""Encode unicodes for process arguments on Windows."""
if isinstance(s, unicode):
return s.encode(locale.getpreferredencoding(False))
elif isinstance(s, bytes):
return s
Expand All @@ -106,208 +63,5 @@ class metaclass(meta):
def __new__(cls, name, nbases, d):
if nbases is None:
return type.__new__(cls, name, (), d)
# There may be clients who rely on this attribute to be set to a reasonable value, which is why
# we set the __metaclass__ attribute explicitly
if not PY3 and '___metaclass__' not in d:
d['__metaclass__'] = meta
return meta(name, bases, d)
return metaclass(meta.__name__ + 'Helper', None, {})


## From https://docs.python.org/3.3/howto/pyporting.html
class UnicodeMixin(object):

"""Mixin class to handle defining the proper __str__/__unicode__
methods in Python 2 or 3."""

if PY3:
def __str__(self):
return self.__unicode__()
else: # Python 2
def __str__(self):
return self.__unicode__().encode(defenc)


"""
This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
handler of Python 3.
Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
"""

# This code is released under the Python license and the BSD 2-clause license


FS_ERRORS = 'surrogateescape'

# # -- Python 2/3 compatibility -------------------------------------
# FS_ERRORS = 'my_surrogateescape'

def u(text):
if PY3:
return text
return text.decode('unicode_escape')

def b(data):
if PY3:
return data.encode('latin1')
return data

if PY3:
_unichr = chr
bytes_chr = lambda code: bytes((code,))
else:
_unichr = unichr
bytes_chr = chr

def surrogateescape_handler(exc):
"""
Pure Python implementation of the PEP 383: the "surrogateescape" error
handler of Python 3. Undecodable bytes will be replaced by a Unicode
character U+DCxx on decoding, and these are translated into the
original bytes on encoding.
"""
mystring = exc.object[exc.start:exc.end]

try:
if isinstance(exc, UnicodeDecodeError):
# mystring is a byte-string in this case
decoded = replace_surrogate_decode(mystring)
elif isinstance(exc, UnicodeEncodeError):
# In the case of u'\udcc3'.encode('ascii',
# 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
# exception anyway after this function is called, even though I think
# it's doing what it should. It seems that the strict encoder is called
# to encode the unicode string that this function returns ...
decoded = replace_surrogate_encode(mystring, exc)
else:
raise exc
except NotASurrogateError:
raise exc
return (decoded, exc.end)


class NotASurrogateError(Exception):
pass


def replace_surrogate_encode(mystring, exc):
"""
Returns a (unicode) string, not the more logical bytes, because the codecs
register_error functionality expects this.
"""
decoded = []
for ch in mystring:
# if PY3:
# code = ch
# else:
code = ord(ch)

# The following magic comes from Py3.3's Python/codecs.c file:
if not 0xD800 <= code <= 0xDCFF:
# Not a surrogate. Fail with the original exception.
raise exc
# mybytes = [0xe0 | (code >> 12),
# 0x80 | ((code >> 6) & 0x3f),
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)


def replace_surrogate_decode(mybytes):
"""
Returns a (unicode) string
"""
decoded = []
for ch in mybytes:
# We may be parsing newbytes (in which case ch is an int) or a native
# str on Py2
if isinstance(ch, int):
code = ch
else:
code = ord(ch)
if 0x80 <= code <= 0xFF:
decoded.append(_unichr(0xDC00 + code))
elif code <= 0x7F:
decoded.append(_unichr(code))
else:
# # It may be a bad byte
# # Try swallowing it.
# continue
# print("RAISE!")
raise NotASurrogateError
return str().join(decoded)


def encodefilename(fn):
if FS_ENCODING == 'ascii':
# ASCII encoder of Python 2 expects that the error handler returns a
# Unicode string encodable to ASCII, whereas our surrogateescape error
# handler has to return bytes in 0x80-0xFF range.
encoded = []
for index, ch in enumerate(fn):
code = ord(ch)
if code < 128:
ch = bytes_chr(code)
elif 0xDC80 <= code <= 0xDCFF:
ch = bytes_chr(code - 0xDC00)
else:
raise UnicodeEncodeError(FS_ENCODING,
fn, index, index+1,
'ordinal not in range(128)')
encoded.append(ch)
return bytes().join(encoded)
elif FS_ENCODING == 'utf-8':
# UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
# doesn't go through our error handler
encoded = []
for index, ch in enumerate(fn):
code = ord(ch)
if 0xD800 <= code <= 0xDFFF:
if 0xDC80 <= code <= 0xDCFF:
ch = bytes_chr(code - 0xDC00)
encoded.append(ch)
else:
raise UnicodeEncodeError(
FS_ENCODING,
fn, index, index+1, 'surrogates not allowed')
else:
ch_utf8 = ch.encode('utf-8')
encoded.append(ch_utf8)
return bytes().join(encoded)
return fn.encode(FS_ENCODING, FS_ERRORS)

def decodefilename(fn):
return fn.decode(FS_ENCODING, FS_ERRORS)

FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')


# normalize the filesystem encoding name.
# For example, we expect "utf-8", not "UTF8".
FS_ENCODING = codecs.lookup(FS_ENCODING).name


def register_surrogateescape():
"""
Registers the surrogateescape error handler on Python 2 (only)
"""
if PY3:
return
try:
codecs.lookup_error(FS_ERRORS)
except LookupError:
codecs.register_error(FS_ERRORS, surrogateescape_handler)


try:
b"100644 \x9f\0aaa".decode(defenc, "surrogateescape")
except Exception:
register_surrogateescape()
Loading
Toggle all file notes Toggle all file annotations