◐ Shell
clean mode source ↗

[3.11] [CVE-2023-27043] gh-102988: Reject malformed addresses in email.parseaddr() (GH-111116) by encukou · Pull Request #123767 · python/cpython

Expand Up @@ -48,6 +48,7 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]')

def _has_surrogates(s): """Return True if s may contain surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 Expand Down Expand Up @@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): return address

def _iter_escaped_chars(addr): pos = 0 escape = False for pos, ch in enumerate(addr): if escape: yield (pos, '\\' + ch) escape = False elif ch == '\\': escape = True else: yield (pos, ch) if escape: yield (pos, '\\')

def _strip_quoted_realnames(addr): """Strip real names between quotes.""" if '"' not in addr: # Fast path return addr
start = 0 open_pos = None result = [] for pos, ch in _iter_escaped_chars(addr): if ch == '"': if open_pos is None: open_pos = pos else: if start != open_pos: result.append(addr[start:open_pos]) start = pos + 1 open_pos = None
if start < len(addr): result.append(addr[start:])
return ''.join(result)
def getaddresses(fieldvalues): """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" all = COMMASPACE.join(str(v) for v in fieldvalues) a = _AddressList(all) return a.addresslist
supports_strict_parsing = True
def getaddresses(fieldvalues, *, strict=True): """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in its place.
If strict is true, use a strict parser which rejects malformed inputs. """
# If strict is true, if the resulting list of parsed addresses is greater # than the number of fieldvalues in the input list, a parsing error has # occurred and consequently a list containing a single empty 2-tuple [('', # '')] is returned in its place. This is done to avoid invalid output. # # Malformed input: getaddresses(['alice@example.com <bob@example.com>']) # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] # Safe output: [('', '')]
if not strict: all = COMMASPACE.join(str(v) for v in fieldvalues) a = _AddressList(all) return a.addresslist
fieldvalues = [str(v) for v in fieldvalues] fieldvalues = _pre_parse_validation(fieldvalues) addr = COMMASPACE.join(fieldvalues) a = _AddressList(addr) result = _post_parse_validation(a.addresslist)
# Treat output as invalid if the number of addresses is not equal to the # expected number of addresses. n = 0 for v in fieldvalues: # When a comma is used in the Real Name part it is not a deliminator. # So strip those out before counting the commas. v = _strip_quoted_realnames(v) # Expected number of addresses: 1 + number of commas n += 1 + v.count(',') if len(result) != n: return [('', '')]
return result

def _check_parenthesis(addr): # Ignore parenthesis in quoted real names. addr = _strip_quoted_realnames(addr)
opens = 0 for pos, ch in _iter_escaped_chars(addr): if ch == '(': opens += 1 elif ch == ')': opens -= 1 if opens < 0: return False return (opens == 0)

def _pre_parse_validation(email_header_fields): accepted_values = [] for v in email_header_fields: if not _check_parenthesis(v): v = "('', '')" accepted_values.append(v)
return accepted_values

def _post_parse_validation(parsed_email_header_tuples): accepted_values = [] # The parser would have parsed a correctly formatted domain-literal # The existence of an [ after parsing indicates a parsing failure for v in parsed_email_header_tuples: if '[' in v[1]: v = ('', '') accepted_values.append(v)
return accepted_values

def _format_timetuple_and_zone(timetuple, zone): Expand Down Expand Up @@ -205,16 +321,33 @@ def parsedate_to_datetime(data): tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))

def parseaddr(addr): def parseaddr(addr, *, strict=True): """ Parse addr into its constituent realname and email address parts.
Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', '').
If strict is True, use a strict parser which rejects malformed inputs. """ addrs = _AddressList(addr).addresslist if not addrs: return '', '' if not strict: addrs = _AddressList(addr).addresslist if not addrs: return ('', '') return addrs[0]
if isinstance(addr, list): addr = addr[0]
if not isinstance(addr, str): return ('', '')
addr = _pre_parse_validation([addr])[0] addrs = _post_parse_validation(_AddressList(addr).addresslist)
if not addrs or len(addrs) > 1: return ('', '')
return addrs[0]

Expand Down