◐ Shell
clean mode source ↗

[3.7] bpo-34454: fix .fromisoformat() methods crashing on inputs with surrogate code points (GH-8862) by miss-islington · Pull Request #8877 · python/cpython

Expand Up @@ -2883,6 +2883,9 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) { Py_ssize_t len;
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); if (dt_ptr == NULL) { goto invalid_string_error; }
int year = 0, month = 0, day = 0;
Expand All @@ -2894,12 +2897,15 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) { }
if (rv < 0) { PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr); return NULL; goto invalid_string_error; }
return new_date_subclass_ex(year, month, day, cls);
invalid_string_error: PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", dtstr); return NULL; }

Expand Down Expand Up @@ -4258,15 +4264,18 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { Py_ssize_t len; const char *p = PyUnicode_AsUTF8AndSize(tstr, &len);
if (p == NULL) { goto invalid_string_error; }
int hour = 0, minute = 0, second = 0, microsecond = 0; int tzoffset, tzimicrosecond = 0; int rv = parse_isoformat_time(p, len, &hour, &minute, &second, &microsecond, &tzoffset, &tzimicrosecond);
if (rv < 0) { PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", p); return NULL; goto invalid_string_error; }
PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, Expand All @@ -4286,6 +4295,10 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) {
Py_DECREF(tzinfo); return t;
invalid_string_error: PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", tstr); return NULL; }

Expand Down Expand Up @@ -4839,6 +4852,33 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) return result; }
static PyObject * _sanitize_isoformat_str(PyObject *dtstr, int *needs_decref) { // `fromisoformat` allows surrogate characters in exactly one position, // the separator; to allow datetime_fromisoformat to make the simplifying // assumption that all valid strings can be encoded in UTF-8, this function // replaces any surrogate character separators with `T`. Py_ssize_t len = PyUnicode_GetLength(dtstr); *needs_decref = 0; if (len <= 10 || !Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, 10))) { return dtstr; }
PyObject *str_out = PyUnicode_New(len, PyUnicode_MAX_CHAR_VALUE(dtstr)); if (str_out == NULL) { return NULL; }
if (PyUnicode_CopyCharacters(str_out, 0, dtstr, 0, len) == -1 || PyUnicode_WriteChar(str_out, 10, (Py_UCS4)'T')) { Py_DECREF(str_out); return NULL; }
*needs_decref = 1; return str_out; }
static PyObject * datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { assert(dtstr != NULL); Expand All @@ -4848,9 +4888,20 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { return NULL; }
int needs_decref = 0; dtstr = _sanitize_isoformat_str(dtstr, &needs_decref); if (dtstr == NULL) { goto error; }
Py_ssize_t len; const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); const char * p = dt_ptr;
if (dt_ptr == NULL) { goto invalid_string_error; }
const char *p = dt_ptr;
int year = 0, month = 0, day = 0; int hour = 0, minute = 0, second = 0, microsecond = 0; Expand Down Expand Up @@ -4883,20 +4934,32 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { &tzoffset, &tzusec); } if (rv < 0) { PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr); return NULL; goto invalid_string_error; }
PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec); if (tzinfo == NULL) { return NULL; goto error; }
PyObject *dt = new_datetime_subclass_ex(year, month, day, hour, minute, second, microsecond, tzinfo, cls);
Py_DECREF(tzinfo); if (needs_decref) { Py_DECREF(dtstr); } return dt;
invalid_string_error: PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", dtstr);
error: if (needs_decref) { Py_DECREF(dtstr); }
return NULL; }

Expand Down