perf(spanner): optimize query result decoding (#17375) · googleapis/google-cloud-python@3f70b2f
@@ -19,14 +19,14 @@
1919import decimal
2020import logging
2121import math
22+import operator
2223import threading
2324import time
2425import uuid
2526from contextlib import contextmanager
26272728from google.api_core import datetime_helpers
2829from google.api_core.exceptions import Aborted
29-from google.cloud._helpers import _date_from_iso8601_date
3030from google.protobuf.internal.enum_type_wrapper import EnumTypeWrapper
3131from google.protobuf.message import DecodeError, Message
3232from google.protobuf.struct_pb2 import ListValue, Value
@@ -465,6 +465,12 @@ def _parse_value_pb(value_pb, field_type, field_name, column_info=None):
465465return _parse_nullable(value_pb, decoder)
466466467467468+_date_fromisoformat = datetime.date.fromisoformat
469+_Decimal = decimal.Decimal
470+_json_from_str = JsonObject.from_str
471+_uuid_UUID = uuid.UUID
472+473+468474def _get_type_decoder(field_type, field_name, column_info=None):
469475"""Returns a function that converts a Value protobuf to cell data.
470476@@ -489,28 +495,30 @@ def _get_type_decoder(field_type, field_name, column_info=None):
489495 """
490496491497type_code = field_type.code
498+# Note: STRING and BOOL use operator.attrgetter because direct attribute extraction
499+# is faster in Python. Other types require type transformation, so they use lambdas.
492500if type_code == TypeCode.STRING:
493-return _parse_string
501+return operator.attrgetter("string_value")
494502elif type_code == TypeCode.BYTES:
495-return _parse_bytes
503+return lambda value_pb: value_pb.string_value.encode("utf8")
496504elif type_code == TypeCode.BOOL:
497-return _parse_bool
505+return operator.attrgetter("bool_value")
498506elif type_code == TypeCode.INT64:
499-return _parse_int64
507+return lambda value_pb: int(value_pb.string_value)
500508elif type_code == TypeCode.FLOAT64:
501509return _parse_float
502510elif type_code == TypeCode.FLOAT32:
503511return _parse_float
504512elif type_code == TypeCode.DATE:
505-return _parse_date
513+return lambda value_pb: _date_fromisoformat(value_pb.string_value)
506514elif type_code == TypeCode.TIMESTAMP:
507515return _parse_timestamp
508516elif type_code == TypeCode.NUMERIC:
509-return _parse_numeric
517+return lambda value_pb: _Decimal(value_pb.string_value)
510518elif type_code == TypeCode.JSON:
511-return _parse_json
519+return lambda value_pb: _json_from_str(value_pb.string_value)
512520elif type_code == TypeCode.UUID:
513-return _parse_uuid
521+return lambda value_pb: _uuid_UUID(value_pb.string_value)
514522elif type_code == TypeCode.PROTO:
515523return lambda value_pb: _parse_proto(value_pb, column_info, field_name)
516524elif type_code == TypeCode.ENUM:
@@ -553,48 +561,81 @@ def _parse_list_value_pbs(rows, row_type):
553561return result
554562555563556-def _parse_string(value_pb) -> str:
557-return value_pb.string_value
558-559-560-def _parse_bytes(value_pb):
561-return value_pb.string_value.encode("utf8")
562-563-564-def _parse_bool(value_pb) -> bool:
565-return value_pb.bool_value
566-567-568-def _parse_int64(value_pb) -> int:
569-return int(value_pb.string_value)
570-571-572564def _parse_float(value_pb) -> float:
573-if value_pb.HasField("string_value"):
574-return float(value_pb.string_value)
575-else:
576-return value_pb.number_value
577-578-579-def _parse_date(value_pb):
580-return _date_from_iso8601_date(value_pb.string_value)
565+# Note: Storing val = value_pb.string_value and doing a truthiness check is faster
566+# than calling value_pb.HasField("string_value") because it avoids the C-extension
567+# method lookup/call overhead and accesses the attribute only once.
568+val = value_pb.string_value
569+return float(val) if val else value_pb.number_value
570+571+572+_POWERS_OF_10 = (
573+1,
574+10,
575+100,
576+1000,
577+10000,
578+100000,
579+1000000,
580+10000000,
581+100000000,
582+1000000000,
583+)
581584582585583586def _parse_timestamp(value_pb):
584-DatetimeWithNanoseconds = datetime_helpers.DatetimeWithNanoseconds
585-return DatetimeWithNanoseconds.from_rfc3339(value_pb.string_value)
586-587-588-def _parse_numeric(value_pb):
589-return decimal.Decimal(value_pb.string_value)
590-591-592-def _parse_json(value_pb):
593-return JsonObject.from_str(value_pb.string_value)
594-595-596-def _parse_uuid(value_pb):
597-return uuid.UUID(value_pb.string_value)
587+val = value_pb.string_value
588+try:
589+if len(val) < 20 or val[10] != "T":
590+raise ValueError()
591+no_fraction = val[:19]
592+bare = datetime.datetime.fromisoformat(no_fraction)
593+if val[19] == ".":
594+if val.endswith("Z"):
595+offset = "Z"
596+fraction = val[20:-1]
597+elif val[-6] in ("+", "-"):
598+offset = val[-6:]
599+fraction = val[20:-6]
600+else:
601+raise ValueError()
602+if not fraction or len(fraction) > 9 or not fraction.isdigit():
603+raise ValueError()
604+scale = 9 - len(fraction)
605+nanos = int(fraction) * _POWERS_OF_10[scale]
606+else:
607+nanos = 0
608+if val.endswith("Z"):
609+offset = "Z"
610+elif val[-6] in ("+", "-"):
611+offset = val[-6:]
612+else:
613+raise ValueError()
614+615+if offset != "Z":
616+sign = offset[0]
617+hours = int(offset[1:3])
618+minutes = int(offset[4:6])
619+if offset[3] != ":":
620+raise ValueError()
621+delta = datetime.timedelta(hours=hours, minutes=minutes)
622+if sign == "-":
623+delta = -delta
624+tzinfo = datetime.timezone(delta)
625+bare = bare.replace(tzinfo=tzinfo).astimezone(datetime.timezone.utc)
626+627+return datetime_helpers.DatetimeWithNanoseconds(
628+bare.year,
629+bare.month,
630+bare.day,
631+bare.hour,
632+bare.minute,
633+bare.second,
634+nanosecond=nanos,
635+tzinfo=datetime.timezone.utc,
636+ )
637+except (IndexError, ValueError) as e:
638+raise ValueError("Timestamp: {} does not match pattern".format(val)) from e
598639599640600641def _parse_proto(value_pb, column_info, field_name):