◐ Shell
clean mode source ↗

perf(spanner): optimize query result decoding (#17375) · googleapis/google-cloud-python@3f70b2f

@@ -19,14 +19,14 @@

1919

import decimal

2020

import logging

2121

import math

22+

import operator

2223

import threading

2324

import time

2425

import uuid

2526

from contextlib import contextmanager

26272728

from google.api_core import datetime_helpers

2829

from google.api_core.exceptions import Aborted

29-

from google.cloud._helpers import _date_from_iso8601_date

3030

from google.protobuf.internal.enum_type_wrapper import EnumTypeWrapper

3131

from google.protobuf.message import DecodeError, Message

3232

from google.protobuf.struct_pb2 import ListValue, Value

@@ -465,6 +465,12 @@ def _parse_value_pb(value_pb, field_type, field_name, column_info=None):

465465

return _parse_nullable(value_pb, decoder)

466466467467468+

_date_fromisoformat = datetime.date.fromisoformat

469+

_Decimal = decimal.Decimal

470+

_json_from_str = JsonObject.from_str

471+

_uuid_UUID = uuid.UUID

472+473+468474

def _get_type_decoder(field_type, field_name, column_info=None):

469475

"""Returns a function that converts a Value protobuf to cell data.

470476

@@ -489,28 +495,30 @@ def _get_type_decoder(field_type, field_name, column_info=None):

489495

"""

490496491497

type_code = field_type.code

498+

# Note: STRING and BOOL use operator.attrgetter because direct attribute extraction

499+

# is faster in Python. Other types require type transformation, so they use lambdas.

492500

if type_code == TypeCode.STRING:

493-

return _parse_string

501+

return operator.attrgetter("string_value")

494502

elif type_code == TypeCode.BYTES:

495-

return _parse_bytes

503+

return lambda value_pb: value_pb.string_value.encode("utf8")

496504

elif type_code == TypeCode.BOOL:

497-

return _parse_bool

505+

return operator.attrgetter("bool_value")

498506

elif type_code == TypeCode.INT64:

499-

return _parse_int64

507+

return lambda value_pb: int(value_pb.string_value)

500508

elif type_code == TypeCode.FLOAT64:

501509

return _parse_float

502510

elif type_code == TypeCode.FLOAT32:

503511

return _parse_float

504512

elif type_code == TypeCode.DATE:

505-

return _parse_date

513+

return lambda value_pb: _date_fromisoformat(value_pb.string_value)

506514

elif type_code == TypeCode.TIMESTAMP:

507515

return _parse_timestamp

508516

elif type_code == TypeCode.NUMERIC:

509-

return _parse_numeric

517+

return lambda value_pb: _Decimal(value_pb.string_value)

510518

elif type_code == TypeCode.JSON:

511-

return _parse_json

519+

return lambda value_pb: _json_from_str(value_pb.string_value)

512520

elif type_code == TypeCode.UUID:

513-

return _parse_uuid

521+

return lambda value_pb: _uuid_UUID(value_pb.string_value)

514522

elif type_code == TypeCode.PROTO:

515523

return lambda value_pb: _parse_proto(value_pb, column_info, field_name)

516524

elif type_code == TypeCode.ENUM:

@@ -553,48 +561,81 @@ def _parse_list_value_pbs(rows, row_type):

553561

return result

554562555563556-

def _parse_string(value_pb) -> str:

557-

return value_pb.string_value

558-559-560-

def _parse_bytes(value_pb):

561-

return value_pb.string_value.encode("utf8")

562-563-564-

def _parse_bool(value_pb) -> bool:

565-

return value_pb.bool_value

566-567-568-

def _parse_int64(value_pb) -> int:

569-

return int(value_pb.string_value)

570-571-572564

def _parse_float(value_pb) -> float:

573-

if value_pb.HasField("string_value"):

574-

return float(value_pb.string_value)

575-

else:

576-

return value_pb.number_value

577-578-579-

def _parse_date(value_pb):

580-

return _date_from_iso8601_date(value_pb.string_value)

565+

# Note: Storing val = value_pb.string_value and doing a truthiness check is faster

566+

# than calling value_pb.HasField("string_value") because it avoids the C-extension

567+

# method lookup/call overhead and accesses the attribute only once.

568+

val = value_pb.string_value

569+

return float(val) if val else value_pb.number_value

570+571+572+

_POWERS_OF_10 = (

573+

1,

574+

10,

575+

100,

576+

1000,

577+

10000,

578+

100000,

579+

1000000,

580+

10000000,

581+

100000000,

582+

1000000000,

583+

)

581584582585583586

def _parse_timestamp(value_pb):

584-

DatetimeWithNanoseconds = datetime_helpers.DatetimeWithNanoseconds

585-

return DatetimeWithNanoseconds.from_rfc3339(value_pb.string_value)

586-587-588-

def _parse_numeric(value_pb):

589-

return decimal.Decimal(value_pb.string_value)

590-591-592-

def _parse_json(value_pb):

593-

return JsonObject.from_str(value_pb.string_value)

594-595-596-

def _parse_uuid(value_pb):

597-

return uuid.UUID(value_pb.string_value)

587+

val = value_pb.string_value

588+

try:

589+

if len(val) < 20 or val[10] != "T":

590+

raise ValueError()

591+

no_fraction = val[:19]

592+

bare = datetime.datetime.fromisoformat(no_fraction)

593+

if val[19] == ".":

594+

if val.endswith("Z"):

595+

offset = "Z"

596+

fraction = val[20:-1]

597+

elif val[-6] in ("+", "-"):

598+

offset = val[-6:]

599+

fraction = val[20:-6]

600+

else:

601+

raise ValueError()

602+

if not fraction or len(fraction) > 9 or not fraction.isdigit():

603+

raise ValueError()

604+

scale = 9 - len(fraction)

605+

nanos = int(fraction) * _POWERS_OF_10[scale]

606+

else:

607+

nanos = 0

608+

if val.endswith("Z"):

609+

offset = "Z"

610+

elif val[-6] in ("+", "-"):

611+

offset = val[-6:]

612+

else:

613+

raise ValueError()

614+615+

if offset != "Z":

616+

sign = offset[0]

617+

hours = int(offset[1:3])

618+

minutes = int(offset[4:6])

619+

if offset[3] != ":":

620+

raise ValueError()

621+

delta = datetime.timedelta(hours=hours, minutes=minutes)

622+

if sign == "-":

623+

delta = -delta

624+

tzinfo = datetime.timezone(delta)

625+

bare = bare.replace(tzinfo=tzinfo).astimezone(datetime.timezone.utc)

626+627+

return datetime_helpers.DatetimeWithNanoseconds(

628+

bare.year,

629+

bare.month,

630+

bare.day,

631+

bare.hour,

632+

bare.minute,

633+

bare.second,

634+

nanosecond=nanos,

635+

tzinfo=datetime.timezone.utc,

636+

)

637+

except (IndexError, ValueError) as e:

638+

raise ValueError("Timestamp: {} does not match pattern".format(val)) from e

598639599640600641

def _parse_proto(value_pb, column_info, field_name):