◐ Shell
clean mode source ↗

bpo-44953: Add vectorcall for itemgetter and attrgetter by sweeneyde · Pull Request #27828 · python/cpython

I re-ran some benchmarks, and they still look good. I plan to merge this in about 24 hours if there are no more objections.

from operator import itemgetter, attrgetter
from itertools import repeat
from collections import namedtuple, deque
from pyperf import Runner

class DefaultClass:
    def __init__(self, a, b):
        self.a = a
        self.b = b

class SlotsClass:
    __slots__ = "a", "b"
    def __init__(self, a, b):
        self.a = a
        self.b = b

NamedTuple = namedtuple("NT", ["a", "b"])
MAP_LOOPS = 10_000

attr_classes = {
    'DefaultClass': DefaultClass,
    'SlotsClass': SlotsClass,
    'NamedTuple': NamedTuple,
}

item_classes = {
    'tuple': tuple,
    'list': list,
    'dict': dict.fromkeys,
}

namespace = {
    'IG': itemgetter(1),
    'AG': attrgetter('a'),
    'repeat': repeat,
    'deque': deque,
} | attr_classes | item_classes

runner = Runner()

for classname in attr_classes:
    runner.timeit(
        name=f"{classname}-1",
        setup=f"obj = {classname}(11, 22)",
        stmt="AG(obj)",
        globals=namespace
    )
    runner.timeit(
        name=f"{classname}-map",
        setup=f"obj = {classname}(11, 22)",
        stmt=f"deque(map(AG, repeat(obj, {MAP_LOOPS})), maxlen=0)",
        globals=namespace,
        inner_loops=MAP_LOOPS,
    )

for classname in item_classes:
    runner.timeit(
        name=f"{classname}-1",
        setup=f"obj = {classname}((1, 2, 3, 4, 5))",
        stmt="IG(obj)",
        globals=namespace,
    )
    runner.timeit(
        name=f"{classname}-map",
        setup=f"obj = {classname}((1, 2, 3, 4, 5))",
        stmt=f"deque(map(IG, repeat(obj, {MAP_LOOPS})), maxlen=0)",
        globals=namespace,
        inner_loops=MAP_LOOPS,
    )

Results:

Benchmark main_bench PR-27828
tuple-map 27.6 ns 13.2 ns: 2.09x faster
dict-map 39.4 ns 24.2 ns: 1.62x faster
NamedTuple-map 40.0 ns 24.8 ns: 1.61x faster
list-map 38.2 ns 23.8 ns: 1.61x faster
SlotsClass-map 42.8 ns 28.1 ns: 1.53x faster
DefaultClass-map 45.5 ns 31.0 ns: 1.47x faster
tuple-1 56.7 ns 41.4 ns: 1.37x faster
NamedTuple-1 70.1 ns 52.7 ns: 1.33x faster
dict-1 69.4 ns 52.2 ns: 1.33x faster
SlotsClass-1 75.1 ns 56.6 ns: 1.33x faster
list-1 68.7 ns 52.1 ns: 1.32x faster
DefaultClass-1 76.9 ns 59.4 ns: 1.29x faster
Geometric mean (ref) 1.48x faster