feat: Experimental transpilation of unannotated python callables by TrevorBergeron · Pull Request #17419 · googleapis/google-cloud-python
System test looks like it might indicate a real error, but probably unrelated to this change:
___________________ test_read_parquet_gcs[bigquery_wildcard] ___________________
[gw19] linux -- Python 3.12.12 /tmpfs/src/github/google-cloud-python/packages/bigframes/.nox/system/bin/python
session =
scalars_dfs = ( bool_col bytes_col \
rowindex ...7 True ... 0 days 00:00:00.000004
8 False ... 5 days 00:00:00
[9 rows x 14 columns])
gcs_folder = 'gs://bigframes-dev-testing/bigframes_tests_system_20260616000152_914011/'
engine = 'bigquery', filename = '*.parquet'
@pytest.mark.parametrize(
("engine", "filename"),
(
pytest.param(
"auto",
"000000000000.parquet",
id="auto",
),
pytest.param(
"pyarrow",
"000000000000.parquet",
id="pyarrow",
),
pytest.param(
"bigquery",
"000000000000.parquet",
id="bigquery",
),
pytest.param(
"bigquery",
"*.parquet",
id="bigquery_wildcard",
),
pytest.param(
"auto",
"*.parquet",
id="auto_wildcard",
marks=pytest.mark.xfail(
raises=ValueError,
),
),
),
)
def test_read_parquet_gcs(
session: bigframes.Session, scalars_dfs, gcs_folder, engine, filename
):
scalars_df, _ = scalars_dfs
# Include wildcard so that multiple files can be written/read if > 1 GB.
# https://cloud.google.com/bigquery/docs/exporting-data#exporting_data_into_one_or_more_files
write_path = gcs_folder + test_read_parquet_gcs.__name__ + "*.parquet"
read_path = gcs_folder + test_read_parquet_gcs.__name__ + filename
df_in: bigframes.dataframe.DataFrame = scalars_df.copy()
# GEOGRAPHY not supported in parquet export.
df_in = df_in.drop(columns="geography_col")
# Make sure we can also serialize the order.
df_write = df_in.reset_index(drop=False)
df_write.index.name = f"ordering_id_{random.randrange(1_000_000)}"
df_write.to_parquet(write_path, index=True)
df_out = (
session.read_parquet(read_path, engine=engine)
# Restore order.
> .set_index(df_write.index.name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.sort_index()
# Restore index.
.set_index(typing.cast(str, df_in.index.name))
)
tests/system/small/test_session.py:1916:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
bigframes/core/logging/log_adapter.py:183: in wrapper
return method(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = ordering_id_430420 rowindex bool_col \
0 4 False
2 ... 432000000000
10 432000000000
...
[36 rows x 15 columns]
keys = ('ordering_id_463089',), append = False, drop = True
def set_index(
self,
keys: typing.Union[blocks.Label, typing.Sequence[blocks.Label]],
append: bool = False,
drop: bool = True,
) -> DataFrame:
if not utils.is_list_like(keys):
keys = typing.cast(typing.Sequence[blocks.Label], (keys,))
else:
keys = typing.cast(typing.Sequence[blocks.Label], tuple(keys))
col_ids = [self._resolve_label_exact(key) for key in keys]
missing = [keys[i] for i in range(len(col_ids)) if col_ids[i] is None]
if len(missing) > 0:
> raise KeyError(f"None of {missing} are in the columns")
E KeyError: "None of ['ordering_id_463089'] are in the columns"
bigframes/dataframe.py:2419: KeyError
CC oncall @sycai for visibility