◐ Shell
clean mode source ↗

feat: Experimental transpilation of unannotated python callables by TrevorBergeron · Pull Request #17419 · googleapis/google-cloud-python

System test looks like it might indicate a real error, but probably unrelated to this change:

___________________ test_read_parquet_gcs[bigquery_wildcard] ___________________
[gw19] linux -- Python 3.12.12 /tmpfs/src/github/google-cloud-python/packages/bigframes/.nox/system/bin/python

session = 
scalars_dfs = (          bool_col                                          bytes_col  \
rowindex                                    ...7             True  ...  0 days 00:00:00.000004
8            False  ...         5 days 00:00:00

[9 rows x 14 columns])
gcs_folder = 'gs://bigframes-dev-testing/bigframes_tests_system_20260616000152_914011/'
engine = 'bigquery', filename = '*.parquet'

    @pytest.mark.parametrize(
        ("engine", "filename"),
        (
            pytest.param(
                "auto",
                "000000000000.parquet",
                id="auto",
            ),
            pytest.param(
                "pyarrow",
                "000000000000.parquet",
                id="pyarrow",
            ),
            pytest.param(
                "bigquery",
                "000000000000.parquet",
                id="bigquery",
            ),
            pytest.param(
                "bigquery",
                "*.parquet",
                id="bigquery_wildcard",
            ),
            pytest.param(
                "auto",
                "*.parquet",
                id="auto_wildcard",
                marks=pytest.mark.xfail(
                    raises=ValueError,
                ),
            ),
        ),
    )
    def test_read_parquet_gcs(
        session: bigframes.Session, scalars_dfs, gcs_folder, engine, filename
    ):
        scalars_df, _ = scalars_dfs
        # Include wildcard so that multiple files can be written/read if > 1 GB.
        # https://cloud.google.com/bigquery/docs/exporting-data#exporting_data_into_one_or_more_files
        write_path = gcs_folder + test_read_parquet_gcs.__name__ + "*.parquet"
        read_path = gcs_folder + test_read_parquet_gcs.__name__ + filename
    
        df_in: bigframes.dataframe.DataFrame = scalars_df.copy()
        # GEOGRAPHY not supported in parquet export.
        df_in = df_in.drop(columns="geography_col")
        # Make sure we can also serialize the order.
        df_write = df_in.reset_index(drop=False)
        df_write.index.name = f"ordering_id_{random.randrange(1_000_000)}"
        df_write.to_parquet(write_path, index=True)
    
        df_out = (
            session.read_parquet(read_path, engine=engine)
            # Restore order.
>           .set_index(df_write.index.name)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            .sort_index()
            # Restore index.
            .set_index(typing.cast(str, df_in.index.name))
        )

tests/system/small/test_session.py:1916: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
bigframes/core/logging/log_adapter.py:183: in wrapper
    return method(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self =     ordering_id_430420  rowindex  bool_col  \
0                          4     False   
2                     ...                  432000000000  
10                                  432000000000  
...

[36 rows x 15 columns]
keys = ('ordering_id_463089',), append = False, drop = True

    def set_index(
        self,
        keys: typing.Union[blocks.Label, typing.Sequence[blocks.Label]],
        append: bool = False,
        drop: bool = True,
    ) -> DataFrame:
        if not utils.is_list_like(keys):
            keys = typing.cast(typing.Sequence[blocks.Label], (keys,))
        else:
            keys = typing.cast(typing.Sequence[blocks.Label], tuple(keys))
        col_ids = [self._resolve_label_exact(key) for key in keys]
        missing = [keys[i] for i in range(len(col_ids)) if col_ids[i] is None]
        if len(missing) > 0:
>           raise KeyError(f"None of {missing} are in the columns")
E           KeyError: "None of ['ordering_id_463089'] are in the columns"

bigframes/dataframe.py:2419: KeyError

CC oncall @sycai for visibility