feat: support full round-trip persistence for multimodal reference co… · googleapis/python-bigquery-dataframes@494a0a1 · GitHub
Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 494a0a1

Browse files
authored
feat: support full round-trip persistence for multimodal reference cols (#2511)
Saves multimodal metadata descriptions on .to_gbq(). Fixes #<452681068> 🦕
1 parent 3ddd7eb commit 494a0a1

4 files changed

Lines changed: 56 additions & 5 deletions

File tree

bigframes/dtypes.py

Lines changed: 21 additions & 1 deletion

bigframes/session/bq_caching_executor.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,13 +334,14 @@ def _export_gbq(
334334
session=array_value.session,
335335
)
336336

337-
has_timedelta_col = any(
338-
t == bigframes.dtypes.TIMEDELTA_DTYPE for t in array_value.schema.dtypes
337+
has_special_dtype_col = any(
338+
t in (bigframes.dtypes.TIMEDELTA_DTYPE, bigframes.dtypes.OBJ_REF_DTYPE)
339+
for t in array_value.schema.dtypes
339340
)
340341

341-
if spec.if_exists != "append" and has_timedelta_col:
342+
if spec.if_exists != "append" and has_special_dtype_col:
342343
# Only update schema if this is not modifying an existing table, and the
343-
# new table contains timedelta columns.
344+
# new table contains special columns (like timedelta or obj_ref).
344345
table = self.bqclient.get_table(spec.table)
345346
table.schema = array_value.schema.to_bigquery()
346347
self.bqclient.update_table(table, ["schema"])

tests/system/small/test_dataframe_io.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,28 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id
10021002
assert table.schema[0].description is None
10031003

10041004

1005+
def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client):
1006+
destination_table = f"{dataset_id}.test_to_gbq_obj_ref"
1007+
sql = """
1008+
SELECT
1009+
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
1010+
"""
1011+
df = session.read_gbq(sql)
1012+
df["obj_ref_col"] = df["uri_col"].str.to_blob()
1013+
df = df.drop(columns=["uri_col"])
1014+
1015+
df.to_gbq(destination_table)
1016+
1017+
table = bigquery_client.get_table(destination_table)
1018+
obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col")
1019+
assert obj_ref_field.field_type == "RECORD"
1020+
assert obj_ref_field.description == "bigframes_dtype: OBJ_REF_DTYPE"
1021+
1022+
reloaded_df = session.read_gbq(destination_table)
1023+
assert reloaded_df["obj_ref_col"].dtype == dtypes.OBJ_REF_DTYPE
1024+
assert len(reloaded_df) == 1
1025+
1026+
10051027
@pytest.mark.parametrize(
10061028
("index"),
10071029
[True, False],

tests/unit/test_dtypes.py

Lines changed: 8 additions & 0 deletions

0 commit comments

Comments
 (0)