feat: add dt.tz_localize() (#2469) · googleapis/python-bigquery-dataframes@f70f93a · GitHub
Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit f70f93a

Browse files
authored
feat: add dt.tz_localize() (#2469)
Only `None` and `"UTC"` time zones are supported in this version. Fixes b/481069646 🦕
1 parent 1d81b41 commit f70f93a

9 files changed

Lines changed: 99 additions & 10 deletions

File tree

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 6 additions & 3 deletions

bigframes/core/compile/sqlglot/expressions/datetime_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ def _(expr: TypedExpr, op: ops.ToDatetimeOp) -> sge.Expression:
371371
)
372372
return sge.Cast(this=result, to="DATETIME")
373373

374-
if expr.dtype == dtypes.STRING_DTYPE:
374+
if expr.dtype in (dtypes.STRING_DTYPE, dtypes.TIMESTAMP_DTYPE):
375375
return sge.TryCast(this=expr.expr, to="DATETIME")
376376

377377
value = expr.expr
@@ -396,7 +396,7 @@ def _(expr: TypedExpr, op: ops.ToTimestampOp) -> sge.Expression:
396396
"PARSE_TIMESTAMP", sge.convert(op.format), expr.expr, sge.convert("UTC")
397397
)
398398

399-
if expr.dtype == dtypes.STRING_DTYPE:
399+
if expr.dtype in (dtypes.STRING_DTYPE, dtypes.DATETIME_DTYPE):
400400
return sge.func("TIMESTAMP", expr.expr)
401401

402402
value = expr.expr

bigframes/operations/datetime_ops.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
7373
dtypes.INT_DTYPE,
7474
dtypes.STRING_DTYPE,
7575
dtypes.DATE_DTYPE,
76+
dtypes.TIMESTAMP_DTYPE,
7677
):
7778
raise TypeError("expected string or numeric input")
7879
return pd.ArrowDtype(pa.timestamp("us", tz=None))
@@ -91,6 +92,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
9192
dtypes.INT_DTYPE,
9293
dtypes.STRING_DTYPE,
9394
dtypes.DATE_DTYPE,
95+
dtypes.DATETIME_DTYPE,
9496
):
9597
raise TypeError("expected string or numeric input")
9698
return pd.ArrowDtype(pa.timestamp("us", tz="UTC"))

bigframes/operations/datetimes.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import annotations
1616

1717
import datetime as dt
18-
from typing import Optional
18+
from typing import Literal, Optional
1919

2020
import bigframes_vendored.pandas.core.arrays.datetimelike as vendored_pandas_datetimelike
2121
import bigframes_vendored.pandas.core.indexes.accessor as vendordt
@@ -147,6 +147,21 @@ def tz(self) -> Optional[dt.timezone]:
147147
else:
148148
raise ValueError(f"Unexpected timezone {tz_string}")
149149

150+
def tz_localize(self, tz: Literal["UTC"] | None) -> series.Series:
151+
if tz == "UTC":
152+
if self._data.dtype == dtypes.TIMESTAMP_DTYPE:
153+
raise ValueError("Already tz-aware.")
154+
155+
return self._data._apply_unary_op(ops.ToTimestampOp())
156+
157+
if tz is None:
158+
if self._data.dtype == dtypes.DATETIME_DTYPE:
159+
return self._data # no-op
160+
161+
return self._data._apply_unary_op(ops.ToDatetimeOp())
162+
163+
raise ValueError(f"Unsupported timezone {tz}")
164+
150165
@property
151166
def unit(self) -> str:
152167
# Assumption: pyarrow dtype

tests/system/small/operations/test_datetimes.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,42 @@ def test_dt_tz(scalars_dfs, col_name):
324324
assert bf_result == pd_result
325325

326326

327+
@pytest.mark.parametrize(
328+
("col_name", "tz"),
329+
[
330+
("datetime_col", None),
331+
("timestamp_col", None),
332+
("datetime_col", "UTC"),
333+
],
334+
)
335+
def test_dt_tz_localize(scalars_dfs, col_name, tz):
336+
pytest.importorskip("pandas", minversion="2.0.0")
337+
scalars_df, scalars_pandas_df = scalars_dfs
338+
bf_series = scalars_df[col_name]
339+
340+
bf_result = bf_series.dt.tz_localize(tz)
341+
pd_result = scalars_pandas_df[col_name].dt.tz_localize(tz)
342+
343+
testing.assert_series_equal(
344+
bf_result.to_pandas(), pd_result, check_index_type=False
345+
)
346+
347+
348+
@pytest.mark.parametrize(
349+
("col_name", "tz"),
350+
[
351+
("timestamp_col", "UTC"),
352+
("datetime_col", "US/Eastern"),
353+
],
354+
)
355+
def test_dt_tz_localize_invalid_inputs(scalars_dfs, col_name, tz):
356+
pytest.importorskip("pandas", minversion="2.0.0")
357+
scalars_df, _ = scalars_dfs
358+
359+
with pytest.raises(ValueError):
360+
scalars_df[col_name].dt.tz_localize(tz)
361+
362+
327363
@pytest.mark.parametrize(
328364
("col_name",),
329365
DATETIME_COL_NAMES,
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
SELECT
22
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS DATETIME) AS `int64_col`,
33
SAFE_CAST(`string_col` AS DATETIME),
4-
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col`
4+
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col`,
5+
SAFE_CAST(`timestamp_col` AS DATETIME)
56
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`

tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ SELECT
44
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000000) AS INT64)) AS TIMESTAMP) AS `int64_col_s`,
55
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000) AS INT64)) AS TIMESTAMP) AS `int64_col_ms`,
66
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col`) AS INT64)) AS TIMESTAMP) AS `int64_col_us`,
7-
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns`
7+
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns`,
8+
TIMESTAMP(`datetime_col`) AS `datetime_col`
89
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`

tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def test_time(scalar_types_df: bpd.DataFrame, snapshot):
180180

181181

182182
def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
183-
col_names = ["int64_col", "string_col", "float64_col"]
183+
col_names = ["int64_col", "string_col", "float64_col", "timestamp_col"]
184184
bf_df = scalar_types_df[col_names]
185185
ops_map = {col_name: ops.ToDatetimeOp().as_expr(col_name) for col_name in col_names}
186186

@@ -189,14 +189,15 @@ def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
189189

190190

191191
def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot):
192-
bf_df = scalar_types_df[["int64_col", "string_col", "float64_col"]]
192+
bf_df = scalar_types_df[["int64_col", "string_col", "float64_col", "datetime_col"]]
193193
ops_map = {
194194
"int64_col": ops.ToTimestampOp().as_expr("int64_col"),
195195
"float64_col": ops.ToTimestampOp().as_expr("float64_col"),
196196
"int64_col_s": ops.ToTimestampOp(unit="s").as_expr("int64_col"),
197197
"int64_col_ms": ops.ToTimestampOp(unit="ms").as_expr("int64_col"),
198198
"int64_col_us": ops.ToTimestampOp(unit="us").as_expr("int64_col"),
199199
"int64_col_ns": ops.ToTimestampOp(unit="ns").as_expr("int64_col"),
200+
"datetime_col": ops.ToTimestampOp().as_expr("datetime_col"),
200201
}
201202

202203
sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))

third_party/bigframes_vendored/pandas/core/indexes/accessor.py

Lines changed: 30 additions & 0 deletions

0 commit comments

Comments
 (0)