feat: add `df.bigquery.ai.forecast` method to pandas dataframe access… · googleapis/python-bigquery-dataframes@1126cec · GitHub
Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 1126cec

Browse files
feat: add df.bigquery.ai.forecast method to pandas dataframe accessor (#2518)
Adds the `.bigquery.ai.forecast()` method to pandas DataFrame objects, which proxies to `bigframes.bigquery.ai.forecast()`. Added unit tests and mocked session responses. --- *PR created automatically by Jules for task [14604090974587392182](https://jules.google.com/task/14604090974587392182) started by @tswast* --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: tswast <247555+tswast@users.noreply.github.com>
1 parent edceb35 commit 1126cec

3 files changed

Lines changed: 137 additions & 1 deletion

File tree

bigframes/bigquery/_operations/ai.py

Lines changed: 11 additions & 0 deletions

bigframes/extensions/pandas/dataframe_accessor.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import cast
15+
from typing import cast, Iterable, Optional
1616

1717
import pandas
1818
import pandas.api.extensions
@@ -21,6 +21,85 @@
2121
import bigframes.pandas as bpd
2222

2323

24+
class AIAccessor:
25+
"""
26+
Pandas DataFrame accessor for BigQuery AI functions.
27+
"""
28+
29+
def __init__(self, pandas_obj: pandas.DataFrame):
30+
self._obj = pandas_obj
31+
32+
def forecast(
33+
self,
34+
*,
35+
data_col: str,
36+
timestamp_col: str,
37+
model: str = "TimesFM 2.0",
38+
id_cols: Optional[Iterable[str]] = None,
39+
horizon: int = 10,
40+
confidence_level: float = 0.95,
41+
context_window: Optional[int] = None,
42+
output_historical_time_series: bool = False,
43+
session=None,
44+
) -> pandas.DataFrame:
45+
"""
46+
Forecast time series at future horizon using BigQuery AI.FORECAST.
47+
48+
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast
49+
50+
Args:
51+
data_col (str):
52+
A str value that specifies the name of the data column. The data column contains the data to forecast.
53+
The data column must use one of the following data types: INT64, NUMERIC and FLOAT64
54+
timestamp_col (str):
55+
A str value that specified the name of the time points column.
56+
The time points column provides the time points used to generate the forecast.
57+
The time points column must use one of the following data types: TIMESTAMP, DATE and DATETIME
58+
model (str, default "TimesFM 2.0"):
59+
A str value that specifies the name of the model. "TimesFM 2.0" and "TimesFM 2.5" are supported.
60+
id_cols (Iterable[str], optional):
61+
An iterable of str value that specifies the names of one or more ID columns. Each ID identifies a unique time series to forecast.
62+
Specify one or more values for this argument in order to forecast multiple time series using a single query.
63+
The columns that you specify must use one of the following data types: STRING, INT64, ARRAY<STRING> and ARRAY<INT64>
64+
horizon (int, default 10):
65+
An int value that specifies the number of time points to forecast. The default value is 10. The valid input range is [1, 10,000].
66+
confidence_level (float, default 0.95):
67+
A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
68+
The default value is 0.95. The valid input range is [0, 1).
69+
context_window (int, optional):
70+
An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
71+
The context window length determines how many of the most recent data points from the input time series are use by the model.
72+
If you don't specify a value, the AI.FORECAST function automatically chooses the smallest possible context window length to use
73+
that is still large enough to cover the number of time series data points in your input data.
74+
output_historical_time_series (bool, default False):
75+
A boolean value that determines whether to include the input time series history in the forecast.
76+
session (bigframes.session.Session, optional):
77+
The BigFrames session to use. If not provided, the default global session is used.
78+
79+
Returns:
80+
pandas.DataFrame:
81+
The forecast DataFrame result.
82+
"""
83+
import bigframes.bigquery.ai
84+
85+
if session is None:
86+
session = bf_session.get_global_session()
87+
88+
bf_df = cast(bpd.DataFrame, session.read_pandas(self._obj))
89+
result = bigframes.bigquery.ai.forecast(
90+
bf_df,
91+
data_col=data_col,
92+
timestamp_col=timestamp_col,
93+
model=model,
94+
id_cols=id_cols,
95+
horizon=horizon,
96+
confidence_level=confidence_level,
97+
context_window=context_window,
98+
output_historical_time_series=output_historical_time_series,
99+
)
100+
return result.to_pandas(ordered=True)
101+
102+
24103
@pandas.api.extensions.register_dataframe_accessor("bigquery")
25104
class BigQueryDataFrameAccessor:
26105
"""
@@ -32,6 +111,13 @@ class BigQueryDataFrameAccessor:
32111
def __init__(self, pandas_obj: pandas.DataFrame):
33112
self._obj = pandas_obj
34113

114+
@property
115+
def ai(self) -> "AIAccessor":
116+
"""
117+
Accessor for BigQuery AI functions.
118+
"""
119+
return AIAccessor(self._obj)
120+
35121
def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
36122
"""
37123
Compute a new pandas Series by applying a SQL scalar function to the DataFrame.

tests/unit/core/compile/sqlglot/test_dataframe_accessor.py

Lines changed: 39 additions & 0 deletions

0 commit comments

Comments
 (0)