Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion packages/bigframes/bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@
to_json,
to_json_string,
)
from bigframes.bigquery._operations.mathematical import rand
from bigframes.bigquery._operations.mathematical import (
hparam_candidates,
hparam_range,
rand,
)
from bigframes.bigquery._operations.search import create_vector_index, vector_search
from bigframes.bigquery._operations.sql import sql_scalar
from bigframes.bigquery._operations.struct import struct
Expand Down Expand Up @@ -130,6 +134,8 @@
to_json,
to_json_string,
# mathematical ops
hparam_candidates,
hparam_range,
rand,
# search ops
create_vector_index,
Expand Down Expand Up @@ -187,6 +193,8 @@
"to_json",
"to_json_string",
# mathematical ops
"hparam_candidates",
"hparam_range",
"rand",
# search ops
"create_vector_index",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,71 @@ def rand() -> bigframes.core.col.Expression:
is_deterministic=False,
)
return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ()))


def hparam_range(min: float | int, max: float | int) -> bigframes.core.col.Expression:
Comment thread
tswast marked this conversation as resolved.
Outdated
"""
Defines the minimum and maximum bounds of the search space of continuous
values for a hyperparameter.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> # Specify a range of values for a hyperparameter.
>>> learn_rate = bbq.hparam_range(0.0001, 1.0)

Args:
min (float or int):
The minimum bound of the search space.
max (float or int):
The maximum bound of the search space.

Returns:
bigframes.pandas.api.typing.Expression:
An expression that can be used in model options.
"""
min_expr = bigframes.core.expression.const(min)
max_expr = bigframes.core.expression.const(max)

op = ops.SqlScalarOp(
_output_type=dtypes.FLOAT_DTYPE,
sql_template="HPARAM_RANGE({0}, {1})",
is_deterministic=True,
)
return bigframes.core.col.Expression(
bigframes.core.expression.OpExpression(op, (min_expr, max_expr))
)


def hparam_candidates(
candidates: list[float | int | str],
Comment thread
tswast marked this conversation as resolved.
Outdated
) -> bigframes.core.col.Expression:
"""
Specifies the set of discrete values for the hyperparameter.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> # Specify a set of values for a hyperparameter.
>>> optimizer = bbq.hparam_candidates(['ADAGRAD', 'SGD', 'FTRL'])

Args:
candidates (list):
The set of discrete values for the hyperparameter.

Returns:
bigframes.pandas.api.typing.Expression:
An expression that can be used in model options.
"""
candidates_expr = bigframes.core.expression.const(candidates)

op = ops.SqlScalarOp(
_output_type=dtypes.STRING_DTYPE,
sql_template="HPARAM_CANDIDATES({0})",
is_deterministic=True,
)
return bigframes.core.col.Expression(
bigframes.core.expression.OpExpression(op, (candidates_expr,))
)
7 changes: 5 additions & 2 deletions packages/bigframes/bigframes/bigquery/_operations/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import google.cloud.bigquery
import pandas as pd

import bigframes.core.col as col
import bigframes.core.logging.log_adapter as log_adapter
import bigframes.core.sql.ml
import bigframes.dataframe as dataframe
Expand Down Expand Up @@ -50,7 +51,9 @@ def create_model(
input_schema: Optional[Mapping[str, str]] = None,
output_schema: Optional[Mapping[str, str]] = None,
connection_name: Optional[str] = None,
options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
options: Optional[
Mapping[str, Union[str, int, float, bool, list, "col.Expression"]]
] = None,
training_data: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None,
custom_holiday: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None,
session: Optional[bigframes.session.Session] = None,
Expand Down Expand Up @@ -78,7 +81,7 @@ def create_model(
The OUTPUT clause, which specifies the schema of the output data.
connection_name (str, optional):
The connection to use for the model.
options (Mapping[str, Union[str, int, float, bool, list]], optional):
options (Mapping[str, Union[str, int, float, bool, list, bigframes.core.col.Expression]], optional):
The OPTIONS clause, which specifies the model options.
training_data (Union[bigframes.pandas.DataFrame, str], optional):
The query or DataFrame to use for training the model.
Expand Down
11 changes: 9 additions & 2 deletions packages/bigframes/bigframes/core/sql/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

from typing import Any, Dict, List, Mapping, Optional, Union

import bigframes.core.col as col
from bigframes.core.compile.sqlglot import sql as sg_sql
from bigframes.core.compile.sqlglot.expression_compiler import expression_compiler


def create_model_ddl(
Expand All @@ -28,7 +30,9 @@ def create_model_ddl(
input_schema: Optional[Mapping[str, str]] = None,
output_schema: Optional[Mapping[str, str]] = None,
connection_name: Optional[str] = None,
options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
options: Optional[
Mapping[str, Union[str, int, float, bool, list, "col.Expression"]]
] = None,
training_data: Optional[str] = None,
custom_holiday: Optional[str] = None,
) -> str:
Expand Down Expand Up @@ -70,7 +74,10 @@ def create_model_ddl(
if options:
rendered_options = []
for option_name, option_value in options.items():
if isinstance(option_value, (list, tuple)):
if isinstance(option_value, col.Expression):
sg_expr = expression_compiler.compile_expression(option_value._value)
rendered_val = sg_sql.to_sql(sg_expr)
elif isinstance(option_value, (list, tuple)):
# Handle list options like model_registry="vertex_ai"
# wait, usually options are key=value.
# if value is list, it is [val1, val2]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE MODEL `my_model`
OPTIONS(model_type = 'LINEAR_REG', learn_rate = HPARAM_RANGE(0.0001, 1.0), optimizer = HPARAM_CANDIDATES(['ADAGRAD', 'SGD']))
AS SELECT * FROM t
18 changes: 18 additions & 0 deletions packages/bigframes/tests/unit/core/sql/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@

import pytest

import bigframes.bigquery as bbq
import bigframes.core.col as col
import bigframes.core.expression as ex
import bigframes.core.sql.ml
import bigframes.dtypes as dtypes
import bigframes.operations.numeric_ops as numeric_ops

pytest.importorskip("pytest_snapshot")

Expand Down Expand Up @@ -97,6 +102,19 @@ def test_create_model_list_option(snapshot):
snapshot.assert_match(sql, "create_model_list_option.sql")


def test_create_model_hparam_tuning(snapshot):
sql = bigframes.core.sql.ml.create_model_ddl(
model_name="my_model",
options={
"model_type": "LINEAR_REG",
"learn_rate": bbq.hparam_range(0.0001, 1.0),
"optimizer": bbq.hparam_candidates(["ADAGRAD", "SGD"]),
},
training_data="SELECT * FROM t",
)
snapshot.assert_match(sql, "create_model_hparam_tuning.sql")


def test_evaluate_model_basic(snapshot):
sql = bigframes.core.sql.ml.evaluate(
model_name="my_project.my_dataset.my_model",
Expand Down
2 changes: 1 addition & 1 deletion packages/bigframes/tests/unit/test_col.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
import pathlib
from typing import Generator

import numpy as np
import pandas as pd
import pytest

import bigframes
import bigframes.pandas as bpd
from bigframes.testing.utils import assert_frame_equal, convert_pandas_dtypes
import numpy as np

pytest.importorskip("polars")
pytest.importorskip("pandas", minversion="3.0.0")
Expand Down