Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/user-guide/common-operations/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ Extracting parts of a date using :py:func:`~datafusion.functions.date_part` (ali
.. ipython:: python

df.select(
f.date_part(literal("month"), f.to_timestamp(col('"Total"'))).alias("month"),
f.extract(literal("day"), f.to_timestamp(col('"Total"'))).alias("day")
f.date_part("month", f.to_timestamp(col('"Total"'))).alias("month"),
f.extract("day", f.to_timestamp(col('"Total"'))).alias("day")
)

String
Expand Down
29 changes: 26 additions & 3 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from __future__ import annotations

import inspect
import warnings
from typing import TYPE_CHECKING, Any

import pyarrow as pa
Expand All @@ -60,6 +61,16 @@
sort_or_default,
)


def _warn_expr_for_literal_arg(function_name: str, arg_name: str) -> None:
warnings.warn(
f"Passing Expr for {function_name}() argument {arg_name!r} is deprecated; "
"pass a Python literal instead.",
DeprecationWarning,
stacklevel=4,
)


__all__ = [
"abs",
"acos",
Expand Down Expand Up @@ -2575,7 +2586,7 @@ def datepart(part: Expr | str, date: Expr) -> Expr:
See Also:
This is an alias for :py:func:`date_part`.
"""
return date_part(part, date)
return _date_part(part, date, "datepart")


def date_part(part: Expr | str, date: Expr) -> Expr:
Expand All @@ -2595,6 +2606,12 @@ def date_part(part: Expr | str, date: Expr) -> Expr:
>>> result.collect_column("y")[0].as_py()
2021
"""
return _date_part(part, date, "date_part")


def _date_part(part: Expr | str, date: Expr, function_name: str) -> Expr:
if isinstance(part, Expr):
_warn_expr_for_literal_arg(function_name, "part")
part = coerce_to_expr(part)
return Expr(f.date_part(part.expr, date.expr))

Expand All @@ -2605,7 +2622,7 @@ def extract(part: Expr | str, date: Expr) -> Expr:
See Also:
This is an alias for :py:func:`date_part`.
"""
return date_part(part, date)
return _date_part(part, date, "extract")


def date_trunc(part: Expr | str, date: Expr) -> Expr:
Expand All @@ -2626,6 +2643,12 @@ def date_trunc(part: Expr | str, date: Expr) -> Expr:
>>> str(result.collect_column("t")[0].as_py())
'2021-07-01 00:00:00'
"""
return _date_trunc(part, date, "date_trunc")


def _date_trunc(part: Expr | str, date: Expr, function_name: str) -> Expr:
if isinstance(part, Expr):
_warn_expr_for_literal_arg(function_name, "part")
part = coerce_to_expr(part)
return Expr(f.date_trunc(part.expr, date.expr))

Expand All @@ -2636,7 +2659,7 @@ def datetrunc(part: Expr | str, date: Expr) -> Expr:
See Also:
This is an alias for :py:func:`date_trunc`.
"""
return date_trunc(part, date)
return _date_trunc(part, date, "datetrunc")


def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr:
Expand Down
50 changes: 43 additions & 7 deletions python/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import math
import warnings
from datetime import date, datetime, time, timezone

import numpy as np
Expand Down Expand Up @@ -1086,10 +1087,10 @@ def test_hash_functions(df):

def test_temporal_functions(df):
df = df.select(
f.date_part(literal("month"), column("d")),
f.datepart(literal("year"), column("d")),
f.date_trunc(literal("month"), column("d")),
f.datetrunc(literal("day"), column("d")),
f.date_part("month", column("d")),
f.datepart("year", column("d")),
f.date_trunc("month", column("d")),
f.datetrunc("day", column("d")),
f.date_bin(
literal("15 minutes").cast(pa.string()),
column("d"),
Expand All @@ -1100,7 +1101,7 @@ def test_temporal_functions(df):
f.to_timestamp_seconds(literal("2023-09-07 05:06:14.523952")),
f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")),
f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")),
f.extract(literal("day"), column("d")),
f.extract("day", column("d")),
f.to_timestamp(
literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
),
Expand Down Expand Up @@ -2160,16 +2161,51 @@ def test_date_part_native_str(self):
ctx = SessionContext()
df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
df = df.select(f.to_timestamp(column("a")).alias("a"))
result = df.select(f.date_part("year", column("a")).alias("y")).collect()
with warnings.catch_warnings():
warnings.simplefilter("error", DeprecationWarning)
result = df.select(f.date_part("year", column("a")).alias("y")).collect()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this necessary?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do need to assert that native string input emits no DeprecationWarning, but I agree that we do not need to execute the full dataframe to prove that. The warning is emitted at expression construction time.
I'll simplify this.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think computing the dataframe is fine to validate the result, was just wondering about the warning code since it will never return in this case. But it's also fine to leave as is.

assert result[0].column(0)[0].as_py() == 2021

@pytest.mark.parametrize(
("func", "name"),
[
pytest.param(f.date_part, "date_part", id="date_part"),
pytest.param(f.datepart, "datepart", id="datepart"),
pytest.param(f.extract, "extract", id="extract"),
],
)
def test_date_part_expr_part_warns_deprecated(self, func, name):
with pytest.warns(
DeprecationWarning,
match=rf"Passing Expr for {name}\(\) argument 'part' is deprecated",
):
expr = func(literal("year"), column("a"))
assert expr is not None

def test_date_trunc_native_str(self):
ctx = SessionContext()
df = ctx.from_pydict({"a": ["2021-07-15T12:34:56"]})
df = df.select(f.to_timestamp(column("a")).alias("a"))
result = df.select(f.date_trunc("month", column("a")).alias("t")).collect()
with warnings.catch_warnings():
warnings.simplefilter("error", DeprecationWarning)
result = df.select(f.date_trunc("month", column("a")).alias("t")).collect()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above

assert str(result[0].column(0)[0].as_py()) == "2021-07-01 00:00:00"

@pytest.mark.parametrize(
("func", "name"),
[
pytest.param(f.date_trunc, "date_trunc", id="date_trunc"),
pytest.param(f.datetrunc, "datetrunc", id="datetrunc"),
],
)
def test_date_trunc_expr_part_warns_deprecated(self, func, name):
with pytest.warns(
DeprecationWarning,
match=rf"Passing Expr for {name}\(\) argument 'part' is deprecated",
):
expr = func(literal("month"), column("a"))
assert expr is not None

def test_left_native_int(self):
ctx = SessionContext()
df = ctx.from_pydict({"a": ["the cat"]})
Expand Down
Loading