Skip to content

Commit

Permalink
SNOW-1660802 Implement dataframe groupby fillna
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-evandenberg committed Sep 20, 2024
1 parent f566e25 commit 281d466
Show file tree
Hide file tree
Showing 8 changed files with 713 additions and 12 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#### New Features

- Added support for `TimedeltaIndex.mean` method.

- Added support for `DataFrameGroupBy.fillna`.

## 1.22.1 (2024-09-11)
This is a re-release of 1.22.0. Please refer to the 1.22.0 release notes for detailed release content.
Expand Down
3 changes: 2 additions & 1 deletion docs/source/modin/supported/groupby_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ Computations/descriptive stats
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ffill`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``fillna`` | N | |
| ``fillna`` | P | GroupBy axis = 0 is supported. |
| | | Does not support ``downcast`` parameter |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``first`` | P | Does not support ``min_count`` parameter |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
25 changes: 22 additions & 3 deletions src/snowflake/snowpark/modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import pandas.core.groupby
from modin.pandas import Series
from pandas._libs.lib import NoDefault, no_default
from pandas._typing import AggFuncType, Axis, IndexLabel
from pandas._typing import AggFuncType, Axis, FillnaOptions, IndexLabel
from pandas.core.dtypes.common import is_dict_like, is_list_like, is_numeric_dtype
from pandas.errors import SpecificationError
from pandas.io.formats.printing import PrettyDict
Expand Down Expand Up @@ -992,9 +992,28 @@ def corr(self, **kwargs):
# TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
ErrorMessage.method_not_implemented_error(name="corr", class_="GroupBy")

def fillna(self, *args, **kwargs):
def fillna(
self,
value: Any = None,
method: FillnaOptions | None = None,
axis: Axis | None = None,
inplace: bool = False,
limit: int | None = None,
downcast: dict | None = None,
):
# TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
ErrorMessage.method_not_implemented_error(name="fillna", class_="GroupBy")
query_compiler = self._query_compiler.groupby_fillna(
self._by,
self._axis,
self._kwargs,
value,
method,
axis,
inplace,
limit,
downcast,
)
return pd.DataFrame(query_compiler=query_compiler)

def count(self):
# TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
Expand Down
10 changes: 8 additions & 2 deletions src/snowflake/snowpark/modin/plugin/_internal/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ def get_snowflake_quoted_identifiers_group_by_pandas_labels(
self,
pandas_labels: list[Hashable],
include_index: bool = True,
include_data: bool = True,
) -> list[tuple[str, ...]]:
"""
Map given pandas labels to names in underlying snowpark dataframe. Given labels can be data or index labels.
Expand All @@ -562,7 +563,8 @@ def get_snowflake_quoted_identifiers_group_by_pandas_labels(
Args:
pandas_labels: A list of pandas labels.
include_index: Include the index columns in addition to data columns, default is True.
include_index: Include the index columns in addition to potentially data columns, default is True.
include_data: Include the data columns in addition to potentially index columns, default is True.
Returns:
A list of tuples for matched identifiers. Each element of list is a tuple of str containing matched
Expand All @@ -576,7 +578,11 @@ def get_snowflake_quoted_identifiers_group_by_pandas_labels(
filter(
lambda col: to_pandas_label(col.label) == label,
self.label_to_snowflake_quoted_identifier[
(0 if include_index else self.num_index_columns) :
(0 if include_index else self.num_index_columns) : (
len(self.label_to_snowflake_quoted_identifier)
if include_data
else self.num_index_columns
)
],
)
)
Expand Down
Loading

0 comments on commit 281d466

Please sign in to comment.