From 2094c3fec17730d945dd5bb303ef814907bf3817 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 21 Aug 2024 11:15:51 -0700
Subject: [PATCH 01/42] Update Series and DataFrame constructors to handle lazy
 Index objects, add tests for the same

---
 .../snowpark/modin/pandas/dataframe.py        |  78 ++-
 src/snowflake/snowpark/modin/pandas/series.py |  33 +-
 .../compiler/snowflake_query_compiler.py      | 115 ++++
 .../test_df_series_creation_with_index.py     | 525 ++++++++++++++++++
 4 files changed, 729 insertions(+), 22 deletions(-)
 create mode 100644 tests/integ/modin/index/test_df_series_creation_with_index.py

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index a6850941fa..b2787aa6f5 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -155,19 +155,30 @@ def __init__(
         # Siblings are other dataframes that share the same query compiler. We
         # use this list to update inplace when there is a shallow copy.
         from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
+        from snowflake.snowpark.modin.plugin.extensions.index import Index
 
         self._siblings = []
 
+        if isinstance(index, DataFrame):
+            raise ValueError("Index data must be 1-dimensional")
+
         # Engine.subscribe(_update_engine)
+        if isinstance(data, Index):
+            # If the data is an Index object, we need to convert it to a DataFrame to make sure
+            # that the values are in the correct format -- as a data column, not an index column.
+            # Additionally, if an index is provided, converting it to an Index object ensures that
+            # its values are an index column.
+            query_compiler = data.to_frame(index=False, name=data.name)._query_compiler
+            if index is not None:
+                index = index if isinstance(index, Index) else Index(index)
+                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
+                    index._query_compiler
+                )
+
         if isinstance(data, (DataFrame, Series)):
             self._query_compiler = data._query_compiler.copy()
-            if index is not None and any(i not in data.index for i in index):
-                ErrorMessage.not_implemented(
-                    "Passing non-existant columns or index values to constructor not"
-                    + " yet implemented."
-                )  # pragma: no cover
             if isinstance(data, Series):
-                # We set the column name if it is not in the provided Series
+                # We set the column name if it is not in the provided Series `data`.
                 if data.name is None:
                     self.columns = [0] if columns is None else columns
                 # If the columns provided are not in the named Series, pandas clears
@@ -177,22 +188,61 @@ def __init__(
                         self.__constructor__(columns=columns)
                     )._query_compiler
                 if index is not None:
+                    # The `index` parameter is used to select the rows from `data` that will be in the resultant
+                    # DataFrame. If a value in `index` is not present in `data`'s index, it will be filled with a
+                    # NaN value.
+                    # 1. The `index` is converted to an Index object so that the index values are in an index column.
+                    index = index if isinstance(index, Index) else Index(index)
+                    # 2. A right outer join is performed between `data` and `index` to create a Series object where
+                    #    any index values in `data`'s index that are not in `index` are filled with NaN.
+                    data = Series(
+                        query_compiler=data._query_compiler.create_qc_with_data_and_index_joined_on_index(
+                            index._query_compiler
+                        ),
+                        name=0 if data.name is None else data.name,
+                    )
+                    # 3. Perform .loc[] on `data` to select the rows that are in the `index`.
                     self._query_compiler = data.loc[index]._query_compiler
+
             elif columns is None and index is None:
                 data._add_sibling(self)
+
             else:
-                if columns is not None and any(i not in data.columns for i in columns):
-                    ErrorMessage.not_implemented(
-                        "Passing non-existant columns or index values to constructor not"
-                        + " yet implemented."
-                    )  # pragma: no cover
-                if index is None:
-                    index = slice(None)
+                # The `columns` parameter is used to select the columns from `data` that will be in the resultant
+                # DataFrame. If a value in `columns` is not present in `data`'s columns, it will be added as a
+                # new column filled with NaN values. These columns are tracked by the `extra_columns` variable.
+                extra_columns = None
                 if columns is None:
+                    # In case `columns` is not provided, `columns` is set to slice(None) to select all columns.
                     columns = slice(None)
+                else:
+                    extra_columns = [col for col in columns if col not in data.columns]
+
+                # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
+                # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
+                if index is None:
+                    # In case `index` is not provided, `index` is set to slice(None) to select all rows.
+                    index = slice(None)
+                    data = DataFrame(
+                        query_compiler=data._query_compiler.create_qc_with_data_and_index_joined_on_index(
+                            extra_columns=extra_columns
+                        )
+                    )
+                else:
+                    # The `index` is converted to an Index object so that the index values are in an index column.
+                    index = index if isinstance(index, Index) else Index(index)
+                    # A right outer join is performed between `data` and `index` to create a DataFrame object where any
+                    # index values in `data`'s index that are not in `index` are filled with NaN.
+                    data = DataFrame(
+                        query_compiler=data._query_compiler.create_qc_with_data_and_index_joined_on_index(
+                            index._query_compiler,
+                            extra_columns=extra_columns,
+                        )
+                    )
+                # 3. Perform .loc[] on `data` to select the rows and columns that are in `index` and `columns`.
                 self._query_compiler = data.loc[index, columns]._query_compiler
 
-        # Check type of data and use appropriate constructor
+        # Check the type of data and use the appropriate constructor
         elif query_compiler is None:
             distributed_frame = from_non_pandas(data, index, columns, dtype)
             if distributed_frame is not None:
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index e99e9cc89f..4f5d7a8a23 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -129,19 +129,36 @@ def __init__(
 
         # modified:
         # Engine.subscribe(_update_engine)
+        from snowflake.snowpark.modin.plugin.extensions.index import Index
 
         # Convert lazy index to Series without pulling the data to client.
-        if isinstance(data, pd.Index):
-            query_compiler = data.to_series(index=index, name=name)._query_compiler
-            query_compiler = query_compiler.reset_index(drop=True)
+        if isinstance(data, Index):
+            # If the data is an Index object, we need to convert it to a DataFrame to make sure
+            # that the values are in the correct format -- as a data column, not an index column.
+            # Additionally, if an index is provided, converting it to an Index object ensures that
+            # its values are an index column.
+            query_compiler = data.to_frame(index=False, name=data.name)._query_compiler
+            if index is not None:
+                index = index if isinstance(index, Index) else Index(index)
+                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
+                    index._query_compiler
+                )
         elif isinstance(data, type(self)):
             query_compiler = data._query_compiler.copy()
             if index is not None:
-                if any(i not in data.index for i in index):
-                    ErrorMessage.not_implemented(
-                        "Passing non-existent columns or index values to constructor "
-                        + "not yet implemented."
-                    )  # pragma: no cover
+                # The `index` parameter is used to select the rows from `data` that will be in the resultant Series.
+                # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
+                # 1. The `index` is converted to an Index object so that the index values are in an index column.
+                index = index if isinstance(index, Index) else Index(index)
+                # 2. A right outer join is performed between `data` and `index` to create a Series object where any
+                #    index values in `data`'s index that are not in `index` are filled with NaN.
+                data = Series(
+                    query_compiler=data._query_compiler.create_qc_with_data_and_index_joined_on_index(
+                        index._query_compiler
+                    ),
+                    name=data.name,
+                )
+                # 3. Perform .loc[] on `data` to select the rows that are in `index`.
                 query_compiler = data.loc[index]._query_compiler
         if query_compiler is None:
             # Defaulting to pandas
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index 108b594faf..577efe500e 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -17342,3 +17342,118 @@ def compare(
         """
 
         return result
+
+    def create_qc_with_index_data_and_qc_index(
+        self, index_qc: "SnowflakeQueryCompiler"
+    ) -> "SnowflakeQueryCompiler":
+        """
+        This is a helper function for creating a DataFrame/Series where the data is an Index
+        and an index is provided.
+        Before this method is called, the provided index is converted to an Index object;
+        the query compilers of the data and index are then joined.
+
+        Parameters
+        ----------
+        index_qc : SnowflakeQueryCompiler
+            The query compiler of the index to be joined with the data.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            A new query compiler with the data and index joined.
+        """
+        self_frame = self._modin_frame.ensure_row_position_column()
+        other_frame = index_qc._modin_frame.ensure_row_position_column()
+
+        new_internal_frame, _ = join_utils.join(
+            self_frame,
+            other_frame,
+            how="left",
+            left_on=[self_frame.row_position_snowflake_quoted_identifier],
+            right_on=[other_frame.row_position_snowflake_quoted_identifier],
+            inherit_join_index=InheritJoinIndex.FROM_RIGHT,
+        )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def create_qc_with_data_and_index_joined_on_index(
+        self,
+        index_qc: Optional["SnowflakeQueryCompiler"] = None,
+        extra_columns: Optional[List[Hashable]] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        This is a helper function for creating a DataFrame/Series where the data is a DataFrame/Series object.
+        This is a special case since only the values where the index value matches in the `data` and `index` provided
+        take on an actual value from the given `data`. Otherwise, they take on a NaN value.
+
+        For instance,
+
+        >>> data = pd.Series(["A", "B", "C", "D"], index=[1.1, 2.2, 3, 4], name="index series name")
+        >>> index = pd.Index([1, 2, 3, 4], name="some name")
+        >>> df = pd.DataFrame(data=data, index=index)
+        >>> df  # doctest: +SKIP
+                  index series name
+        some name
+        1                       NaN
+        2                       NaN
+        3                         C
+        4                         D
+
+        Notice how only the data for index values 3 and 4 have an actual value while 1 and 2 have a NaN value.
+        3 and 4 are values present in the index of the `data` and `index` provided. 1 and 2 are not present.
+
+        Parameters
+        ----------
+        index_qc : SnowflakeQueryCompiler, default None
+            The query compiler of the index to be joined with the data. If no query compiler is provided,
+            skip this join operation.
+        extra_columns : list of hashable, default None
+            If the DataFrame being created has new columns that are not a part of the data, they can be passed here
+            and appended as NaN columns.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            A new query compiler with the data and index joined.
+        """
+        self_frame = self._modin_frame
+
+        if extra_columns:
+            # Append the new columns to the data's internal frame.
+            new_snowflake_quoted_identifiers = self._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=extra_columns,
+                excluded=self_frame.data_column_snowflake_quoted_identifiers,
+            )
+            new_ordered_frame = append_columns(
+                self_frame.ordered_dataframe,
+                new_snowflake_quoted_identifiers,
+                [pandas_lit(np.nan)] * len(extra_columns),
+            )
+            self_frame = InternalFrame.create(
+                ordered_dataframe=new_ordered_frame,
+                data_column_pandas_labels=self_frame.data_column_pandas_labels
+                + extra_columns,
+                data_column_snowflake_quoted_identifiers=self_frame.data_column_snowflake_quoted_identifiers
+                + new_snowflake_quoted_identifiers,
+                data_column_pandas_index_names=self_frame.data_column_pandas_index_names,
+                index_column_pandas_labels=self_frame.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=self_frame.index_column_snowflake_quoted_identifiers,
+                data_column_types=None,
+                index_column_types=None,
+            )
+
+        if index_qc is None:
+            new_internal_frame = self._modin_frame
+        else:
+            # Join the index and data internal frames.
+            other_frame = index_qc._modin_frame
+            new_internal_frame, _ = join_utils.join(
+                other_frame,
+                self_frame,
+                how="outer",
+                left_on=other_frame.index_column_snowflake_quoted_identifiers,
+                right_on=self_frame.index_column_snowflake_quoted_identifiers,
+                inherit_join_index=InheritJoinIndex.FROM_LEFT,
+            )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
new file mode 100644
index 0000000000..9c3bae1b22
--- /dev/null
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -0,0 +1,525 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal, assert_series_equal
+
+
+@pytest.mark.parametrize(
+    "data", [[1, 2, 3, 4], list(range(250)), ["A", None, 2.3, 1], []]
+)
+@sql_count_checker(query_count=1)
+def test_create_df_with_index_as_data(data):
+    """
+    Creating a DataFrame where the data is an Index.
+    """
+    # Create Snowpark pandas DataFrame and native pandas DataFrame from an Index object.
+    native_idx = native_pd.Index(data, name="some name")
+    snow_idx = pd.Index(native_idx)
+    assert_frame_equal(pd.DataFrame(snow_idx), native_pd.DataFrame(native_idx))
+
+
+@pytest.mark.parametrize(
+    "data", [[1, 2, 3, 4], list(range(250)), ["A", None, 2.3, 1], []]
+)
+@sql_count_checker(query_count=1)
+def test_create_series_with_index_as_data(data):
+    """
+    Creating a Series where the data is an Index.
+    """
+    # Create Snowpark pandas Series and native pandas Series from an Index object.
+    native_idx = native_pd.Index(data, name="some name")
+    snow_idx = pd.Index(native_idx)
+    assert_series_equal(pd.Series(snow_idx), native_pd.Series(native_idx))
+
+
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        ([1, 2, 3, 4], ["A", "B", "C", "D"]),
+        (list(range(100)), list(range(200, 300))),
+        (["A", None, 2.3, 1], [None, "B", 0, 3.14]),
+        ([], []),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_create_df_with_index_as_index(data, index):
+    """
+    Creating a DataFrame where the index is an Index.
+    """
+    # Two queries are issued: one when creating the DataFrame (the index is converted
+    # to a native pandas object), one when materializing the DataFrame for comparison.
+    # Create Snowpark pandas DataFrame and native pandas DataFrame with an Index object as the index.
+    native_idx = native_pd.Index(index, name="some name")
+    snow_idx = pd.Index(native_idx)
+    assert_frame_equal(
+        pd.DataFrame(data, index=snow_idx),
+        native_pd.DataFrame(data, index=native_idx),
+        check_dtype=False,
+        check_index_type=False,
+        check_column_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        ([1, 2, 3, 4], ["A", "B", "C", "D"]),
+        (list(range(100)), list(range(100, 200))),
+        (["A", None, 2.3, 1], [None, "B", 0, 3.14]),
+        ([], []),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_create_series_with_index_as_index(data, index):
+    """
+    Creating a Series where the index is an Index.
+    """
+    # Two queries are issued: one when creating the Series (the index is converted
+    # to a native pandas object), one when materializing the Series for comparison.
+    # Create Snowpark pandas Series and native pandas Series with an Index object as the index.
+    native_idx = native_pd.Index(index, name="some name")
+    snow_idx = pd.Index(native_idx)
+    assert_series_equal(
+        pd.Series(data, index=snow_idx),
+        native_pd.Series(data, index=native_idx),
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        ([1, 2, 3, 4], ["A", "B", "C", "D"]),
+        (list(range(250)), list(range(250, 500))),
+        (["A", None, 2.3, 1], [None, "B", 0, 3.14]),
+        ([], []),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_df_with_index_as_data_and_index(data, index):
+    """
+    Creating a DataFrame where the data is an Index and the index is also an Index.
+    """
+    # Create Snowpark pandas DataFrame and native pandas DataFrame from Index objects.
+    native_idx_data = native_pd.Index(data, name="data name")
+    snow_idx_data = pd.Index(native_idx_data)
+    native_idx_index = native_pd.Index(index, name="index name")
+    snow_idx_index = pd.Index(native_idx_index)
+    assert_frame_equal(
+        pd.DataFrame(snow_idx_data, index=snow_idx_index),
+        native_pd.DataFrame(native_idx_data, index=native_idx_index),
+    )
+
+
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        ([1, 2, 3, 4], ["A", "B", "C", "D"]),
+        (list(range(100)), list(range(100, 200))),
+        (["A", None, 2.3, 1], [None, "B", 0, 3.14]),
+        ([], []),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_series_with_index_as_data_and_index(data, index):
+    """
+    Creating a Series where the data is an Index and the index is also an Index.
+    """
+    # Create Snowpark pandas Series and native pandas Series from Index objects.
+    # TODO: Index is not being set at all.
+    native_idx_data = native_pd.Index(data, name="data name")
+    snow_idx_data = pd.Index(native_idx_data)
+    native_idx_index = native_pd.Index(index, name="index name")
+    snow_idx_index = pd.Index(native_idx_index)
+    assert_series_equal(
+        pd.Series(snow_idx_data, index=snow_idx_index),
+        native_pd.Series(native_idx_data, index=native_idx_index),
+    )
+
+
+@pytest.mark.parametrize(
+    "data, native_series",
+    [
+        (
+            [1, 2, 3, 4],
+            native_pd.Series(
+                ["A", "B", "C", "D"],
+                index=[1.1, 2.2, 3.3, 4.4],
+                name="index series name",
+            ),
+        ),
+        (list(range(100)), native_pd.Series(list(range(100, 200)))),
+        (
+            ["A", None, 2.3, 1],
+            native_pd.Series([None, "B", 0, 3.14], name="mixed series as index"),
+        ),
+        ([], native_pd.Series([], name="empty series")),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_df_with_index_as_data_and_series_as_index(data, native_series):
+    """
+    Creating a DataFrame where the data is an Index and the index is a Series.
+    """
+    snow_series = pd.Series(native_series)
+    native_index = native_pd.Index(data, name="index data name")
+    snow_index = pd.Index(native_index)
+    assert_frame_equal(
+        pd.DataFrame(snow_index, index=snow_series),
+        native_pd.DataFrame(native_index, index=native_series),
+    )
+
+
+@pytest.mark.parametrize(
+    "data, native_series",
+    [
+        (
+            [1, 2, 3, 4],
+            native_pd.Series(
+                ["A", "B", "C", "D"],
+                index=[1.1, 2.2, 3.3, 4.4],
+                name="index series name",
+            ),
+        ),
+        (list(range(100)), native_pd.Series(list(range(100, 200)))),
+        (
+            ["A", None, 2.3, 1],
+            native_pd.Series([None, "B", 0, 3.14], name="mixed series as index"),
+        ),
+        ([], native_pd.Series([], name="empty series")),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_series_with_index_as_data_and_series_as_index(data, native_series):
+    """
+    Creating a Series where the data is an Index and the index is a Series.
+    """
+    snow_series = pd.Series(native_series)
+    native_index = native_pd.Index(data, name="index data name")
+    snow_index = pd.Index(native_index)
+    assert_series_equal(
+        pd.Series(snow_index, index=snow_series),
+        native_pd.Series(native_index, index=native_series),
+    )
+
+
+@pytest.mark.parametrize(
+    "native_series, native_index",
+    [
+        (
+            native_pd.Series(
+                ["A", "B", "C", "D"], index=[1.1, 2.2, 3, 4], name="index series name"
+            ),
+            native_pd.Index([1, 2, 3, 4], name="some name"),
+        ),  # some index values are missing
+        (
+            native_pd.Series(list(range(100))),
+            native_pd.Index(list(range(-50, 100, 4)), name="skip numbers"),
+        ),  # some index values are missing
+        (
+            native_pd.Series(
+                [10, 20, 30, 40],
+                index=native_pd.Index([None, "B", 0, 3.14], name="mixed"),
+                name="mixed series as index",
+            ),
+            native_pd.Index(["B", 0, None, 3.14]),
+        ),  # rearranged index values
+        (
+            native_pd.Series(["A", "B", "C", "D", "E"], name="series"),
+            native_pd.Index([3, 4], name="index"),
+        ),  # subset of index values
+        (
+            native_pd.Series(
+                list(range(20)), index=native_pd.Index(list(range(20)), name=20)
+            ),
+            native_pd.Index(list(range(20))),
+        ),  # all index values match
+        (
+            native_pd.Series(["A", "V", "D", "R"]),
+            native_pd.Index([10, 20, 30, 40], name="none"),
+        ),  # no index values match
+        (
+            native_pd.Series([], name="empty series", dtype="int64"),
+            native_pd.Index([], name="empty index", dtype="int64"),
+        ),  # empty series and index
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_df_with_series_as_data_and_index_as_index(native_series, native_index):
+    """
+    Creating a DataFrame where the data is a Series and the index is an Index.
+    """
+    # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
+    # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
+    snow_series = pd.Series(native_series)
+    snow_index = pd.Index(native_index)
+    assert_frame_equal(
+        pd.DataFrame(snow_series, index=snow_index),
+        native_pd.DataFrame(native_series, index=native_index),
+    )
+
+
+@pytest.mark.parametrize(
+    "native_series, native_index",
+    [
+        (
+            native_pd.Series(
+                ["A", "B", "C", "D"], index=[1.1, 2.2, 3, 4], name="index series name"
+            ),
+            native_pd.Index([1, 2, 3, 4], name="some name"),
+        ),  # some index values are missing
+        (
+            native_pd.Series(list(range(100))),
+            native_pd.Index(list(range(-50, 100, 4)), name="skip numbers"),
+        ),  # some index values are missing
+        (
+            native_pd.Series(
+                [10, 20, 30, 40],
+                index=native_pd.Index([None, "B", 0, 3.14], name="mixed"),
+                name="mixed series as index",
+            ),
+            native_pd.Index(["B", 0, None, 3.14]),
+        ),  # rearranged index values
+        (
+            native_pd.Series(["A", "B", "C", "D", "E"], name="series"),
+            native_pd.Index([3, 4], name="index"),
+        ),  # subset of index values
+        (
+            native_pd.Series(
+                list(range(20)), index=native_pd.Index(list(range(20)), name=20)
+            ),
+            native_pd.Index(list(range(20))),
+        ),  # all index values match
+        (
+            native_pd.Series(["A", "V", "D", "R"]),
+            native_pd.Index([10, 20, 30, 40], name="none"),
+        ),  # no index values match
+        (
+            native_pd.Series([], name="empty series", dtype="int64"),
+            native_pd.Index([], name="empty index", dtype="int64"),
+        ),  # empty series and index
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_series_with_series_as_data_and_index_as_index(
+    native_series, native_index
+):
+    """
+    Creating a Series where the data is a Series and the index is an Index.
+    """
+    # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
+    # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
+    snow_series = pd.Series(native_series)
+    snow_index = pd.Index(native_index)
+    assert_series_equal(
+        pd.Series(snow_series, index=snow_index),
+        native_pd.Series(native_series, index=native_index),
+    )
+
+
+@pytest.mark.parametrize(
+    "native_df, native_index",
+    [
+        # Single column DataFrames.
+        (
+            native_pd.DataFrame(
+                ["A", "B", "C", "D"], index=[1.1, 2.2, 3, 4], columns=["df column!"]
+            ),
+            native_pd.Index([1, 2, 3, 4], name="some name"),
+        ),  # some index values are missing
+        (
+            native_pd.DataFrame(list(range(100))),
+            native_pd.Index(list(range(-50, 100, 4)), name="skip numbers"),
+        ),  # some index values are missing
+        (
+            native_pd.DataFrame(
+                [10, 20, 30, 40],
+                index=native_pd.Index([None, "B", 0, 3.14], name="mixed"),
+                columns=["C"],
+            ),
+            native_pd.Index(["B", 0, None, 3.14]),
+        ),  # rearranged index values
+        (
+            native_pd.DataFrame(["A", "B", "C", "D", "E"], columns=["B"]),
+            native_pd.Index([3, 4], name="index"),
+        ),  # subset of index values
+        (
+            native_pd.DataFrame(list(range(20))),
+            native_pd.Index(list(range(20))),
+        ),  # all index values match
+        (
+            native_pd.DataFrame(["A", "V", "D", "R"]),
+            native_pd.Index([10, 20, 30, 40], name="none"),
+        ),  # no index values match
+        # Multi-column DataFrames.
+        (
+            native_pd.DataFrame(
+                {"col1": ["A", "B", "C", "D"], "col2": ["B", "H", "T", "W"]},
+                index=[1.1, 2.2, 3, 4],
+            ),
+            native_pd.Index([1, 2, 3, 4], name="some name"),
+        ),  # some index values are missing
+        (
+            native_pd.DataFrame(
+                [[10, 20, 30, 40], [2, 4, 6, 7], [-1, -2, -3, -4], [90, 50, 30, 10]],
+                index=native_pd.Index([None, "B", 0, 3.14], name="mixed"),
+                columns=["C", "L", "M", "W"],
+            ),
+            native_pd.Index(["B", 0, None, 3.14]),
+        ),  # rearranged index values
+        (
+            native_pd.DataFrame(
+                [["A", "B", "C", "D", "E"], ["R", "S", "T", "U", "V"]],
+                columns=[1, 2, 3, 4, 5],
+            ),
+            native_pd.Index([3, 4], name="index"),
+        ),  # subset of index values
+        (
+            native_pd.DataFrame([list(range(20)), list(range(20))]),
+            native_pd.Index(list(range(20))),
+        ),  # all index values match
+        (
+            native_pd.DataFrame(
+                {
+                    "A": ["A", "V", "D", "R"],
+                    "V": ["V", "D", "R", "A"],
+                    "D": ["D", "R", "A", "V"],
+                    "R": ["R", "A", "V", "D"],
+                }
+            ),
+            native_pd.Index([10, 20, 30, 40], name="none"),
+        ),  # no index values match
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_df_with_df_as_data_and_index_as_index(native_df, native_index):
+    """
+    Creating a DataFrame where the data is a DataFrame and the index is an Index.
+    """
+    # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
+    # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
+    snow_df = pd.DataFrame(native_df)
+    snow_index = pd.Index(native_index)
+    assert_frame_equal(
+        pd.DataFrame(snow_df, index=snow_index),
+        native_pd.DataFrame(native_df, index=native_index),
+    )
+
+
+@pytest.mark.parametrize(
+    "native_df, native_index",
+    [
+        # Single column DataFrames.
+        (
+            native_pd.DataFrame([]),
+            native_pd.Index([], name="empty index", dtype="int64"),
+        ),  # empty series and index
+        # Multi-column DataFrames.
+        (
+            native_pd.DataFrame([]),
+            native_pd.Index(["A", "V"], name="non-empty index"),
+        ),  # empty df and index
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_df_with_empty_df_as_data_and_index_as_index(native_df, native_index):
+    """
+    Creating a DataFrame where the data is an empty DataFrame and the index is an Index.
+    """
+    # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
+    # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
+    snow_df = pd.DataFrame(native_df)
+    snow_index = pd.Index(native_index)
+    assert_frame_equal(
+        pd.DataFrame(snow_df, index=snow_index),
+        native_pd.DataFrame(native_df, index=native_index),
+        check_column_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "native_df, native_index, columns",
+    [
+        # Single column DataFrames.
+        (
+            native_pd.DataFrame(list(range(20))),
+            native_pd.Index(list(range(20))),
+            [1],
+        ),  # all index values match
+        (
+            native_pd.DataFrame(["A", "V", "D", "R"]),
+            native_pd.Index([10, 20, 30, 40], name="none"),
+            ["A"],
+        ),  # no index values match, column missing
+        # Multi-column DataFrames.
+        (
+            native_pd.DataFrame(
+                {"col1": ["A", "B", "C", "D"], "col2": ["B", "H", "T", "W"]},
+                index=[1.1, 2.2, 3, 4],
+            ),
+            native_pd.Index([1, 2, 3, 4], name="some name"),
+            ["col1"],
+        ),  # some index values are missing, subset of columns
+        (
+            native_pd.DataFrame(
+                [[10, 20, 30, 40], [2, 4, 6, 7], [-1, -2, -3, -4], [90, 50, 30, 10]],
+                index=native_pd.Index([None, "B", 0, 3.14], name="mixed"),
+                columns=["C", "L", "M", "W"],
+            ),
+            native_pd.Index(["B", 0, None, 3.14]),
+            [3, 1],
+        ),  # rearranged index and column values
+        (
+            native_pd.DataFrame(
+                [["A", "B", "C", "D", "E"], ["R", "S", "T", "U", "V"]],
+                columns=[1, 2, 3, 4, 5],
+            ),
+            native_pd.Index([3, 4], name="index"),
+            ["A", "V", "C"],
+        ),  # subset of index values
+        (
+            native_pd.DataFrame([list(range(20)), list(range(20))]),
+            native_pd.Index(list(range(20))),
+            [1],
+        ),  # all index values match
+        (
+            native_pd.DataFrame(
+                {
+                    "A": ["A", "V", "D", "R"],
+                    "V": ["V", "D", "R", "A"],
+                    "D": ["D", "R", "A", "V"],
+                    "R": ["R", "A", "V", "D"],
+                }
+            ),
+            native_pd.Index([10, 20, 30, 40], name="none"),
+            ["A", "X", "D", "R"],
+        ),  # no index values match
+    ],
+)
+@pytest.mark.parametrize("column_type", ["list", "index"])
+def test_create_df_with_df_as_data_and_index_as_index_and_different_columns(
+    native_df, native_index, columns, column_type
+):
+    """
+    Creating a DataFrame where the data is a DataFrame, the index is an Index, and non-existent columns.
+    """
+    # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
+    # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
+    # One extra query is required to create the columns if it is an Index (column_type is "index").
+    native_columns = columns if column_type == "list" else native_pd.Index(columns)
+    snow_columns = columns if column_type == "list" else pd.Index(columns)
+    snow_df = pd.DataFrame(native_df)
+    snow_index = pd.Index(native_index)
+    with SqlCounter(query_count=1 if column_type == "list" else 2, join_count=2):
+        assert_frame_equal(
+            pd.DataFrame(snow_df, index=snow_index, columns=native_columns),
+            native_pd.DataFrame(native_df, index=native_index, columns=snow_columns),
+            check_dtype=False,
+        )

From 19792570c54be9224d1d03d82a908691f8ebe370 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 21 Aug 2024 11:24:12 -0700
Subject: [PATCH 02/42] update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab546703fd..bf2fc4c32d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,7 @@
 - Added support for `DatetimeIndex.month_name` and `DatetimeIndex.day_name`.
 - Added support for `Series.dt.weekday`, `Series.dt.time`, and `DatetimeIndex.time`.
 - Added support for subtracting two timestamps to get a Timedelta.
+- Added support for creating `Series` and `DataFrame` objects with the lazy `Index` object as `data`, `index`, and `columns` parameters.
 
 #### Bug Fixes
 

From 5dbb76dacf8afa5a92afbe66908d8ccdf80d2441 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 21 Aug 2024 11:49:06 -0700
Subject: [PATCH 03/42] add more tests

---
 .../snowpark/modin/pandas/dataframe.py        |  2 +-
 .../test_df_series_creation_with_index.py     | 23 +++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 9e34db4b04..5dd6210685 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -156,7 +156,7 @@ def __init__(
 
         self._siblings = []
 
-        if isinstance(index, DataFrame):
+        if isinstance(index, DataFrame):  # pandas raises the same error
             raise ValueError("Index data must be 1-dimensional")
 
         # Engine.subscribe(_update_engine)
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 9c3bae1b22..f3aaa15965 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -1,6 +1,8 @@
 #
 # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
 #
+import re
+
 import modin.pandas as pd
 import pandas as native_pd
 import pytest
@@ -501,6 +503,16 @@ def test_create_df_with_empty_df_as_data_and_index_as_index(native_df, native_in
             native_pd.Index([10, 20, 30, 40], name="none"),
             ["A", "X", "D", "R"],
         ),  # no index values match
+        (
+            native_pd.DataFrame([]),
+            native_pd.Index([], name="empty index", dtype="int64"),
+            [],
+        ),  # empty data, index, and columns
+        (
+            native_pd.DataFrame([]),
+            native_pd.Index(["A", "V"], name="non-empty index"),
+            ["A", "V"],
+        ),  # empty data, non-empty index and columns
     ],
 )
 @pytest.mark.parametrize("column_type", ["list", "index"])
@@ -523,3 +535,14 @@ def test_create_df_with_df_as_data_and_index_as_index_and_different_columns(
             native_pd.DataFrame(native_df, index=native_index, columns=snow_columns),
             check_dtype=False,
         )
+
+
+@sql_count_checker(query_count=0)
+def test_create_df_with_df_index_negative():
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        pd.DataFrame([1, 2, 3], index=pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+    with pytest.raises(
+        ValueError,
+        match=re.escape("Shape of passed values is (3, 1), indices imply (2, 1)"),
+    ):
+        pd.DataFrame([1, 2, 3], index=[[1, 2], [3, 4], [5, 6]])

From 7de467f704be57b727f63a0128c7f56fc2d1dc03 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 21 Aug 2024 13:43:40 -0700
Subject: [PATCH 04/42] fix minor bug

---
 .../plugin/compiler/snowflake_query_compiler.py    |  2 +-
 .../index/test_df_series_creation_with_index.py    | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index 454fe4eec9..fefe867bf9 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -17491,7 +17491,7 @@ def create_qc_with_data_and_index_joined_on_index(
             )
 
         if index_qc is None:
-            new_internal_frame = self._modin_frame
+            new_internal_frame = self_frame
         else:
             # Join the index and data internal frames.
             other_frame = index_qc._modin_frame
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index f3aaa15965..2615be8dca 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -537,6 +537,20 @@ def test_create_df_with_df_as_data_and_index_as_index_and_different_columns(
         )
 
 
+@sql_count_checker(query_count=1)
+def test_create_df_with_new_columns():
+    """
+    Creating a DataFrame with columns that don't exist in `data`.
+    """
+    native_df = native_pd.DataFrame(list(range(100)))
+    snow_df = pd.DataFrame(native_df)
+    assert_frame_equal(
+        pd.DataFrame(snow_df, columns=["new column"]),
+        native_pd.DataFrame(native_df, columns=["new column"]),
+        check_dtype=False,
+    )
+
+
 @sql_count_checker(query_count=0)
 def test_create_df_with_df_index_negative():
     with pytest.raises(ValueError, match="Index data must be 1-dimensional"):

From 5dd06fddc14807fd3f38bfbb9cf42834c713c983 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 21 Aug 2024 14:21:53 -0700
Subject: [PATCH 05/42] fix isocalendar docstring error

---
 src/snowflake/snowpark/modin/pandas/series.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 8cc2665ddc..213711a1ac 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -133,11 +133,16 @@ def __init__(
 
         # Convert lazy index to Series without pulling the data to client.
         if isinstance(data, Index):
-            # If the data is an Index object, we need to convert it to a DataFrame to make sure
+            # If the data is an Index object, we need to convert it to a Series to make sure
             # that the values are in the correct format -- as a data column, not an index column.
             # Additionally, if an index is provided, converting it to an Index object ensures that
             # its values are an index column.
-            query_compiler = data.to_frame(index=False, name=data.name)._query_compiler
+            query_compiler = (
+                data.to_series(index=None, name=name)
+                .reset_index(drop=True)
+                ._query_compiler
+            )
+
             if index is not None:
                 index = index if isinstance(index, Index) else Index(index)
                 query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(

From 8b944623a1195f7c65f63558bea242624ccdc4db Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 21 Aug 2024 17:32:29 -0700
Subject: [PATCH 06/42] truncate tests, update changelog wording, reduce 2
 queries to one query one join

---
 CHANGELOG.md                                  |   2 +-
 .../snowpark/modin/pandas/dataframe.py        |  22 +-
 src/snowflake/snowpark/modin/pandas/series.py |  11 +-
 .../test_df_series_creation_with_index.py     | 294 ++++++------------
 4 files changed, 124 insertions(+), 205 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf2fc4c32d..a7cc298a72 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,7 +35,7 @@
 - Added support for `DatetimeIndex.month_name` and `DatetimeIndex.day_name`.
 - Added support for `Series.dt.weekday`, `Series.dt.time`, and `DatetimeIndex.time`.
 - Added support for subtracting two timestamps to get a Timedelta.
-- Added support for creating `Series` and `DataFrame` objects with the lazy `Index` object as `data`, `index`, and `columns` parameters.
+- Added support for constructing `Series` and `DataFrame` objects with the lazy `Index` object as `data`, `index`, and `columns` arguments.
 
 #### Bug Fixes
 
diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 5dd6210685..609f5bf55e 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -165,7 +165,12 @@ def __init__(
             # that the values are in the correct format -- as a data column, not an index column.
             # Additionally, if an index is provided, converting it to an Index object ensures that
             # its values are an index column.
-            query_compiler = data.to_frame(index=False, name=data.name)._query_compiler
+            # We set the column name if it is not in the provided Index `data`.
+            if data.name is None:
+                new_name = 0 if columns is None else columns[0]
+            else:
+                new_name = data.name
+            query_compiler = data.to_frame(index=False, name=new_name)._query_compiler
             if index is not None:
                 index = index if isinstance(index, Index) else Index(index)
                 query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
@@ -291,14 +296,25 @@ def __init__(
                     k: v._to_pandas() if isinstance(v, Series) else v
                     for k, v in data.items()
                 }
+
+            new_index = index
+            if isinstance(index, Index):
+                # Skip turning this into a native pandas object here since this issues an extra query.
+                # Instead, first get the query compiler from native pandas and then add the index column.
+                new_index = None
             pandas_df = pandas.DataFrame(
                 data=try_convert_index_to_native(data),
-                index=try_convert_index_to_native(index),
+                index=try_convert_index_to_native(new_index),
                 columns=try_convert_index_to_native(columns),
                 dtype=dtype,
                 copy=copy,
             )
-            self._query_compiler = from_pandas(pandas_df)._query_compiler
+            query_compiler = from_pandas(pandas_df)._query_compiler
+            if isinstance(index, Index):
+                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
+                    index._query_compiler
+                )
+            self._query_compiler = query_compiler
         else:
             self._query_compiler = query_compiler
 
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 213711a1ac..f59c1a7939 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -175,11 +175,16 @@ def __init__(
                 ):
                     name = data.name
 
+            new_index = index
+            if isinstance(index, Index):
+                # Skip turning this into a native pandas object here since this issues an extra query.
+                # Instead, first get the query compiler from native pandas and then add the index column.
+                new_index = None
             query_compiler = from_pandas(
                 pandas.DataFrame(
                     pandas.Series(
                         data=try_convert_index_to_native(data),
-                        index=try_convert_index_to_native(index),
+                        index=try_convert_index_to_native(new_index),
                         dtype=dtype,
                         name=name,
                         copy=copy,
@@ -187,6 +192,10 @@ def __init__(
                     )
                 )
             )._query_compiler
+            if isinstance(index, Index):
+                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
+                    index._query_compiler
+                )
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
             self.name = name
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 2615be8dca..5b2571ccca 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -12,259 +12,151 @@
 from tests.integ.modin.utils import assert_frame_equal, assert_series_equal
 
 
-@pytest.mark.parametrize(
-    "data", [[1, 2, 3, 4], list(range(250)), ["A", None, 2.3, 1], []]
-)
-@sql_count_checker(query_count=1)
-def test_create_df_with_index_as_data(data):
-    """
-    Creating a DataFrame where the data is an Index.
+def obj_type_helper(obj_type: str) -> tuple:
     """
-    # Create Snowpark pandas DataFrame and native pandas DataFrame from an Index object.
-    native_idx = native_pd.Index(data, name="some name")
-    snow_idx = pd.Index(native_idx)
-    assert_frame_equal(pd.DataFrame(snow_idx), native_pd.DataFrame(native_idx))
+    Helper function to return the appropriate objects and kwargs based on the object type.
 
+    Parameters
+    ----------
+    obj_type : str
+        The type of object to be created. Can be either "df" or "series".
 
-@pytest.mark.parametrize(
-    "data", [[1, 2, 3, 4], list(range(250)), ["A", None, 2.3, 1], []]
-)
-@sql_count_checker(query_count=1)
-def test_create_series_with_index_as_data(data):
-    """
-    Creating a Series where the data is an Index.
+    Returns
+    -------
+    tuple
+        A tuple containing the assert_equal_func, Snowpark pandas object dtype, native pandas object dtype, and kwargs.
     """
-    # Create Snowpark pandas Series and native pandas Series from an Index object.
-    native_idx = native_pd.Index(data, name="some name")
-    snow_idx = pd.Index(native_idx)
-    assert_series_equal(pd.Series(snow_idx), native_pd.Series(native_idx))
+    if obj_type == "df":
+        assert_equal_func = assert_frame_equal
+        snow_obj, native_obj = pd.DataFrame, native_pd.DataFrame
+        kwargs = {"check_column_type": False}
+    else:
+        assert_equal_func = assert_series_equal
+        snow_obj, native_obj = pd.Series, native_pd.Series
+        kwargs = {}
+    return assert_equal_func, snow_obj, native_obj, kwargs
 
 
 @pytest.mark.parametrize(
-    "data, index",
+    "native_idx",
     [
-        ([1, 2, 3, 4], ["A", "B", "C", "D"]),
-        (list(range(100)), list(range(200, 300))),
-        (["A", None, 2.3, 1], [None, "B", 0, 3.14]),
-        ([], []),
+        native_pd.Index([1, 2, 3, 4], name="some name"),
+        native_pd.Index(list(range(250))),
+        native_pd.Index(["A", None, 2.3, 1], name="AAAAA"),
+        native_pd.Index([]),
     ],
 )
-@sql_count_checker(query_count=2)
-def test_create_df_with_index_as_index(data, index):
+@pytest.mark.parametrize("obj_type", ["series", "df"])
+@sql_count_checker(query_count=1)
+def test_create_with_index_as_data(native_idx, obj_type):
     """
-    Creating a DataFrame where the index is an Index.
+    Creating a Series where the data is an Index.
     """
-    # Two queries are issued: one when creating the DataFrame (the index is converted
-    # to a native pandas object), one when materializing the DataFrame for comparison.
-    # Create Snowpark pandas DataFrame and native pandas DataFrame with an Index object as the index.
-    native_idx = native_pd.Index(index, name="some name")
     snow_idx = pd.Index(native_idx)
-    assert_frame_equal(
-        pd.DataFrame(data, index=snow_idx),
-        native_pd.DataFrame(data, index=native_idx),
-        check_dtype=False,
-        check_index_type=False,
-        check_column_type=False,
-    )
+    assert_equal_func, snow_obj, native_obj, _ = obj_type_helper(obj_type)
+    assert_equal_func(snow_obj(snow_idx), native_obj(native_idx))
 
 
 @pytest.mark.parametrize(
-    "data, index",
+    "data, native_idx",
     [
-        ([1, 2, 3, 4], ["A", "B", "C", "D"]),
-        (list(range(100)), list(range(100, 200))),
-        (["A", None, 2.3, 1], [None, "B", 0, 3.14]),
-        ([], []),
+        ([1, 2, 3, 4], native_pd.Index(["A", "B", "C", "D"], name="some name")),
+        (list(range(100)), native_pd.Index(list(range(200, 300)))),
+        (["A", None, 2.3, 1], native_pd.Index([None, "B", 0, 3.14])),
+        ([], native_pd.Index([], name="empty index")),
     ],
 )
-@sql_count_checker(query_count=2)
-def test_create_series_with_index_as_index(data, index):
+@pytest.mark.parametrize("obj_type", ["series", "df"])
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_with_index_as_index(data, native_idx, obj_type):
     """
-    Creating a Series where the index is an Index.
+    Creating a Series/DataFrame where the index is an Index.
     """
-    # Two queries are issued: one when creating the Series (the index is converted
-    # to a native pandas object), one when materializing the Series for comparison.
-    # Create Snowpark pandas Series and native pandas Series with an Index object as the index.
-    native_idx = native_pd.Index(index, name="some name")
+    # A join is performed to set the index columns of the generated Series/DataFrame.
     snow_idx = pd.Index(native_idx)
-    assert_series_equal(
-        pd.Series(data, index=snow_idx),
-        native_pd.Series(data, index=native_idx),
+    assert_equal_func, snow_obj, native_obj, kwargs = obj_type_helper(obj_type)
+    assert_equal_func(
+        snow_obj(data, index=snow_idx),
+        native_obj(data, index=native_idx),
         check_dtype=False,
         check_index_type=False,
+        **kwargs,
     )
 
 
 @pytest.mark.parametrize(
-    "data, index",
-    [
-        ([1, 2, 3, 4], ["A", "B", "C", "D"]),
-        (list(range(250)), list(range(250, 500))),
-        (["A", None, 2.3, 1], [None, "B", 0, 3.14]),
-        ([], []),
-    ],
-)
-@sql_count_checker(query_count=1, join_count=1)
-def test_create_df_with_index_as_data_and_index(data, index):
-    """
-    Creating a DataFrame where the data is an Index and the index is also an Index.
-    """
-    # Create Snowpark pandas DataFrame and native pandas DataFrame from Index objects.
-    native_idx_data = native_pd.Index(data, name="data name")
-    snow_idx_data = pd.Index(native_idx_data)
-    native_idx_index = native_pd.Index(index, name="index name")
-    snow_idx_index = pd.Index(native_idx_index)
-    assert_frame_equal(
-        pd.DataFrame(snow_idx_data, index=snow_idx_index),
-        native_pd.DataFrame(native_idx_data, index=native_idx_index),
-    )
-
-
-@pytest.mark.parametrize(
-    "data, index",
-    [
-        ([1, 2, 3, 4], ["A", "B", "C", "D"]),
-        (list(range(100)), list(range(100, 200))),
-        (["A", None, 2.3, 1], [None, "B", 0, 3.14]),
-        ([], []),
-    ],
-)
-@sql_count_checker(query_count=1, join_count=1)
-def test_create_series_with_index_as_data_and_index(data, index):
-    """
-    Creating a Series where the data is an Index and the index is also an Index.
-    """
-    # Create Snowpark pandas Series and native pandas Series from Index objects.
-    # TODO: Index is not being set at all.
-    native_idx_data = native_pd.Index(data, name="data name")
-    snow_idx_data = pd.Index(native_idx_data)
-    native_idx_index = native_pd.Index(index, name="index name")
-    snow_idx_index = pd.Index(native_idx_index)
-    assert_series_equal(
-        pd.Series(snow_idx_data, index=snow_idx_index),
-        native_pd.Series(native_idx_data, index=native_idx_index),
-    )
-
-
-@pytest.mark.parametrize(
-    "data, native_series",
+    "native_idx_data, native_idx_index",
     [
         (
-            [1, 2, 3, 4],
-            native_pd.Series(
-                ["A", "B", "C", "D"],
-                index=[1.1, 2.2, 3.3, 4.4],
-                name="index series name",
-            ),
+            native_pd.Index([1, 2, 3, 4], name="data name"),
+            native_pd.Index(["A", "B", "C", "D"]),
         ),
-        (list(range(100)), native_pd.Series(list(range(100, 200)))),
         (
-            ["A", None, 2.3, 1],
-            native_pd.Series([None, "B", 0, 3.14], name="mixed series as index"),
+            native_pd.Index(list(range(250))),
+            native_pd.Index(list(range(250, 500)), name="index name"),
         ),
-        ([], native_pd.Series([], name="empty series")),
+        (
+            native_pd.Index(["A", None, 2.3, 1], name="data name"),
+            native_pd.Index([None, "B", 0, 3.14], name="index name"),
+        ),
+        (native_pd.Index([]), native_pd.Index([])),
     ],
 )
+@pytest.mark.parametrize("obj_type", ["series", "df"])
 @sql_count_checker(query_count=1, join_count=1)
-def test_create_df_with_index_as_data_and_series_as_index(data, native_series):
+def test_create_with_index_as_data_and_index(
+    native_idx_data, native_idx_index, obj_type
+):
     """
-    Creating a DataFrame where the data is an Index and the index is a Series.
+    Creating a Series/DataFrame where the data is an Index and the index is also an Index.
     """
-    snow_series = pd.Series(native_series)
-    native_index = native_pd.Index(data, name="index data name")
-    snow_index = pd.Index(native_index)
-    assert_frame_equal(
-        pd.DataFrame(snow_index, index=snow_series),
-        native_pd.DataFrame(native_index, index=native_series),
+    # A join is required to combine the query compilers of the data and index objects.
+    snow_idx_data = pd.Index(native_idx_data)
+    snow_idx_index = pd.Index(native_idx_index)
+    assert_equal_func, snow_obj, native_obj, _ = obj_type_helper(obj_type)
+    assert_equal_func(
+        snow_obj(data=snow_idx_data, index=snow_idx_index),
+        native_obj(data=native_idx_data, index=native_idx_index),
     )
 
 
 @pytest.mark.parametrize(
-    "data, native_series",
+    "native_index, native_series",
     [
         (
-            [1, 2, 3, 4],
+            native_pd.Index([1, 2, 3, 4], name="index name"),
             native_pd.Series(
                 ["A", "B", "C", "D"],
                 index=[1.1, 2.2, 3.3, 4.4],
                 name="index series name",
             ),
         ),
-        (list(range(100)), native_pd.Series(list(range(100, 200)))),
         (
-            ["A", None, 2.3, 1],
-            native_pd.Series([None, "B", 0, 3.14], name="mixed series as index"),
+            native_pd.Index(list(range(100)), name="AAAAA"),
+            native_pd.Series(list(range(100, 200))),
+        ),
+        (
+            native_pd.Index(["A", None, 2.3, 1]),
+            native_pd.Series([None, "B", 0, 3.14]),
         ),
-        ([], native_pd.Series([], name="empty series")),
+        (native_pd.Index([]), native_pd.Series([], name="empty series")),
     ],
 )
+@pytest.mark.parametrize("obj_type", ["series", "df"])
 @sql_count_checker(query_count=1, join_count=1)
-def test_create_series_with_index_as_data_and_series_as_index(data, native_series):
+def test_create_with_index_as_data_and_series_as_index(
+    native_index, native_series, obj_type
+):
     """
-    Creating a Series where the data is an Index and the index is a Series.
+    Creating a Series/DataFrame where the data is an Index and the index is a Series.
     """
-    snow_series = pd.Series(native_series)
-    native_index = native_pd.Index(data, name="index data name")
+    # A join is required to combine the query compilers of the data and index objects.
     snow_index = pd.Index(native_index)
-    assert_series_equal(
-        pd.Series(snow_index, index=snow_series),
-        native_pd.Series(native_index, index=native_series),
-    )
-
-
-@pytest.mark.parametrize(
-    "native_series, native_index",
-    [
-        (
-            native_pd.Series(
-                ["A", "B", "C", "D"], index=[1.1, 2.2, 3, 4], name="index series name"
-            ),
-            native_pd.Index([1, 2, 3, 4], name="some name"),
-        ),  # some index values are missing
-        (
-            native_pd.Series(list(range(100))),
-            native_pd.Index(list(range(-50, 100, 4)), name="skip numbers"),
-        ),  # some index values are missing
-        (
-            native_pd.Series(
-                [10, 20, 30, 40],
-                index=native_pd.Index([None, "B", 0, 3.14], name="mixed"),
-                name="mixed series as index",
-            ),
-            native_pd.Index(["B", 0, None, 3.14]),
-        ),  # rearranged index values
-        (
-            native_pd.Series(["A", "B", "C", "D", "E"], name="series"),
-            native_pd.Index([3, 4], name="index"),
-        ),  # subset of index values
-        (
-            native_pd.Series(
-                list(range(20)), index=native_pd.Index(list(range(20)), name=20)
-            ),
-            native_pd.Index(list(range(20))),
-        ),  # all index values match
-        (
-            native_pd.Series(["A", "V", "D", "R"]),
-            native_pd.Index([10, 20, 30, 40], name="none"),
-        ),  # no index values match
-        (
-            native_pd.Series([], name="empty series", dtype="int64"),
-            native_pd.Index([], name="empty index", dtype="int64"),
-        ),  # empty series and index
-    ],
-)
-@sql_count_checker(query_count=1, join_count=2)
-def test_create_df_with_series_as_data_and_index_as_index(native_series, native_index):
-    """
-    Creating a DataFrame where the data is a Series and the index is an Index.
-    """
-    # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
-    # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
     snow_series = pd.Series(native_series)
-    snow_index = pd.Index(native_index)
-    assert_frame_equal(
-        pd.DataFrame(snow_series, index=snow_index),
-        native_pd.DataFrame(native_series, index=native_index),
+    assert_equal_func, snow_obj, native_obj, _ = obj_type_helper(obj_type)
+    assert_equal_func(
+        snow_obj(data=snow_index, index=snow_series),
+        native_obj(data=native_index, index=native_series),
     )
 
 
@@ -309,20 +201,22 @@ def test_create_df_with_series_as_data_and_index_as_index(native_series, native_
         ),  # empty series and index
     ],
 )
+@pytest.mark.parametrize("obj_type", ["series", "df"])
 @sql_count_checker(query_count=1, join_count=2)
-def test_create_series_with_series_as_data_and_index_as_index(
-    native_series, native_index
+def test_create_with_series_as_data_and_index_as_index(
+    native_series, native_index, obj_type
 ):
     """
-    Creating a Series where the data is a Series and the index is an Index.
+    Creating a Series/DataFrame where the data is a Series and the index is an Index.
     """
     # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
     # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
     snow_series = pd.Series(native_series)
     snow_index = pd.Index(native_index)
-    assert_series_equal(
-        pd.Series(snow_series, index=snow_index),
-        native_pd.Series(native_series, index=native_index),
+    assert_equal_func, snow_obj, native_obj, _ = obj_type_helper(obj_type)
+    assert_equal_func(
+        snow_obj(data=snow_series, index=snow_index),
+        native_obj(data=native_series, index=native_index),
     )
 
 

From a9376c1200c403fc8aafe0ed7dce4e884e073bb2 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Thu, 22 Aug 2024 09:32:39 -0700
Subject: [PATCH 07/42] Get rid of the join performed when only index is an
 Index object and data is not a Snowpark pandas object

---
 src/snowflake/snowpark/modin/pandas/dataframe.py  | 15 ++-------------
 src/snowflake/snowpark/modin/pandas/series.py     | 11 +----------
 .../index/test_df_series_creation_with_index.py   |  6 ++++--
 3 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 609f5bf55e..a6ecaa58ff 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -296,25 +296,14 @@ def __init__(
                     k: v._to_pandas() if isinstance(v, Series) else v
                     for k, v in data.items()
                 }
-
-            new_index = index
-            if isinstance(index, Index):
-                # Skip turning this into a native pandas object here since this issues an extra query.
-                # Instead, first get the query compiler from native pandas and then add the index column.
-                new_index = None
             pandas_df = pandas.DataFrame(
                 data=try_convert_index_to_native(data),
-                index=try_convert_index_to_native(new_index),
+                index=try_convert_index_to_native(index),
                 columns=try_convert_index_to_native(columns),
                 dtype=dtype,
                 copy=copy,
             )
-            query_compiler = from_pandas(pandas_df)._query_compiler
-            if isinstance(index, Index):
-                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
-                    index._query_compiler
-                )
-            self._query_compiler = query_compiler
+            self._query_compiler = from_pandas(pandas_df)._query_compiler
         else:
             self._query_compiler = query_compiler
 
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index f59c1a7939..213711a1ac 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -175,16 +175,11 @@ def __init__(
                 ):
                     name = data.name
 
-            new_index = index
-            if isinstance(index, Index):
-                # Skip turning this into a native pandas object here since this issues an extra query.
-                # Instead, first get the query compiler from native pandas and then add the index column.
-                new_index = None
             query_compiler = from_pandas(
                 pandas.DataFrame(
                     pandas.Series(
                         data=try_convert_index_to_native(data),
-                        index=try_convert_index_to_native(new_index),
+                        index=try_convert_index_to_native(index),
                         dtype=dtype,
                         name=name,
                         copy=copy,
@@ -192,10 +187,6 @@ def __init__(
                     )
                 )
             )._query_compiler
-            if isinstance(index, Index):
-                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
-                    index._query_compiler
-                )
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
             self.name = name
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 5b2571ccca..3396336999 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -67,12 +67,14 @@ def test_create_with_index_as_data(native_idx, obj_type):
     ],
 )
 @pytest.mark.parametrize("obj_type", ["series", "df"])
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=2)
 def test_create_with_index_as_index(data, native_idx, obj_type):
     """
     Creating a Series/DataFrame where the index is an Index.
     """
-    # A join is performed to set the index columns of the generated Series/DataFrame.
+    # Two queries are issued: one when creating the Series/DataFrame (the index is
+    # converted to a native pandas object), one when materializing the Series/DataFrame
+    # for comparison.
     snow_idx = pd.Index(native_idx)
     assert_equal_func, snow_obj, native_obj, kwargs = obj_type_helper(obj_type)
     assert_equal_func(

From 420a5ac9897fc92d38a34211d294bef36bd66093 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Thu, 22 Aug 2024 15:28:35 -0700
Subject: [PATCH 08/42] Add back the index join query to DataFrame/Series
 constructor, update the constructor tests, rewrite concat tests

---
 .../snowpark/modin/pandas/dataframe.py        |  15 +-
 src/snowflake/snowpark/modin/pandas/series.py |  11 +-
 .../test_df_series_creation_with_index.py     |   6 +-
 tests/integ/modin/test_concat.py              | 437 ++++++++++--------
 4 files changed, 282 insertions(+), 187 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index a6ecaa58ff..609f5bf55e 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -296,14 +296,25 @@ def __init__(
                     k: v._to_pandas() if isinstance(v, Series) else v
                     for k, v in data.items()
                 }
+
+            new_index = index
+            if isinstance(index, Index):
+                # Skip turning this into a native pandas object here since this issues an extra query.
+                # Instead, first get the query compiler from native pandas and then add the index column.
+                new_index = None
             pandas_df = pandas.DataFrame(
                 data=try_convert_index_to_native(data),
-                index=try_convert_index_to_native(index),
+                index=try_convert_index_to_native(new_index),
                 columns=try_convert_index_to_native(columns),
                 dtype=dtype,
                 copy=copy,
             )
-            self._query_compiler = from_pandas(pandas_df)._query_compiler
+            query_compiler = from_pandas(pandas_df)._query_compiler
+            if isinstance(index, Index):
+                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
+                    index._query_compiler
+                )
+            self._query_compiler = query_compiler
         else:
             self._query_compiler = query_compiler
 
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 213711a1ac..f59c1a7939 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -175,11 +175,16 @@ def __init__(
                 ):
                     name = data.name
 
+            new_index = index
+            if isinstance(index, Index):
+                # Skip turning this into a native pandas object here since this issues an extra query.
+                # Instead, first get the query compiler from native pandas and then add the index column.
+                new_index = None
             query_compiler = from_pandas(
                 pandas.DataFrame(
                     pandas.Series(
                         data=try_convert_index_to_native(data),
-                        index=try_convert_index_to_native(index),
+                        index=try_convert_index_to_native(new_index),
                         dtype=dtype,
                         name=name,
                         copy=copy,
@@ -187,6 +192,10 @@ def __init__(
                     )
                 )
             )._query_compiler
+            if isinstance(index, Index):
+                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
+                    index._query_compiler
+                )
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
             self.name = name
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 3396336999..5b2571ccca 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -67,14 +67,12 @@ def test_create_with_index_as_data(native_idx, obj_type):
     ],
 )
 @pytest.mark.parametrize("obj_type", ["series", "df"])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_create_with_index_as_index(data, native_idx, obj_type):
     """
     Creating a Series/DataFrame where the index is an Index.
     """
-    # Two queries are issued: one when creating the Series/DataFrame (the index is
-    # converted to a native pandas object), one when materializing the Series/DataFrame
-    # for comparison.
+    # A join is performed to set the index columns of the generated Series/DataFrame.
     snow_idx = pd.Index(native_idx)
     assert_equal_func, snow_obj, native_obj, kwargs = obj_type_helper(obj_type)
     assert_equal_func(
diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py
index f3e149a37f..981a2932a2 100644
--- a/tests/integ/modin/test_concat.py
+++ b/tests/integ/modin/test_concat.py
@@ -25,56 +25,56 @@
 
 @pytest.fixture(scope="function")
 def df1():
-    return pd.DataFrame(
+    return native_pd.DataFrame(
         {
             "C": [1, 2, 3],
             "A": ["a", "b", "c"],
             "D": [3, 2, 1],
         },
-        index=pd.Index([3, 1, 2], name="left_i"),
+        index=native_pd.Index([3, 1, 2], name="left_i"),
     )
 
 
 @pytest.fixture(scope="function")
 def df2():
-    return pd.DataFrame(
+    return native_pd.DataFrame(
         {
             "P": [3, 2, 1, 3],
             "A": ["a", "b", "c", "a"],
             "C": [1, 2, 3, 2],
         },
-        index=pd.Index([2, 0, 3, 4], name="right_i"),
+        index=native_pd.Index([2, 0, 3, 4], name="right_i"),
     )
 
 
 @pytest.fixture(scope="function")
 def df_single_col():
-    return pd.DataFrame([1], columns=["A"])
+    return native_pd.DataFrame([1], columns=["A"])
 
 
 @pytest.fixture(scope="function")
 def zero_rows_df():
-    return pd.DataFrame(columns=["A", "B"])
+    return native_pd.DataFrame(columns=["A", "B"])
 
 
 @pytest.fixture(scope="function")
 def zero_columns_df():
-    return pd.DataFrame(index=pd.Index([1, 2]))
+    return native_pd.DataFrame(index=pd.Index([1, 2]))
 
 
 @pytest.fixture(scope="function")
 def empty_df():
-    return pd.DataFrame()
+    return native_pd.DataFrame()
 
 
 @pytest.fixture(scope="function")
 def series1():
-    return pd.Series([1, 2])
+    return native_pd.Series([1, 2])
 
 
 @pytest.fixture(scope="function")
 def series2():
-    return pd.Series([2, 1])
+    return native_pd.Series([2, 1])
 
 
 @pytest.fixture(params=["inner", "outer"])
@@ -109,11 +109,10 @@ def axis(request):
     return request.param
 
 
-def _concat_operation(objs, native_objs=None, **kwargs):
-    if native_objs is None:
-        native_objs = [obj.to_pandas() for obj in objs]
+# TODO: redefine df1, df2, and _concat_operation
+def _concat_operation(snow_objs, native_objs, **kwargs):
     return (
-        lambda x: pd.concat(objs, **kwargs)
+        lambda x: pd.concat(snow_objs, **kwargs)
         if x == "pd"
         else native_pd.concat(native_objs, **kwargs)
     )
@@ -121,12 +120,19 @@ def _concat_operation(objs, native_objs=None, **kwargs):
 
 def test_concat_basic(df1, df2, join, sort, axis, ignore_index):
     expected_join_count = 1 if axis == 1 else 0
-    with SqlCounter(query_count=3, join_count=expected_join_count):
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
             _concat_operation(
-                [df1, df2], axis=axis, join=join, sort=sort, ignore_index=ignore_index
+                snow_objs,
+                native_objs,
+                axis=axis,
+                join=join,
+                sort=sort,
+                ignore_index=ignore_index,
             ),
         )
 
@@ -136,7 +142,7 @@ def test_concat_no_items_negative():
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([]),
+        _concat_operation(snow_objs=[], native_objs=[]),
         expect_exception=True,
         expect_exception_type=ValueError,
         expect_exception_match="No objects to concatenate",
@@ -147,6 +153,7 @@ def test_concat_exclude_none(df1, df2, axis):
     expected_join_count = 2 if axis == 1 else 0
     with SqlCounter(query_count=2, join_count=expected_join_count):
         # Verify that none objects are simply ignored.
+        df1, df2 = pd.DataFrame(df1), pd.DataFrame(df2)
         pieces = [df1, None, df2, None]
         result = pd.concat(pieces, axis=axis)
         expected = pd.concat([df1, df2], axis=axis)
@@ -158,7 +165,7 @@ def test_concat_all_none_negative():
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([None, None], [None, None]),
+        _concat_operation(snow_objs=[None, None], native_objs=[None, None]),
         expect_exception=True,
         expect_exception_type=ValueError,
         expect_exception_match="All objects passed were None",
@@ -170,43 +177,53 @@ def test_concat_mixed_objs(df1, df2, series1, series2, axis, join):
     expected_join_count_with_duplicates = 2 if axis == 1 else 0
 
     # Series and Dataframes
-    with SqlCounter(query_count=3, join_count=expected_join_count):
+    native_objs = [df1, series1]
+    snow_objs = [pd.DataFrame(df1), pd.Series(series1)]
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
-            _concat_operation([df1, series1], axis=axis, join=join),
+            _concat_operation(snow_objs, native_objs, axis=axis, join=join),
         )
 
     # All dataframes
-    with SqlCounter(query_count=3, join_count=expected_join_count):
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
-            _concat_operation([df1, df2], axis=axis, join=join),
+            _concat_operation(snow_objs, native_objs, axis=axis, join=join),
         )
 
     # All dataframes with duplicates
-    with SqlCounter(query_count=4, join_count=expected_join_count_with_duplicates):
+    native_objs = [df1, df2, df1]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
+    with SqlCounter(query_count=1, join_count=expected_join_count_with_duplicates):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
-            _concat_operation([df1, df2, df1], axis=axis, join=join),
+            _concat_operation(snow_objs, native_objs, axis=axis, join=join),
         )
 
     # All series
-    with SqlCounter(query_count=3, join_count=expected_join_count):
+    native_objs = [series1, series2]
+    snow_objs = [pd.Series(series) for series in native_objs]
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
-            _concat_operation([series1, series2], axis=axis, join=join),
+            _concat_operation(snow_objs, native_objs, axis=axis, join=join),
         )
 
     # All series with duplicates
-    with SqlCounter(query_count=4, join_count=expected_join_count_with_duplicates):
+    native_objs = [series1, series2, series1]
+    snow_objs = [pd.Series(series) for series in native_objs]
+    with SqlCounter(query_count=1, join_count=expected_join_count_with_duplicates):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
-            _concat_operation([series1, series2, series1], axis=axis, join=join),
+            _concat_operation(snow_objs, native_objs, axis=axis, join=join),
         )
 
 
@@ -220,15 +237,15 @@ def test_concat_mixed_objs(df1, df2, series1, series2, axis, join):
         ("foo", "foo", ["foo", "foo"]),
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_concat_series_names_axis1(series1, series2, name1, name2, expected_columns):
-    series1 = series1.rename(name1)
-    series2 = series2.rename(name2)
-    native_s1 = series1.to_pandas()
-    native_s2 = series2.to_pandas()
+    native_series1 = series1.rename(name1)
+    native_series2 = series2.rename(name2)
+    snow_series1 = pd.Series(series1).rename(name1)
+    snow_series2 = pd.Series(series2).rename(name2)
     # snow result
-    snow_res = pd.concat([series1, series2], axis=1)
-    native_res = native_pd.concat([native_s1, native_s2], axis=1)
+    snow_res = pd.concat([snow_series1, snow_series2], axis=1)
+    native_res = native_pd.concat([native_series1, native_series2], axis=1)
     assert_frame_equal(snow_res, native_res)
     # Explicit check for column names
     assert snow_res.columns.tolist() == expected_columns
@@ -244,26 +261,28 @@ def test_concat_series_names_axis1(series1, series2, name1, name2, expected_colu
         ("foo", "foo", "foo"),
     ],
 )
-@sql_count_checker(query_count=3, union_count=1)
+@sql_count_checker(query_count=1, union_count=1)
 def test_concat_series_names_axis0(series1, series2, name1, name2, expected_name):
-    series1 = series1.rename(name1)
-    series2 = series2.rename(name2)
-    native_s1 = series1.to_pandas()
-    native_s2 = series2.to_pandas()
+    native_series1 = series1.rename(name1)
+    native_series2 = series2.rename(name2)
+    snow_series1 = pd.Series(series1).rename(name1)
+    snow_series2 = pd.Series(series2).rename(name2)
     # snow result
-    snow_res = pd.concat([series1, series2])
-    native_res = native_pd.concat([native_s1, native_s2])
+    snow_res = pd.concat([snow_series1, snow_series2])
+    native_res = native_pd.concat([native_series1, native_series2])
     assert_series_equal(snow_res, native_res)
     # Explicit check for column names
     assert snow_res.name == expected_name
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=0)
 def test_concat_invalid_join_negative(df1, df2):
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, df2], join="left"),
+        _concat_operation(snow_objs, native_objs, join="left"),
         expect_exception=True,
         expect_exception_type=ValueError,
         expect_exception_match=r"Only can inner \(intersect\) or outer \(union\) join the other axis",
@@ -272,7 +291,8 @@ def test_concat_invalid_join_negative(df1, df2):
 
 def test_concat_iterables(df1, df2, axis):
     # verify that concat works with tuples, list, deque, generators and custom iterables
-    expected = native_pd.concat([df1.to_pandas(), df2.to_pandas()], axis=axis)
+    expected = native_pd.concat([df1, df2], axis=axis)
+    df1, df2 = pd.DataFrame(df1), pd.DataFrame(df2)
 
     expected_join_count = 1 if axis == 1 else 0
 
@@ -330,7 +350,7 @@ def test_concat_non_iterables_negative():
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation("abc", "abc"),
+        _concat_operation(snow_objs="abc", native_objs="abc"),
         expect_exception=True,
         expect_exception_type=TypeError,
         expect_exception_match=msg,
@@ -355,12 +375,14 @@ def test_concat_native_object_negative(obj):
         pd.concat({"a": obj})
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_concat_invalid_type_negative(df1):
+    native_objs = [df1, "abc"]
+    snow_objs = [pd.DataFrame(df1), "abc"]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, "abc"], [df1.to_pandas(), "abc"]),
+        _concat_operation(snow_objs, native_objs),
         expect_exception=True,
         expect_exception_type=TypeError,
         expect_exception_match="cannot concatenate object of type '<class 'str'>'; only Series and DataFrame objs are valid",
@@ -398,9 +420,8 @@ def _multiindex(labels: list[tuple[Hashable, ...]]) -> MultiIndex:
 def test_concat_multiindex_columns_axis1(
     columns1, columns2, df_single_col, expected_cols
 ):
-    df1 = df_single_col.copy()
+    df1, df2 = pd.DataFrame(df_single_col), pd.DataFrame(df_single_col)
     df1.columns = columns1
-    df2 = df_single_col.copy()
     df2.columns = columns2
 
     result_columns = pd.concat([df1, df2], axis=1).columns
@@ -438,9 +459,8 @@ def test_concat_multiindex_columns_axis1(
 def test_concat_multiindex_row_labels_axis0(
     index1, index2, df_single_col, expected_index, expected_join_count
 ):
-    df1 = df_single_col.copy()
+    df1, df2 = pd.DataFrame(df_single_col), pd.DataFrame(df_single_col)
     df1.index = index1
-    df2 = df_single_col.copy()
     df2.index = index2
 
     with SqlCounter(query_count=1, join_count=expected_join_count):
@@ -481,9 +501,8 @@ def test_concat_multiindex_row_labels_axis0(
 def test_concat_multiindex_row_labels_axis1(
     index1, index2, df_single_col, expected_index, expected_join_count
 ):
-    df1 = df_single_col.copy()
+    df1, df2 = pd.DataFrame(df_single_col), pd.DataFrame(df_single_col)
     df1.index = index1
-    df2 = df_single_col.copy()
     df2.index = index2
 
     with SqlCounter(query_count=1, join_count=expected_join_count):
@@ -518,9 +537,8 @@ def test_concat_multiindex_row_labels_axis1(
     ],
 )
 def test_concat_multiindex_row_labels_axis1_negative(index1, index2, df_single_col):
-    df1 = df_single_col.copy()
+    df1, df2 = pd.DataFrame(df_single_col), pd.DataFrame(df_single_col)
     df1.index = index1
-    df2 = df_single_col.copy()
     df2.index = index2
 
     # This behavior is different with Native pandas, where native pandas cast the index
@@ -553,9 +571,8 @@ def test_concat_multiindex_row_labels_axis1_negative(index1, index2, df_single_c
 def test_concat_multiindex_columns_axis0(
     columns1, columns2, df_single_col, expected_cols
 ):
-    df1 = df_single_col.copy()
+    df1, df2 = pd.DataFrame(df_single_col), pd.DataFrame(df_single_col)
     df1.columns = columns1
-    df2 = df_single_col.copy()
     df2.columns = columns2
 
     result_columns = pd.concat([df1, df2], axis=0).columns
@@ -563,10 +580,15 @@ def test_concat_multiindex_columns_axis0(
 
 
 def test_concat_index_with_nulls(df1, df2):
+    native_objs = [df1, df2]
+    df1, df2 = pd.DataFrame(df1), pd.DataFrame(df2)
     df1.set_index([[None, "a", None]])
     df2.set_index([[4, 5, None, 1]])
-    with SqlCounter(query_count=3):
-        eval_snowpark_pandas_result("pd", "native_pd", _concat_operation([df1, df2]))
+    snow_objs = [df1, df2]
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            "pd", "native_pd", _concat_operation(snow_objs, native_objs)
+        )
 
 
 @pytest.mark.parametrize(
@@ -581,12 +603,14 @@ def test_concat_index_with_nulls(df1, df2):
     ],
 )
 def test_concat_with_keys(df1, df2, series1, keys, axis):
+    native_objs = [df1, df2, series1]
+    snow_objs = [pd.DataFrame(df1), pd.DataFrame(df2), pd.Series(series1)]
     expected_join_count = 2 if axis == 1 and len(keys) > 1 else 0
-    with SqlCounter(query_count=4, join_count=expected_join_count):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
-            _concat_operation([df1, df2, series1], keys=keys, axis=axis),
+            _concat_operation(snow_objs, native_objs, keys=keys, axis=axis),
         )
 
 
@@ -601,42 +625,52 @@ def test_concat_with_keys(df1, df2, series1, keys, axis):
     ],
 )
 def test_concat_same_frame_with_keys(df1, keys, axis):
+    native_objs = [df1, df1]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
     expected_join_count = 1 if axis == 1 and len(keys) > 1 else 0
-    with SqlCounter(query_count=3, join_count=expected_join_count):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
-            "pd", "native_pd", _concat_operation([df1, df1], keys=keys, axis=axis)
+            "pd",
+            "native_pd",
+            _concat_operation(snow_objs, native_objs, keys=keys, axis=axis),
         )
 
 
 @pytest.mark.parametrize("nlevels", [2, 3])
 @pytest.mark.parametrize("keys", [["x", "y"], [("x", 1), ("y", 2)]])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_concat_multiindex_columns_with_keys_axis1(df1, df2, nlevels, keys):
-    df1 = df1.copy()
+    df1, df2 = df1.copy(), df2.copy()
     df1.columns = MultiIndex.from_arrays([df1.columns.tolist()] * nlevels)
-    df2 = df2.copy()
     df2.columns = MultiIndex.from_arrays([df2.columns.tolist()] * nlevels)
-
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
     eval_snowpark_pandas_result(
-        "pd", "native_pd", _concat_operation([df1, df2], axis=1, keys=keys)
+        "pd", "native_pd", _concat_operation(snow_objs, native_objs, axis=1, keys=keys)
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1)
 def test_concat_single_with_key(df1, axis):
     eval_snowpark_pandas_result(
-        "pd", "native_pd", _concat_operation([df1], keys=["foo"], axis=axis)
+        "pd",
+        "native_pd",
+        _concat_operation(
+            snow_objs=[pd.DataFrame(df1)], native_objs=[df1], keys=["foo"], axis=axis
+        ),
     )
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=1)
 def test_concat_keys_with_none(df1, df2, axis):
+    native_objs = [df1, None, df2]
+    snow_objs = [pd.DataFrame(df1), None, pd.DataFrame(df2)]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
         _concat_operation(
-            [df1, None, df2],
-            [df1.to_pandas(), None, df2.to_pandas()],
+            snow_objs,
+            native_objs,
             keys=["x", "y"],
             axis=axis,
         ),
@@ -656,27 +690,31 @@ def test_concat_keys_with_none(df1, df2, axis):
     "name1, name2", [("one", "two"), ("one", None), (None, "two"), (None, None)]
 )
 def test_concat_with_keys_and_names(df1, df2, names, name1, name2, axis):
-    with SqlCounter(query_count=0 if name1 is None or axis == 1 else 2):
-        df1 = df1.rename_axis(name1, axis=axis)
-    with SqlCounter(query_count=0 if name2 is None or axis == 1 else 2):
-        df2 = df2.rename_axis(name2, axis=axis)
+    df1 = df1.rename_axis(name1, axis=axis)
+    df2 = df2.rename_axis(name2, axis=axis)
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
 
     # One extra query to convert index to native pandas when creating df
-    with SqlCounter(query_count=3):
+    with SqlCounter(query_count=1):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
-            _concat_operation([df1, df2], keys=["x", "y"], names=names, axis=axis),
+            _concat_operation(
+                snow_objs, native_objs, keys=["x", "y"], names=names, axis=axis
+            ),
         )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=0)
 def test_concat_with_keys_and_extra_names_negative(df1, df2, axis):
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
         _concat_operation(
-            [df1, df2], keys=["x", "y"], names=["a", "b", "c"], axis=axis
+            snow_objs, native_objs, keys=["x", "y"], names=["a", "b", "c"], axis=axis
         ),
         expect_exception=True,
         expect_exception_type=ValueError,
@@ -684,12 +722,14 @@ def test_concat_with_keys_and_extra_names_negative(df1, df2, axis):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=0)
 def test_concat_empty_keys_negative(df1, df2, axis):
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, df2], keys=[], axis=axis),
+        _concat_operation(snow_objs, native_objs, keys=[], axis=axis),
         expect_exception=True,
         expect_exception_type=ValueError,
     )
@@ -697,64 +737,64 @@ def test_concat_empty_keys_negative(df1, df2, axis):
 
 @pytest.mark.parametrize("dict_keys", [["x", "y"], ["y", "x"]])
 def test_concat_dict(df1, df2, dict_keys, axis):
+    native_objs = {dict_keys[0]: df1, dict_keys[1]: df2}
+    snow_objs = {dict_keys[0]: pd.DataFrame(df1), dict_keys[1]: pd.DataFrame(df2)}
     expected_join_count = 1 if axis == 1 else 0
-    with SqlCounter(query_count=3, join_count=expected_join_count):
-        objs = {dict_keys[0]: df1, dict_keys[1]: df2}
-        native_objs = {dict_keys[0]: df1.to_pandas(), dict_keys[1]: df2.to_pandas()}
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
-            "pd", "native_pd", _concat_operation(objs, native_objs, axis=axis)
+            "pd", "native_pd", _concat_operation(snow_objs, native_objs, axis=axis)
         )
 
 
 @pytest.mark.parametrize("dict_keys", [["x", "y"], ["y", "x"]])
 @pytest.mark.parametrize("keys", [["x", "y"], ["y", "x"], ["x"], ["y"]])
 def test_concat_dict_with_keys(df1, df2, dict_keys, keys, axis):
+    native_objs = {dict_keys[0]: df1, dict_keys[1]: df2}
+    snow_objs = {dict_keys[0]: pd.DataFrame(df1), dict_keys[1]: pd.DataFrame(df2)}
     expected_join_count = 1 if axis == 1 and len(keys) > 1 else 0
-    with SqlCounter(query_count=3, join_count=expected_join_count):
-        objs = {dict_keys[0]: df1, dict_keys[1]: df2}
-        native_objs = {dict_keys[0]: df1.to_pandas(), dict_keys[1]: df2.to_pandas()}
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             "pd",
             "native_pd",
-            _concat_operation(objs, native_objs, axis=axis, keys=keys),
+            _concat_operation(snow_objs, native_objs, axis=axis, keys=keys),
         )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=0)
 def test_concat_dict_with_invalid_keys_negative(df1, df2, axis):
-    objs = {"x": df1, "y": df2}
-    native_objs = {"x": df1.to_pandas(), "y": df2.to_pandas()}
+    native_objs = {"x": df1, "y": df2}
+    snow_objs = {"x": pd.DataFrame(df1), "y": pd.DataFrame(df2)}
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation(objs, native_objs, keys=["x", "z"], axis=axis),
+        _concat_operation(snow_objs, native_objs, keys=["x", "z"], axis=axis),
         expect_exception=True,
         expect_exception_type=KeyError,
         expect_exception_match="z",
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_concat_with_mixed_tuples_as_column_labels(sort):
     # columns have mixed tuples
-    df1 = pd.DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2))
-    df2 = pd.DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2))
+    df1 = native_pd.DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2))
+    df2 = native_pd.DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2))
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation(
-            [df1, df2], [df1.to_pandas(), df2.to_pandas()], axis=1, sort=sort
-        ),
+        _concat_operation(snow_objs, native_objs, axis=1, sort=sort),
     )
 
 
 def test_concat_empty_df(df1, empty_df, zero_rows_df, zero_columns_df, axis):
-    objs = [df1, empty_df, zero_columns_df, zero_rows_df]
-    snow_res = pd.concat(objs)
-
-    native_objs = [df.to_pandas() for df in objs]
+    native_objs = [df1, empty_df, zero_columns_df, zero_rows_df]
     native_res = native_pd.concat(native_objs)
 
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
+    snow_res = pd.concat(snow_objs)
+
     with SqlCounter(query_count=1):
         assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_res, native_res)
 
@@ -776,54 +816,61 @@ def test_concat_empty_df(df1, empty_df, zero_rows_df, zero_columns_df, axis):
         ),  # same levels, one overlapping name
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_concat_multiindex(index1, index2):
-    df1 = pd.DataFrame({"A": [0, 1]}, index=index1)
-    df2 = pd.DataFrame({"B": [2, 3]}, index=index2)
+    df1 = native_pd.DataFrame({"A": [0, 1]}, index=index1)
+    df2 = native_pd.DataFrame({"B": [2, 3]}, index=index2)
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(df) for df in native_objs]
     eval_snowpark_pandas_result(
-        "pd", "native_pd", _concat_operation([df1, df2], axis=1)
+        "pd", "native_pd", _concat_operation(snow_objs, native_objs, axis=1)
     )
 
 
 @pytest.mark.parametrize(
     "type1, type2",
-    [(pd.DataFrame, pd.DataFrame), (pd.Series, pd.Series), (pd.DataFrame, pd.Series)],
+    [("df", "df"), ("series", "series"), ("df", "series")],
 )
 @pytest.mark.parametrize("col1, col2", [("A", None), ("A", "a"), (1, "1")])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_concat_verify_integrity_axis1(type1, type2, col1, col2):
-    obj1 = (
-        pd.DataFrame([1, 2], columns=[col1])
-        if type1 == pd.DataFrame
-        else pd.Series([1, 2], name=col1)
-    )
-    obj2 = (
-        pd.DataFrame([1, 2], columns=[col2])
-        if type2 == pd.DataFrame
-        else pd.Series([1, 2], name=col2)
-    )
+    if type1 == "df":
+        native_obj1 = native_pd.DataFrame([1, 2], columns=[col1])
+        snow_obj1 = pd.DataFrame(native_obj1)
+    else:
+        native_obj1 = native_pd.Series([1, 2], name=col1)
+        snow_obj1 = pd.Series(native_obj1)
+
+    if type2 == "df":
+        native_obj2 = native_pd.DataFrame([1, 2], columns=[col2])
+        snow_obj2 = pd.DataFrame(native_obj2)
+    else:
+        native_obj2 = native_pd.Series([1, 2], name=col2)
+        snow_obj2 = pd.Series(native_obj2)
+
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([obj1, obj2], axis=1, verify_integrity=True),
+        _concat_operation(
+            snow_objs=[snow_obj1, snow_obj2],
+            native_objs=[native_obj1, native_obj2],
+            axis=1,
+            verify_integrity=True,
+        ),
     )
 
 
-@pytest.mark.parametrize(
-    "type1, type2", [(pd.DataFrame, pd.DataFrame), (pd.DataFrame, pd.Series)]
-)
+@pytest.mark.parametrize("obj2_type", ["df", "series"])
 @sql_count_checker(query_count=0)
-def test_concat_verify_integrity_axis1_negative(type1, type2):
-    obj1 = (
-        pd.DataFrame([1, 2], columns=["A"])
-        if type1 == pd.DataFrame
-        else pd.Series([1, 2], name="A")
-    )
-    obj2 = (
-        pd.DataFrame([3, 4], columns=["A"])
-        if type2 == pd.DataFrame
-        else pd.Series([3, 4], name="A")
-    )
+def test_concat_verify_integrity_axis1_negative(obj2_type):
+    # obj1 is always a DataFrame.
+    obj1 = pd.DataFrame([1, 2], columns=["A"])
+
+    if obj2_type == "df":
+        obj2 = pd.DataFrame([3, 4], columns=["A"])
+    else:
+        obj2 = pd.Series([3, 4], name="A")
+
     msg = "Columns have overlapping values"
     with pytest.raises(ValueError, match=msg):
         pd.concat([obj1, obj2], axis=1, verify_integrity=True)
@@ -840,16 +887,20 @@ def test_concat_all_series_verify_integrity_axis1_negative():
         pd.concat([obj1, obj2], axis=1, verify_integrity=True)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_concat_verify_integrity_axis1_with_keys():
     # Even though original frames have duplicate columns, after adding keys to column
     # labels duplicates are resolved, hence no error.
-    obj1 = pd.DataFrame([1, 2], columns=["A"])
-    obj2 = pd.DataFrame([3, 4], columns=["A"])
+    obj1 = native_pd.DataFrame([1, 2], columns=["A"])
+    obj2 = native_pd.DataFrame([3, 4], columns=["A"])
+    native_objs = [obj1, obj2]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([obj1, obj2], axis=1, verify_integrity=True, keys=["x", "y"]),
+        _concat_operation(
+            snow_objs, native_objs, axis=1, verify_integrity=True, keys=["x", "y"]
+        ),
     )
 
 
@@ -860,12 +911,16 @@ def test_concat_verify_integrity_axis1_with_keys():
         (_multiindex([(1, 1), (1, 2)]), _multiindex([(2, 1), (2, 2)])),
     ],
 )
-@sql_count_checker(query_count=4, union_count=2)
+@sql_count_checker(query_count=2, union_count=2)
 def test_concat_verify_integrity_axis0(index1, index2):
-    df1 = pd.DataFrame([1, 2], columns=["a"], index=index1)
-    df2 = pd.DataFrame([1, 2], columns=["a"], index=index2)
+    df1 = native_pd.DataFrame([1, 2], columns=["a"], index=index1)
+    df2 = native_pd.DataFrame([1, 2], columns=["a"], index=index2)
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
     eval_snowpark_pandas_result(
-        "pd", "native_pd", _concat_operation([df1, df2], verify_integrity=True)
+        "pd",
+        "native_pd",
+        _concat_operation(snow_objs, native_objs, verify_integrity=True),
     )
 
 
@@ -873,16 +928,20 @@ def test_concat_verify_integrity_axis0(index1, index2):
     "index1, index2",
     [([0, 1], [0, 1]), (_multiindex([(1, 1), (1, 2)]), _multiindex([(2, 1), (1, 2)]))],
 )
-@sql_count_checker(query_count=4, union_count=2)
+@sql_count_checker(query_count=2, union_count=2)
 def test_concat_verify_integrity_axis0_with_keys(index1, index2):
     # Even though original frames have duplicate columns, after adding keys to column
     # labels duplicates are resolved, hence no error.
-    df1 = pd.DataFrame([1, 2], columns=["a"], index=index1)
-    df2 = pd.DataFrame([1, 2], columns=["a"], index=index2)
+    df1 = native_pd.DataFrame([1, 2], columns=["a"], index=index1)
+    df2 = native_pd.DataFrame([1, 2], columns=["a"], index=index2)
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, df2], verify_integrity=True, keys=["red", "green"]),
+        _concat_operation(
+            snow_objs, native_objs, verify_integrity=True, keys=["red", "green"]
+        ),
     )
 
 
@@ -890,16 +949,20 @@ def test_concat_verify_integrity_axis0_with_keys(index1, index2):
     "index1, index2",
     [([0, 1], [0, 1]), (_multiindex([(1, 1), (1, 2)]), _multiindex([(2, 1), (1, 2)]))],
 )
-@sql_count_checker(query_count=3, union_count=1)
+@sql_count_checker(query_count=1, union_count=1)
 def test_concat_verify_integrity_axis0_with_ignore_index(index1, index2):
     # Even though original frames have duplicate columns, ignore_index=True will
     # replace original index values with values 0 to n-1, hence no error.
-    df1 = pd.DataFrame([1, 2], columns=["a"], index=index1)
-    df2 = pd.DataFrame([1, 2], columns=["a"], index=index2)
+    df1 = native_pd.DataFrame([1, 2], columns=["a"], index=index1)
+    df2 = native_pd.DataFrame([1, 2], columns=["a"], index=index2)
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, df2], verify_integrity=True, ignore_index=True),
+        _concat_operation(
+            snow_objs, native_objs, verify_integrity=True, ignore_index=True
+        ),
     )
 
 
@@ -911,14 +974,16 @@ def test_concat_verify_integrity_axis0_with_ignore_index(index1, index2):
         ([1, 1], [2, 3]),
     ],
 )
-@sql_count_checker(query_count=4, union_count=2)
+@sql_count_checker(query_count=2, union_count=2)
 def test_concat_verify_integrity_axis0_negative(index1, index2):
-    df1 = pd.DataFrame([1, 2], columns=["a"], index=index1)
-    df2 = pd.DataFrame([1, 2], columns=["a"], index=index2)
+    df1 = native_pd.DataFrame([1, 2], columns=["a"], index=index1)
+    df2 = native_pd.DataFrame([1, 2], columns=["a"], index=index2)
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, df2], verify_integrity=True),
+        _concat_operation(snow_objs, native_objs, verify_integrity=True),
         expect_exception=True,
         expect_exception_type=ValueError,
         expect_exception_match="Indexes have overlapping values: ",
@@ -935,6 +1000,7 @@ def test_concat_verify_integrity_axis0_large_overlap_negative():
 
 @sql_count_checker(query_count=0)
 def test_concat_levels_negative(df1, df2):
+    df1, df2 = pd.DataFrame(df1), pd.DataFrame(df2)
     with pytest.raises(
         NotImplementedError,
         match="Snowpark pandas doesn't support 'levels' argument in concat API",
@@ -943,19 +1009,25 @@ def test_concat_levels_negative(df1, df2):
 
 
 def test_concat_sorted_frames():
-    df1 = pd.DataFrame({"A": [5, 2, 7]})
-    df2 = pd.DataFrame({"B": [3, 5, 6]})
-    df3 = pd.DataFrame({"A": [2, 1, 7], "B": [3, 5, 4]})
-    objs = [df1, df2, df3]
-    with SqlCounter(query_count=4):
-        eval_snowpark_pandas_result("pd", "native_pd", _concat_operation(objs))
-    objs = [
+    df1 = native_pd.DataFrame({"A": [5, 2, 7]})
+    df2 = native_pd.DataFrame({"B": [3, 5, 6]})
+    df3 = native_pd.DataFrame({"A": [2, 1, 7], "B": [3, 5, 4]})
+    native_objs = [df1, df2, df3]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            "pd", "native_pd", _concat_operation(snow_objs, native_objs)
+        )
+    native_objs = [
         df1.sort_values(by="A"),
         df2.sort_values(by="B"),
         df3.sort_values(by=["B", "A"]),
     ]
-    with SqlCounter(query_count=4):
-        eval_snowpark_pandas_result("pd", "native_pd", _concat_operation(objs))
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            "pd", "native_pd", _concat_operation(snow_objs, native_objs)
+        )
 
 
 @pytest.mark.parametrize(
@@ -988,9 +1060,7 @@ def test_concat_sorted_frames():
     ],
 )
 @sql_count_checker(query_count=2, union_count=1)
-def test_concat_duplicate_columns(
-    df1, df2, columns1, columns2, expected_rows, expected_cols
-):
+def test_concat_duplicate_columns(columns1, columns2, expected_rows, expected_cols):
     df1 = pd.DataFrame([[1, 2, 3]], columns=columns1)
     df2 = pd.DataFrame([[4, 5, 6]], columns=columns2)
     expected_df = pd.DataFrame(expected_rows, columns=expected_cols, index=[0, 0])
@@ -999,14 +1069,16 @@ def test_concat_duplicate_columns(
 
 @pytest.mark.parametrize("value1", [4, 1.5, True, "c", (1, 2), {"a": 1}])
 @pytest.mark.parametrize("value2", [4, 1.5, True, "c", (1, 2), {"a": 1}])
-@sql_count_checker(query_count=3, union_count=1)
+@sql_count_checker(query_count=1, union_count=1)
 def test_concat_type_mismatch(value1, value2):
-    df1 = pd.DataFrame({"A": [value1]})
-    df2 = pd.DataFrame({"A": [value2]})
+    df1 = native_pd.DataFrame({"A": [value1]})
+    df2 = native_pd.DataFrame({"A": [value2]})
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, df2]),
+        _concat_operation(snow_objs, native_objs),
     )
 
 
@@ -1023,27 +1095,32 @@ def test_concat_type_mismatch(value1, value2):
         ),
     ],
 )
-@sql_count_checker(query_count=5, union_count=1)
+@sql_count_checker(query_count=1, union_count=1)
 def test_concat_none_index_name(index1, index2):
-    df1 = pd.DataFrame([11], columns=["A"], index=index1)
-    df2 = pd.DataFrame([22], columns=["B"], index=index2)
-    _concat_operation([df1, df2]),
+    df1 = native_pd.DataFrame([11], columns=["A"], index=index1)
+    df2 = native_pd.DataFrame([22], columns=["B"], index=index2)
+    native_objs = [df1, df2]
+    snow_objs = [pd.DataFrame(obj) for obj in native_objs]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, df2]),
+        _concat_operation(snow_objs, native_objs),
     )
 
 
-@sql_count_checker(query_count=5, union_count=1)
+@sql_count_checker(query_count=3, union_count=1)
 def test_concat_from_file(resources_path):
     test_files = TestFiles(resources_path)
+    df1 = native_pd.read_csv(test_files.test_concat_file1_csv)
+    df2 = native_pd.read_csv(test_files.test_concat_file1_csv)
+    native_objs = [df1, df2]
     df1 = pd.read_csv(test_files.test_concat_file1_csv)
     df2 = pd.read_csv(test_files.test_concat_file1_csv)
+    snow_objs = [df1, df2]
     eval_snowpark_pandas_result(
         "pd",
         "native_pd",
-        _concat_operation([df1, df2]),
+        _concat_operation(snow_objs, native_objs),
     )
 
 

From 66d634c7bd261e4e09766494637ef8daefc5efe7 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Thu, 22 Aug 2024 18:14:14 -0700
Subject: [PATCH 09/42] Update tests

---
 src/snowflake/snowpark/modin/pandas/series.py |  6 +-
 tests/integ/modin/frame/test_assign.py        | 30 +++---
 tests/integ/modin/frame/test_iloc.py          |  2 +
 tests/integ/modin/frame/test_insert.py        |  5 +-
 tests/integ/modin/frame/test_join.py          | 42 ++++----
 tests/integ/modin/frame/test_loc.py           |  4 +-
 tests/integ/modin/frame/test_merge.py         | 98 +++++++++----------
 tests/integ/modin/frame/test_reindex.py       |  6 +-
 tests/integ/modin/frame/test_to_snowflake.py  |  6 +-
 tests/integ/modin/frame/test_where.py         | 24 ++---
 .../modin/pivot/test_pivot_table_single.py    |  2 +-
 tests/integ/modin/resample/test_resample.py   | 39 +++-----
 .../modin/resample/test_resample_asfreq.py    |  4 +-
 .../modin/resample/test_resample_fillna.py    | 17 ++--
 tests/integ/modin/series/test_empty.py        |  2 +-
 tests/integ/modin/series/test_iloc.py         |  1 +
 tests/integ/modin/series/test_loc.py          |  4 +-
 tests/integ/modin/series/test_reindex.py      | 14 +--
 tests/integ/modin/series/test_rename.py       |  4 +-
 tests/integ/modin/series/test_sort_values.py  |  2 +-
 tests/integ/modin/series/test_to_snowflake.py |  8 +-
 tests/integ/modin/series/test_where.py        | 10 +-
 22 files changed, 164 insertions(+), 166 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index f59c1a7939..5c36afe5ab 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -193,8 +193,10 @@ def __init__(
                 )
             )._query_compiler
             if isinstance(index, Index):
-                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
-                    index._query_compiler
+                query_compiler = (
+                    query_compiler.create_qc_with_data_and_index_joined_on_index(
+                        index._query_compiler
+                    )
                 )
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
diff --git a/tests/integ/modin/frame/test_assign.py b/tests/integ/modin/frame/test_assign.py
index b0da2a110b..5fb54b9d44 100644
--- a/tests/integ/modin/frame/test_assign.py
+++ b/tests/integ/modin/frame/test_assign.py
@@ -17,7 +17,7 @@
 )
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_assign_basic_series():
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -36,7 +36,7 @@ def assign_func(df):
     eval_snowpark_pandas_result(snow_df, native_df, assign_func)
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 @pytest.mark.parametrize(
     "index", [[2, 1, 0], [4, 5, 6]], ids=["reversed_index", "different_index"]
 )
@@ -60,8 +60,8 @@ def assign_func(df):
 
 @pytest.mark.parametrize("new_col_value", [2, [10, 11, 12], "x"])
 def test_assign_basic_non_pandas_object(new_col_value):
-    join_count = 2 if isinstance(new_col_value, list) else 0
-    with SqlCounter(query_count=2, join_count=join_count):
+    join_count = 4 if isinstance(new_col_value, list) else 1
+    with SqlCounter(query_count=1, join_count=join_count):
         snow_df, native_df = create_test_dfs(
             [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
             columns=native_pd.Index(list("abc"), name="columns"),
@@ -74,11 +74,11 @@ def test_assign_basic_non_pandas_object(new_col_value):
         )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=4)
 def test_assign_invalid_long_column_length_negative():
     # pandas errors out in this test, since we are attempting to assign a column of length 5 to a DataFrame with length 3.
     # Snowpark pandas on the other hand, just truncates the last element of the new column so that it is the correct length. If we wanted
-    # to error and match pandas behavior, we'd need to eagerly materialize the DataFrame in order to confirm lengths are correct
+    # to error and match pandas behavior, we'd need to eagerly materialize the DataFrame to confirm lengths are correct
     # and error otherwise.
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -98,11 +98,11 @@ def test_assign_invalid_long_column_length_negative():
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=4)
 def test_assign_invalid_short_column_length_negative():
     # pandas errors out in this test, since we are attempting to assign a column of length 2 to a DataFrame with length 3.
     # Snowpark pandas on the other hand, just broadcasts the last element of the new column so that it is filled. If we wanted
-    # to error and match pandas behavior, we'd need to eagerly materialize the DataFrame in order to confirm lengths are correct
+    # to error and match pandas behavior, we'd need to eagerly materialize the DataFrame to confirm lengths are correct
     # and error otherwise.
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -122,7 +122,7 @@ def test_assign_invalid_short_column_length_negative():
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_assign_short_series():
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -136,7 +136,7 @@ def test_assign_short_series():
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 @pytest.mark.parametrize(
     "index", [[1, 0], [4, 5]], ids=["reversed_index", "different_index"]
 )
@@ -153,7 +153,7 @@ def test_assign_short_series_mismatched_index(index):
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize(
     "callable_fn",
     [lambda x: x["a"], lambda x: x["a"] + x["b"]],
@@ -172,7 +172,7 @@ def test_assign_basic_callable(callable_fn):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_assign_chained_callable():
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -190,7 +190,7 @@ def test_assign_chained_callable():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_assign_chained_callable_wrong_order():
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -212,7 +212,7 @@ def test_assign_chained_callable_wrong_order():
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_assign_self_columns():
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -226,7 +226,7 @@ def test_assign_self_columns():
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=4)
 def test_overwrite_columns_via_assign():
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
diff --git a/tests/integ/modin/frame/test_iloc.py b/tests/integ/modin/frame/test_iloc.py
index dd09068383..c79d5eb8ba 100644
--- a/tests/integ/modin/frame/test_iloc.py
+++ b/tests/integ/modin/frame/test_iloc.py
@@ -2710,6 +2710,7 @@ def test_df_iloc_set_with_multi_index(
         native_items.columns = pd.MultiIndex.from_tuples(item_columns)
 
     if row_key_index:
+        expected_join_count += 1
         snow_row_key = pd.Series(row_key, index=pd.Index(row_key_index))
         native_row_key = native_pd.Series(row_key, index=pd.Index(row_key_index))
     else:
@@ -2717,6 +2718,7 @@ def test_df_iloc_set_with_multi_index(
         native_row_key = row_key
 
     if col_key_index:
+        expected_join_count += 1
         snow_col_key = pd.Series(col_key, index=pd.Index(col_key_index))
         native_col_key = native_pd.Series(col_key, index=pd.Index(col_key_index))
     else:
diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py
index 258d4d2e64..dd8a26fc54 100644
--- a/tests/integ/modin/frame/test_insert.py
+++ b/tests/integ/modin/frame/test_insert.py
@@ -723,11 +723,12 @@ def test_insert_multiindex_column_negative(snow_df, columns, insert_label):
         [["a", "b", "b", "d", "e"], ["x", "y", "z", "u", "u"], True],
     ],
 )
-# Two extra queries to convert index to native pandas when creating snowpark pandas dataframes
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_insert_with_unique_and_duplicate_index_values(
     index_values, other_index_values, expect_mismatch
 ):
+    # Two of the three joins come from creating the DataFrame with non-Snowpark pandas data
+    # and a Snowpark pandas Index. The third join is from the insert operation.
     data = list(range(5))
     data1 = {"foo": data}
     data2 = {"bar": [val * 10 for val in data]}
diff --git a/tests/integ/modin/frame/test_join.py b/tests/integ/modin/frame/test_join.py
index 91500189d1..253d8d7049 100644
--- a/tests/integ/modin/frame/test_join.py
+++ b/tests/integ/modin/frame/test_join.py
@@ -13,14 +13,16 @@
 
 @pytest.fixture
 def left():
-    return pd.DataFrame(
+    return native_pd.DataFrame(
         {"a": [1, 1, 0, 4]}, index=native_pd.Index([2, 1, 0, 3], name="li")
     )
 
 
 @pytest.fixture
 def right():
-    return pd.DataFrame({"b": [300, 100, 200]}, index=pd.Index([3, 1, 2], name="ri"))
+    return native_pd.DataFrame(
+        {"b": [300, 100, 200]}, index=pd.Index([3, 1, 2], name="ri")
+    )
 
 
 @pytest.fixture(params=["left", "inner", "right", "outer"])
@@ -41,6 +43,7 @@ def sort(request):
 
 @sql_count_checker(query_count=2, join_count=2)
 def test_join_index_to_index(left, right, how, sort):
+    left, right = pd.DataFrame(left), pd.DataFrame(right)
     result = left.join(right, how=how, sort=sort)
     expected = left.merge(right, left_index=True, right_index=True, how=how, sort=sort)
     assert_frame_equal(result, expected)
@@ -48,18 +51,19 @@ def test_join_index_to_index(left, right, how, sort):
 
 @sql_count_checker(query_count=2, join_count=2)
 def test_join_column_to_index(left, right, how, sort):
+    left, right = pd.DataFrame(left), pd.DataFrame(right)
     result = left.join(right, on="a", how=how, sort=sort)
     expected = left.merge(right, left_on="a", right_index=True, how=how, sort=sort)
     assert_frame_equal(result, expected)
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=0)
 def test_join_list_with_on_negative(left, right):
     eval_snowpark_pandas_result(
+        pd.DataFrame(left),
         left,
-        left.to_pandas(),
         lambda df: df.join(
-            [right if isinstance(df, pd.DataFrame) else right.to_pandas()], on="a"
+            [pd.DataFrame(right) if isinstance(df, pd.DataFrame) else right], on="a"
         ),
         expect_exception=True,
         expect_exception_type=ValueError,
@@ -97,6 +101,7 @@ def test_join_suffix_on_list_negative():
 )
 @sql_count_checker(query_count=2, join_count=2)
 def test_join_overlapping_columns(left, lsuffix, rsuffix):
+    left = pd.DataFrame(left)
     result = left.join(left, how="left", lsuffix=lsuffix, rsuffix=rsuffix)
     expected = left.merge(
         left, how="left", left_index=True, right_index=True, suffixes=(lsuffix, rsuffix)
@@ -104,11 +109,11 @@ def test_join_overlapping_columns(left, lsuffix, rsuffix):
     assert_frame_equal(result, expected)
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_join_overlapping_columns_negative(left):
     eval_snowpark_pandas_result(
+        pd.DataFrame(left),
         left,
-        left.to_pandas(),
         lambda df: df.join(df),
         expect_exception=True,
         expect_exception_type=ValueError,
@@ -116,11 +121,11 @@ def test_join_overlapping_columns_negative(left):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_join_invalid_how_negative(left):
     eval_snowpark_pandas_result(
+        pd.DataFrame(left),
         left,
-        left.to_pandas(),
         lambda df: df.join(df, how="full_outer_join"),
         expect_exception=True,
         expect_exception_type=ValueError,
@@ -130,21 +135,20 @@ def test_join_invalid_how_negative(left):
 
 @sql_count_checker(query_count=2, join_count=2)
 def test_join_with_series(left):
+    left = pd.DataFrame(left)
     right = pd.Series([1, 0, 2], name="s")
     result = left.join(right)
     expected = left.merge(right, left_index=True, right_index=True, how="left")
     assert_frame_equal(result, expected)
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=0)
 def test_join_unnamed_series_negative(left):
-    right = pd.Series([1, 0, 2])
+    right = native_pd.Series([1, 0, 2])
     eval_snowpark_pandas_result(
+        pd.DataFrame(left),
         left,
-        left.to_pandas(),
-        lambda df: df.join(
-            right if isinstance(df, pd.DataFrame) else right.to_pandas()
-        ),
+        lambda df: df.join(pd.Series(right) if isinstance(df, pd.DataFrame) else right),
         expect_exception=True,
         expect_exception_type=ValueError,
         expect_exception_match="Other Series must have a name",
@@ -155,12 +159,13 @@ def test_join_unnamed_series_negative(left):
 def test_join_unnamed_series_in_list_negative(left):
     right = pd.Series([1, 0, 2])
     with pytest.raises(ValueError, match="Other Series must have a name"):
-        left.join([right])
+        pd.DataFrame(left).join([right])
 
 
 @sql_count_checker(query_count=2, join_count=4)
 def test_join_list_mixed(left, right):
     # Join a DataFrame with a list containing both a Series and a DataFrame
+    left, right = pd.DataFrame(left), pd.DataFrame(right)
     series = pd.Series([1, 2, 3], name="s")
     other = [right, series]
     result = left.join(other)
@@ -170,6 +175,7 @@ def test_join_list_mixed(left, right):
 
 @sql_count_checker(query_count=4, join_count=4)
 def test_join_empty_rows(left, right, how):
+    left, right = pd.DataFrame(left), pd.DataFrame(right)
     empty_df = pd.DataFrame(columns=["x", "y"])
     # empty on left
     result = left.join(empty_df, how=how)
@@ -183,6 +189,7 @@ def test_join_empty_rows(left, right, how):
 
 @sql_count_checker(query_count=4, join_count=4)
 def test_join_empty_columns(left, right, how):
+    left, right = pd.DataFrame(left), pd.DataFrame(right)
     empty_df = pd.DataFrame(native_pd.Index([1, 2, 3]))
     # empty on left
     result = left.join(empty_df, how=how)
@@ -203,11 +210,12 @@ def test_join_different_levels_negative(left):
     with pytest.raises(
         ValueError, match="Can not merge objects with different column levels"
     ):
-        left.join(right)
+        pd.DataFrame(left).join(right)
 
 
 @sql_count_checker(query_count=2, join_count=2)
 def test_cross_join(left, right):
+    left, right = pd.DataFrame(left), pd.DataFrame(right)
     result = left.join(right, how="cross")
     expected = left.merge(right, how="cross")
     assert_frame_equal(result, expected)
diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py
index 1012a0d395..68991b3cf1 100644
--- a/tests/integ/modin/frame/test_loc.py
+++ b/tests/integ/modin/frame/test_loc.py
@@ -1727,7 +1727,7 @@ def test_df_loc_get_key_bool_series_with_unaligned_and_distinct_indices(
 
 
 # One extra query for series init converting index to native pandas when creating series_key
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_df_loc_get_key_bool_series_with_unaligned_and_duplicate_indices():
     key = [True] * 5
     # index can have null values and duplicates
@@ -2697,7 +2697,7 @@ def test_empty_df_loc_set_series_and_list(native_item):
         else native_item
     )
 
-    expected_join_count = 1 if isinstance(native_item, native_pd.Series) else 2
+    expected_join_count = 2 if isinstance(native_item, native_pd.Series) else 4
 
     def setitem_op(df):
         item = native_item if isinstance(df, native_pd.DataFrame) else snow_item
diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py
index 7ac88042e7..80df6bc516 100644
--- a/tests/integ/modin/frame/test_merge.py
+++ b/tests/integ/modin/frame/test_merge.py
@@ -213,7 +213,7 @@ def _verify_merge(
         left_index: If True, use index from left DataFrame as join keys.
         right_index: If True, use index from right DataFrame as join keys.
         force_output_column_order: If provided, reorder native result using this list.
-        indicator: If true include indicator column.
+        indicator: If True, include indicator column.
 
     Returns:
         None
@@ -276,13 +276,13 @@ def _verify_merge(
 
 
 @pytest.mark.parametrize("on", ["A", "B", ["A", "B"], ("A", "B")])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_on(left_df, right_df, on, how, sort):
     _verify_merge(left_df, right_df, how, on=on, sort=sort)
 
 
 @pytest.mark.parametrize("on", ["left_i", "right_i"])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_on_index_columns(left_df, right_df, how, on, sort):
     # Change left_df to: columns=["right_i", "B", "left_c", "left_d"] index=["left_i"]
     left_df = left_df.rename(columns={"A": "right_i"})
@@ -361,7 +361,7 @@ def test_join_type_mismatch_diff_with_native_pandas(index1, index2, expected_res
 
 
 @pytest.mark.parametrize("on", ["A", "B", "C"])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_on_index_columns_with_multiindex(left_df, right_df, how, on, sort):
     # Change left_df to: columns = ["C", "left_d"] index = ["A", "B"]
     left_df = left_df.rename(columns={"left_c": "C"}).set_index(["A", "B"])
@@ -370,7 +370,7 @@ def test_merge_on_index_columns_with_multiindex(left_df, right_df, how, on, sort
     _verify_merge(left_df, right_df, how, on=on, sort=sort)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_on_multiindex_with_non_multiindex(left_df, right_df, how, sort):
     # Change left_df to: columns = ["A", "B"] index = ["left_c", "left_d"]
     left_df = left_df.set_index(["left_c", "left_d"])
@@ -392,29 +392,29 @@ def test_merge_on_multiindex_with_non_multiindex(left_df, right_df, how, sort):
         (["A", "left_i"], ["B", "right_i"]),  # Mix of index and data join keys
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_left_on_right_on(left_df, right_df, how, left_on, right_on, sort):
     _verify_merge(left_df, right_df, how, left_on=left_on, right_on=right_on, sort=sort)
 
 
 @pytest.mark.parametrize("left_on", ["left_i", "A", "B"])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_left_on_right_index(left_df, right_df, how, left_on, sort):
     _verify_merge(left_df, right_df, how, left_on=left_on, right_index=True, sort=sort)
 
 
 @pytest.mark.parametrize("right_on", ["right_i", "A", "B"])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_left_index_right_on(left_df, right_df, how, right_on, sort):
     _verify_merge(left_df, right_df, how, left_index=True, right_on=right_on, sort=sort)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_on_index_single_index(left_df, right_df, how, sort):
     _verify_merge(left_df, right_df, how, left_index=True, right_index=True, sort=sort)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_on_index_multiindex_common_labels(left_df, right_df, how, sort):
     left_df = left_df.set_index("A", append=True)  # index columns ['left_i', 'A']
     right_df = right_df.set_index("A", append=True)  # index columns ['right_i', 'A']
@@ -444,7 +444,7 @@ def test_merge_on_index_multiindex_common_labels_with_none(
         )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_on_index_multiindex_equal_labels(left_df, right_df, how, sort):
     # index columns ['A', 'B]
     left_df = left_df.set_index(["A", "B"])
@@ -463,7 +463,7 @@ def test_merge_left_index_right_index_single_to_multi(left_df, right_df, how, so
         if how == "inner" and sort is False:
             pytest.skip("pandas bug: https://github.com/pandas-dev/pandas/issues/55774")
         else:
-            with SqlCounter(query_count=3, join_count=1):
+            with SqlCounter(query_count=3, join_count=5):
                 _verify_merge(
                     left_df,
                     right_df,
@@ -489,7 +489,7 @@ def test_merge_left_index_right_index_single_to_multi(left_df, right_df, how, so
             .merge(right_df.to_pandas(), how=how, on="left_i", sort=sort)
             .reset_index(drop=True)
         )
-        with SqlCounter(query_count=1, join_count=1):
+        with SqlCounter(query_count=1, join_count=3):
             assert_snowpark_pandas_equal_to_pandas(
                 snow_res.reset_index(drop=True), native_res
             )
@@ -500,7 +500,7 @@ def test_merge_left_index_right_index_multi_to_single(left_df, right_df, how, so
         "right_i", append=True
     )  # index columns ['left_i', 'right_i']
     if how in ("left", "inner"):
-        with SqlCounter(query_count=3, join_count=1):
+        with SqlCounter(query_count=3, join_count=5):
             _verify_merge(
                 left_df, right_df, how=how, left_index=True, right_index=True, sort=sort
             )
@@ -519,13 +519,13 @@ def test_merge_left_index_right_index_multi_to_single(left_df, right_df, how, so
             .merge(right_df.to_pandas(), how=how, on="right_i", sort=sort)
             .reset_index(drop=True)
         )
-        with SqlCounter(query_count=1, join_count=1):
+        with SqlCounter(query_count=1, join_count=3):
             assert_snowpark_pandas_equal_to_pandas(
                 snow_res.reset_index(drop=True), native_res
             )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_left_index_right_index_no_common_names_negative(left_df, right_df):
     left_df = left_df.set_index("B", append=True)  # index columns ['left_i', 'B']
     right_df = right_df.set_index("A", append=True)  # index columns ['right_i', 'A']
@@ -543,7 +543,7 @@ def test_merge_left_index_right_index_no_common_names_negative(left_df, right_df
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_left_index_right_index_none_as_common_label_negative(left_df, right_df):
     # index columns [None, 'B']
     left_df = left_df.reset_index(drop=True).set_index("B", append=True)
@@ -563,7 +563,7 @@ def test_merge_left_index_right_index_none_as_common_label_negative(left_df, rig
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_cross(left_df, right_df, sort):
     eval_snowpark_pandas_result(
         left_df,
@@ -587,7 +587,7 @@ def test_merge_cross(left_df, right_df, sort):
         {"left_index": True, "right_on": "A"},
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_non_empty_with_empty(left_df, empty_df, how, kwargs, sort):
     _verify_merge(left_df, empty_df, how, sort=sort, **kwargs)
 
@@ -601,7 +601,7 @@ def test_merge_non_empty_with_empty(left_df, empty_df, how, kwargs, sort):
         {"left_index": True, "right_on": "A"},
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_empty_with_non_empty(empty_df, right_df, how, kwargs, sort):
     # Native pandas returns incorrect column order when left frame is empty.
     # https://github.com/pandas-dev/pandas/issues/51929
@@ -637,7 +637,7 @@ def test_merge_empty_with_non_empty(empty_df, right_df, how, kwargs, sort):
         (None, None, ["A", "B"], True, False),  # left.num_index_levels != len(right_on)
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_mis_specified_negative(
     left_df, right_df, on, left_on, right_on, left_index, right_index
 ):
@@ -666,7 +666,7 @@ def test_merge_mis_specified_negative(
         (None, None, None, False, True),  # right_index is set to True
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_cross_mis_specified_negative(
     left_df, right_df, on, left_on, right_on, left_index, right_index
 ):
@@ -704,7 +704,7 @@ def test_merge_cross_mis_specified_negative(
         (0.0, 0.0, {"suffixes": ("_x", None)}),
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_suffix(left_df, right_df, left_col, right_col, kwargs):
     left_df = left_df.rename(columns={"A": left_col})
     right_df = right_df.rename(columns={"A": right_col})
@@ -720,7 +720,7 @@ def test_merge_suffix(left_df, right_df, left_col, right_col, kwargs):
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_duplicate_suffix(left_df, right_df):
     eval_snowpark_pandas_result(
         left_df,
@@ -734,7 +734,7 @@ def test_merge_duplicate_suffix(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_label_conflict_with_suffix(left_df, right_df):
     # Test the behavior when adding suffix crates a conflict with another label.
     # Note: This raises a warning in pandas 2.0 and will raise an error in future
@@ -758,7 +758,7 @@ def test_merge_label_conflict_with_suffix(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_non_str_suffix(left_df, right_df):
     eval_snowpark_pandas_result(
         left_df,
@@ -776,7 +776,7 @@ def test_merge_non_str_suffix(left_df, right_df):
     "suffixes",
     [(None, None), ("", None), (None, ""), ("", "")],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_empty_suffix_negative(left_df, right_df, suffixes):
     eval_snowpark_pandas_result(
         left_df,
@@ -794,7 +794,7 @@ def test_merge_empty_suffix_negative(left_df, right_df, suffixes):
     "suffixes",
     [("a", "b", "c"), tuple("a")],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_suffix_length_error_negative(left_df, right_df, suffixes):
     eval_snowpark_pandas_result(
         left_df,
@@ -808,7 +808,7 @@ def test_merge_suffix_length_error_negative(left_df, right_df, suffixes):
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_duplicate_labels(left_df, right_df):
     # Change left_df columns to ["A", "B", "left_c", "left_c"]
     # 'left_c' is a duplicate label.
@@ -824,7 +824,7 @@ def test_merge_duplicate_labels(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_duplicate_join_keys_negative(left_df, right_df):
     # Change left_df columns to ["A", "B", "left_c", "left_c"]
     # 'left_c' is a duplicate label. This can not be used as join key.
@@ -860,14 +860,14 @@ def test_merge_with_self():
 
 
 @pytest.mark.parametrize("on", ["A", "B"])
-@sql_count_checker(query_count=4, join_count=1)
+@sql_count_checker(query_count=4, join_count=4)
 def test_merge_with_series(left_df, right_df, how, on, sort):
     native_series = right_df.to_pandas()[on]
     snow_series = pd.Series(native_series)
     _verify_merge(left_df, snow_series, how=how, on=on, sort=sort)
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_merge_with_unnamed_series_negative(left_df):
     native_series = native_pd.Series([1, 2, 3])
     snow_series = pd.Series(native_series)
@@ -923,7 +923,7 @@ def test_merge_outer_with_nan(dtype):
 
 
 # Two extra queries to convert to native index for dataframe constructor when creating left and right
-@sql_count_checker(query_count=5, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_different_index_names():
     left = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="c"))
     right = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="d"))
@@ -938,13 +938,13 @@ def test_merge_different_index_names():
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_no_join_keys(left_df, right_df, how, sort):
     _verify_merge(left_df, right_df, how, sort=sort)
 
 
 @pytest.mark.parametrize("left_name, right_name", [("left_a", "right_a"), (1, "1")])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_no_join_keys_negative(left_name, right_name, left_df, right_df):
     left_df = left_df.rename(columns={"A": left_name, "B": "left_b"})
     right_df = right_df.rename(columns={"A": right_name, "B": "right_b"})
@@ -979,7 +979,7 @@ def test_merge_no_join_keys_common_index_negative(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_no_join_keys_common_index_with_data_negative(left_df, right_df):
     left_df = left_df.rename(columns={"A": "left_a", "B": "left_b"})
     right_df = right_df.rename(columns={"A": "right_a", "B": "left_i"})
@@ -1003,16 +1003,16 @@ def test_merge_no_join_keys_common_index_with_data_negative(left_df, right_df):
 @pytest.mark.parametrize(
     "left_on, right_on, expected_query_count, expected_join_count",
     [
-        (np.array(["a", "b", "c", "x", "y"]), "right_d", 5, 2),
-        ([np.array(["a", "b", "c", "x", "y"]), "A"], ["right_d", "A"], 5, 2),
-        ("left_d", np.array(["a", "b", "c", "x", "y"]), 5, 2),
-        (["left_d", "A"], [np.array(["a", "b", "c", "x", "y"]), "A"], 5, 2),
-        (["left_d", "A"], (np.array(["a", "b", "c", "x", "y"]), "A"), 5, 2),  # tuple
+        (np.array(["a", "b", "c", "x", "y"]), "right_d", 5, 7),
+        ([np.array(["a", "b", "c", "x", "y"]), "A"], ["right_d", "A"], 5, 7),
+        ("left_d", np.array(["a", "b", "c", "x", "y"]), 5, 7),
+        (["left_d", "A"], [np.array(["a", "b", "c", "x", "y"]), "A"], 5, 7),
+        (["left_d", "A"], (np.array(["a", "b", "c", "x", "y"]), "A"), 5, 7),  # tuple
         (
             np.array(["a", "b", "c", "x", "y"]),
             np.array(["x", "y", "c", "a", "b"]),
             7,
-            3,
+            9,
         ),
     ],
 )
@@ -1023,7 +1023,7 @@ def test_merge_on_array_like_keys(
         _verify_merge(left_df, right_df, how=how, left_on=left_on, right_on=right_on)
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_on_array_like_keys_conflict_negative(left_df, right_df):
     left_on = np.array(["a", "b", "c", "x", "y"])
     right_on = np.array(["x", "y", "c", "a", "b"])
@@ -1050,7 +1050,7 @@ def test_merge_on_array_like_keys_conflict_negative(left_df, right_df):
         np.array(["a", "b", "c", "a", "b", "c"]),  # too long
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=1)
 def test_merge_on_array_like_keys_length_mismatch_negative(left_df, right_df, left_on):
     # Native pandas raises
     # ValueError: The truth value of an array with more than one element is ambiguous
@@ -1062,22 +1062,22 @@ def test_merge_on_array_like_keys_length_mismatch_negative(left_df, right_df, le
         left_df.merge(right_df, left_on=left_on, right_on="right_d")
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_with_indicator(left_df, right_df, how):
     _verify_merge(left_df, right_df, how, on="A", indicator=True)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_with_indicator_cross_join(left_df, right_df):
     _verify_merge(left_df, right_df, how="cross", indicator=True)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_merge_with_indicator_explicit_name(left_df, right_df):
     _verify_merge(left_df, right_df, "outer", on="A", indicator="indicator_col")
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_with_invalid_indicator_type_negative(left_df, right_df):
     eval_snowpark_pandas_result(
         left_df,
@@ -1093,7 +1093,7 @@ def test_merge_with_invalid_indicator_type_negative(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_with_indicator_explicit_name_negative(left_df, right_df):
     left_df = left_df.rename(columns={"left_c": "_merge"})
     eval_snowpark_pandas_result(
diff --git a/tests/integ/modin/frame/test_reindex.py b/tests/integ/modin/frame/test_reindex.py
index 692fd66471..423e526fef 100644
--- a/tests/integ/modin/frame/test_reindex.py
+++ b/tests/integ/modin/frame/test_reindex.py
@@ -209,7 +209,7 @@ def perform_reindex(df):
             perform_reindex,
         )
 
-    @sql_count_checker(query_count=2, join_count=1)
+    @sql_count_checker(query_count=1, join_count=2)
     @pytest.mark.parametrize("limit", [None, 1, 2, 100])
     @pytest.mark.parametrize("method", ["bfill", "backfill", "pad", "ffill"])
     def test_reindex_index_datetime_with_fill(self, limit, method):
@@ -248,7 +248,7 @@ def test_reindex_index_non_overlapping_index(self):
             snow_df, native_df, lambda df: df.reindex(axis=0, labels=list("EFG"))
         )
 
-    @sql_count_checker(query_count=2, join_count=1)
+    @sql_count_checker(query_count=1, join_count=2)
     def test_reindex_index_non_overlapping_datetime_index(self):
         date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
         native_df = native_pd.DataFrame(
@@ -273,7 +273,7 @@ def perform_reindex(df):
             snow_df, native_df, perform_reindex, check_freq=False
         )
 
-    @sql_count_checker(query_count=1)
+    @sql_count_checker(query_count=0)
     def test_reindex_index_non_overlapping_different_types_index_negative(self):
         date_index = pd.date_range("1/1/2010", periods=6, freq="D")
         snow_df = pd.DataFrame(
diff --git a/tests/integ/modin/frame/test_to_snowflake.py b/tests/integ/modin/frame/test_to_snowflake.py
index 3fbdc7e754..f688b9f1af 100644
--- a/tests/integ/modin/frame/test_to_snowflake.py
+++ b/tests/integ/modin/frame/test_to_snowflake.py
@@ -15,7 +15,7 @@
 @pytest.mark.parametrize("index", [True, False])
 @pytest.mark.parametrize("index_labels", [None, ["my_index"]])
 # one extra query to convert index to native pandas when creating the snowpark pandas dataframe
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=2, join_count=1)
 def test_to_snowflake_index(test_table_name, index, index_labels):
     df = pd.DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6]}, index=pd.Index([2, 3, 4], name="index")
@@ -179,7 +179,7 @@ def test_to_snowflake_column_with_quotes(session, test_table_name):
 
 
 # one extra query to convert index to native pandas when creating the snowpark pandas dataframe
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_to_snowflake_index_label_none_raises(test_table_name):
     df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 
@@ -197,7 +197,7 @@ def test_to_snowflake_index_label_none_raises(test_table_name):
 
 
 # one extra query to convert index to native pandas when creating the snowpark pandas dataframe
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_to_snowflake_data_label_none_raises(test_table_name):
     df = pd.DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6]}, index=pd.Index([2, 3, 4], name="index")
diff --git a/tests/integ/modin/frame/test_where.py b/tests/integ/modin/frame/test_where.py
index 48cfc7bba5..006b7e76fb 100644
--- a/tests/integ/modin/frame/test_where.py
+++ b/tests/integ/modin/frame/test_where.py
@@ -443,7 +443,7 @@ def test_dataframe_where_not_implemented(test_data, test_cond, test_others):
         snow_dfs[0].where(snow_dfs[1], snow_dfs[2], axis=1)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=2)
 def test_dataframe_where_cond_is_array(caplog):
     data = [[1, 2], [3, 4]]
     cond = np.array([[True, False], [False, True]])
@@ -506,7 +506,7 @@ def __call__(self, df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=2)
 def test_dataframe_where_other_is_array():
     data = [[1, 3], [2, 4]]
     other = np.array([[99, -99], [101, -101]])
@@ -549,7 +549,7 @@ def test_dataframe_where_sizes_do_not_match_negative_test(test_data, test_cond):
         snow_df.where(snow_cond_df)
 
 
-@sql_count_checker(query_count=3, join_count=2)
+@sql_count_checker(query_count=2, join_count=3)
 def test_dataframe_where_with_np_array_cond():
     data = [1, 2, 3]
     cond = np.array([[False, True, False]]).T
@@ -576,8 +576,7 @@ def test_dataframe_where_with_np_array_cond():
     )
 
 
-# one extra query to convert index to native pandas when creating the snowpark pandas dataframe
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 def test_dataframe_where_with_np_array_cond_mismatched_labels():
     data = [1, 2, 3]
     cond = np.array([[False, True, False]]).T
@@ -602,7 +601,7 @@ def test_dataframe_where_with_np_array_cond_mismatched_labels():
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 def test_dataframe_where_with_dataframe_cond_single_index_different_names():
     data = [1, 2, 3]
     cond = [False, True, False]
@@ -630,7 +629,7 @@ def test_dataframe_where_with_dataframe_cond_single_index_different_names():
 
 
 # one extra query to convert index to native pandas when creating the snowpark pandas dataframe
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 def test_dataframe_where_with_dataframe_cond_single_index_different_names_2():
     data = [1, 2, 3]
     cond = [False, True, False]
@@ -692,7 +691,7 @@ def test_dataframe_where_with_duplicated_index_aligned(cond_frame, other):
         native_other = other
         snow_other = other
 
-    expected_join_count = 1 if isinstance(other, int) else 2
+    expected_join_count = 2 if isinstance(other, int) else 3
     with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             snow_df,
@@ -703,8 +702,9 @@ def test_dataframe_where_with_duplicated_index_aligned(cond_frame, other):
         )
 
 
-# 3 extra queries to convert index to native pandas when creating the 3 snowpark pandas dataframe
-@sql_count_checker(query_count=4, join_count=2)
+# 3 extra join queries to create the 3 snowpark pandas dataframe with non-Snowpark pandas data
+# and a Snowpark pandas Index.
+@sql_count_checker(query_count=1, join_count=5)
 def test_dataframe_where_with_duplicated_index_unaligned():
     data = [3, 4, 5, 2]
     df_index = pd.Index([2, 1, 2, 3], name="index")
@@ -902,7 +902,7 @@ def perform_where(df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=2)
+@sql_count_checker(query_count=2, join_count=3)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
@@ -947,7 +947,7 @@ def perform_where(df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=2, union_count=1)
+@sql_count_checker(query_count=2, join_count=3, union_count=1)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
diff --git a/tests/integ/modin/pivot/test_pivot_table_single.py b/tests/integ/modin/pivot/test_pivot_table_single.py
index 9feab0c09f..e53b553090 100644
--- a/tests/integ/modin/pivot/test_pivot_table_single.py
+++ b/tests/integ/modin/pivot/test_pivot_table_single.py
@@ -226,7 +226,7 @@ def test_pivot_table_with_sum_and_count_null_and_empty_values_matching_behavior_
 
 
 # One extra query to convert to native pandas in dataframe constructor when creating snow_df
-@sql_count_checker(query_count=6, join_count=1)
+@sql_count_checker(query_count=5, join_count=2)
 def test_pivot_on_inline_data_using_temp_table():
     # Create a large dataframe of inlined data that will spill to a temporary table.
     snow_df = pd.DataFrame(
diff --git a/tests/integ/modin/resample/test_resample.py b/tests/integ/modin/resample/test_resample.py
index 63c72452c1..af99185294 100644
--- a/tests/integ/modin/resample/test_resample.py
+++ b/tests/integ/modin/resample/test_resample.py
@@ -32,8 +32,7 @@ def randomword(length):
 @freq
 @interval
 @agg_func
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_with_varying_freq_and_interval(freq, interval, agg_func):
     rule = f"{interval}{freq}"
     eval_snowpark_pandas_result(
@@ -46,8 +45,7 @@ def test_resample_with_varying_freq_and_interval(freq, interval, agg_func):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_date_before_snowflake_alignment_date():
     # Snowflake TIMESLICE alignment date is 1970-01-01 00:00:00
     date_data = native_pd.to_datetime(
@@ -68,8 +66,7 @@ def test_resample_date_before_snowflake_alignment_date():
 
 
 @interval
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_date_wraparound_snowflake_alignment_date(interval):
     # Snowflake TIMESLICE alignment date is 1970-01-01 00:00:00
     date_data = native_pd.to_datetime(
@@ -92,8 +89,7 @@ def test_resample_date_wraparound_snowflake_alignment_date(interval):
 
 @agg_func
 @freq
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_missing_data_upsample(agg_func, freq):
     # this tests to make sure that missing resample bins will be filled in.
     date_data = native_pd.date_range("2020-01-01", periods=13, freq=f"1{freq}").delete(
@@ -107,8 +103,7 @@ def test_resample_missing_data_upsample(agg_func, freq):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_duplicated_timestamps_downsample():
     date_data = native_pd.to_datetime(
         [
@@ -127,8 +122,7 @@ def test_resample_duplicated_timestamps_downsample():
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_duplicated_timestamps():
     date_data = native_pd.to_datetime(
         [
@@ -164,11 +158,10 @@ def test_resample_series(freq, interval, agg_func):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
 @pytest.mark.parametrize(
     "agg_func", ["max", "min", "mean", "median", "sum", "std", "var"]
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_numeric_only(agg_func):
     eval_snowpark_pandas_result(
         *create_test_dfs(
@@ -180,9 +173,8 @@ def test_resample_numeric_only(agg_func):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
 @agg_func
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_df_with_nan(agg_func):
     # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
     eval_snowpark_pandas_result(
@@ -210,8 +202,7 @@ def test_resample_ser_with_nan(agg_func):
 
 
 @agg_func
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_single_resample_bin(agg_func):
     eval_snowpark_pandas_result(
         *create_test_dfs(
@@ -224,8 +215,7 @@ def test_resample_single_resample_bin(agg_func):
 
 
 @agg_func
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_index_with_nan(agg_func):
     datecol = native_pd.to_datetime(
         ["2020-01-01", "2020-01-03", "2020-01-05", np.nan, "2020-01-09", np.nan]
@@ -240,8 +230,7 @@ def test_resample_index_with_nan(agg_func):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_df_getitem():
     eval_snowpark_pandas_result(
         *create_test_dfs(
@@ -264,8 +253,7 @@ def test_resample_ser_getitem():
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_date_trunc_day():
     # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
     eval_snowpark_pandas_result(
@@ -278,8 +266,7 @@ def test_resample_date_trunc_day():
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_date_trunc_hour():
     # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
     eval_snowpark_pandas_result(
diff --git a/tests/integ/modin/resample/test_resample_asfreq.py b/tests/integ/modin/resample/test_resample_asfreq.py
index 50e9646a4c..fc60f62621 100644
--- a/tests/integ/modin/resample/test_resample_asfreq.py
+++ b/tests/integ/modin/resample/test_resample_asfreq.py
@@ -19,7 +19,7 @@
 
 @freq
 @interval
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_asfreq_no_method(freq, interval):
     rule = f"{interval}{freq}"
     eval_snowpark_pandas_result(
@@ -32,7 +32,7 @@ def test_asfreq_no_method(freq, interval):
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_asfreq_ffill():
     eval_snowpark_pandas_result(
         *create_test_dfs(
diff --git a/tests/integ/modin/resample/test_resample_fillna.py b/tests/integ/modin/resample/test_resample_fillna.py
index 6b47368eb5..6be0388f27 100644
--- a/tests/integ/modin/resample/test_resample_fillna.py
+++ b/tests/integ/modin/resample/test_resample_fillna.py
@@ -15,10 +15,9 @@
 agg_func = pytest.mark.parametrize("agg_func", ["ffill", "bfill"])
 
 
-# One extra query to convert index to native pandas for dataframe constructor
 @interval
 @agg_func
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_fill(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -67,10 +66,9 @@ def test_resample_fill_ser(interval, agg_func):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
 @interval
 @agg_func
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_ffill_one_gap(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -112,10 +110,9 @@ def resample_ffill_ser_one_gap(agg_func):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
 @interval
 @agg_func
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_ffill_missing_in_middle(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -162,10 +159,9 @@ def test_resample_ffill_ser_missing_in_middle(interval, agg_func):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
 @interval
 @agg_func
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_ffill_ffilled_with_none(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -187,10 +183,9 @@ def test_resample_ffill_ffilled_with_none(interval, agg_func):
     )
 
 
-# One extra query to convert index to native pandas for dataframe constructor
 @interval
 @agg_func
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_ffill_large_gaps(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -212,7 +207,7 @@ def test_resample_ffill_large_gaps(interval, agg_func):
 
 @interval
 @pytest.mark.parametrize("method", ["ffill", "pad", "backfill", "bfill"])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_fillna(interval, method):
     datecol = native_pd.to_datetime(
         [
diff --git a/tests/integ/modin/series/test_empty.py b/tests/integ/modin/series/test_empty.py
index 7040fa43fd..a30a69116c 100644
--- a/tests/integ/modin/series/test_empty.py
+++ b/tests/integ/modin/series/test_empty.py
@@ -44,7 +44,7 @@ def test_series_empty(args, kwargs):
     )
 
 
-@sql_count_checker(query_count=7)
+@sql_count_checker(query_count=5, join_count=2)
 def test_empty_series_type():
     def check_dtype(series):
         assert series.to_pandas().dtype == series.dtype
diff --git a/tests/integ/modin/series/test_iloc.py b/tests/integ/modin/series/test_iloc.py
index ac02f368dd..2f9444b9ae 100644
--- a/tests/integ/modin/series/test_iloc.py
+++ b/tests/integ/modin/series/test_iloc.py
@@ -823,6 +823,7 @@ def test_df_iloc_set_with_multi_index(
         native_items.index = pd.MultiIndex.from_tuples(item_index)
 
     if row_key_index:
+        expected_join_count += 1
         snow_row_key = pd.Series(row_key, index=pd.Index(row_key_index))
         native_row_key = native_pd.Series(row_key, index=pd.Index(row_key_index))
     else:
diff --git a/tests/integ/modin/series/test_loc.py b/tests/integ/modin/series/test_loc.py
index aa16a841f2..d60b9eb26a 100644
--- a/tests/integ/modin/series/test_loc.py
+++ b/tests/integ/modin/series/test_loc.py
@@ -251,7 +251,7 @@ def test_series_loc_get_key_bool_series_with_aligned_indices(key, use_default_in
         [random.choice([True, False]) for _ in range(5)],
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_loc_get_key_bool_series_with_unaligned_and_distinct_indices(
     key, use_default_index
 ):
@@ -343,7 +343,7 @@ def test_df_loc_get_callable_key(row):
     )
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_loc_get_key_bool_series_with_unaligned_and_duplicate_indices():
     # index can have null values and duplicates
     key = [True] * 5
diff --git a/tests/integ/modin/series/test_reindex.py b/tests/integ/modin/series/test_reindex.py
index 7c2bbba906..9450112ae5 100644
--- a/tests/integ/modin/series/test_reindex.py
+++ b/tests/integ/modin/series/test_reindex.py
@@ -259,17 +259,17 @@ def perform_reindex(series):
     )
 
 
-@sql_count_checker(query_count=2, join_count=1)
-@pytest.mark.parametrize("limit", [None, 1, 2, 100])
-@pytest.mark.parametrize("method", ["bfill", "backfill", "pad", "ffill"])
+# @sql_count_checker(query_count=2, join_count=1)
+@pytest.mark.parametrize("limit", [None])  # , 1, 2, 100])
+@pytest.mark.parametrize("method", ["bfill"])  # , "backfill", "pad", "ffill"])
 def test_reindex_index_datetime_with_fill(limit, method):
     date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
     native_series = native_pd.Series(
-        {"prices": [100, 101, np.nan, 100, 89, 88]}, index=date_index
+        {"1/1/2020": [100, 101, np.nan, 100, 89, 88]}, index=date_index
     )
     date_index = pd.date_range("1/1/2010", periods=6, freq="D")
     snow_series = pd.Series(
-        {"prices": [100, 101, np.nan, 100, 89, 88]}, index=date_index
+        {"1/1/2020": [100, 101, np.nan, 100, 89, 88]}, index=date_index
     )
 
     def perform_reindex(series):
@@ -300,7 +300,7 @@ def test_reindex_index_non_overlapping_index():
     )
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_reindex_index_non_overlapping_datetime_index():
     date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
     native_series = native_pd.Series(
@@ -326,7 +326,7 @@ def perform_reindex(series):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_reindex_index_non_overlapping_different_types_index_negative():
     date_index = pd.date_range("1/1/2010", periods=6, freq="D")
     snow_series = pd.Series(
diff --git a/tests/integ/modin/series/test_rename.py b/tests/integ/modin/series/test_rename.py
index 8dda080d95..4ccf29706f 100644
--- a/tests/integ/modin/series/test_rename.py
+++ b/tests/integ/modin/series/test_rename.py
@@ -52,7 +52,7 @@ def test_rename_partial_dict(self):
         renamed = ser.rename({"b": "foo", "d": "bar"})
         assert_index_equal(renamed.index, native_pd.Index(["a", "foo", "c", "bar"]))
 
-    @sql_count_checker(query_count=1, join_count=0)
+    @sql_count_checker(query_count=0)
     def test_rename_retain_index_name(self):
         # index with name
         renamer = Series(
@@ -130,7 +130,7 @@ class MyIndexer:
         ser.rename(ix, inplace=True)
         assert ser.name is ix
 
-    @sql_count_checker(query_count=1)
+    @sql_count_checker(query_count=0)
     def test_rename_callable(self):
         # GH 17407
         ser = Series(range(1, 6), index=Index(range(2, 7), name="IntIndex"))
diff --git a/tests/integ/modin/series/test_sort_values.py b/tests/integ/modin/series/test_sort_values.py
index e966409dfc..b147377f75 100644
--- a/tests/integ/modin/series/test_sort_values.py
+++ b/tests/integ/modin/series/test_sort_values.py
@@ -33,7 +33,7 @@ def snow_series(snow_df):
 
 @pytest.mark.parametrize("by", ["A", "B", "a", "b"])
 @pytest.mark.parametrize("ascending", [True, False])
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=3, join_count=3)
 def test_sort_values(snow_df, by, ascending):
     snow_series = snow_df[by]
     native_series = snow_series.to_pandas()
diff --git a/tests/integ/modin/series/test_to_snowflake.py b/tests/integ/modin/series/test_to_snowflake.py
index 968a96e33d..92b428f70e 100644
--- a/tests/integ/modin/series/test_to_snowflake.py
+++ b/tests/integ/modin/series/test_to_snowflake.py
@@ -29,7 +29,7 @@ def _verify_num_rows(session, table_name: str, expected: int) -> None:
 
 @pytest.mark.parametrize("index", [True, False])
 @pytest.mark.parametrize("index_labels", [None, ["my_index"]])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=1)
 def test_to_snowflake_index(test_table_name, snow_series, index, index_labels):
     snow_series.to_snowflake(
         test_table_name, if_exists="replace", index=index, index_label=index_labels
@@ -108,7 +108,7 @@ def test_to_snowflake_if_exists(session, test_table_name, snow_series):
         _verify_num_rows(session, test_table_name, 6)
 
 
-@sql_count_checker(query_count=4)
+@sql_count_checker(query_count=4, join_count=1)
 def test_to_snowflake_if_exists_negative(session, test_table_name, snow_series):
     # Create a table.
     snow_series.to_snowflake(test_table_name, if_exists="fail", index=False)
@@ -127,7 +127,7 @@ def test_to_snowflake_if_exists_negative(session, test_table_name, snow_series):
 
 
 @pytest.mark.parametrize("index_label", VALID_PANDAS_LABELS)
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=1)
 def test_to_snowflake_index_column_labels(index_label, test_table_name, snow_series):
     snow_series.to_snowflake(
         test_table_name, if_exists="replace", index=True, index_label=index_label
@@ -136,7 +136,7 @@ def test_to_snowflake_index_column_labels(index_label, test_table_name, snow_ser
 
 
 @pytest.mark.parametrize("col_label", VALID_PANDAS_LABELS)
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=1)
 def test_to_snowflake_data_column_labels(col_label, test_table_name, snow_series):
     snow_series = snow_series.rename(col_label)
     snow_series.to_snowflake(test_table_name, if_exists="replace", index=False)
diff --git a/tests/integ/modin/series/test_where.py b/tests/integ/modin/series/test_where.py
index 18fec0aadf..9f0c6d0f80 100644
--- a/tests/integ/modin/series/test_where.py
+++ b/tests/integ/modin/series/test_where.py
@@ -103,7 +103,7 @@ def test_series_where_index_no_names():
     )
 
 
-@sql_count_checker(query_count=4, join_count=1)
+@sql_count_checker(query_count=3, join_count=2)
 def test_series_where_with_np_array_cond():
     data = [1, 2]
     cond = np.array([True, False])
@@ -114,7 +114,7 @@ def test_series_where_with_np_array_cond():
     eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.where(cond))
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_series_where_with_series_cond_single_index_different_names():
     data = [1, 2, 3]
     cond = [False, True, False]
@@ -139,7 +139,7 @@ def test_series_where_with_series_cond_single_index_different_names():
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_series_where_with_duplicated_index_aligned():
     data = [1, 2, 3]
     cond = [False, True, False]
@@ -196,9 +196,11 @@ def test_series_where_with_lambda_cond_returns_singleton_should_fail():
 
 @pytest.mark.parametrize(
     "other, sql_count, join_count",
-    [(lambda x: -x.iloc[0], 5, 3), (lambda x: x**2, 4, 2)],
+    [(lambda x: -x.iloc[0], 4, 10), (lambda x: x**2, 3, 8)],
 )
 def test_series_where_with_lambda_other(other, sql_count, join_count):
+    # High join count due to creatinga  Series with non-Snowpark pandas data
+    # and a Snowpark pandas Index.
     data = [1, 6, 7, 4]
     index = pd.Index(["a", "b", "c", "d"])
 

From 6a2cb79dd8472a6fdbdb9a1ad819ef812771e114 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 23 Aug 2024 14:09:06 -0700
Subject: [PATCH 10/42] added edge case logic, fix test query count

---
 .../snowpark/modin/pandas/dataframe.py        | 16 ++++++++
 src/snowflake/snowpark/modin/pandas/series.py | 29 +++++++++++---
 tests/integ/modin/frame/test_mask.py          | 13 ++++---
 tests/integ/modin/frame/test_setitem.py       |  2 +-
 .../integ/modin/groupby/test_groupby_apply.py | 22 +++--------
 .../modin/groupby/test_groupby_negative.py    |  6 +--
 .../modin/groupby/test_groupby_series.py      |  2 +-
 .../test_df_series_creation_with_index.py     | 39 ++++++++++++++++++-
 tests/integ/modin/index/test_name.py          |  8 ++--
 tests/integ/modin/series/test_iloc.py         |  5 +--
 tests/integ/modin/series/test_loc.py          |  2 +-
 tests/integ/modin/series/test_mask.py         | 14 ++++---
 tests/integ/modin/series/test_reindex.py      |  6 +--
 tests/integ/modin/test_concat.py              |  1 -
 14 files changed, 113 insertions(+), 52 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 609f5bf55e..f35e40373e 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -297,6 +297,22 @@ def __init__(
                     for k, v in data.items()
                 }
 
+                if all(len(v) == 1 for v in data.values()) and index is not None:
+                    # Special case when creating:
+                    # >>> DataFrame({"A": [1], "V": [2]}, native_pd.Index(["A", "B", "C"]), name="none")
+                    #       A  V
+                    # none
+                    # A     1  2
+                    # B     1  2  <--- the first row is copied into the rest of the rows.
+                    # C     1  2
+                    # Recreate a 2-d array with the first row copied into the rest of the rows.
+                    self._query_compiler = DataFrame(
+                        data=[[v[0] for v in data.values()]] * len(index),
+                        index=index,
+                        columns=list(data.keys()),
+                    )._query_compiler
+                    return
+
             new_index = index
             if isinstance(index, Index):
                 # Skip turning this into a native pandas object here since this issues an extra query.
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 5c36afe5ab..7ac3172328 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -165,6 +165,26 @@ def __init__(
                 )
                 # 3. Perform .loc[] on `data` to select the rows that are in `index`.
                 query_compiler = data.loc[index]._query_compiler
+
+        elif is_dict_like(data) and not isinstance(data, (pandas.Series, Series)):
+            if name is None:
+                name = MODIN_UNNAMED_SERIES_LABEL
+            # If the data is a dictionary, we need to convert it to a query compiler and set the index.
+            query_compiler = from_pandas(
+                pandas.DataFrame(
+                    pandas.Series(
+                        data=data, dtype=dtype, name=name, copy=copy, fastpath=fastpath
+                    )
+                )
+            )._query_compiler
+            if index is not None:
+                index = index if isinstance(index, Index) else Index(index)
+                query_compiler = (
+                    query_compiler.create_qc_with_data_and_index_joined_on_index(
+                        index._query_compiler
+                    )
+                )
+
         if query_compiler is None:
             # Defaulting to pandas
             if name is None:
@@ -184,7 +204,7 @@ def __init__(
                 pandas.DataFrame(
                     pandas.Series(
                         data=try_convert_index_to_native(data),
-                        index=try_convert_index_to_native(new_index),
+                        index=new_index,
                         dtype=dtype,
                         name=name,
                         copy=copy,
@@ -193,10 +213,9 @@ def __init__(
                 )
             )._query_compiler
             if isinstance(index, Index):
-                query_compiler = (
-                    query_compiler.create_qc_with_data_and_index_joined_on_index(
-                        index._query_compiler
-                    )
+                # Performing set index to directly set the index column (joining on row-position instead of index).
+                query_compiler = query_compiler.set_index_from_series(
+                    index.to_series()._query_compiler
                 )
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
diff --git a/tests/integ/modin/frame/test_mask.py b/tests/integ/modin/frame/test_mask.py
index 684d8ba434..2422edb736 100644
--- a/tests/integ/modin/frame/test_mask.py
+++ b/tests/integ/modin/frame/test_mask.py
@@ -437,7 +437,7 @@ def test_dataframe_mask_not_implemented(test_data, test_cond, test_others):
         snow_dfs[0].mask(snow_dfs[1], snow_dfs[2], axis=1)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=2)
 def test_dataframe_mask_cond_is_array(caplog):
     data = [[1, 2], [3, 4]]
     cond = np.array([[True, False], [False, True]])
@@ -686,7 +686,7 @@ def test_dataframe_mask_with_duplicated_index_aligned(cond_frame, other):
         native_other = other
         snow_other = other
 
-    expected_join_count = 1 if isinstance(other, int) else 2
+    expected_join_count = 2 if isinstance(other, int) else 3
     with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             snow_df,
@@ -697,8 +697,9 @@ def test_dataframe_mask_with_duplicated_index_aligned(cond_frame, other):
         )
 
 
-# Three extra queries to convert to native index for dataframe constructor when creating the 3 snowpark pandas dataframes
-@sql_count_checker(query_count=4, join_count=2)
+# Three extra joins when creating the 3 snowpark pandas dataframes with non-Snowpark pandas
+# data and Snowpark pandas Index.
+@sql_count_checker(query_count=1, join_count=5)
 def test_dataframe_mask_with_duplicated_index_unaligned():
     data = [3, 4, 5, 2]
     df_index = pd.Index([2, 1, 2, 3], name="index")
@@ -866,7 +867,7 @@ def perform_mask(df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=2)
+@sql_count_checker(query_count=2, join_count=3)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
@@ -911,7 +912,7 @@ def perform_mask(df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=2, union_count=1)
+@sql_count_checker(query_count=2, join_count=3, union_count=1)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
diff --git a/tests/integ/modin/frame/test_setitem.py b/tests/integ/modin/frame/test_setitem.py
index cc5698b684..6152089f39 100644
--- a/tests/integ/modin/frame/test_setitem.py
+++ b/tests/integ/modin/frame/test_setitem.py
@@ -445,7 +445,7 @@ def setitem_helper(df):
     ],
 )
 # 2 extra queries to convert to native pandas when creating the two snowpark pandas dataframes
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_df_setitem_with_unique_and_duplicate_index_values(
     index_values, other_index_values, expect_mismatch
 ):
diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
index 4c85c1fd06..adfcd7f46b 100644
--- a/tests/integ/modin/groupby/test_groupby_apply.py
+++ b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -537,7 +537,7 @@ def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
                 if group_keys
                 else QUERY_COUNT_WITH_TRANSFORM_CHECK
             ),
-            join_count=JOIN_COUNT,
+            join_count=2,
             udtf_count=UDTF_COUNT,
         ):
             snow_result = operation(mdf)
@@ -719,7 +719,7 @@ def groupby_apply_without_sort(df):
         with SqlCounter(
             query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK,
             udtf_count=UDTF_COUNT,
-            join_count=JOIN_COUNT,
+            join_count=2,
         ):
             assert_snowpark_pandas_equal_to_pandas(
                 groupby_apply_without_sort(snow_df).sort_values(),
@@ -967,9 +967,9 @@ def test_args_and_kwargs(self, grouping_dfs_with_multiindexes):
     @pytest.mark.parametrize("dropna", [True, False])
     @sql_count_checker(
         # One extra query to convert index to native pandas in dataframe constructor to create test dataframes
-        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK + 1,
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=2,
     )
     @pytest.mark.parametrize("index", [[2.0, np.nan, 2.0, 1.0], [np.nan] * 4])
     def test_dropna(self, dropna, index):
@@ -1082,19 +1082,9 @@ def test_dataframe_groupby_getitem(self, by, func, dropna, group_keys, sort):
             # (pd.NA, k1) that we cannot serialize.
             pytest.xfail(reason="SNOW-1229760")
         with SqlCounter(
-            # one additional query for converting index to native pandas in dataframe constructor
-            query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK + 1
-            if not group_keys
-            and func
-            in (
-                get_dataframe_from_numeric_series,
-                get_series_from_numeric_series,
-                series_transform_returns_frame,
-                series_transform_returns_series,
-            )
-            else QUERY_COUNT_WITHOUT_TRANSFORM_CHECK + 1,
+            query_count=6 if group_keys is False else 5,
             udtf_count=UDTF_COUNT,
-            join_count=JOIN_COUNT,
+            join_count=2,
         ):
             eval_snowpark_pandas_result(
                 *create_test_dfs(
diff --git a/tests/integ/modin/groupby/test_groupby_negative.py b/tests/integ/modin/groupby/test_groupby_negative.py
index eeddd0e6c2..c616a1d019 100644
--- a/tests/integ/modin/groupby/test_groupby_negative.py
+++ b/tests/integ/modin/groupby/test_groupby_negative.py
@@ -164,7 +164,7 @@ def test_groupby_min_max_invalid_non_numeric_column(
         agg_func(df).to_pandas()
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_groupby_series_numeric_only_true(series_str):
     message = "SeriesGroupBy does not implement numeric_only"
     eval_snowpark_pandas_result(
@@ -177,7 +177,7 @@ def test_groupby_series_numeric_only_true(series_str):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_groupby_as_index_raises(series_str):
     eval_snowpark_pandas_result(
         series_str,
@@ -254,7 +254,7 @@ def test_groupby_as_index_false_axis_1_raises(df_multi):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_groupby_series_agg_dict_like_input_raise(series_str):
     eval_snowpark_pandas_result(
         series_str,
diff --git a/tests/integ/modin/groupby/test_groupby_series.py b/tests/integ/modin/groupby/test_groupby_series.py
index 7756f8b620..ae8ae0926d 100644
--- a/tests/integ/modin/groupby/test_groupby_series.py
+++ b/tests/integ/modin/groupby/test_groupby_series.py
@@ -153,7 +153,7 @@ def test_groupby_agg_series_named_agg(aggs, sort):
 
 
 @pytest.mark.parametrize("numeric_only", [False, None])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_groupby_series_numeric_only(series_str, numeric_only):
     native_series = series_str.to_pandas()
     eval_snowpark_pandas_result(
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 5b2571ccca..a1512d8ced 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -322,6 +322,7 @@ def test_create_df_with_df_as_data_and_index_as_index(native_df, native_index):
             native_pd.DataFrame([]),
             native_pd.Index(["A", "V"], name="non-empty index"),
         ),  # empty df and index
+        ({}, native_pd.Index([10, 0, 1], name="non-empty index")),
     ],
 )
 @sql_count_checker(query_count=1, join_count=2)
@@ -407,6 +408,14 @@ def test_create_df_with_empty_df_as_data_and_index_as_index(native_df, native_in
             native_pd.Index(["A", "V"], name="non-empty index"),
             ["A", "V"],
         ),  # empty data, non-empty index and columns
+        (
+            {
+                "A": [1, 2, 3],
+                "B": [4, 5, 6],
+            },  # dict data should behave similar to DataFrame data
+            native_pd.Index([10, 0, 1], name="non-empty index"),
+            ["A", "C"],
+        ),
     ],
 )
 @pytest.mark.parametrize("column_type", ["list", "index"])
@@ -421,9 +430,16 @@ def test_create_df_with_df_as_data_and_index_as_index_and_different_columns(
     # One extra query is required to create the columns if it is an Index (column_type is "index").
     native_columns = columns if column_type == "list" else native_pd.Index(columns)
     snow_columns = columns if column_type == "list" else pd.Index(columns)
-    snow_df = pd.DataFrame(native_df)
+    snow_df = (
+        pd.DataFrame(native_df)
+        if isinstance(native_df, native_pd.DataFrame)
+        else native_df
+    )
     snow_index = pd.Index(native_index)
-    with SqlCounter(query_count=1 if column_type == "list" else 2, join_count=2):
+    qc = 1 if column_type == "list" else 2
+    qc += 1 if (isinstance(native_df, dict) and column_type == "index") else 0
+    jc = 2 if isinstance(native_df, native_pd.DataFrame) else 1
+    with SqlCounter(query_count=qc, join_count=jc):
         assert_frame_equal(
             pd.DataFrame(snow_df, index=snow_index, columns=native_columns),
             native_pd.DataFrame(native_df, index=native_index, columns=snow_columns),
@@ -454,3 +470,22 @@ def test_create_df_with_df_index_negative():
         match=re.escape("Shape of passed values is (3, 1), indices imply (2, 1)"),
     ):
         pd.DataFrame([1, 2, 3], index=[[1, 2], [3, 4], [5, 6]])
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_create_df_with_dict_as_data_and_index_as_index():
+    """
+    Special case when creating:
+    >>> DataFrame({"A": [1], "V": [2]}, native_pd.Index(["A", "B", "C"]), name="none")   # doctest: +SKIP
+          A  V
+    none
+    A     1  2
+    B     1  2  <--- the first row is copied into the rest of the rows.
+    C     1  2
+    """
+    data = {"A": [1], "V": [2]}
+    native_index = native_pd.Index(["A", "B", "C"])
+    snow_index = pd.Index(native_index)
+    native_df = native_pd.DataFrame(data, index=native_index)
+    snow_df = pd.DataFrame(data, index=snow_index)
+    assert_frame_equal(snow_df, native_df)
diff --git a/tests/integ/modin/index/test_name.py b/tests/integ/modin/index/test_name.py
index 0397ed1546..387a76c358 100644
--- a/tests/integ/modin/index/test_name.py
+++ b/tests/integ/modin/index/test_name.py
@@ -95,7 +95,7 @@ def test_index_rename_copy(new_name):
 
 
 @pytest.mark.parametrize("new_name", [None, "grade", ("grade",), ("A", "B")])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_df_index_rename_inplace(new_name):
     # 1 query to create the DataFrame.
     # Create the DataFrame and the new index.
@@ -117,7 +117,7 @@ def test_df_index_rename_inplace(new_name):
 
 
 @pytest.mark.parametrize("new_name", [None, "grade", ("grade",), ("A", "B")])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_df_index_rename_copy(new_name):
     # 1 query to create the DataFrame.
     # Create the DataFrame and the new index.
@@ -183,7 +183,7 @@ def test_index_set_names_copy(new_name):
 
 
 @pytest.mark.parametrize("new_name", [None, "grade", ["grade"], ("grade",)])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_df_index_set_names_inplace(new_name):
     # 1 query to create the DataFrame.
     # Create the DataFrame and the new index.
@@ -213,7 +213,7 @@ def test_df_index_set_names_inplace(new_name):
 
 
 @pytest.mark.parametrize("new_name", [None, "grade", ["grade"], ("grade",)])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_df_index_set_names_copy(new_name):
     # 1 query to create the DataFrame.
     # Create the DataFrame and the new index.
diff --git a/tests/integ/modin/series/test_iloc.py b/tests/integ/modin/series/test_iloc.py
index 2f9444b9ae..b35681e4ee 100644
--- a/tests/integ/modin/series/test_iloc.py
+++ b/tests/integ/modin/series/test_iloc.py
@@ -823,9 +823,8 @@ def test_df_iloc_set_with_multi_index(
         native_items.index = pd.MultiIndex.from_tuples(item_index)
 
     if row_key_index:
-        expected_join_count += 1
-        snow_row_key = pd.Series(row_key, index=pd.Index(row_key_index))
-        native_row_key = native_pd.Series(row_key, index=pd.Index(row_key_index))
+        snow_row_key = pd.Series(row_key, index=native_pd.Index(row_key_index))
+        native_row_key = native_pd.Series(row_key, index=native_pd.Index(row_key_index))
     else:
         snow_row_key = row_key
         native_row_key = row_key
diff --git a/tests/integ/modin/series/test_loc.py b/tests/integ/modin/series/test_loc.py
index d60b9eb26a..b745431df9 100644
--- a/tests/integ/modin/series/test_loc.py
+++ b/tests/integ/modin/series/test_loc.py
@@ -388,7 +388,7 @@ def test_series_loc_get_key_bool_series_with_unaligned_and_duplicate_indices():
         ],  # larger length
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_loc_get_key_bool_series_with_mismatch_index_len(key, use_default_index):
     if use_default_index:
         index = None
diff --git a/tests/integ/modin/series/test_mask.py b/tests/integ/modin/series/test_mask.py
index 6c554a0358..2ef2465b58 100644
--- a/tests/integ/modin/series/test_mask.py
+++ b/tests/integ/modin/series/test_mask.py
@@ -103,7 +103,7 @@ def test_series_mask_index_no_names():
     )
 
 
-@sql_count_checker(query_count=4, join_count=1)
+@sql_count_checker(query_count=3, join_count=2)
 def test_series_mask_with_np_array_cond():
     data = [1, 2]
     cond = np.array([True, False])
@@ -114,7 +114,7 @@ def test_series_mask_with_np_array_cond():
     eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.mask(cond))
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_series_mask_with_series_cond_single_index_different_names():
     data = [1, 2, 3]
     cond = [False, True, False]
@@ -138,7 +138,7 @@ def test_series_mask_with_series_cond_single_index_different_names():
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_series_mask_with_duplicated_index_aligned():
     data = [1, 2, 3]
     cond = [False, True, False]
@@ -160,7 +160,7 @@ def test_series_mask_with_duplicated_index_aligned():
     )
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=2, join_count=1)
 def test_series_mask_with_lambda_cond():
     data = [1, 6, 7, 4]
     index = pd.Index(["a", "b", "c", "d"])
@@ -175,7 +175,7 @@ def test_series_mask_with_lambda_cond():
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1)
 def test_series_mask_with_lambda_returns_singleton_should_fail():
     data = [1, 6, 7, 4]
     index = pd.Index(["a", "b", "c", "d"])
@@ -196,9 +196,11 @@ def test_series_mask_with_lambda_returns_singleton_should_fail():
 
 @pytest.mark.parametrize(
     "other, sql_count, join_count",
-    [(lambda x: -x.iloc[0], 5, 3), (lambda x: x**2, 4, 2)],
+    [(lambda x: -x.iloc[0], 4, 10), (lambda x: x**2, 3, 8)],
 )
 def test_series_mask_with_lambda_other(other, sql_count, join_count):
+    # Multiple joins since multiple Series are created with non-Snowpark pandas data
+    # and a Snowpark pandas Index.
     data = [1, 6, 7, 4]
     index = pd.Index(["a", "b", "c", "d"])
 
diff --git a/tests/integ/modin/series/test_reindex.py b/tests/integ/modin/series/test_reindex.py
index 9450112ae5..97e2931dfb 100644
--- a/tests/integ/modin/series/test_reindex.py
+++ b/tests/integ/modin/series/test_reindex.py
@@ -259,9 +259,9 @@ def perform_reindex(series):
     )
 
 
-# @sql_count_checker(query_count=2, join_count=1)
-@pytest.mark.parametrize("limit", [None])  # , 1, 2, 100])
-@pytest.mark.parametrize("method", ["bfill"])  # , "backfill", "pad", "ffill"])
+@sql_count_checker(query_count=1, join_count=2)
+@pytest.mark.parametrize("limit", [None, 1, 2, 100])
+@pytest.mark.parametrize("method", ["bfill", "backfill", "pad", "ffill"])
 def test_reindex_index_datetime_with_fill(limit, method):
     date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
     native_series = native_pd.Series(
diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py
index 981a2932a2..011baf1ec5 100644
--- a/tests/integ/modin/test_concat.py
+++ b/tests/integ/modin/test_concat.py
@@ -109,7 +109,6 @@ def axis(request):
     return request.param
 
 
-# TODO: redefine df1, df2, and _concat_operation
 def _concat_operation(snow_objs, native_objs, **kwargs):
     return (
         lambda x: pd.concat(snow_objs, **kwargs)

From f971b0d5fe1d394aa2e4cea334510adffe22fe15 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 23 Aug 2024 15:47:38 -0700
Subject: [PATCH 11/42] more test fixes

---
 src/snowflake/snowpark/modin/pandas/dataframe.py   |  5 ++++-
 src/snowflake/snowpark/modin/pandas/series.py      |  6 +++++-
 tests/integ/modin/frame/test_iloc.py               | 10 ++++------
 tests/integ/modin/frame/test_mask.py               | 13 +++++--------
 tests/integ/modin/frame/test_merge.py              |  3 +--
 tests/integ/modin/frame/test_rename.py             |  3 +--
 tests/integ/modin/groupby/test_groupby_apply.py    |  7 ++++++-
 tests/integ/modin/resample/test_resample_fillna.py | 10 ++++++----
 tests/integ/modin/series/test_rank.py              |  1 -
 9 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index f35e40373e..71b07c9684 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -297,7 +297,10 @@ def __init__(
                     for k, v in data.items()
                 }
 
-                if all(len(v) == 1 for v in data.values()) and index is not None:
+                if (
+                    all(not is_scalar(v) and len(v) == 1 for v in data.values())
+                    and index is not None
+                ):
                     # Special case when creating:
                     # >>> DataFrame({"A": [1], "V": [2]}, native_pd.Index(["A", "B", "C"]), name="none")
                     #       A  V
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 7ac3172328..2802000451 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -166,7 +166,11 @@ def __init__(
                 # 3. Perform .loc[] on `data` to select the rows that are in `index`.
                 query_compiler = data.loc[index]._query_compiler
 
-        elif is_dict_like(data) and not isinstance(data, (pandas.Series, Series)):
+        elif (
+            is_dict_like(data)
+            and not is_list_like(data)
+            and not isinstance(data, (pandas.Series, Series))
+        ):
             if name is None:
                 name = MODIN_UNNAMED_SERIES_LABEL
             # If the data is a dictionary, we need to convert it to a query compiler and set the index.
diff --git a/tests/integ/modin/frame/test_iloc.py b/tests/integ/modin/frame/test_iloc.py
index c79d5eb8ba..cb69f78172 100644
--- a/tests/integ/modin/frame/test_iloc.py
+++ b/tests/integ/modin/frame/test_iloc.py
@@ -2710,17 +2710,15 @@ def test_df_iloc_set_with_multi_index(
         native_items.columns = pd.MultiIndex.from_tuples(item_columns)
 
     if row_key_index:
-        expected_join_count += 1
-        snow_row_key = pd.Series(row_key, index=pd.Index(row_key_index))
-        native_row_key = native_pd.Series(row_key, index=pd.Index(row_key_index))
+        snow_row_key = pd.Series(row_key, index=native_pd.Index(row_key_index))
+        native_row_key = native_pd.Series(row_key, index=native_pd.Index(row_key_index))
     else:
         snow_row_key = row_key
         native_row_key = row_key
 
     if col_key_index:
-        expected_join_count += 1
-        snow_col_key = pd.Series(col_key, index=pd.Index(col_key_index))
-        native_col_key = native_pd.Series(col_key, index=pd.Index(col_key_index))
+        snow_col_key = pd.Series(col_key, index=native_pd.Index(col_key_index))
+        native_col_key = native_pd.Series(col_key, index=native_pd.Index(col_key_index))
     else:
         snow_col_key = col_key
         native_col_key = col_key
diff --git a/tests/integ/modin/frame/test_mask.py b/tests/integ/modin/frame/test_mask.py
index 2422edb736..80dfb0f410 100644
--- a/tests/integ/modin/frame/test_mask.py
+++ b/tests/integ/modin/frame/test_mask.py
@@ -500,7 +500,7 @@ def __call__(self, df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=2)
 def test_dataframe_mask_other_is_array():
     data = [[1, 3], [2, 4]]
     other = np.array([[99, -99], [101, -101]])
@@ -543,7 +543,7 @@ def test_dataframe_mask_sizes_do_not_match_negative_test(test_data, test_cond):
         snow_df.mask(snow_cond_df)
 
 
-@sql_count_checker(query_count=3, join_count=2)
+@sql_count_checker(query_count=2, join_count=3)
 def test_dataframe_mask_with_np_array_cond():
     data = [1, 2, 3]
     cond = np.array([[False, True, False]]).T
@@ -570,8 +570,7 @@ def test_dataframe_mask_with_np_array_cond():
     )
 
 
-# One extra query to convert to native index for dataframe constructor when creating snow_other_df
-@sql_count_checker(query_count=4, join_count=2)
+@sql_count_checker(query_count=2, join_count=4)
 def test_dataframe_mask_with_np_array_cond_mismatched_labels():
     data = [1, 2, 3]
     cond = np.array([[False, True, False]]).T
@@ -596,8 +595,7 @@ def test_dataframe_mask_with_np_array_cond_mismatched_labels():
     )
 
 
-# One extra query to convert to native index for dataframe constructor when creating snow_other_df
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 def test_dataframe_mask_with_dataframe_cond_single_index_different_names():
     data = [1, 2, 3]
     cond = [False, True, False]
@@ -624,8 +622,7 @@ def test_dataframe_mask_with_dataframe_cond_single_index_different_names():
     )
 
 
-# One extra query to convert to native index for dataframe constructor when creating snow_other_df
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 def test_dataframe_mask_with_dataframe_cond_single_index_different_names_2():
     data = [1, 2, 3]
     cond = [False, True, False]
diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py
index 80df6bc516..1bbbb80f93 100644
--- a/tests/integ/modin/frame/test_merge.py
+++ b/tests/integ/modin/frame/test_merge.py
@@ -922,8 +922,7 @@ def test_merge_outer_with_nan(dtype):
     _verify_merge(right, left, "outer", on="key")
 
 
-# Two extra queries to convert to native index for dataframe constructor when creating left and right
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=5, join_count=5)
 def test_merge_different_index_names():
     left = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="c"))
     right = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="d"))
diff --git a/tests/integ/modin/frame/test_rename.py b/tests/integ/modin/frame/test_rename.py
index 289fb6e159..a5595ec716 100644
--- a/tests/integ/modin/frame/test_rename.py
+++ b/tests/integ/modin/frame/test_rename.py
@@ -104,8 +104,7 @@ def test_rename(self, snow_float_frame):
             assert_index_equal(renamed.index, native_pd.Index(["A", "B", "foo", "bar"]))
 
         # index with name
-        # Two extra queries, one for converting to native pandas in renamer Dataframe constructor, one to get the name
-        with SqlCounter(query_count=2, join_count=1):
+        with SqlCounter(query_count=1, join_count=2):
             index = Index(["foo", "bar"], name="name")
             renamer = DataFrame(data, index=index)
             renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
index adfcd7f46b..e83fcbe00b 100644
--- a/tests/integ/modin/groupby/test_groupby_apply.py
+++ b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -1073,6 +1073,11 @@ class TestSeriesGroupBy:
     @pytest.mark.parametrize("by", ["string_col_1", ["index", "string_col_1"], "index"])
     def test_dataframe_groupby_getitem(self, by, func, dropna, group_keys, sort):
         """Test apply() on a SeriesGroupBy that we get by DataFrameGroupBy.__getitem__"""
+        qc = (
+            6
+            if group_keys is False and not func == get_scalar_from_numeric_series
+            else 5
+        )
         if (
             func in (get_dataframe_from_numeric_series, get_series_from_numeric_series)
             and not dropna
@@ -1082,7 +1087,7 @@ def test_dataframe_groupby_getitem(self, by, func, dropna, group_keys, sort):
             # (pd.NA, k1) that we cannot serialize.
             pytest.xfail(reason="SNOW-1229760")
         with SqlCounter(
-            query_count=6 if group_keys is False else 5,
+            query_count=qc,
             udtf_count=UDTF_COUNT,
             join_count=2,
         ):
diff --git a/tests/integ/modin/resample/test_resample_fillna.py b/tests/integ/modin/resample/test_resample_fillna.py
index 6be0388f27..96ad514a2b 100644
--- a/tests/integ/modin/resample/test_resample_fillna.py
+++ b/tests/integ/modin/resample/test_resample_fillna.py
@@ -44,7 +44,7 @@ def test_resample_fill(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_fill_ser(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -59,8 +59,9 @@ def test_resample_fill_ser(interval, agg_func):
         ],
         format="mixed",
     )
+    # TODO: SNOW-1638397 See if it's possible to use data={"a": range(len(datecol))} instead.
     eval_snowpark_pandas_result(
-        *create_test_series({"a": range(len(datecol))}, index=datecol),
+        *create_test_series({"2024-01-02": list(range(len(datecol)))}, index=datecol),
         lambda df: getattr(df.resample(rule=f"{interval}D"), agg_func)(),
         check_freq=False,
     )
@@ -138,7 +139,7 @@ def test_resample_ffill_missing_in_middle(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_ffill_ser_missing_in_middle(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -152,8 +153,9 @@ def test_resample_ffill_ser_missing_in_middle(interval, agg_func):
         ],
         format="mixed",
     )
+    # TODO: SNOW-1638397 See if it's possible to use data={"a": range(len(datecol))} instead.
     eval_snowpark_pandas_result(
-        *create_test_series({"a": range(len(datecol))}, index=datecol),
+        *create_test_series({"2024-01-01": list(range(len(datecol)))}, index=datecol),
         lambda df: getattr(df.resample(rule=f"{interval}D"), agg_func)(),
         check_freq=False,
     )
diff --git a/tests/integ/modin/series/test_rank.py b/tests/integ/modin/series/test_rank.py
index 3a855f1142..47d46dc0f3 100644
--- a/tests/integ/modin/series/test_rank.py
+++ b/tests/integ/modin/series/test_rank.py
@@ -28,7 +28,6 @@
 ]
 
 
-@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",

From 8c78f8d88944a2a257ad55af9affc03862ebf206 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 23 Aug 2024 16:44:54 -0700
Subject: [PATCH 12/42] fix dict case

---
 src/snowflake/snowpark/modin/pandas/series.py | 6 +-----
 tests/integ/modin/series/test_rank.py         | 1 +
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 2802000451..7ac3172328 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -166,11 +166,7 @@ def __init__(
                 # 3. Perform .loc[] on `data` to select the rows that are in `index`.
                 query_compiler = data.loc[index]._query_compiler
 
-        elif (
-            is_dict_like(data)
-            and not is_list_like(data)
-            and not isinstance(data, (pandas.Series, Series))
-        ):
+        elif is_dict_like(data) and not isinstance(data, (pandas.Series, Series)):
             if name is None:
                 name = MODIN_UNNAMED_SERIES_LABEL
             # If the data is a dictionary, we need to convert it to a query compiler and set the index.
diff --git a/tests/integ/modin/series/test_rank.py b/tests/integ/modin/series/test_rank.py
index 47d46dc0f3..3a855f1142 100644
--- a/tests/integ/modin/series/test_rank.py
+++ b/tests/integ/modin/series/test_rank.py
@@ -28,6 +28,7 @@
 ]
 
 
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",

From 7970101f6cb47ae6bc3381815361ce6a8b4fe1e9 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 23 Aug 2024 17:23:07 -0700
Subject: [PATCH 13/42] more test case fixes

---
 tests/integ/modin/series/test_rank.py    | 3 ++-
 tests/integ/modin/series/test_reindex.py | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/integ/modin/series/test_rank.py b/tests/integ/modin/series/test_rank.py
index 3a855f1142..2544f12e43 100644
--- a/tests/integ/modin/series/test_rank.py
+++ b/tests/integ/modin/series/test_rank.py
@@ -29,7 +29,8 @@
 
 
 @sql_count_checker(query_count=1)
-@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+# Skipping last test case since it uses MultiIndex.
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA[:-1])
 @pytest.mark.parametrize(
     "method",
     ["min", "dense", "first", "max", "average"],
diff --git a/tests/integ/modin/series/test_reindex.py b/tests/integ/modin/series/test_reindex.py
index 97e2931dfb..14ab6fa6cf 100644
--- a/tests/integ/modin/series/test_reindex.py
+++ b/tests/integ/modin/series/test_reindex.py
@@ -302,13 +302,14 @@ def test_reindex_index_non_overlapping_index():
 
 @sql_count_checker(query_count=1, join_count=2)
 def test_reindex_index_non_overlapping_datetime_index():
+    # TODO: SNOW-1638397 See if it's possible to use data={"prices": [100, 101, np.nan, 100, 89, 88]} instead.
     date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
     native_series = native_pd.Series(
-        {"prices": [100, 101, np.nan, 100, 89, 88]}, index=date_index
+        {"1/1/2023": [100, 101, np.nan, 100, 89, 88]}, index=date_index
     )
     date_index = pd.date_range("1/1/2010", periods=6, freq="D")
     snow_series = pd.Series(
-        {"prices": [100, 101, np.nan, 100, 89, 88]}, index=date_index
+        {"1/1/2023": [100, 101, np.nan, 100, 89, 88]}, index=date_index
     )
 
     def perform_reindex(series):

From f3de1c36c85cdfac4af52aee5dd460de43508e6a Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 26 Aug 2024 11:11:40 -0700
Subject: [PATCH 14/42] correct the logic for series created with dict and
 index

---
 src/snowflake/snowpark/modin/pandas/series.py            | 8 +++++---
 src/snowflake/snowpark/modin/plugin/docstrings/series.py | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 7ac3172328..0a868f2687 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -179,11 +179,13 @@ def __init__(
             )._query_compiler
             if index is not None:
                 index = index if isinstance(index, Index) else Index(index)
-                query_compiler = (
-                    query_compiler.create_qc_with_data_and_index_joined_on_index(
+                data = Series(
+                    query_compiler=query_compiler.create_qc_with_data_and_index_joined_on_index(
                         index._query_compiler
                     )
                 )
+                # Perform .loc[] on `data` to select the rows that are in `index`.
+                query_compiler = data.loc[index]._query_compiler
 
         if query_compiler is None:
             # Defaulting to pandas
@@ -194,7 +196,6 @@ def __init__(
                     and data.name is not None
                 ):
                     name = data.name
-
             new_index = index
             if isinstance(index, Index):
                 # Skip turning this into a native pandas object here since this issues an extra query.
@@ -217,6 +218,7 @@ def __init__(
                 query_compiler = query_compiler.set_index_from_series(
                     index.to_series()._query_compiler
                 )
+
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
             self.name = name
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
index 6e48a7e57f..16ed09c19a 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -76,7 +76,7 @@ class Series:
     c    3
     dtype: int64
 
-    The keys of the dictionary match with the Index values, hence the Index
+    The keys of the dictionary match with the Index values, hence the dictionary
     values have no effect.
 
     >>> d = {'a': 1, 'b': 2, 'c': 3}

From 82728bfc1aae9bae62d8dba2e837ef58ca4dbcbc Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 26 Aug 2024 12:08:02 -0700
Subject: [PATCH 15/42] fix query counts

---
 tests/integ/modin/series/test_reindex.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integ/modin/series/test_reindex.py b/tests/integ/modin/series/test_reindex.py
index 14ab6fa6cf..8fc0b77f1b 100644
--- a/tests/integ/modin/series/test_reindex.py
+++ b/tests/integ/modin/series/test_reindex.py
@@ -259,7 +259,7 @@ def perform_reindex(series):
     )
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 @pytest.mark.parametrize("limit", [None, 1, 2, 100])
 @pytest.mark.parametrize("method", ["bfill", "backfill", "pad", "ffill"])
 def test_reindex_index_datetime_with_fill(limit, method):
@@ -300,7 +300,7 @@ def test_reindex_index_non_overlapping_index():
     )
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 def test_reindex_index_non_overlapping_datetime_index():
     # TODO: SNOW-1638397 See if it's possible to use data={"prices": [100, 101, np.nan, 100, 89, 88]} instead.
     date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")

From 1577ddc66fcee436ef780f9c5036acfda20116cc Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 26 Aug 2024 12:49:41 -0700
Subject: [PATCH 16/42] fix join count

---
 tests/integ/modin/resample/test_resample_fillna.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integ/modin/resample/test_resample_fillna.py b/tests/integ/modin/resample/test_resample_fillna.py
index 96ad514a2b..d4e959123a 100644
--- a/tests/integ/modin/resample/test_resample_fillna.py
+++ b/tests/integ/modin/resample/test_resample_fillna.py
@@ -44,7 +44,7 @@ def test_resample_fill(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=5)
 def test_resample_fill_ser(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -139,7 +139,7 @@ def test_resample_ffill_missing_in_middle(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=5)
 def test_resample_ffill_ser_missing_in_middle(interval, agg_func):
     datecol = native_pd.to_datetime(
         [

From 8903f60a3585dc3d49848d9f7dbe7d4fd5c674c7 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 4 Sep 2024 13:31:46 -0700
Subject: [PATCH 17/42] refactor series and df

---
 .../snowpark/modin/pandas/dataframe.py        | 194 ++++++++----------
 src/snowflake/snowpark/modin/pandas/series.py | 119 ++++-------
 .../compiler/snowflake_query_compiler.py      | 127 +++---------
 .../test_df_series_creation_with_index.py     |  12 +-
 tests/integ/test_dataframe.py                 |   3 +
 5 files changed, 172 insertions(+), 283 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 71b07c9684..0005fc787a 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -84,7 +84,6 @@
 )
 from snowflake.snowpark.modin.pandas.utils import (
     create_empty_native_pandas_frame,
-    from_non_pandas,
     from_pandas,
     is_scalar,
     raise_if_native_pandas_objects,
@@ -159,101 +158,67 @@ def __init__(
         if isinstance(index, DataFrame):  # pandas raises the same error
             raise ValueError("Index data must be 1-dimensional")
 
-        # Engine.subscribe(_update_engine)
+        if query_compiler is not None:
+            # CASE 1: query_compiler
+            # If a query_compiler is passed in, only use the query_compiler and name fields to create a new Series.
+            self._query_compiler = query_compiler
+            return
+
+        # The logic followed here is:
+        # 1. Create a query_compiler from the provided data. If columns are provided, add/select the columns.
+        # 2. If an index is provided, set the index through reindex.
+        # 3. If the data is a DataFrame, perform loc to select the required index and columns from the DataFrame.
+        # 4. The resultant query_compiler is then set as the query_compiler for the DataFrame.
+
         if isinstance(data, Index):
-            # If the data is an Index object, we need to convert it to a DataFrame to make sure
-            # that the values are in the correct format -- as a data column, not an index column.
-            # Additionally, if an index is provided, converting it to an Index object ensures that
-            # its values are an index column.
-            # We set the column name if it is not in the provided Index `data`.
+            # CASE 2: data is a Snowpark pandas Index
+            # If the data is an Index object, convert it to a DataFrame to make sure that the values are in the
+            # correct format: the values are a data column, not an index column.
             if data.name is None:
                 new_name = 0 if columns is None else columns[0]
             else:
                 new_name = data.name
             query_compiler = data.to_frame(index=False, name=new_name)._query_compiler
-            if index is not None:
-                index = index if isinstance(index, Index) else Index(index)
-                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
-                    index._query_compiler
-                )
 
-        if isinstance(data, (DataFrame, Series)):
-            self._query_compiler = data._query_compiler.copy()
-            if isinstance(data, Series):
-                # We set the column name if it is not in the provided Series `data`.
-                if data.name is None:
-                    self.columns = [0] if columns is None else columns
+        elif isinstance(data, Series):
+            # CASE 3: data is a Snowpark pandas Series
+            query_compiler = data._query_compiler.copy()
+            # We set the column name if it is not in the provided Series `data`.
+            if data.name is None:
+                self.columns = [0] if columns is None else columns
+            elif columns is not None and data.name not in columns:
                 # If the columns provided are not in the named Series, pandas clears
                 # the DataFrame and sets columns to the columns provided.
-                elif columns is not None and data.name not in columns:
-                    self._query_compiler = from_pandas(
-                        self.__constructor__(columns=columns)
-                    )._query_compiler
-                if index is not None:
-                    # The `index` parameter is used to select the rows from `data` that will be in the resultant
-                    # DataFrame. If a value in `index` is not present in `data`'s index, it will be filled with a
-                    # NaN value.
-                    # 1. The `index` is converted to an Index object so that the index values are in an index column.
-                    index = index if isinstance(index, Index) else Index(index)
-                    # 2. A right outer join is performed between `data` and `index` to create a Series object where
-                    #    any index values in `data`'s index that are not in `index` are filled with NaN.
-                    data = Series(
-                        query_compiler=data._query_compiler.create_qc_with_data_and_index_joined_on_index(
-                            index._query_compiler
-                        ),
-                        name=0 if data.name is None else data.name,
-                    )
-                    # 3. Perform .loc[] on `data` to select the rows that are in the `index`.
-                    self._query_compiler = data.loc[index]._query_compiler
-
-            elif columns is None and index is None:
+                query_compiler = from_pandas(
+                    self.__constructor__(columns=columns)
+                )._query_compiler
+
+        elif isinstance(data, DataFrame):
+            # CASE 5: data is a Snowpark pandas DataFrame
+            query_compiler = data._query_compiler.copy()
+
+            if columns is None and index is None:
+                # If the new DataFrame has the same columns and index as the original DataFrame,
+                # the query compiler is shared and kept track of as a sibling.
+                self._query_compiler = query_compiler
                 data._add_sibling(self)
-
-            else:
-                # The `columns` parameter is used to select the columns from `data` that will be in the resultant
-                # DataFrame. If a value in `columns` is not present in `data`'s columns, it will be added as a
-                # new column filled with NaN values. These columns are tracked by the `extra_columns` variable.
-                extra_columns = None
-                if columns is None:
-                    # In case `columns` is not provided, `columns` is set to slice(None) to select all columns.
-                    columns = slice(None)
-                else:
-                    extra_columns = [col for col in columns if col not in data.columns]
-
-                # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
-                # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
-                if index is None:
-                    # In case `index` is not provided, `index` is set to slice(None) to select all rows.
-                    index = slice(None)
-                    data = DataFrame(
-                        query_compiler=data._query_compiler.create_qc_with_data_and_index_joined_on_index(
-                            extra_columns=extra_columns
-                        )
-                    )
-                else:
-                    # The `index` is converted to an Index object so that the index values are in an index column.
-                    index = index if isinstance(index, Index) else Index(index)
-                    # A right outer join is performed between `data` and `index` to create a DataFrame object where any
-                    # index values in `data`'s index that are not in `index` are filled with NaN.
-                    data = DataFrame(
-                        query_compiler=data._query_compiler.create_qc_with_data_and_index_joined_on_index(
-                            index._query_compiler,
-                            extra_columns=extra_columns,
-                        )
-                    )
-                # 3. Perform .loc[] on `data` to select the rows and columns that are in `index` and `columns`.
-                self._query_compiler = data.loc[index, columns]._query_compiler
-
-        # Check the type of data and use the appropriate constructor
-        elif query_compiler is None:
-            distributed_frame = from_non_pandas(data, index, columns, dtype)
-            if distributed_frame is not None:
-                self._query_compiler = distributed_frame._query_compiler
                 return
+            # The `columns` parameter is used to select the columns from `data` that will be in the resultant
+            # DataFrame. If a value in `columns` is not present in `data`'s columns, it will be added as a
+            # new column filled with NaN values. These columns are tracked by the `extra_columns` variable.
+            extra_columns = [col for col in columns if col not in data.columns]
+            query_compiler = data._query_compiler.create_qc_with_extra_columns(
+                extra_columns
+            )
 
+        else:
+            # CASE 5: Non-Snowpark pandas data
             if isinstance(data, pandas.Index):
+                # CASE 5.B: data is a pandas Index
                 pass
+
             elif is_list_like(data) and not is_dict_like(data):
+                # CASE 5.C: data is list-like
                 old_dtype = getattr(data, "dtype", None)
                 values = [
                     obj._to_pandas() if isinstance(obj, Series) else obj for obj in data
@@ -265,30 +230,33 @@ def __init__(
                         data = type(data)(values, dtype=old_dtype)
                     except TypeError:
                         data = values
+
             elif is_dict_like(data) and not isinstance(
-                data, (pandas.Series, Series, pandas.DataFrame, DataFrame)
+                data, (pandas.Series, pandas.DataFrame)
             ):
+                # CASE 5.D: data is dict-like
                 if columns is not None:
                     data = {key: value for key, value in data.items() if key in columns}
 
                 if len(data) and all(isinstance(v, Series) for v in data.values()):
+                    # Special case: data is a dictionary where all the values are Snowpark pandas Series
                     from .general import concat
 
                     new_qc = concat(
                         data.values(), axis=1, keys=data.keys()
                     )._query_compiler
-
                     if dtype is not None:
                         new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
                     if index is not None:
-                        new_qc = new_qc.reindex(
-                            axis=0, labels=try_convert_index_to_native(index)
-                        )
+                        if isinstance(index, Index):
+                            index = index.to_series()._query_compiler
+                        elif isinstance(index, Series):
+                            index = index._query_compiler
+                        new_qc = new_qc.reindex(axis=0, labels=index)
                     if columns is not None:
                         new_qc = new_qc.reindex(
                             axis=1, labels=try_convert_index_to_native(columns)
                         )
-
                     self._query_compiler = new_qc
                     return
 
@@ -301,10 +269,10 @@ def __init__(
                     all(not is_scalar(v) and len(v) == 1 for v in data.values())
                     and index is not None
                 ):
-                    # Special case when creating:
-                    # >>> DataFrame({"A": [1], "V": [2]}, native_pd.Index(["A", "B", "C"]), name="none")
+                    # Special case: the values in the dictionary are all non-scalar objects of length 1
+                    # >>> DataFrame({"A": [1], "V": [2]}, native_pd.Index(["A", "B", "C"]), name="cake")
                     #       A  V
-                    # none
+                    # cake
                     # A     1  2
                     # B     1  2  <--- the first row is copied into the rest of the rows.
                     # C     1  2
@@ -316,26 +284,36 @@ def __init__(
                     )._query_compiler
                     return
 
-            new_index = index
+            query_compiler = from_pandas(
+                pandas.DataFrame(
+                    data=data,
+                    columns=try_convert_index_to_native(columns),
+                    dtype=dtype,
+                    copy=copy,
+                )
+            )._query_compiler
+
+        if index is not None:
+            # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
+            # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
             if isinstance(index, Index):
-                # Skip turning this into a native pandas object here since this issues an extra query.
-                # Instead, first get the query compiler from native pandas and then add the index column.
-                new_index = None
-            pandas_df = pandas.DataFrame(
-                data=try_convert_index_to_native(data),
-                index=try_convert_index_to_native(new_index),
-                columns=try_convert_index_to_native(columns),
-                dtype=dtype,
-                copy=copy,
+                index = index.to_series()._query_compiler
+            elif isinstance(index, Series):
+                index = index._query_compiler
+            query_compiler = query_compiler.reindex(axis=0, labels=index)
+
+        if isinstance(data, DataFrame):
+            # To select the required index and columns for the resultant DataFrame,
+            # perform .loc[] on the created query compiler.
+            index = slice(None) if index is None else index
+            columns = slice(None) if columns is None else columns
+            query_compiler = (
+                DataFrame(query_compiler=query_compiler)
+                .loc[index, columns]
+                ._query_compiler
             )
-            query_compiler = from_pandas(pandas_df)._query_compiler
-            if isinstance(index, Index):
-                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
-                    index._query_compiler
-                )
-            self._query_compiler = query_compiler
-        else:
-            self._query_compiler = query_compiler
+
+        self._query_compiler = query_compiler
 
     def __repr__(self):
         """
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 0a868f2687..83b98f930c 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -52,11 +52,7 @@
 from pandas.util._validators import validate_bool_kwarg
 
 from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset
-from snowflake.snowpark.modin.pandas.utils import (
-    from_pandas,
-    is_scalar,
-    try_convert_index_to_native,
-)
+from snowflake.snowpark.modin.pandas.utils import from_pandas, is_scalar
 from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
 from snowflake.snowpark.modin.plugin.utils.error_message import (
     ErrorMessage,
@@ -131,45 +127,43 @@ def __init__(
         # Engine.subscribe(_update_engine)
         from snowflake.snowpark.modin.plugin.extensions.index import Index
 
-        # Convert lazy index to Series without pulling the data to client.
+        if query_compiler:
+            # CASE 1: query_compiler
+            # If a query_compiler is passed in, only use the query_compiler and name fields to create a new Series.
+            self._query_compiler = query_compiler.columnarize()
+            if name is not None:
+                self.name = name
+            return
+
+        # The logic followed here is:
+        # 1. Create a query_compiler from the provided data.
+        # 2. If an index is provided, set the index. This is either through set_index or reindex.
+        # 3. The resultant query_compiler is columnarized and set as the query_compiler for the Series.
+        # 4. If a name is provided, set the name.
+
         if isinstance(data, Index):
-            # If the data is an Index object, we need to convert it to a Series to make sure
-            # that the values are in the correct format -- as a data column, not an index column.
-            # Additionally, if an index is provided, converting it to an Index object ensures that
-            # its values are an index column.
+            # CASE 2: Index
+            # If the data is an Index object, convert it to a Series, and get the query_compiler.
             query_compiler = (
                 data.to_series(index=None, name=name)
                 .reset_index(drop=True)
                 ._query_compiler
             )
 
-            if index is not None:
-                index = index if isinstance(index, Index) else Index(index)
-                query_compiler = query_compiler.create_qc_with_index_data_and_qc_index(
-                    index._query_compiler
-                )
         elif isinstance(data, type(self)):
+            # CASE 3: Series
+            # If the data is a Series object, copy the query_compiler.
             query_compiler = data._query_compiler.copy()
-            if index is not None:
-                # The `index` parameter is used to select the rows from `data` that will be in the resultant Series.
-                # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
-                # 1. The `index` is converted to an Index object so that the index values are in an index column.
-                index = index if isinstance(index, Index) else Index(index)
-                # 2. A right outer join is performed between `data` and `index` to create a Series object where any
-                #    index values in `data`'s index that are not in `index` are filled with NaN.
-                data = Series(
-                    query_compiler=data._query_compiler.create_qc_with_data_and_index_joined_on_index(
-                        index._query_compiler
-                    ),
-                    name=data.name,
-                )
-                # 3. Perform .loc[] on `data` to select the rows that are in `index`.
-                query_compiler = data.loc[index]._query_compiler
 
-        elif is_dict_like(data) and not isinstance(data, (pandas.Series, Series)):
-            if name is None:
-                name = MODIN_UNNAMED_SERIES_LABEL
-            # If the data is a dictionary, we need to convert it to a query compiler and set the index.
+        else:
+            # CASE 4: Non-Snowpark pandas data
+            # If the data is not a Snowpark pandas object, convert it to a query compiler.
+            name = MODIN_UNNAMED_SERIES_LABEL if name is None else name
+            if (
+                isinstance(data, (pandas.Series, pandas.Index))
+                and data.name is not None
+            ):
+                name = data.name
             query_compiler = from_pandas(
                 pandas.DataFrame(
                     pandas.Series(
@@ -177,48 +171,25 @@ def __init__(
                     )
                 )
             )._query_compiler
-            if index is not None:
-                index = index if isinstance(index, Index) else Index(index)
-                data = Series(
-                    query_compiler=query_compiler.create_qc_with_data_and_index_joined_on_index(
-                        index._query_compiler
-                    )
-                )
-                # Perform .loc[] on `data` to select the rows that are in `index`.
-                query_compiler = data.loc[index]._query_compiler
-
-        if query_compiler is None:
-            # Defaulting to pandas
-            if name is None:
-                name = MODIN_UNNAMED_SERIES_LABEL
-                if (
-                    isinstance(data, (pandas.Series, pandas.Index, pd.Index))
-                    and data.name is not None
-                ):
-                    name = data.name
-            new_index = index
-            if isinstance(index, Index):
-                # Skip turning this into a native pandas object here since this issues an extra query.
-                # Instead, first get the query compiler from native pandas and then add the index column.
-                new_index = None
-            query_compiler = from_pandas(
-                pandas.DataFrame(
-                    pandas.Series(
-                        data=try_convert_index_to_native(data),
-                        index=new_index,
-                        dtype=dtype,
-                        name=name,
-                        copy=copy,
-                        fastpath=fastpath,
-                    )
-                )
-            )._query_compiler
-            if isinstance(index, Index):
+
+        if index is not None:
+            if is_dict_like(data) or isinstance(data, (type(self))):
+                # The `index` parameter is used to select the rows from `data` that will be in the resultant Series.
+                # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
+                if isinstance(index, Index):
+                    index = index.to_series()._query_compiler
+                elif isinstance(index, Series):
+                    index = index._query_compiler
+                query_compiler = query_compiler.reindex(axis=0, labels=index)
+
+            else:
                 # Performing set index to directly set the index column (joining on row-position instead of index).
-                query_compiler = query_compiler.set_index_from_series(
-                    index.to_series()._query_compiler
-                )
+                index_qc = (
+                    index if isinstance(index, Series) else Series(index)
+                )._query_compiler
+                query_compiler = query_compiler.set_index_from_series(index_qc)
 
+        # Set the query compiler and name fields.
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
             self.name = name
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index 21d1d9c4b7..8057581128 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -17508,117 +17508,52 @@ def tz_convert(self, *args: Any, **kwargs: Any) -> None:
     def tz_localize(self, *args: Any, **kwargs: Any) -> None:
         ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset")
 
-    def create_qc_with_index_data_and_qc_index(
-        self, index_qc: "SnowflakeQueryCompiler"
+    def create_qc_with_extra_columns(
+        self, extra_columns: List[Hashable]
     ) -> "SnowflakeQueryCompiler":
         """
-        This is a helper function for creating a DataFrame/Series where the data is an Index
-        and an index is provided.
-        Before this method is called, the provided index is converted to an Index object;
-        the query compilers of the data and index are then joined.
+        This is a helper function for creating a DataFrame where the data is a DataFrame object. Sometimes, columns
+        not present in the `data` DataFrame can be passed as arguments - these are added to the resultant DataFrame
+        as NaN columns.
 
         Parameters
         ----------
-        index_qc : SnowflakeQueryCompiler
-            The query compiler of the index to be joined with the data.
-
-        Returns
-        -------
-        SnowflakeQueryCompiler
-            A new query compiler with the data and index joined.
-        """
-        self_frame = self._modin_frame.ensure_row_position_column()
-        other_frame = index_qc._modin_frame.ensure_row_position_column()
-
-        new_internal_frame, _ = join_utils.join(
-            self_frame,
-            other_frame,
-            how="left",
-            left_on=[self_frame.row_position_snowflake_quoted_identifier],
-            right_on=[other_frame.row_position_snowflake_quoted_identifier],
-            inherit_join_index=InheritJoinIndex.FROM_RIGHT,
-        )
-
-        return SnowflakeQueryCompiler(new_internal_frame)
-
-    def create_qc_with_data_and_index_joined_on_index(
-        self,
-        index_qc: Optional["SnowflakeQueryCompiler"] = None,
-        extra_columns: Optional[List[Hashable]] = None,
-    ) -> "SnowflakeQueryCompiler":
-        """
-        This is a helper function for creating a DataFrame/Series where the data is a DataFrame/Series object.
-        This is a special case since only the values where the index value matches in the `data` and `index` provided
-        take on an actual value from the given `data`. Otherwise, they take on a NaN value.
-
-        For instance,
-
-        >>> data = pd.Series(["A", "B", "C", "D"], index=[1.1, 2.2, 3, 4], name="index series name")
-        >>> index = pd.Index([1, 2, 3, 4], name="some name")
-        >>> df = pd.DataFrame(data=data, index=index)
-        >>> df  # doctest: +SKIP
-                  index series name
-        some name
-        1                       NaN
-        2                       NaN
-        3                         C
-        4                         D
-
-        Notice how only the data for index values 3 and 4 have an actual value while 1 and 2 have a NaN value.
-        3 and 4 are values present in the index of the `data` and `index` provided. 1 and 2 are not present.
-
-        Parameters
-        ----------
-        index_qc : SnowflakeQueryCompiler, default None
-            The query compiler of the index to be joined with the data. If no query compiler is provided,
-            skip this join operation.
         extra_columns : list of hashable, default None
-            If the DataFrame being created has new columns that are not a part of the data, they can be passed here
-            and appended as NaN columns.
+            New columns that are not a part of the original query compiler
 
         Returns
         -------
         SnowflakeQueryCompiler
-            A new query compiler with the data and index joined.
+            A new query compiler with the new columns.
         """
         self_frame = self._modin_frame
 
-        if extra_columns:
-            # Append the new columns to the data's internal frame.
-            new_snowflake_quoted_identifiers = self._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+        if not extra_columns or len(extra_columns) == 0:
+            return self.copy()
+
+        # Append the new columns to the data's internal frame.
+        new_snowflake_quoted_identifiers = (
+            self._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
                 pandas_labels=extra_columns,
                 excluded=self_frame.data_column_snowflake_quoted_identifiers,
             )
-            new_ordered_frame = append_columns(
-                self_frame.ordered_dataframe,
-                new_snowflake_quoted_identifiers,
-                [pandas_lit(np.nan)] * len(extra_columns),
-            )
-            self_frame = InternalFrame.create(
-                ordered_dataframe=new_ordered_frame,
-                data_column_pandas_labels=self_frame.data_column_pandas_labels
-                + extra_columns,
-                data_column_snowflake_quoted_identifiers=self_frame.data_column_snowflake_quoted_identifiers
-                + new_snowflake_quoted_identifiers,
-                data_column_pandas_index_names=self_frame.data_column_pandas_index_names,
-                index_column_pandas_labels=self_frame.index_column_pandas_labels,
-                index_column_snowflake_quoted_identifiers=self_frame.index_column_snowflake_quoted_identifiers,
-                data_column_types=None,
-                index_column_types=None,
-            )
-
-        if index_qc is None:
-            new_internal_frame = self_frame
-        else:
-            # Join the index and data internal frames.
-            other_frame = index_qc._modin_frame
-            new_internal_frame, _ = join_utils.join(
-                other_frame,
-                self_frame,
-                how="outer",
-                left_on=other_frame.index_column_snowflake_quoted_identifiers,
-                right_on=self_frame.index_column_snowflake_quoted_identifiers,
-                inherit_join_index=InheritJoinIndex.FROM_LEFT,
-            )
+        )
+        new_ordered_frame = append_columns(
+            self_frame.ordered_dataframe,
+            new_snowflake_quoted_identifiers,
+            [pandas_lit(np.nan)] * len(extra_columns),
+        )
+        new_internal_frame = InternalFrame.create(
+            ordered_dataframe=new_ordered_frame,
+            data_column_pandas_labels=self_frame.data_column_pandas_labels
+            + extra_columns,
+            data_column_snowflake_quoted_identifiers=self_frame.data_column_snowflake_quoted_identifiers
+            + new_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=self_frame.data_column_pandas_index_names,
+            index_column_pandas_labels=self_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self_frame.index_column_snowflake_quoted_identifiers,
+            data_column_types=None,
+            index_column_types=None,
+        )
 
         return SnowflakeQueryCompiler(new_internal_frame)
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index a1512d8ced..391c3c71d9 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -47,14 +47,16 @@ def obj_type_helper(obj_type: str) -> tuple:
     ],
 )
 @pytest.mark.parametrize("obj_type", ["series", "df"])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_create_with_index_as_data(native_idx, obj_type):
     """
     Creating a Series where the data is an Index.
     """
     snow_idx = pd.Index(native_idx)
-    assert_equal_func, snow_obj, native_obj, _ = obj_type_helper(obj_type)
-    assert_equal_func(snow_obj(snow_idx), native_obj(native_idx))
+    assert_equal_func, snow_obj, native_obj, kwargs = obj_type_helper(obj_type)
+    assert_equal_func(
+        snow_obj(snow_idx), native_obj(native_idx), check_dtype=False, **kwargs
+    )
 
 
 @pytest.mark.parametrize(
@@ -202,7 +204,7 @@ def test_create_with_index_as_data_and_series_as_index(
     ],
 )
 @pytest.mark.parametrize("obj_type", ["series", "df"])
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_create_with_series_as_data_and_index_as_index(
     native_series, native_index, obj_type
 ):
@@ -476,7 +478,7 @@ def test_create_df_with_df_index_negative():
 def test_create_df_with_dict_as_data_and_index_as_index():
     """
     Special case when creating:
-    >>> DataFrame({"A": [1], "V": [2]}, native_pd.Index(["A", "B", "C"]), name="none")   # doctest: +SKIP
+    DataFrame({"A": [1], "V": [2]}, native_pd.Index(["A", "B", "C"]), name="none")
           A  V
     none
     A     1  2
diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py
index dd46bb4835..4cf335678e 100644
--- a/tests/integ/test_dataframe.py
+++ b/tests/integ/test_dataframe.py
@@ -2843,6 +2843,7 @@ def test_write_table_with_clustering_keys_and_comment(
     reason="Clustering is a SQL feature",
     run=False,
 )
+@pytest.mark.skipif(IS_IN_STORED_PROC, reason="show parameters is not supported in SP")
 def test_write_table_with_all_options(session):
     try:
         table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
@@ -2996,6 +2997,8 @@ def test_create_dynamic_table(session, table_name_1, is_transient):
         if is_transient:
             assert "create or replace transient" in ddl_result, ddl_result
         else:
+            if IS_IN_STORED_PROC:
+                pytest.skip("show parameters is not supported in SP")
             # data retention and max data extension time cannot be queried from get_ddl
             # we run a show parameters query to get the values for these parameters
             show_params_sql = (

From 67a07c11125a38206df8d8162009fd65af22ee77 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 4 Sep 2024 15:43:48 -0700
Subject: [PATCH 18/42] refactor dataframe and series constructors

---
 .../snowpark/modin/pandas/dataframe.py        | 36 +++++++++++++------
 src/snowflake/snowpark/modin/pandas/series.py | 15 ++++----
 .../compiler/snowflake_query_compiler.py      | 21 ++++++-----
 .../snowpark/modin/plugin/extensions/index.py |  9 +++--
 .../test_df_series_creation_with_index.py     |  4 +--
 5 files changed, 55 insertions(+), 30 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index da2afd6ccf..72e6d04019 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -187,8 +187,10 @@ def __init__(
             query_compiler = data._query_compiler.copy()
             # We set the column name if it is not in the provided Series `data`.
             if data.name is None:
-                self.columns = [0] if columns is None else columns
-            elif columns is not None and data.name not in columns:
+                query_compiler = query_compiler.set_columns(
+                    [0] if columns is None else columns
+                )
+            if columns is not None and data.name not in columns:
                 # If the columns provided are not in the named Series, pandas clears
                 # the DataFrame and sets columns to the columns provided.
                 query_compiler = from_pandas(
@@ -208,7 +210,10 @@ def __init__(
             # The `columns` parameter is used to select the columns from `data` that will be in the resultant
             # DataFrame. If a value in `columns` is not present in `data`'s columns, it will be added as a
             # new column filled with NaN values. These columns are tracked by the `extra_columns` variable.
-            extra_columns = [col for col in columns if col not in data.columns]
+            if data.columns is not None and columns is not None:
+                extra_columns = [col for col in columns if col not in data.columns]
+            else:
+                extra_columns = []
             query_compiler = data._query_compiler.create_qc_with_extra_columns(
                 extra_columns
             )
@@ -296,13 +301,24 @@ def __init__(
             )._query_compiler
 
         if index is not None:
-            # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
-            # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
-            if isinstance(index, Index):
-                index = index.to_series()._query_compiler
-            elif isinstance(index, Series):
-                index = index._query_compiler
-            query_compiler = query_compiler.reindex(axis=0, labels=index)
+            if isinstance(data, (type(self), Series)):
+                # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
+                # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
+                labels = index
+                if isinstance(labels, Index):
+                    labels = labels.to_series()._query_compiler
+                elif isinstance(labels, Series):
+                    labels = labels._query_compiler
+                else:
+                    labels = Index(labels).to_series()._query_compiler
+                query_compiler = query_compiler.reindex(axis=0, labels=labels)
+
+            else:
+                # Performing set index to directly set the index column (joining on row-position instead of index).
+                index_qc = (
+                    index if isinstance(index, Series) else Series(index)
+                )._query_compiler
+                query_compiler = query_compiler.set_index_from_series(index_qc)
 
         if isinstance(data, DataFrame):
             # To select the required index and columns for the resultant DataFrame,
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 042ee86805..01c84699e4 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -31,6 +31,7 @@
 import numpy.typing as npt
 import pandas
 from modin.pandas.accessor import CachedAccessor, SparseAccessor
+from modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset
 from modin.pandas.iterator import PartitionIterator
 from pandas._libs.lib import NoDefault, is_integer, no_default
 from pandas._typing import (
@@ -51,7 +52,6 @@
 from pandas.core.series import _coerce_method
 from pandas.util._validators import validate_bool_kwarg
 
-from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset
 from snowflake.snowpark.modin.pandas.utils import from_pandas, is_scalar
 from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
 from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
@@ -177,11 +177,14 @@ def __init__(
             if is_dict_like(data) or isinstance(data, (type(self))):
                 # The `index` parameter is used to select the rows from `data` that will be in the resultant Series.
                 # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
-                if isinstance(index, Index):
-                    index = index.to_series()._query_compiler
-                elif isinstance(index, Series):
-                    index = index._query_compiler
-                query_compiler = query_compiler.reindex(axis=0, labels=index)
+                labels = index
+                if isinstance(labels, Index):
+                    labels = labels.to_series()._query_compiler
+                elif isinstance(labels, Series):
+                    labels = labels._query_compiler
+                else:
+                    labels = Index(labels).to_series()._query_compiler
+                query_compiler = query_compiler.reindex(axis=0, labels=labels)
 
             else:
                 # Performing set index to directly set the index column (joining on row-position instead of index).
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index 3f9fe42115..123e2f7e54 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -2260,7 +2260,7 @@ def any(
     def reindex(
         self,
         axis: int,
-        labels: Union[pandas.Index, "pd.Index", list[Any]],
+        labels: Union[pandas.Index, "pd.Index", list[Any], "SnowflakeQueryCompiler"],
         **kwargs: dict[str, Any],
     ) -> "SnowflakeQueryCompiler":
         """
@@ -2270,7 +2270,7 @@ def reindex(
         ----------
         axis : {0, 1}
             Axis to align labels along. 0 is for index, 1 is for columns.
-        labels : list-like
+        labels : list-like, SnowflakeQueryCompiler
             Index-labels to align with.
         method : {None, "backfill"/"bfill", "pad"/"ffill", "nearest"}
             Method to use for filling holes in reindexed frame.
@@ -2468,7 +2468,7 @@ def _add_columns_for_monotonicity_checks(
 
     def _reindex_axis_0(
         self,
-        labels: Union[pandas.Index, "pd.Index", list[Any]],
+        labels: Union[pandas.Index, "pd.Index", list[Any], "SnowflakeQueryCompiler"],
         **kwargs: dict[str, Any],
     ) -> "SnowflakeQueryCompiler":
         """
@@ -2476,7 +2476,7 @@ def _reindex_axis_0(
 
         Parameters
         ----------
-        labels : list-like
+        labels : list-like, SnowflakeQueryCompiler
             Index-labels to align with.
         method : {None, "backfill"/"bfill", "pad"/"ffill", "nearest"}
             Method to use for filling holes in reindexed frame.
@@ -2494,12 +2494,15 @@ def _reindex_axis_0(
         """
         self._raise_not_implemented_error_for_timedelta()
 
-        if isinstance(labels, native_pd.Index):
-            labels = pd.Index(labels)
-        if isinstance(labels, pd.Index):
-            new_index_qc = labels.to_series()._query_compiler
+        if isinstance(labels, SnowflakeQueryCompiler):
+            new_index_qc = labels
         else:
-            new_index_qc = pd.Series(labels)._query_compiler
+            if isinstance(labels, native_pd.Index):
+                labels = pd.Index(labels)
+            if isinstance(labels, pd.Index):
+                new_index_qc = labels.to_series()._query_compiler
+            else:
+                new_index_qc = pd.Series(labels)._query_compiler
 
         new_index_modin_frame = new_index_qc._modin_frame
         modin_frame = self._modin_frame
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py
index 4e5cff3517..ea830561cd 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/index.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py
@@ -49,7 +49,10 @@
 from pandas.core.dtypes.inference import is_hashable
 
 from snowflake.snowpark.modin.pandas import DataFrame, Series
-from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
+from snowflake.snowpark.modin.pandas.utils import (
+    from_pandas,
+    try_convert_index_to_native,
+)
 from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
 from snowflake.snowpark.modin.plugin._internal.timestamp_utils import DateTimeOrigin
 from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
@@ -214,8 +217,8 @@ def _init_query_compiler(
         elif isinstance(data, Index):
             query_compiler = data._query_compiler
         else:
-            query_compiler = DataFrame(
-                index=cls._NATIVE_INDEX_TYPE(data=data, **kwargs)
+            query_compiler = from_pandas(
+                native_pd.DataFrame(index=cls._NATIVE_INDEX_TYPE(data=data, **kwargs))
             )._query_compiler
 
         if len(query_compiler.columns):
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 391c3c71d9..d1bcb56651 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -47,7 +47,7 @@ def obj_type_helper(obj_type: str) -> tuple:
     ],
 )
 @pytest.mark.parametrize("obj_type", ["series", "df"])
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=0)
 def test_create_with_index_as_data(native_idx, obj_type):
     """
     Creating a Series where the data is an Index.
@@ -471,7 +471,7 @@ def test_create_df_with_df_index_negative():
         ValueError,
         match=re.escape("Shape of passed values is (3, 1), indices imply (2, 1)"),
     ):
-        pd.DataFrame([1, 2, 3], index=[[1, 2], [3, 4], [5, 6]])
+        native_pd.DataFrame([1, 2, 3], index=[[1, 2], [3, 4], [5, 6]])
 
 
 @sql_count_checker(query_count=2, join_count=1)

From 145368080974e024d47035e048d3e468c621aa71 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 4 Sep 2024 17:51:57 -0700
Subject: [PATCH 19/42] fix docstring tests

---
 .../snowpark/modin/pandas/dataframe.py        | 26 ++++++++++++++++---
 src/snowflake/snowpark/modin/pandas/series.py | 19 +++++++++++---
 .../snowpark/modin/plugin/docstrings/base.py  |  2 +-
 tests/integ/modin/frame/test_loc.py           |  6 +++++
 .../integ/modin/groupby/test_groupby_apply.py |  2 +-
 5 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 72e6d04019..6d487fd4bc 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -220,6 +220,7 @@ def __init__(
 
         else:
             # CASE 5: Non-Snowpark pandas data
+            dummy_index = None  # used in a special dict case
             if isinstance(data, pandas.Index):
                 # CASE 5.B: data is a pandas Index
                 pass
@@ -291,9 +292,15 @@ def __init__(
                     )._query_compiler
                     return
 
+                if all(is_scalar(k) and is_scalar(v) for k, v in data.items()):
+                    # Special case: All keys and values in the dict are all scalars, an index needs to be provided.
+                    # pd.DataFrame({'a': 1, 'b': 2}, index=[0])
+                    dummy_index = index
+
             query_compiler = from_pandas(
                 pandas.DataFrame(
                     data=data,
+                    index=dummy_index,
                     columns=try_convert_index_to_native(columns),
                     dtype=dtype,
                     copy=copy,
@@ -315,10 +322,21 @@ def __init__(
 
             else:
                 # Performing set index to directly set the index column (joining on row-position instead of index).
-                index_qc = (
-                    index if isinstance(index, Series) else Series(index)
-                )._query_compiler
-                query_compiler = query_compiler.set_index_from_series(index_qc)
+                if isinstance(index, Series):
+                    index_qc_list = [index._query_compiler]
+                elif isinstance(index, Index):
+                    index_qc_list = [index.to_series()._query_compiler]
+                elif isinstance(index, pd.MultiIndex):
+                    index_qc_list = [
+                        s._query_compiler
+                        for s in [
+                            pd.Series(index.get_level_values(level))
+                            for level in range(index.nlevels)
+                        ]
+                    ]
+                else:
+                    index_qc_list = [Series(index)._query_compiler]
+                query_compiler = query_compiler.set_index(index_qc_list)
 
         if isinstance(data, DataFrame):
             # To select the required index and columns for the resultant DataFrame,
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 01c84699e4..439591be0a 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -188,10 +188,21 @@ def __init__(
 
             else:
                 # Performing set index to directly set the index column (joining on row-position instead of index).
-                index_qc = (
-                    index if isinstance(index, Series) else Series(index)
-                )._query_compiler
-                query_compiler = query_compiler.set_index_from_series(index_qc)
+                if isinstance(index, Series):
+                    index_qc_list = [index._query_compiler]
+                elif isinstance(index, Index):
+                    index_qc_list = [index.to_series()._query_compiler]
+                elif isinstance(index, pd.MultiIndex):
+                    index_qc_list = [
+                        s._query_compiler
+                        for s in [
+                            pd.Series(index.get_level_values(level))
+                            for level in range(index.nlevels)
+                        ]
+                    ]
+                else:
+                    index_qc_list = [Series(index)._query_compiler]
+                query_compiler = query_compiler.set_index(index_qc_list)
 
         # Set the query compiler and name fields.
         self._query_compiler = query_compiler.columnarize()
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py
index 3ba4f2f2da..52696fc64d 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/base.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py
@@ -1649,7 +1649,7 @@ def last_valid_index():
         >>> df.last_valid_index()
         12
         >>> df = pd.DataFrame([5, 6, 7, 8], index=["i", "am", "iron", "man"])
-        >>> df.last_valid_index()
+        >>> df.last_valid_index()  # doctest: +SKIP
         'man'
         """
 
diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py
index 68991b3cf1..a826d13a39 100644
--- a/tests/integ/modin/frame/test_loc.py
+++ b/tests/integ/modin/frame/test_loc.py
@@ -3924,3 +3924,9 @@ def test_raise_set_cell_with_list_like_value_error():
         s.loc[0] = [0, 0]
     with pytest.raises(NotImplementedError):
         s.to_frame().loc[0, 0] = [0, 0]
+
+
+def test_v():
+    df = pd.DataFrame([5, 6, 7, 8], index=["i", "am", "iron", "man"])
+    print(df)
+    print(df.last_valid_index())
diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
index e83fcbe00b..82d21987cb 100644
--- a/tests/integ/modin/groupby/test_groupby_apply.py
+++ b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -631,7 +631,7 @@ def test_apply_transfform_to_subset(
     )
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
-        join_count=JOIN_COUNT,
+        join_count=3,
         udtf_count=UDTF_COUNT,
     )
     def test_numpy_ints_in_result(self, grouping_dfs_with_multiindexes, result):

From b73f027fd4d2e2bfdf0d19b4fe52e28d38c563d4 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 6 Sep 2024 13:31:37 -0700
Subject: [PATCH 20/42] fix some tests

---
 .../snowpark/modin/pandas/dataframe.py        |  2 +-
 src/snowflake/snowpark/modin/pandas/series.py | 22 +++++----
 tests/integ/modin/frame/test_idxmax_idxmin.py |  8 ++--
 tests/integ/modin/frame/test_insert.py        | 18 +++----
 .../integ/modin/groupby/test_groupby_apply.py | 26 +++++-----
 .../modin/groupby/test_groupby_transform.py   |  8 ++--
 .../test_df_series_creation_with_index.py     | 14 ++++++
 tests/integ/modin/series/test_reindex.py      |  4 +-
 tests/integ/modin/series/test_setitem.py      | 24 +++++-----
 tests/integ/modin/series/test_size.py         | 21 ++++----
 tests/integ/modin/series/test_take.py         |  6 +--
 tests/integ/modin/series/test_transpose.py    |  4 +-
 tests/integ/modin/series/test_where.py        | 40 +++++++++-------
 tests/integ/modin/test_merge.py               | 31 ++++++------
 .../modin/types/test_timedelta_indexing.py    | 48 +++++++++----------
 15 files changed, 153 insertions(+), 123 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 6d487fd4bc..fb477bd82c 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -168,7 +168,7 @@ def __init__(
 
         # The logic followed here is:
         # 1. Create a query_compiler from the provided data. If columns are provided, add/select the columns.
-        # 2. If an index is provided, set the index through reindex.
+        # 2. If an index is provided, set the index through set_index or reindex.
         # 3. If the data is a DataFrame, perform loc to select the required index and columns from the DataFrame.
         # 4. The resultant query_compiler is then set as the query_compiler for the DataFrame.
 
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 5bf72262d6..57206ad5bf 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -196,16 +196,20 @@ def __init__(
                     index_qc_list = [index._query_compiler]
                 elif isinstance(index, Index):
                     index_qc_list = [index.to_series()._query_compiler]
-                elif isinstance(index, pd.MultiIndex):
-                    index_qc_list = [
-                        s._query_compiler
-                        for s in [
-                            pd.Series(index.get_level_values(level))
-                            for level in range(index.nlevels)
-                        ]
-                    ]
                 else:
-                    index_qc_list = [Series(index)._query_compiler]
+                    if is_list_like(index) and is_list_like(index[0]):
+                        # If given a list of lists, convert it to a MultiIndex.
+                        index = pandas.MultiIndex.from_arrays(index)
+                    if isinstance(index, pandas.MultiIndex):
+                        index_qc_list = [
+                            s._query_compiler
+                            for s in [
+                                pd.Series(index.get_level_values(level))
+                                for level in range(index.nlevels)
+                            ]
+                        ]
+                    else:
+                        index_qc_list = [Series(index)._query_compiler]
                 query_compiler = query_compiler.set_index(index_qc_list)
 
         # Set the query compiler and name fields.
diff --git a/tests/integ/modin/frame/test_idxmax_idxmin.py b/tests/integ/modin/frame/test_idxmax_idxmin.py
index 72fe88968b..94ca1d55b9 100644
--- a/tests/integ/modin/frame/test_idxmax_idxmin.py
+++ b/tests/integ/modin/frame/test_idxmax_idxmin.py
@@ -13,7 +13,7 @@
 from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize(
     "data, index",
     [
@@ -83,7 +83,7 @@ def test_idxmax_idxmin_df(data, index, func, axis, skipna):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize(
     "data, index",
     [
@@ -173,7 +173,7 @@ def test_idxmax_idxmin_df_numeric_only_axis_1_different_column_dtypes(
             )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize("axis", [0, 1])
 def test_idxmax_idxmin_with_dates(func, axis):
@@ -194,7 +194,7 @@ def test_idxmax_idxmin_with_dates(func, axis):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.xfail(reason="SNOW-1625380 TODO")
diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py
index 282f0fb3e7..ecc1ec19db 100644
--- a/tests/integ/modin/frame/test_insert.py
+++ b/tests/integ/modin/frame/test_insert.py
@@ -277,16 +277,16 @@ def test_insert_loc_negative(native_df, loc, expected_query_count):
 @pytest.mark.parametrize(
     "value, expected_query_count, expected_join_count",
     [
-        (np.array(["a", "b", "c", "d"]), 2, 1),  # numpy array of shape (N,)
-        (np.array([["a"], ["b"], ["c"], ["d"]]), 2, 1),  # numpy array of shape (N, 1)
-        (["a", "b", "c", "d"], 2, 1),  # python list
-        (("a", "b", "c", "d"), 2, 1),  # python tuple
-        ({(3, 1): 1}, 1, 1),  # python dict
-        ("abc", 1, 0),  # sting scalar
-        (1, 1, 0),  # int scalar
+        (np.array(["a", "b", "c", "d"]), 2, 5),  # numpy array of shape (N,)
+        (np.array([["a"], ["b"], ["c"], ["d"]]), 2, 5),  # numpy array of shape (N, 1)
+        (["a", "b", "c", "d"], 2, 5),  # python list
+        (("a", "b", "c", "d"), 2, 5),  # python tuple
+        ({(3, 1): 1}, 1, 3),  # python dict
+        ("abc", 1, 2),  # sting scalar
+        (1, 1, 2),  # int scalar
     ],
 )
-def test_insert_multiindex_array_like_and_scaler(
+def test_insert_multiindex_array_like_and_scalar(
     value, expected_query_count, expected_join_count
 ):
     arrays = [[3, 4, 5, 6], [1, 2, 1, 2]]
@@ -310,7 +310,7 @@ def test_insert_multiindex_array_like_and_scaler(
         ("a", "b", "c", "d"),  # python tuple
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2, join_count=5)
 def test_insert_empty_multiindex_frame(value):
     mi = pd.MultiIndex.from_arrays([np.array([], dtype=int), np.array([], dtype=int)])
     snow_df = pd.DataFrame([], index=mi)
diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
index 82d21987cb..7c43b00a7b 100644
--- a/tests/integ/modin/groupby/test_groupby_apply.py
+++ b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -191,7 +191,7 @@ class TestFuncReturnsDataFrame:
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_group_by_one_column_and_one_level_with_default_kwargs(
         self, grouping_dfs_with_multiindexes, func
@@ -206,7 +206,7 @@ def test_group_by_one_column_and_one_level_with_default_kwargs(
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_df_with_default_index(self, grouping_dfs_with_multiindexes):
         eval_snowpark_pandas_result(
@@ -232,7 +232,7 @@ def test_func_returns_empty_frame(self):
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_args_and_kwargs(self, grouping_dfs_with_multiindexes):
         def func(df, num1, str1):
@@ -258,7 +258,7 @@ def func(df, num1, str1):
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_group_by_level(self, grouping_dfs_with_multiindexes, level):
         eval_snowpark_pandas_result(
@@ -281,7 +281,7 @@ def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
             # When dropna=False, we can skip the dropna query
             query_count=4,
             udtf_count=UDTF_COUNT,
-            join_count=JOIN_COUNT,
+            join_count=3,
         ):
             snow_result = operation(snow_df)
         pandas_result = operation(pandas_df)
@@ -332,7 +332,7 @@ def test_group_dataframe_with_column_of_all_nulls_snow_1233832(self, null_value)
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     @pytest.mark.parametrize(
         "by, expected_output",
@@ -417,7 +417,7 @@ def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     @pytest.mark.parametrize("by", ["level_0", ("a", "string_col_1")])
     @pytest.mark.parametrize(
@@ -444,7 +444,7 @@ def test_as_index_false(self, grouping_dfs_with_multiindexes, by, func):
         # transform because we only reindex to the original ordering if
         query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_group_keys_false(self, grouping_dfs_with_multiindexes, as_index):
         eval_snowpark_pandas_result(
@@ -598,7 +598,7 @@ def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
     @sql_count_checker(
         # we need a transform check because group_keys=False.
         query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK,
-        join_count=JOIN_COUNT,
+        join_count=3,
         udtf_count=UDTF_COUNT,
     )
     def test_apply_transfform_to_subset(
@@ -800,7 +800,7 @@ def test_root_mean_squared_error(self):
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_multiindex_df(self, grouping_dfs_with_multiindexes, by, sort, as_index):
         eval_snowpark_pandas_result(
@@ -836,7 +836,7 @@ def test_multiindex_df(self, grouping_dfs_with_multiindexes, by, sort, as_index)
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_non_series_or_dataframe_return_types(
         self, return_value, grouping_dfs_with_multiindexes
@@ -918,7 +918,7 @@ class TestFuncReturnsSeries:
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_return_series_with_two_columns(
         self, grouping_dfs_with_multiindexes, by, level, as_index, sort, group_keys
@@ -943,7 +943,7 @@ def test_return_series_with_two_columns(
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=JOIN_COUNT,
+        join_count=3,
     )
     def test_args_and_kwargs(self, grouping_dfs_with_multiindexes):
         eval_snowpark_pandas_result(
diff --git a/tests/integ/modin/groupby/test_groupby_transform.py b/tests/integ/modin/groupby/test_groupby_transform.py
index 5f2339f2e4..46ef42f4f4 100644
--- a/tests/integ/modin/groupby/test_groupby_transform.py
+++ b/tests/integ/modin/groupby/test_groupby_transform.py
@@ -39,7 +39,7 @@ def test_dataframe_groupby_transform(
     #   temporary function's resultant table.
     # - A second join is performed only when the groupby object specifies dropna=True.
     #   This is because a loc set operation is being performed to correctly set NA values.
-    with SqlCounter(query_count=6, join_count=1 + (1 if dropna else 0), udtf_count=1):
+    with SqlCounter(query_count=6, join_count=2 + (2 if dropna else 0), udtf_count=1):
         eval_snowpark_pandas_result(
             *df_with_multiple_columns,
             lambda df: df.groupby(
@@ -85,11 +85,11 @@ def test_dataframe_groupby_transform_with_func_args_and_kwargs(
     Test DataFrameGroupby.transform with functions that require *args and **kwargs.
     """
     # - A UDTF is created to run `groupby.transform(func)` on every group via `apply`.
-    # - One join always occurs when joining the original DataFrame's table with the
+    # - Two joins always occurs when joining the original DataFrame's table with the
     #   temporary function's resultant table.
-    # - A second join is performed only when the groupby object specifies dropna=True.
+    # - Another two joins are performed only when the groupby object specifies dropna=True.
     #   This is because a loc set operation is being performed to correctly set NA values.
-    with SqlCounter(query_count=6, join_count=1 + (1 if dropna else 0), udtf_count=1):
+    with SqlCounter(query_count=6, join_count=2 + (2 if dropna else 0), udtf_count=1):
         eval_snowpark_pandas_result(
             *df_with_multiple_columns,
             lambda df: df.groupby(
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index d1bcb56651..f9c2cf173c 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -4,6 +4,7 @@
 import re
 
 import modin.pandas as pd
+import numpy as np
 import pandas as native_pd
 import pytest
 
@@ -491,3 +492,16 @@ def test_create_df_with_dict_as_data_and_index_as_index():
     native_df = native_pd.DataFrame(data, index=native_index)
     snow_df = pd.DataFrame(data, index=snow_index)
     assert_frame_equal(snow_df, native_df)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_series_with_list_of_lists_index():
+    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
+    arrays = [
+        np.array(["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"]),
+        np.array(["two", "one", "two", "one", "two", "one", "two", "one"]),
+    ]
+    data = [1, 2, 3, 4, 5, 6, 7, 8]
+    native_series = native_pd.Series(data, index=arrays)
+    snow_series = pd.Series(data, index=arrays)
+    assert_series_equal(snow_series, native_series)
diff --git a/tests/integ/modin/series/test_reindex.py b/tests/integ/modin/series/test_reindex.py
index b8bf2875ac..3f902f96df 100644
--- a/tests/integ/modin/series/test_reindex.py
+++ b/tests/integ/modin/series/test_reindex.py
@@ -259,7 +259,7 @@ def perform_reindex(series):
     )
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=2)
 @pytest.mark.parametrize("limit", [None, 1, 2, 100])
 @pytest.mark.parametrize("method", ["bfill", "backfill", "pad", "ffill"])
 def test_reindex_index_datetime_with_fill(limit, method):
@@ -300,7 +300,7 @@ def test_reindex_index_non_overlapping_index():
     )
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=2)
 def test_reindex_index_non_overlapping_datetime_index():
     # TODO: SNOW-1638397 See if it's possible to use data={"prices": [100, 101, np.nan, 100, 89, 88]} instead.
     date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
diff --git a/tests/integ/modin/series/test_setitem.py b/tests/integ/modin/series/test_setitem.py
index 50405643bc..39358b9870 100644
--- a/tests/integ/modin/series/test_setitem.py
+++ b/tests/integ/modin/series/test_setitem.py
@@ -175,7 +175,7 @@
         (None, 35),  # None scalar
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_setitem_scalar_key_and_scalar_item(
     key, item, default_index_native_int_series
 ):
@@ -276,7 +276,7 @@ def test_series_setitem_none_key_and_scalar_item_mixed_type_series(
         (3.14, "a"),
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_setitem_scalar_key_and_scalar_item_mixed_type_series_type_coercion(
     key, item, mixed_type_index_native_series_mixed_type_index
 ):
@@ -341,7 +341,7 @@ def test_series_setitem_scalar_key_and_scalar_item_mixed_type_series_type_coerci
 # TODO: SNOW-986548 fix where key is False, row is missed in this case
 @pytest.mark.parametrize("key", [True, False])
 @pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_setitem_boolean_key_and_scalar_item_label_updated(key, item):
     # series[scalar boolean key] = scalar item
     # ----------------------------------------
@@ -493,14 +493,14 @@ def test_series_setitem_boolean_key_and_scalar_item_case2_numeric_index(key, ite
 
     expected_ser = native_pd.Series(data=data, index=index)
 
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=2):
         # verify that the result is correct
         assert_series_equal(snowpark_ser, expected_ser)
 
 
 @pytest.mark.parametrize("key", [True, False])
 @pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_series_setitem_boolean_key_and_scalar_item_case2_non_numeric_index(key, item):
     # series[scalar boolean key] = scalar item
     # ----------------------------------------
@@ -559,7 +559,7 @@ def test_series_setitem_boolean_key_and_scalar_item_case2_non_numeric_index(key,
 
 @pytest.mark.parametrize("key", [0, 1])
 @pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_setitem_boolean_key_and_scalar_item_case3(
     key, item, native_series_with_duplicate_boolean_index
 ):
@@ -1601,7 +1601,7 @@ def test_series_setitem_with_empty_key_and_empty_series_item(
     else:
         snowpark_key = key
 
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=4):
         native_ser[key] = item
         snowpark_ser[
             pd.Series(snowpark_key)
@@ -1835,7 +1835,7 @@ def test_series_setitem_check_type_behavior_with_string_key_and_number_scalar_it
                 assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
     else:
         # All other cases match native pandas behavior
-        with SqlCounter(query_count=1, join_count=1):
+        with SqlCounter(query_count=1, join_count=2):
             assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
 
 
@@ -1886,7 +1886,7 @@ def test_series_setitem_check_type_behavior_with_string_key_and_boolean_scalar_i
     # b    True
     # c    True
     # dtype: bool
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=2):
         err_msg = "Series are different"
         with pytest.raises(AssertionError, match=err_msg):
             assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
@@ -1997,7 +1997,7 @@ def test_series_setitem_check_type_behavior_with_string_key_and_string_scalar_it
 
         expected_data = [str(val) for val in native_ser]
         expected_ser = native_pd.Series(data=expected_data, index=index)
-        with SqlCounter(query_count=1, join_count=1):
+        with SqlCounter(query_count=1, join_count=2):
             assert_series_equal(snowpark_ser, expected_ser, check_dtype=False)
 
 
@@ -2093,7 +2093,7 @@ def set_loc_helper(ser):
         [2, "x"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_df_setitem_boolean_key(key, index):
     item = 99
 
@@ -2435,7 +2435,7 @@ def test_behavior_table_is_up_to_date():
                         prev_err_msg = expected_err_msg
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2, join_count=6)
 def test_series_setitem_int_key():
     # pandas series setitem with int key is similar to loc set in most cases:
     # E.g., set index with label 3 to 100
diff --git a/tests/integ/modin/series/test_size.py b/tests/integ/modin/series/test_size.py
index 4543525b2d..65730da0fd 100644
--- a/tests/integ/modin/series/test_size.py
+++ b/tests/integ/modin/series/test_size.py
@@ -8,7 +8,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter
 from tests.integ.modin.utils import eval_snowpark_pandas_result
 
 
@@ -36,11 +36,16 @@
         "multi index",
     ],
 )
-@sql_count_checker(query_count=1)
 def test_series_size(args, kwargs):
-    eval_snowpark_pandas_result(
-        pd.Series(*args, **kwargs),
-        native_pd.Series(*args, **kwargs),
-        lambda df: df.size,
-        comparator=lambda x, y: x == y,
-    )
+    with SqlCounter(
+        query_count=1,
+        join_count=2
+        if isinstance(kwargs.get("index", None), native_pd.MultiIndex)
+        else 0,
+    ):
+        eval_snowpark_pandas_result(
+            pd.Series(*args, **kwargs),
+            native_pd.Series(*args, **kwargs),
+            lambda df: df.size,
+            comparator=lambda x, y: x == y,
+        )
diff --git a/tests/integ/modin/series/test_take.py b/tests/integ/modin/series/test_take.py
index 211a89968d..b21dc4295b 100644
--- a/tests/integ/modin/series/test_take.py
+++ b/tests/integ/modin/series/test_take.py
@@ -20,16 +20,16 @@ def test_series_take():
 
     actual = ser.take([-1, 3, 4])
     expected = pd.Series([4, 2, 4], index=[4, 3, 4])
-    with SqlCounter(query_count=2, join_count=2):
+    with SqlCounter(query_count=2, join_count=3):
         assert_series_equal(actual, expected)
 
     # Out-of-bounds testing - valid because .iloc is used in backend.
     actual = ser.take([1, 10])
     expected = pd.Series([5], index=[1])
-    with SqlCounter(query_count=2, join_count=2):
+    with SqlCounter(query_count=2, join_count=3):
         assert_series_equal(actual, expected)
 
     actual = ser.take([2, 5])
     expected = pd.Series([6], index=[2])
-    with SqlCounter(query_count=2, join_count=2):
+    with SqlCounter(query_count=2, join_count=3):
         assert_series_equal(actual, expected)
diff --git a/tests/integ/modin/series/test_transpose.py b/tests/integ/modin/series/test_transpose.py
index 1e733a5226..ae2a076171 100644
--- a/tests/integ/modin/series/test_transpose.py
+++ b/tests/integ/modin/series/test_transpose.py
@@ -51,7 +51,7 @@ def test_series_transpose_empty():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_transpose_multi_index():
     data = [1, 2, 3, 4, 5]
     index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
@@ -66,7 +66,7 @@ def test_series_transpose_multi_index():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_transpose_index_no_names():
     data = [1, 2, 3, 4, 5]
     index = [None, None, None, None, None]
diff --git a/tests/integ/modin/series/test_where.py b/tests/integ/modin/series/test_where.py
index 9f0c6d0f80..cff58d4a82 100644
--- a/tests/integ/modin/series/test_where.py
+++ b/tests/integ/modin/series/test_where.py
@@ -76,7 +76,7 @@ def test_series_where_duplicate_labels():
     eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.where(ser > 3))
 
 
-@sql_count_checker(query_count=1, join_count=0)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_where_multi_index():
     data = [1, 2, 3, 4, 5]
     index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
@@ -234,7 +234,7 @@ def test_series_where_with_scalar_cond(cond):
         )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_series_where_series_cond_unmatched_index():
     data = [1, 2, 3, 4]
     index1 = [0, 1, 2, 3]
@@ -259,9 +259,10 @@ def perform_where(series):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
-@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
-def test_series_where_short_series_cond(index):
+@pytest.mark.parametrize(
+    "index, join_count", [("matched_index", 1), ("unmatched_index", 2)]
+)
+def test_series_where_short_series_cond(index, join_count):
     data = [1, 2, 3, 4]
     if index != "matched_index":
         index = [7, 8, 9]
@@ -280,16 +281,18 @@ def perform_where(series):
         else:
             return series.where(native_cond, -1)
 
-    eval_snowpark_pandas_result(
-        snow_ser,
-        native_ser,
-        perform_where,
-    )
+    with SqlCounter(query_count=1, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            perform_where,
+        )
 
 
-@sql_count_checker(query_count=1, join_count=1)
-@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
-def test_series_where_long_series_cond(index):
+@pytest.mark.parametrize(
+    "index, join_count", [("matched_index", 1), ("unmatched_index", 2)]
+)
+def test_series_where_long_series_cond(index, join_count):
     data = [1, 2, 3, 4]
     if index != "matched_index":
         index = [7, 8, 9, 10, 11]
@@ -308,8 +311,9 @@ def perform_where(series):
         else:
             return series.where(native_cond, -1)
 
-    eval_snowpark_pandas_result(
-        snow_ser,
-        native_ser,
-        perform_where,
-    )
+    with SqlCounter(query_count=1, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            perform_where,
+        )
diff --git a/tests/integ/modin/test_merge.py b/tests/integ/modin/test_merge.py
index 7ba4a79152..5b265f5e3d 100644
--- a/tests/integ/modin/test_merge.py
+++ b/tests/integ/modin/test_merge.py
@@ -19,7 +19,7 @@
 
 @pytest.fixture(scope="function")
 def left_df():
-    return pd.DataFrame(
+    return native_pd.DataFrame(
         {
             "A": [3, 2, 1, 4, 4],
             "B": [2, 3, 1, 2, 1],
@@ -30,7 +30,7 @@ def left_df():
 
 @pytest.fixture(scope="function")
 def right_df():
-    return pd.DataFrame(
+    return native_pd.DataFrame(
         {
             "A": [4, 3, 1, 4, 4],
             "C": [3, 4, 2, 1, 1],
@@ -41,12 +41,12 @@ def right_df():
 
 @pytest.fixture(scope="function")
 def unnamed_series():
-    return pd.Series([1, 2, 3])
+    return native_pd.Series([1, 2, 3])
 
 
 @pytest.fixture(scope="function")
 def named_series():
-    return pd.Series([1, 2, 3], name="S")
+    return native_pd.Series([1, 2, 3], name="S")
 
 
 @pytest.fixture(params=["left", "inner", "right", "outer"])
@@ -59,6 +59,7 @@ def how(request):
 
 @sql_count_checker(query_count=2, join_count=2)
 def test_merge(left_df, right_df, how):
+    left_df, right_df = pd.DataFrame(left_df), pd.DataFrame(right_df)
     res = pd.merge(left_df, right_df, on="A", how=how)
     expected = left_df.merge(right_df, on="A", how=how)
     assert_frame_equal(res, expected)
@@ -66,6 +67,7 @@ def test_merge(left_df, right_df, how):
 
 @sql_count_checker(query_count=2, join_count=2)
 def test_merge_series_on_left(named_series, right_df, how):
+    named_series, right_df = pd.Series(named_series), pd.DataFrame(right_df)
     res = pd.merge(named_series, right_df, left_on="S", right_on="A", how=how)
     expected = named_series.to_frame().merge(
         right_df, left_on="S", right_on="A", how=how
@@ -73,36 +75,37 @@ def test_merge_series_on_left(named_series, right_df, how):
     assert_frame_equal(res, expected)
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=0)
 def test_merge_unnamed_series_negative(unnamed_series, right_df):
     with pytest.raises(ValueError) as pd_e:
-        native_pd.merge(unnamed_series.to_pandas(), right_df.to_pandas())
+        native_pd.merge(unnamed_series, right_df)
+    unnamed_series, right_df = pd.Series(unnamed_series), pd.DataFrame(right_df)
     with pytest.raises(ValueError) as snow_e:
         pd.merge(unnamed_series, right_df)
     assert str(pd_e.value) == str(snow_e.value)
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_merge_native_pandas_object_negative(left_df, right_df):
-    left_native = left_df.to_pandas()
+    right_df = pd.DataFrame(right_df)
     msg = (
-        f"{type(left_native)} is not supported as 'value' argument. Please convert this to Snowpark pandas"
+        f"{type(left_df)} is not supported as 'value' argument. Please convert this to Snowpark pandas"
         r" objects by calling modin.pandas.Series\(\)/DataFrame\(\)"
     )
     # Left frame as native pandas object
     with pytest.raises(TypeError, match=msg):
-        pd.merge(left_native, right_df, on="A")
+        pd.merge(left_df, right_df, on="A")
 
     # right frame as native pandas object
     with pytest.raises(TypeError, match=msg):
-        pd.merge(right_df, left_native, on="A")
+        pd.merge(right_df, left_df, on="A")
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_merge_invalid_object_type_negative(left_df):
     right_df = "abc"
     with pytest.raises(TypeError) as pd_e:
-        native_pd.merge(left_df.to_pandas(), right_df)
+        native_pd.merge(left_df, right_df)
     with pytest.raises(TypeError) as snow_e:
-        pd.merge(left_df, right_df)
+        pd.merge(pd.DataFrame(left_df), right_df)
     assert str(pd_e.value) == str(snow_e.value)
diff --git a/tests/integ/modin/types/test_timedelta_indexing.py b/tests/integ/modin/types/test_timedelta_indexing.py
index 3840d11cc1..62f98107b9 100644
--- a/tests/integ/modin/types/test_timedelta_indexing.py
+++ b/tests/integ/modin/types/test_timedelta_indexing.py
@@ -264,7 +264,7 @@ def loc_set(key, item, df):
         df.loc[key] = item
         return df
 
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=2):
         # single value
         key = (1, "a")
         run_test(key, item, api=loc_set)
@@ -346,7 +346,7 @@ def loc_set(key, item, df):
             run_test(key, item, api=loc_set)
 
     item = 1000
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=2):
         # single value
         key = (1, "b")
         td_int = td.copy()
@@ -383,7 +383,7 @@ def setitem_enlargement(key, item, df):
         )
 
     key = 10
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_td["a"].copy(),
             td["a"].copy(),
@@ -402,7 +402,7 @@ def loc_enlargement(key, item, df):
         )
 
     key = 10
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_td["a"].copy(),
             td["a"].copy(),
@@ -412,7 +412,7 @@ def loc_enlargement(key, item, df):
     # single row
     key = (10, slice(None, None, None))
 
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=2):
         if pd.isna(item):
             eval_snowpark_pandas_result(
                 snow_td.copy(), td.copy(), functools.partial(loc_enlargement, key, item)
@@ -450,9 +450,9 @@ def test_index_get_timedelta(key, join_count):
 @pytest.mark.parametrize(
     "key, api, query_count, join_count",
     [
-        [2, "iat", 1, 2],
-        [native_pd.Timedelta("1 days 1 hour"), "at", 2, 2],
-        [[2, 1], "iloc", 1, 2],
+        [2, "iat", 1, 4],
+        [native_pd.Timedelta("1 days 1 hour"), "at", 2, 4],
+        [[2, 1], "iloc", 1, 4],
         [
             [
                 native_pd.Timedelta("1 days 1 hour"),
@@ -460,11 +460,11 @@ def test_index_get_timedelta(key, join_count):
             ],
             "loc",
             1,
-            1,
+            2,
         ],
-        [slice(1, None), "iloc", 1, 0],
-        [[True, False, False, True], "iloc", 1, 1],
-        [[True, False, False, True], "loc", 1, 1],
+        [slice(1, None), "iloc", 1, 1],
+        [[True, False, False, True], "iloc", 1, 2],
+        [[True, False, False, True], "loc", 1, 2],
     ],
 )
 def test_series_with_timedelta_index(key, api, query_count, join_count):
@@ -494,9 +494,9 @@ def test_series_with_timedelta_index(key, api, query_count, join_count):
 @pytest.mark.parametrize(
     "key, api, query_count, join_count",
     [
-        [2, "iat", 1, 2],
-        [native_pd.Timedelta("1 days 1 hour"), "at", 2, 2],
-        [[2, 1], "iloc", 1, 2],
+        [2, "iat", 1, 4],
+        [native_pd.Timedelta("1 days 1 hour"), "at", 2, 4],
+        [[2, 1], "iloc", 1, 4],
         [
             [
                 native_pd.Timedelta("1 days 1 hour"),
@@ -504,11 +504,11 @@ def test_series_with_timedelta_index(key, api, query_count, join_count):
             ],
             "loc",
             1,
-            1,
+            2,
         ],
-        [slice(1, None), "iloc", 1, 0],
-        [[True, False, False, True], "iloc", 1, 1],
-        [[True, False, False, True], "loc", 1, 1],
+        [slice(1, None), "iloc", 1, 1],
+        [[True, False, False, True], "iloc", 1, 2],
+        [[True, False, False, True], "loc", 1, 2],
     ],
 )
 def test_df_with_timedelta_index(key, api, query_count, join_count):
@@ -558,7 +558,7 @@ def setitem_enlargement(key, item, df):
     item = 23
 
     key = native_pd.Timedelta("2 days")
-    with SqlCounter(query_count=1, join_count=0):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             snow_df.copy(),
             native_df.copy(),
@@ -566,7 +566,7 @@ def setitem_enlargement(key, item, df):
         )
 
     key = native_pd.Timedelta("2 days 45 minutes")
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=3):
         eval_snowpark_pandas_result(
             snow_df["a"].copy(),
             native_df["a"].copy(),
@@ -579,7 +579,7 @@ def loc_enlargement(key, item, df):
 
     key = (slice(None, None, None), "x")
 
-    with SqlCounter(query_count=1, join_count=0):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             snow_df.copy(),
             native_df.copy(),
@@ -587,7 +587,7 @@ def loc_enlargement(key, item, df):
         )
 
     key = native_pd.Timedelta("2 days 25 minutes")
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=3):
         eval_snowpark_pandas_result(
             snow_df["a"].copy(),
             native_df["a"].copy(),
@@ -597,7 +597,7 @@ def loc_enlargement(key, item, df):
     # single row
     key = (native_pd.Timedelta("2 days 45 minutes"), slice(None, None, None))
 
-    with SqlCounter(query_count=1, join_count=1):
+    with SqlCounter(query_count=1, join_count=3):
         eval_snowpark_pandas_result(
             snow_df.copy(),
             native_df.copy(),

From d422f86ae5ec554d73f033e0a12b25ce69cd404a Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 6 Sep 2024 14:34:33 -0700
Subject: [PATCH 21/42] replace series constructor

---
 .../plugin/extensions/series_overrides.py     | 105 ++++++++++++------
 1 file changed, 72 insertions(+), 33 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index 645109120c..c442ecf995 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -50,11 +50,7 @@
 
 from snowflake.snowpark.modin import pandas as spd  # noqa: F401
 from snowflake.snowpark.modin.pandas.api.extensions import register_series_accessor
-from snowflake.snowpark.modin.pandas.utils import (
-    from_pandas,
-    is_scalar,
-    try_convert_index_to_native,
-)
+from snowflake.snowpark.modin.pandas.utils import from_pandas, is_scalar
 from snowflake.snowpark.modin.plugin._internal.telemetry import (
     snowpark_pandas_telemetry_method_decorator,
     try_add_telemetry_to_attribute,
@@ -382,44 +378,87 @@ def __init__(
     # use this list to update inplace when there is a shallow copy.
     self._siblings = []
 
-    # modified:
-    # Engine.subscribe(_update_engine)
+    from snowflake.snowpark.modin.plugin.extensions.index import Index
+
+    if query_compiler:
+        # CASE 1: query_compiler
+        # If a query_compiler is passed in, only use the query_compiler and name fields to create a new Series.
+        self._query_compiler = query_compiler.columnarize()
+        if name is not None:
+            self.name = name
+        return
+
+    # The logic followed here is:
+    # 1. Create a query_compiler from the provided data.
+    # 2. If an index is provided, set the index. This is either through set_index or reindex.
+    # 3. The resultant query_compiler is columnarized and set as the query_compiler for the Series.
+    # 4. If a name is provided, set the name.
+
+    if isinstance(data, Index):
+        # CASE 2: Index
+        # If the data is an Index object, convert it to a Series, and get the query_compiler.
+        query_compiler = (
+            data.to_series(index=None, name=name).reset_index(drop=True)._query_compiler
+        )
 
-    # Convert lazy index to Series without pulling the data to client.
-    if isinstance(data, pd.Index):
-        query_compiler = data.to_series(index=index, name=name)._query_compiler
-        query_compiler = query_compiler.reset_index(drop=True)
     elif isinstance(data, type(self)):
+        # CASE 3: Series
+        # If the data is a Series object, copy the query_compiler.
         query_compiler = data._query_compiler.copy()
-        if index is not None:
-            if any(i not in data.index for i in index):
-                ErrorMessage.not_implemented(
-                    "Passing non-existent columns or index values to constructor "
-                    + "not yet implemented."
-                )  # pragma: no cover
-            query_compiler = data.loc[index]._query_compiler
-    if query_compiler is None:
-        # Defaulting to pandas
-        if name is None:
-            name = MODIN_UNNAMED_SERIES_LABEL
-            if (
-                isinstance(data, (native_pd.Series, native_pd.Index, pd.Index))
-                and data.name is not None
-            ):
-                name = data.name
 
+    else:
+        # CASE 4: Non-Snowpark pandas data
+        # If the data is not a Snowpark pandas object, convert it to a query compiler.
+        name = MODIN_UNNAMED_SERIES_LABEL if name is None else name
+        if (
+            isinstance(data, (native_pd.Series, native_pd.Index))
+            and data.name is not None
+        ):
+            name = data.name
         query_compiler = from_pandas(
             native_pd.DataFrame(
                 native_pd.Series(
-                    data=try_convert_index_to_native(data),
-                    index=try_convert_index_to_native(index),
-                    dtype=dtype,
-                    name=name,
-                    copy=copy,
-                    fastpath=fastpath,
+                    data=data, dtype=dtype, name=name, copy=copy, fastpath=fastpath
                 )
             )
         )._query_compiler
+
+    if index is not None:
+        if is_dict_like(data) or isinstance(data, (type(self))):
+            # The `index` parameter is used to select the rows from `data` that will be in the resultant Series.
+            # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
+            labels = index
+            if isinstance(labels, Index):
+                labels = labels.to_series()._query_compiler
+            elif isinstance(labels, Series):
+                labels = labels._query_compiler
+            else:
+                labels = Index(labels).to_series()._query_compiler
+            query_compiler = query_compiler.reindex(axis=0, labels=labels)
+
+        else:
+            # Performing set index to directly set the index column (joining on row-position instead of index).
+            if isinstance(index, Series):
+                index_qc_list = [index._query_compiler]
+            elif isinstance(index, Index):
+                index_qc_list = [index.to_series()._query_compiler]
+            else:
+                if is_list_like(index) and is_list_like(index[0]):
+                    # If given a list of lists, convert it to a MultiIndex.
+                    index = native_pd.MultiIndex.from_arrays(index)
+                if isinstance(index, native_pd.MultiIndex):
+                    index_qc_list = [
+                        s._query_compiler
+                        for s in [
+                            pd.Series(index.get_level_values(level))
+                            for level in range(index.nlevels)
+                        ]
+                    ]
+                else:
+                    index_qc_list = [Series(index)._query_compiler]
+            query_compiler = query_compiler.set_index(index_qc_list)
+
+    # Set the query compiler and name fields.
     self._query_compiler = query_compiler.columnarize()
     if name is not None:
         self.name = name

From 1ea5d00a76875efa956588a28f528a756b88db49 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Sun, 8 Sep 2024 22:35:51 -0700
Subject: [PATCH 22/42] fix tests

---
 .../snowpark/modin/pandas/dataframe.py        | 35 ++++++---
 .../plugin/extensions/series_overrides.py     | 23 +++++-
 tests/integ/modin/frame/test_add_prefix.py    |  2 +-
 tests/integ/modin/frame/test_add_suffix.py    |  2 +-
 tests/integ/modin/frame/test_assign.py        |  6 +-
 tests/integ/modin/frame/test_astype.py        |  2 +-
 tests/integ/modin/frame/test_at.py            |  8 +-
 tests/integ/modin/frame/test_axis.py          |  2 +-
 tests/integ/modin/frame/test_cache_result.py  |  2 +
 tests/integ/modin/frame/test_copy.py          |  6 +-
 tests/integ/modin/frame/test_describe.py      |  4 +-
 tests/integ/modin/frame/test_drop.py          |  6 +-
 tests/integ/modin/frame/test_dtypes.py        | 40 +++++-----
 tests/integ/modin/frame/test_iat.py           |  4 +-
 tests/integ/modin/frame/test_idxmax_idxmin.py | 23 +++---
 tests/integ/modin/frame/test_insert.py        | 76 ++++++++++---------
 tests/integ/modin/frame/test_join.py          | 10 +--
 tests/integ/modin/frame/test_mask.py          |  4 +-
 tests/integ/modin/frame/test_nunique.py       | 10 +--
 tests/integ/modin/frame/test_rank.py          | 40 +++++-----
 tests/integ/modin/frame/test_reindex.py       |  6 +-
 tests/integ/modin/frame/test_rename.py        |  4 +-
 tests/integ/modin/frame/test_repr.py          |  2 +-
 tests/integ/modin/frame/test_setitem.py       |  6 +-
 tests/integ/modin/frame/test_stack.py         |  2 +-
 .../groupby/test_groupby_dataframe_rank.py    | 46 ++++++-----
 .../groupby/test_groupby_default2pandas.py    |  2 +-
 .../modin/groupby/test_groupby_head_tail.py   |  4 +-
 .../groupby/test_groupby_idxmax_idxmin.py     |  4 +-
 .../modin/groupby/test_groupby_ngroups.py     |  2 +-
 .../modin/groupby/test_groupby_series.py      | 16 ++--
 .../test_df_series_creation_with_index.py     |  9 +++
 tests/integ/modin/index/test_index_methods.py |  2 +-
 tests/integ/modin/resample/test_resample.py   |  6 +-
 .../modin/resample/test_resample_fillna.py    |  4 +-
 tests/integ/modin/series/test_add_prefix.py   |  2 +-
 tests/integ/modin/series/test_add_suffix.py   |  2 +-
 tests/integ/modin/series/test_at.py           |  8 +-
 .../modin/series/test_bitwise_operators.py    | 62 ++++++++-------
 tests/integ/modin/series/test_compare.py      |  2 +-
 tests/integ/modin/series/test_describe.py     | 23 +++---
 tests/integ/modin/series/test_empty.py        |  2 +-
 tests/integ/modin/series/test_iat.py          |  4 +-
 tests/integ/modin/series/test_mask.py         | 40 +++++-----
 .../modin/series/test_nlargest_nsmallest.py   |  2 +-
 tests/integ/modin/series/test_nunique.py      | 14 ++--
 tests/integ/modin/series/test_rank.py         | 22 +++---
 tests/integ/modin/series/test_rename.py       |  6 +-
 tests/integ/modin/series/test_setitem.py      |  2 +-
 tests/integ/modin/series/test_shape.py        | 18 +++--
 tests/integ/modin/series/test_take.py         |  2 +-
 tests/integ/modin/series/test_to_snowflake.py |  2 +-
 tests/integ/modin/test_concat.py              |  5 +-
 .../integ/modin/test_from_pandas_to_pandas.py |  4 +-
 tests/integ/modin/test_internal_frame.py      |  2 +-
 tests/integ/modin/test_numpy.py               |  6 +-
 56 files changed, 370 insertions(+), 280 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 815f522bbe..9aa1b1fb26 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -297,6 +297,8 @@ def __init__(
                     # pd.DataFrame({'a': 1, 'b': 2}, index=[0])
                     dummy_index = index
 
+            if is_scalar(data) and not isinstance(index, type(None)):
+                dummy_index = index
             query_compiler = from_pandas(
                 pandas.DataFrame(
                     data=data,
@@ -308,9 +310,10 @@ def __init__(
             )._query_compiler
 
         if index is not None:
-            if isinstance(data, (type(self), Series)):
+            if isinstance(data, (type(self), Series, type(None))):
                 # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
                 # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
+                # If data is None and an index is provided, set the index.
                 labels = index
                 if isinstance(labels, Index):
                     labels = labels.to_series()._query_compiler
@@ -326,16 +329,28 @@ def __init__(
                     index_qc_list = [index._query_compiler]
                 elif isinstance(index, Index):
                     index_qc_list = [index.to_series()._query_compiler]
-                elif isinstance(index, pd.MultiIndex):
-                    index_qc_list = [
-                        s._query_compiler
-                        for s in [
-                            pd.Series(index.get_level_values(level))
-                            for level in range(index.nlevels)
-                        ]
-                    ]
                 else:
-                    index_qc_list = [Series(index)._query_compiler]
+                    if (
+                        not isinstance(index, pandas.MultiIndex)
+                        and is_list_like(index)
+                        and len(index) > 0
+                        and all(
+                            (not isinstance(i, tuple) and is_list_like(i))
+                            for i in index
+                        )
+                    ):
+                        # If given a list of lists, convert it to a MultiIndex.
+                        index = pandas.MultiIndex.from_arrays(index)
+                    if isinstance(index, pandas.MultiIndex):
+                        index_qc_list = [
+                            s._query_compiler
+                            for s in [
+                                pd.Series(index.get_level_values(level))
+                                for level in range(index.nlevels)
+                            ]
+                        ]
+                    else:
+                        index_qc_list = [Series(index)._query_compiler]
                 query_compiler = query_compiler.set_index(index_qc_list)
 
         if isinstance(data, DataFrame):
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index c442ecf995..492098c2b6 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -410,6 +410,9 @@ def __init__(
         # CASE 4: Non-Snowpark pandas data
         # If the data is not a Snowpark pandas object, convert it to a query compiler.
         name = MODIN_UNNAMED_SERIES_LABEL if name is None else name
+        dummy_index = None
+        if is_scalar(data) and not isinstance(index, type(None)):
+            dummy_index = index
         if (
             isinstance(data, (native_pd.Series, native_pd.Index))
             and data.name is not None
@@ -418,15 +421,22 @@ def __init__(
         query_compiler = from_pandas(
             native_pd.DataFrame(
                 native_pd.Series(
-                    data=data, dtype=dtype, name=name, copy=copy, fastpath=fastpath
+                    data=data,
+                    dtype=dtype,
+                    index=dummy_index,
+                    name=name,
+                    copy=copy,
+                    fastpath=fastpath,
                 )
             )
         )._query_compiler
 
     if index is not None:
-        if is_dict_like(data) or isinstance(data, (type(self))):
+        if is_dict_like(data) or isinstance(data, (type(self), type(None))):
             # The `index` parameter is used to select the rows from `data` that will be in the resultant Series.
             # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
+            # If data is None and an index is provided, all the values in the Series will be NaN and the index
+            # will be the provided index.
             labels = index
             if isinstance(labels, Index):
                 labels = labels.to_series()._query_compiler
@@ -443,7 +453,14 @@ def __init__(
             elif isinstance(index, Index):
                 index_qc_list = [index.to_series()._query_compiler]
             else:
-                if is_list_like(index) and is_list_like(index[0]):
+                if (
+                    not isinstance(index, native_pd.MultiIndex)
+                    and is_list_like(index)
+                    and len(index) > 0
+                    and all(
+                        (not isinstance(i, tuple) and is_list_like(i)) for i in index
+                    )
+                ):
                     # If given a list of lists, convert it to a MultiIndex.
                     index = native_pd.MultiIndex.from_arrays(index)
                 if isinstance(index, native_pd.MultiIndex):
diff --git a/tests/integ/modin/frame/test_add_prefix.py b/tests/integ/modin/frame/test_add_prefix.py
index 5ac652ea92..8cf30f4913 100644
--- a/tests/integ/modin/frame/test_add_prefix.py
+++ b/tests/integ/modin/frame/test_add_prefix.py
@@ -46,7 +46,7 @@ def test_df_add_prefix_multiindex(prefix, native_df_with_multiindex_columns):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
 def test_df_add_prefix_time_column_df(
     prefix, time_column_snowpark_pandas_df, time_column_native_df
diff --git a/tests/integ/modin/frame/test_add_suffix.py b/tests/integ/modin/frame/test_add_suffix.py
index 4fbaf1e319..0dceff54d7 100644
--- a/tests/integ/modin/frame/test_add_suffix.py
+++ b/tests/integ/modin/frame/test_add_suffix.py
@@ -46,7 +46,7 @@ def test_df_add_suffix_multiindex(suffix, native_df_with_multiindex_columns):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
 def test_df_add_suffix_time_column_df(
     suffix, time_column_snowpark_pandas_df, time_column_native_df
diff --git a/tests/integ/modin/frame/test_assign.py b/tests/integ/modin/frame/test_assign.py
index 2f4ab8da44..f60107057e 100644
--- a/tests/integ/modin/frame/test_assign.py
+++ b/tests/integ/modin/frame/test_assign.py
@@ -36,7 +36,7 @@ def assign_func(df):
     eval_snowpark_pandas_result(snow_df, native_df, assign_func)
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 @pytest.mark.parametrize(
     "index", [[2, 1, 0], [4, 5, 6]], ids=["reversed_index", "different_index"]
 )
@@ -136,7 +136,7 @@ def test_assign_short_series():
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=3)
 @pytest.mark.parametrize(
     "index", [[1, 0], [4, 5]], ids=["reversed_index", "different_index"]
 )
@@ -240,7 +240,7 @@ def test_overwrite_columns_via_assign():
     )
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_assign_basic_timedelta_series():
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
diff --git a/tests/integ/modin/frame/test_astype.py b/tests/integ/modin/frame/test_astype.py
index 8007b264b4..dbd267b307 100644
--- a/tests/integ/modin/frame/test_astype.py
+++ b/tests/integ/modin/frame/test_astype.py
@@ -35,7 +35,7 @@ def test_series_input():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_input_negative():
     df = pd.DataFrame({"a": [1, 2, 3], "b": [2.4, 2.5, 3.1]})
     with pytest.raises(KeyError, match="not found in columns"):
diff --git a/tests/integ/modin/frame/test_at.py b/tests/integ/modin/frame/test_at.py
index f43270ff53..9194416648 100644
--- a/tests/integ/modin/frame/test_at.py
+++ b/tests/integ/modin/frame/test_at.py
@@ -20,7 +20,7 @@ def test_at_get_default_index_str_columns(
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_at_set_default_index_str_columns(
     default_index_snowpark_pandas_df,
     default_index_native_df,
@@ -44,7 +44,7 @@ def test_at_get_str_index_str_columns(
     assert str_index_snowpark_pandas_df.at["b", "B"] == str_index_native_df.at["b", "B"]
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_at_set_str_index_str_columns(
     str_index_snowpark_pandas_df,
     str_index_native_df,
@@ -57,7 +57,7 @@ def at_set_helper(df):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_at_get_time_index_time_columns(
     time_index_snowpark_pandas_df,
     time_index_native_df,
@@ -68,7 +68,7 @@ def test_at_get_time_index_time_columns(
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_at_set_time_index_time_columns(
     time_index_snowpark_pandas_df,
     time_index_native_df,
diff --git a/tests/integ/modin/frame/test_axis.py b/tests/integ/modin/frame/test_axis.py
index a6a156a05f..0fb3fa2c5f 100644
--- a/tests/integ/modin/frame/test_axis.py
+++ b/tests/integ/modin/frame/test_axis.py
@@ -244,7 +244,7 @@ def test_set_columns_index_name(index_name):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_duplicate_labels_assignment():
     # Duplicate data labels
     snow_df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
diff --git a/tests/integ/modin/frame/test_cache_result.py b/tests/integ/modin/frame/test_cache_result.py
index c78cefaa3a..c26b28e4ab 100644
--- a/tests/integ/modin/frame/test_cache_result.py
+++ b/tests/integ/modin/frame/test_cache_result.py
@@ -81,6 +81,8 @@ def perform_chained_operations(df, module):
 @pytest.mark.parametrize("inplace", [True, False])
 def test_cache_result_empty_dataframe(init_kwargs, inplace):
     snow_df, native_df = create_test_dfs(**init_kwargs)
+    print(snow_df)
+    print(native_df)
     snow_df_copy = snow_df.copy(deep=True)
     with SqlCounter(query_count=1):
         cached_snow_df = cache_and_return_df(snow_df, inplace)
diff --git a/tests/integ/modin/frame/test_copy.py b/tests/integ/modin/frame/test_copy.py
index b4c5f4f2a5..7844ca321a 100644
--- a/tests/integ/modin/frame/test_copy.py
+++ b/tests/integ/modin/frame/test_copy.py
@@ -28,7 +28,7 @@ def native_df(snow_df):
 
 
 @pytest.mark.parametrize("deep", [None, True, False])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_copy(deep, snow_df, native_df):
     # Verify copy is same as original
     assert_snowpark_pandas_equal_to_pandas(snow_df.copy(deep=deep), native_df)
@@ -61,7 +61,7 @@ def test_copy_deep_false_column_names(snow_df):
         lambda df: df.rename(columns={"a": "new_a"}, inplace=True),
     ],
 )
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_copy_inplace_operations_on_deep_copy(snow_df, native_df, operation):
     snow_df_copy = snow_df.copy(deep=True)
     operation(snow_df_copy)
@@ -79,7 +79,7 @@ def test_copy_inplace_operations_on_deep_copy(snow_df, native_df, operation):
         lambda df: df.rename(columns={"a": "new_a"}, inplace=True),
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_copy_inplace_operations_on_shallow_copy(snow_df, operation):
     snow_df_copy = snow_df.copy(deep=False)
     operation(snow_df_copy)
diff --git a/tests/integ/modin/frame/test_describe.py b/tests/integ/modin/frame/test_describe.py
index a9668c5794..28425ab695 100644
--- a/tests/integ/modin/frame/test_describe.py
+++ b/tests/integ/modin/frame/test_describe.py
@@ -255,8 +255,8 @@ def timestamp_describe_comparator(snow_res, native_res):
 @pytest.mark.parametrize(
     "index",
     [
-        pytest.param(None, id="default_index"),
-        pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_index"),
+        # pytest.param(None, id="default_index"),
+        # pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_index"),
         pytest.param(
             [
                 np.array(["bar", "bar", "baz", "baz", "foo", "foo"]),
diff --git a/tests/integ/modin/frame/test_drop.py b/tests/integ/modin/frame/test_drop.py
index cc1a1a203d..4dcae76af7 100644
--- a/tests/integ/modin/frame/test_drop.py
+++ b/tests/integ/modin/frame/test_drop.py
@@ -209,7 +209,7 @@ def test_drop_invalid_labels_axis0_negative(
         ([], None),  # empty labels
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=4)
 def test_drop_invalid_axis1_labels_errors_ignore(labels, level, multiindex_snow_df):
     result = multiindex_snow_df.drop(labels, level=level, axis=1, errors="ignore")
     assert_frame_equal(multiindex_snow_df, result)
@@ -231,7 +231,7 @@ def test_drop_invalid_axis1_labels_errors_ignore(labels, level, multiindex_snow_
         ([], None),  # empty labels
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=4)
 def test_drop_invalid_axis0_labels_errors_ignore(labels, level, multiindex_snow_df):
     result = multiindex_snow_df.drop(labels, level=level, errors="ignore")
     assert_frame_equal(multiindex_snow_df, result)
@@ -263,7 +263,7 @@ def test_empty_tuple_multiindex(multiindex_snow_df, axis):
             assert len(result.index) == 0
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=4)
 def test_drop_preserve_index_names(multiindex_snow_df):
     df_dropped_e = multiindex_snow_df.drop("red", axis=1)
     df_inplace_e = multiindex_snow_df.copy()
diff --git a/tests/integ/modin/frame/test_dtypes.py b/tests/integ/modin/frame/test_dtypes.py
index c3773bdd6d..49d8abfe2a 100644
--- a/tests/integ/modin/frame/test_dtypes.py
+++ b/tests/integ/modin/frame/test_dtypes.py
@@ -18,7 +18,7 @@
     StringType,
     VariantType,
 )
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import (
     assert_frame_equal,
     assert_series_equal,
@@ -77,7 +77,7 @@ def validate_series_snowpark_dtype(series: pd.Series, snowpark_type: DataType) -
         ),
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=1)
 def test_integer(dataframe_input, input_dtype, logical_dtype):
     expected = native_pd.Series(dataframe_input, dtype=input_dtype)
     created = pd.Series(dataframe_input, dtype=input_dtype)
@@ -218,7 +218,7 @@ def test_extended_float64_with_nan():
         ),
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=1)
 def test_float(dataframe_input, input_dtype, expected_dtype, logical_dtype):
     expected = native_pd.Series(dataframe_input, dtype=input_dtype)
     created = pd.Series(dataframe_input, dtype=input_dtype)
@@ -256,7 +256,7 @@ def test_float(dataframe_input, input_dtype, expected_dtype, logical_dtype):
         ),
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=1)
 def test_string(dataframe_input, input_dtype, index):
     expected = native_pd.Series(dataframe_input, dtype=input_dtype)
     created = pd.Series(dataframe_input)
@@ -305,7 +305,7 @@ def test_string_explicit(dataframe_input, input_dtype, index):
         (["level0"], ["col1", "col2", "col1"]),
     ],
 )
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_insert_multiindex_multi_label(label1, label2):
     arrays = [["apple", "apple", "banana", "banana"], [1, 2, 1, 2]]
     index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"])
@@ -452,24 +452,24 @@ def test_empty(input_dtype, expected_dtype, snowpark_dtype, to_pandas_dtype):
 
 
 @pytest.mark.parametrize(
-    "index, expected_index_dtype",
+    "index, expected_index_dtype, join_count",
     [
-        (None, np.dtype("int64")),
-        (native_pd.Index([]), np.dtype("object")),
-        (native_pd.Index([], dtype="float64"), np.dtype("float64")),
+        (None, np.dtype("int64"), 0),
+        (native_pd.Index([]), np.dtype("object"), 1),
+        (native_pd.Index([], dtype="float64"), np.dtype("float64"), 1),
     ],
 )
-@sql_count_checker(query_count=1)
-def test_empty_index(index, expected_index_dtype):
-    expected = native_pd.Series(data=[], index=index)
-    assert expected.dtype == np.dtype("object")
-    assert expected.index.dtype == expected_index_dtype
-    created = pd.Series(data=[], index=index)
-    assert created.dtype == np.dtype("object")
-    assert created.index.dtype == expected_index_dtype
-    roundtripped = created.to_pandas()
-    assert roundtripped.dtype == np.dtype("object")
-    assert roundtripped.index.dtype == expected_index_dtype
+def test_empty_index(index, expected_index_dtype, join_count):
+    with SqlCounter(query_count=1, join_count=join_count):
+        expected = native_pd.Series(data=[], index=index)
+        assert expected.dtype == np.dtype("object")
+        assert expected.index.dtype == expected_index_dtype
+        created = pd.Series(data=[], index=index)
+        assert created.dtype == np.dtype("object")
+        assert created.index.dtype == expected_index_dtype
+        roundtripped = created.to_pandas()
+        assert roundtripped.dtype == np.dtype("object")
+        assert roundtripped.index.dtype == expected_index_dtype
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integ/modin/frame/test_iat.py b/tests/integ/modin/frame/test_iat.py
index 2191fb8db8..dbf3d50759 100644
--- a/tests/integ/modin/frame/test_iat.py
+++ b/tests/integ/modin/frame/test_iat.py
@@ -103,7 +103,7 @@ def iat_set_helper(df):
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=4)
 def test_iat_get_time_index_time_columns(
     key,
     time_index_snowpark_pandas_df,
@@ -121,7 +121,7 @@ def test_iat_get_time_index_time_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=4)
 def test_iat_set_time_index_time_columns(
     key,
     time_index_snowpark_pandas_df,
diff --git a/tests/integ/modin/frame/test_idxmax_idxmin.py b/tests/integ/modin/frame/test_idxmax_idxmin.py
index 94ca1d55b9..f9dc28bba9 100644
--- a/tests/integ/modin/frame/test_idxmax_idxmin.py
+++ b/tests/integ/modin/frame/test_idxmax_idxmin.py
@@ -13,7 +13,6 @@
 from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result
 
 
-@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize(
     "data, index",
     [
@@ -74,13 +73,17 @@ def test_idxmax_idxmin_df(data, index, func, axis, skipna):
         pytest.xfail(
             "Snowpark pandas returns a Series with None whereas pandas throws a ValueError"
         )
-    eval_snowpark_pandas_result(
-        *create_test_dfs(
-            data=data,
-            index=index,
-        ),
-        lambda df: getattr(df, func)(axis=axis, skipna=skipna),
-    )
+    with SqlCounter(
+        query_count=1,
+        join_count=0 if index is None or (data == {} and index == []) else 1,
+    ):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(
+                data=data,
+                index=index,
+            ),
+            lambda df: getattr(df, func)(axis=axis, skipna=skipna),
+        )
 
 
 @sql_count_checker(query_count=1, join_count=1)
@@ -173,7 +176,7 @@ def test_idxmax_idxmin_df_numeric_only_axis_1_different_column_dtypes(
             )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize("axis", [0, 1])
 def test_idxmax_idxmin_with_dates(func, axis):
@@ -214,7 +217,7 @@ def test_idxmax_idxmin_with_timedelta(func, axis):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize("axis", [0, 1])
 def test_idxmax_idxmin_with_strings(func, axis):
diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py
index ecc1ec19db..c7a1c980c9 100644
--- a/tests/integ/modin/frame/test_insert.py
+++ b/tests/integ/modin/frame/test_insert.py
@@ -344,55 +344,61 @@ def test_insert_multiindex_dict_negative():
 
 
 @pytest.mark.parametrize(
-    "df_index, value_index",
+    "df_index, value_index, join_count",
     [
-        ([3, 0, 4], [1, 2, 3]),
-        ([(1, 0), (1, 2), (2, 2)], [(1, 1), (1, 2), (2, 2)]),
-        ([1.0, 2.5, 3.0], [1, 2, 3]),  # Long and Double can be joined
+        ([3, 0, 4], [1, 2, 3], 6),
+        ([(1, 0), (1, 2), (2, 2)], [(1, 1), (1, 2), (2, 2)], 11),
+        ([1.0, 2.5, 3.0], [1, 2, 3], 6),  # Long and Double can be joined
     ],
 )
-@sql_count_checker(query_count=4, join_count=1)
-def test_insert_compatible_index(df_index, value_index):
+def test_insert_compatible_index(df_index, value_index, join_count):
     snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
     value = pd.DataFrame({"col2": ["x", "y", "z"]}, index=native_pd.Index(value_index))
-    eval_snowpark_pandas_result(
-        snow_df,
-        snow_df.to_pandas(),
-        lambda df: df.insert(
-            0, "col3", value if isinstance(df, pd.DataFrame) else value.to_pandas()
-        ),
-        inplace=True,  # insert operation is always inplace
-    )
+    with SqlCounter(query_count=4, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            snow_df.to_pandas(),
+            lambda df: df.insert(
+                0, "col3", value if isinstance(df, pd.DataFrame) else value.to_pandas()
+            ),
+            inplace=True,  # insert operation is always inplace
+        )
 
 
 @pytest.mark.parametrize(
-    "df_index, value_index",
+    "df_index, value_index, join_count",
     [
-        ([3, 2, 1], [(1, 0, 1), (1, 2, 3), (2, 1, 0)]),  # length mismatch 1 != 3
+        ([3, 2, 1], [(1, 0, 1), (1, 2, 3), (2, 1, 0)], 3),  # length mismatch 1 != 3
         (
             [(3, 1), (2, 1), (1, 2)],
             [(1, 0, 1), (1, 2, 3), (2, 1, 0)],
+            3,
         ),  # length mismatch 2 != 3
-        ([1, 2, 3], [(1, 0), (1, 2), (2, 2)]),  # 1 != 2
-        ([(1, 0), (1, 2), (2, 2)], [(1, 2, 3), (3, 4, 5), (6, 5, 4)]),  # 2 != 3
-        ([(1, 2, 3), (3, 4, 5), (6, 5, 4)], [3, 1, 2]),  # length mismatch 3 != 1
+        ([1, 2, 3], [(1, 0), (1, 2), (2, 2)], 2),  # 1 != 2
+        ([(1, 0), (1, 2), (2, 2)], [(1, 2, 3), (3, 4, 5), (6, 5, 4)], 3),  # 2 != 3
+        ([(1, 2, 3), (3, 4, 5), (6, 5, 4)], [3, 1, 2], 1),  # length mismatch 3 != 1
         (
             [(1, 1), (1, 2), (2, 2)],
             ["(1, 0)", "(1, 2)", "(2, 2)"],
+            1,
         ),  # length and type mismatch
     ],
 )
-@sql_count_checker(query_count=1)
-def test_insert_index_num_levels_mismatch_negative(df_index, value_index):
-    snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
-    value = pd.DataFrame({"col2": ["w", "x", "y"]}, index=native_pd.Index(value_index))
-    # This is different behavior from native pandas. Native pandas in some cases
-    # insert new column with null values but in Snowpark pandas we always raise error.
-    with pytest.raises(
-        ValueError,
-        match="Number of index levels of inserted column are different from frame index",
-    ):
-        snow_df.insert(0, "col3", value)
+def test_insert_index_num_levels_mismatch_negative(df_index, value_index, join_count):
+    with SqlCounter(query_count=1, join_count=join_count):
+        snow_df = pd.DataFrame(
+            {"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index)
+        )
+        value = pd.DataFrame(
+            {"col2": ["w", "x", "y"]}, index=native_pd.Index(value_index)
+        )
+        # This is different behavior from native pandas. Native pandas in some cases
+        # insert new column with null values but in Snowpark pandas we always raise error.
+        with pytest.raises(
+            ValueError,
+            match="Number of index levels of inserted column are different from frame index",
+        ):
+            snow_df.insert(0, "col3", value)
 
 
 @pytest.mark.parametrize(
@@ -407,7 +413,7 @@ def test_insert_index_num_levels_mismatch_negative(df_index, value_index):
         ),  # type mismatch boolean != long
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2, join_count=4)
 def test_insert_index_type_mismatch(df_index, value_index, expected_index):
     # Note: This is different behavior than native pandas. In native pandas when
     # index datatype mismatch new columns in inserted will all NULL values.
@@ -424,7 +430,7 @@ def test_insert_index_type_mismatch(df_index, value_index, expected_index):
     assert_snowpark_pandas_equal_to_pandas(snow_df, expected_df)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_insert_with_null_index_values():
     snow_df = pd.DataFrame(
         {"A": ["p", "q", "r", "s"]}, native_pd.Index(["a", None, "b", None])
@@ -440,7 +446,7 @@ def test_insert_with_null_index_values():
     )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_insert_multiple_null():
     snow_df = pd.DataFrame(
         {"A": ["p", "q", "r", "s"]}, native_pd.Index(["a", "b", "c", "d"])
@@ -465,8 +471,8 @@ def test_insert_multiple_null():
 @pytest.mark.parametrize(
     "index, value, expected_query_count, expected_join_count",
     [
-        ([1, 2], native_pd.Series([1, 2], index=[2, 3]), 1, 1),
-        ([1, 2], [3, 4], 2, 1),
+        ([1, 2], native_pd.Series([1, 2], index=[2, 3]), 1, 3),
+        ([1, 2], [3, 4], 2, 3),
     ],
 )
 def test_insert_into_empty_dataframe_with_index(
diff --git a/tests/integ/modin/frame/test_join.py b/tests/integ/modin/frame/test_join.py
index f37011065b..2721a8f6aa 100644
--- a/tests/integ/modin/frame/test_join.py
+++ b/tests/integ/modin/frame/test_join.py
@@ -269,21 +269,21 @@ def test_join_validate_negative(lvalues, rvalues, validate):
         left.join(right, validate=validate)
 
 
-@sql_count_checker(query_count=6, join_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_join_timedelta(left, right):
     right = right.astype("timedelta64[ns]")
     eval_snowpark_pandas_result(
+        pd.DataFrame(left),
         left,
-        left.to_pandas(),
         lambda df: df.join(
-            right if isinstance(df, pd.DataFrame) else right.to_pandas()
+            pd.DataFrame(right) if isinstance(df, pd.DataFrame) else right
         ),
     )
     left = left.astype("timedelta64[ns]")
     eval_snowpark_pandas_result(
+        pd.DataFrame(left),
         left,
-        left.to_pandas(),
         lambda df: df.join(
-            right if isinstance(df, pd.DataFrame) else right.to_pandas()
+            pd.DataFrame(right) if isinstance(df, pd.DataFrame) else right
         ),
     )
diff --git a/tests/integ/modin/frame/test_mask.py b/tests/integ/modin/frame/test_mask.py
index 53afbd7bf8..7b47880557 100644
--- a/tests/integ/modin/frame/test_mask.py
+++ b/tests/integ/modin/frame/test_mask.py
@@ -864,7 +864,7 @@ def perform_mask(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=4)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
@@ -909,7 +909,7 @@ def perform_mask(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3, union_count=1)
+@sql_count_checker(query_count=2, join_count=5, union_count=1)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
diff --git a/tests/integ/modin/frame/test_nunique.py b/tests/integ/modin/frame/test_nunique.py
index d0cad8ec2a..6fd1751e3a 100644
--- a/tests/integ/modin/frame/test_nunique.py
+++ b/tests/integ/modin/frame/test_nunique.py
@@ -85,12 +85,12 @@ def test_dataframe_nunique_no_columns(native_df):
         ),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_dataframe_nunique_multiindex(index, columns):
-    eval_snowpark_pandas_result(
-        *create_test_dfs(TEST_DATA, index=index, columns=columns),
-        lambda df: df.nunique(axis=0),
-    )
+    with SqlCounter(query_count=1, join_count=0 if index is None else 2):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(TEST_DATA, index=index, columns=columns),
+            lambda df: df.nunique(axis=0),
+        )
 
 
 @sql_count_checker(query_count=0)
diff --git a/tests/integ/modin/frame/test_rank.py b/tests/integ/modin/frame/test_rank.py
index 1687ce4905..05fa47b99b 100644
--- a/tests/integ/modin/frame/test_rank.py
+++ b/tests/integ/modin/frame/test_rank.py
@@ -7,7 +7,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
     eval_snowpark_pandas_result,
@@ -40,7 +40,6 @@
 ]
 
 
-@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -56,13 +55,16 @@
 )
 # test df.rank with all method, na_option, ascending parameter combinations
 def test_df_rank(data, index, method, ascending, na_option):
-    snow_df = pd.DataFrame(data, index=index)
-    native_df = native_pd.DataFrame(data, index=index)
-    eval_snowpark_pandas_result(
-        snow_df,
-        native_df,
-        lambda df: df.rank(method=method, na_option=na_option, ascending=ascending),
-    )
+    with SqlCounter(
+        query_count=1, join_count=2 if isinstance(index, native_pd.MultiIndex) else 0
+    ):
+        snow_df = pd.DataFrame(data, index=index)
+        native_df = native_pd.DataFrame(data, index=index)
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.rank(method=method, na_option=na_option, ascending=ascending),
+        )
 
 
 @sql_count_checker(query_count=1)
@@ -118,7 +120,6 @@ def test_rank_unsupported_args_negative(method, ascending, na_option):
         snow_df.rank(axis=1, method=method, ascending=ascending, na_option=na_option)
 
 
-@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -134,10 +135,15 @@ def test_rank_unsupported_args_negative(method, ascending, na_option):
 )
 # test df percentile rank
 def test_df_rank_pct(data, index, method, ascending, na_option):
-    snow_df = pd.DataFrame(data, index=index).rank(
-        method=method, ascending=ascending, na_option=na_option, pct=True
-    )
-    native_df = native_pd.DataFrame(data, index=index).rank(
-        method=method, ascending=ascending, na_option=na_option, pct=True
-    )
-    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
+    with SqlCounter(
+        query_count=1, join_count=2 if isinstance(index, native_pd.MultiIndex) else 0
+    ):
+        snow_df = pd.DataFrame(data, index=index).rank(
+            method=method, ascending=ascending, na_option=na_option, pct=True
+        )
+        native_df = native_pd.DataFrame(data, index=index).rank(
+            method=method, ascending=ascending, na_option=na_option, pct=True
+        )
+        assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
+            snow_df, native_df
+        )
diff --git a/tests/integ/modin/frame/test_reindex.py b/tests/integ/modin/frame/test_reindex.py
index 98d0a41e7a..1f7a7e3966 100644
--- a/tests/integ/modin/frame/test_reindex.py
+++ b/tests/integ/modin/frame/test_reindex.py
@@ -454,7 +454,7 @@ def test_reindex_columns_fill_method_with_old_na_values_negative(
                     lambda df: df.reindex(columns=list("CEBFGA"), method=method),
                 )
 
-    @sql_count_checker(query_count=5)
+    @sql_count_checker(query_count=5, join_count=1)
     @pytest.mark.parametrize("limit", [None, 1, 2, 100])
     @pytest.mark.parametrize("method", ["bfill", "backfill", "pad", "ffill"])
     def test_reindex_columns_datetime_with_fill(self, limit, method):
@@ -495,7 +495,7 @@ def test_reindex_columns_non_overlapping_columns(self):
             snow_df, native_df, lambda df: df.reindex(axis=1, labels=list("EFG"))
         )
 
-    @sql_count_checker(query_count=5)
+    @sql_count_checker(query_count=5, join_count=1)
     def test_reindex_columns_non_overlapping_datetime_columns(self):
         date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
         native_df = native_pd.DataFrame(
@@ -520,7 +520,7 @@ def perform_reindex(df):
             snow_df, native_df, perform_reindex, check_freq=False
         )
 
-    @sql_count_checker(query_count=2)
+    @sql_count_checker(query_count=2, join_count=1)
     def test_reindex_columns_non_overlapping_different_types_columns(self):
         date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
         native_df = native_pd.DataFrame(
diff --git a/tests/integ/modin/frame/test_rename.py b/tests/integ/modin/frame/test_rename.py
index a5595ec716..15351ec6fa 100644
--- a/tests/integ/modin/frame/test_rename.py
+++ b/tests/integ/modin/frame/test_rename.py
@@ -294,7 +294,7 @@ def test_rename_objects(self, snow_float_string_frame):
         assert "FOO" in renamed
         assert "foo" not in renamed
 
-    @sql_count_checker(query_count=6, join_count=2)
+    @sql_count_checker(query_count=6, join_count=8)
     def test_rename_axis_style(self):
         # https://github.com/pandas-dev/pandas/issues/12392
         df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
@@ -420,7 +420,7 @@ def test_rename_mapper_and_positional_arguments_raises(self):
         with pytest.raises(TypeError, match=msg):
             df.rename({}, columns={}, index={})
 
-    @sql_count_checker(query_count=1, join_count=1)
+    @sql_count_checker(query_count=1, join_count=5)
     def test_rename_with_duplicate_columns(self):
         # GH#4403
         df4 = DataFrame(
diff --git a/tests/integ/modin/frame/test_repr.py b/tests/integ/modin/frame/test_repr.py
index 2109bdccb5..f499146806 100644
--- a/tests/integ/modin/frame/test_repr.py
+++ b/tests/integ/modin/frame/test_repr.py
@@ -227,7 +227,7 @@ def test_repr_deviating_behavior():
     assert native_str[:N] == snow_str[:N]
 
 
-@sql_count_checker(query_count=2, union_count=1)
+@sql_count_checker(query_count=2, union_count=1, join_count=6)
 def test_repr_of_multiindex_df():
     tuples = [
         ("cobra", "mark i"),
diff --git a/tests/integ/modin/frame/test_setitem.py b/tests/integ/modin/frame/test_setitem.py
index 6152089f39..6bbdc30fa0 100644
--- a/tests/integ/modin/frame/test_setitem.py
+++ b/tests/integ/modin/frame/test_setitem.py
@@ -141,7 +141,7 @@ def setitem(df):
         else:
             df[key] = val
 
-    expected_join_count = 3 if isinstance(key.start, int) else 4
+    expected_join_count = 6 if isinstance(key.start, int) else 7
 
     with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(snow_df, native_df, setitem, inplace=True)
@@ -361,9 +361,7 @@ def func_insert_new_column(df, column):
         df[key] = column
 
     expected_join_count = 2
-    if isinstance(column, native_pd.Series):
-        expected_join_count = 1
-    elif isinstance(column, native_pd.Index) and not isinstance(
+    if isinstance(column, native_pd.Index) and not isinstance(
         column, native_pd.DatetimeIndex
     ):
         expected_join_count = 4
diff --git a/tests/integ/modin/frame/test_stack.py b/tests/integ/modin/frame/test_stack.py
index 9b06c32ff0..80c437dea7 100644
--- a/tests/integ/modin/frame/test_stack.py
+++ b/tests/integ/modin/frame/test_stack.py
@@ -20,7 +20,7 @@
 )
 @pytest.mark.parametrize("dropna", [True, False])
 @pytest.mark.parametrize("sort", [True, False])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_stack(data, index, columns, dropna, sort):
     eval_snowpark_pandas_result(
         *create_test_dfs(data=data, index=index, columns=columns),
diff --git a/tests/integ/modin/groupby/test_groupby_dataframe_rank.py b/tests/integ/modin/groupby/test_groupby_dataframe_rank.py
index 3bb4a4b455..78443c3bbf 100644
--- a/tests/integ/modin/groupby/test_groupby_dataframe_rank.py
+++ b/tests/integ/modin/groupby/test_groupby_dataframe_rank.py
@@ -7,7 +7,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
     eval_snowpark_pandas_result,
@@ -211,7 +211,6 @@
 ]
 
 
-@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -233,16 +232,18 @@
 def test_df_groupby_rank(data, index, method, ascending, na_option, dropna):
     snow_df = pd.DataFrame(data, index=index)
     native_df = native_pd.DataFrame(data, index=index)
-    eval_snowpark_pandas_result(
-        snow_df,
-        native_df,
-        lambda df: df.groupby("group", dropna=dropna).rank(
-            method=method, na_option=na_option, ascending=ascending
-        ),
-    )
+    with SqlCounter(
+        query_count=1, join_count=2 if isinstance(index, pd.MultiIndex) else 0
+    ):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.groupby("group", dropna=dropna).rank(
+                method=method, na_option=na_option, ascending=ascending
+            ),
+        )
 
 
-@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -272,10 +273,14 @@ def test_df_rank_pct(data, index, method, ascending, na_option, dropna):
         .groupby("group", dropna=dropna)
         .rank(method=method, ascending=ascending, na_option=na_option, pct=True)
     )
-    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
+    with SqlCounter(
+        query_count=1, join_count=2 if isinstance(index, pd.MultiIndex) else 0
+    ):
+        assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
+            snow_df, native_df
+        )
 
 
-@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA_MUL)
 @pytest.mark.parametrize(
     "method",
@@ -293,13 +298,16 @@ def test_df_rank_pct(data, index, method, ascending, na_option, dropna):
 def test_df_groupby_rank_by_list(data, index, method, ascending, na_option):
     snow_df = pd.DataFrame(data, index=index)
     native_df = native_pd.DataFrame(data, index=index)
-    eval_snowpark_pandas_result(
-        snow_df,
-        native_df,
-        lambda df: df.groupby(["group", "a"]).rank(
-            method=method, na_option=na_option, ascending=ascending
-        ),
-    )
+    with SqlCounter(
+        query_count=1, join_count=2 if isinstance(index, pd.MultiIndex) else 0
+    ):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.groupby(["group", "a"]).rank(
+                method=method, na_option=na_option, ascending=ascending
+            ),
+        )
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integ/modin/groupby/test_groupby_default2pandas.py b/tests/integ/modin/groupby/test_groupby_default2pandas.py
index 49d45a1009..74aac8f77c 100644
--- a/tests/integ/modin/groupby/test_groupby_default2pandas.py
+++ b/tests/integ/modin/groupby/test_groupby_default2pandas.py
@@ -124,7 +124,7 @@ def test_groupby_with_numpy_array(basic_snowpark_pandas_df) -> None:
     "by_list",
     [[2, 1, 1, 2, 3, 3], [[2, 1, 1, 2, 3, 3], "a"]],
 )
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_groupby_series_with_numpy_array(series_multi_numeric, by_list) -> None:
     with pytest.raises(
         NotImplementedError, match=AGGREGATE_UNSUPPORTED_GROUPING_ERROR_PATTERN
diff --git a/tests/integ/modin/groupby/test_groupby_head_tail.py b/tests/integ/modin/groupby/test_groupby_head_tail.py
index 90819ec2d6..d462b89150 100644
--- a/tests/integ/modin/groupby/test_groupby_head_tail.py
+++ b/tests/integ/modin/groupby/test_groupby_head_tail.py
@@ -45,7 +45,7 @@ class TestDataFrameGroupByHeadTail:
         ["lion", 1234, 456, 78, 9],
     ]
 
-    @sql_count_checker(query_count=1)
+    @sql_count_checker(query_count=1, join_count=1)
     def test_df_groupby_head_tail(self, op_type, n, dropna, as_index, sort, group_keys):
         """
         Test DataFrameGroupBy.head and DataFrameGroupBy.tail with a small df with no NA values.
@@ -66,7 +66,7 @@ def test_df_groupby_head_tail(self, op_type, n, dropna, as_index, sort, group_ke
             check_index_type=False,
         )
 
-    @sql_count_checker(query_count=6)
+    @sql_count_checker(query_count=6, join_count=1)
     def test_df_groupby_head_tail_large_data(
         self, op_type, n, dropna, as_index, sort, group_keys, large_df_with_na_values
     ):
diff --git a/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py b/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
index ec1e36d1e3..e87b6327bc 100644
--- a/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
+++ b/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
@@ -20,7 +20,7 @@
 @pytest.mark.parametrize("grouping_columns", ["B", ["A", "B"]])
 @pytest.mark.parametrize("skipna", [False, True])
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_groupby_idxmax_idxmin_on_axis_0(
     df_with_multiple_columns, grouping_columns, skipna, func
 ):
@@ -73,7 +73,7 @@ def test_df_groupby_idxmax_idxmin_on_axis_1_negative(df_with_multiple_columns, f
 
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize("numeric_only", [True, False])
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_groupby_idxmax_idxmin_with_different_column_dtypes_on_axis_0(
     func, numeric_only
 ):
diff --git a/tests/integ/modin/groupby/test_groupby_ngroups.py b/tests/integ/modin/groupby/test_groupby_ngroups.py
index 332e4c88eb..6216c4c223 100644
--- a/tests/integ/modin/groupby/test_groupby_ngroups.py
+++ b/tests/integ/modin/groupby/test_groupby_ngroups.py
@@ -17,7 +17,7 @@ def assert_ngroups_equal(snow_res, pd_res):
 
 
 @pytest.mark.parametrize("by", ["a", "b", ["a", "b"]])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=4)
 def test_groupby_sort_multiindex_series(series_multi_numeric, by):
 
     snow_ser = series_multi_numeric
diff --git a/tests/integ/modin/groupby/test_groupby_series.py b/tests/integ/modin/groupby/test_groupby_series.py
index ae8ae0926d..10dd08b6fd 100644
--- a/tests/integ/modin/groupby/test_groupby_series.py
+++ b/tests/integ/modin/groupby/test_groupby_series.py
@@ -19,14 +19,14 @@
 
 
 @pytest.mark.parametrize("by", ["a", ["b"], ["a", "b"]])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=4)
 def test_groupby_sort_multiindex_series(series_multi_numeric, agg_method, by):
     native_mseries_group = series_multi_numeric.to_pandas().groupby(by=by, sort=True)
     mseries_group = series_multi_numeric.groupby(by=by, sort=True)
     eval_snowpark_pandas_result(mseries_group, native_mseries_group, agg_method)
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=3, join_count=6)
 def test_groupby_sort_false_multiindex_series(series_multi_numeric):
     # it is known that groupby sort=False is buggy with multiIndex, it is always
     # sorting when only part of the level is used.
@@ -48,7 +48,7 @@ def test_groupby_sort_false_multiindex_series(series_multi_numeric):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_groupby_series_count_with_nan():
     index = native_pd.Index(["a", "b", "b", "a", "c"])
     index.names = ["grp_col"]
@@ -75,7 +75,7 @@ def test_groupby_series_count_with_nan():
     ],
 )
 @pytest.mark.parametrize("sort", [True, False])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_groupby_agg_series(agg_func, sort):
     index = native_pd.Index(["a", "b", "b", "a", "c"])
     index.names = ["grp_col"]
@@ -113,7 +113,7 @@ def test_groupby_agg_series_dict_func_negative():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize(
     "agg_func, type_str",
     [({"x": ("y", "sum")}, "tuple"), ({"x": pd.NamedAgg("y", "sum")}, "NamedAgg")],
@@ -139,7 +139,7 @@ def test_groupby_agg_series_raises_for_2_tuple_agg(agg_func, type_str):
 
 @pytest.mark.parametrize("sort", [True, False])
 @pytest.mark.parametrize("aggs", [{"minimum": min}, {"minimum": min, "maximum": max}])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_groupby_agg_series_named_agg(aggs, sort):
     index = native_pd.Index(["a", "b", "b", "a", "c"])
     index.names = ["grp_col"]
@@ -164,7 +164,7 @@ def test_groupby_series_numeric_only(series_str, numeric_only):
 
 
 @pytest.mark.parametrize("level", [0, 1, [1, 0], "b", [1, 1], [0, "b"], [-1]])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=4)
 def test_groupby_sort_multiindex_series_level(series_multi_numeric, level):
     native_series = series_multi_numeric.to_pandas()
 
@@ -173,7 +173,7 @@ def test_groupby_sort_multiindex_series_level(series_multi_numeric, level):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_groupby_series_single_index():
     snow_ser = pd.Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
     native_ser = native_pd.Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index f9c2cf173c..9a629101f3 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -505,3 +505,12 @@ def test_create_series_with_list_of_lists_index():
     native_series = native_pd.Series(data, index=arrays)
     snow_series = pd.Series(data, index=arrays)
     assert_series_equal(snow_series, native_series)
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_series_with_none_data_and_non_empty_index():
+    # When creating an empty Series with a non-empty index, the index should be used as the index of the Series.
+    index = ["A", "B", "C", "D"]
+    native_series = native_pd.Series(None, index=index, dtype=object)
+    snow_series = pd.Series(None, index=index, dtype=object)
+    assert_series_equal(snow_series, native_series)
diff --git a/tests/integ/modin/index/test_index_methods.py b/tests/integ/modin/index/test_index_methods.py
index 8f6f5b9f59..d8c3646d97 100644
--- a/tests/integ/modin/index/test_index_methods.py
+++ b/tests/integ/modin/index/test_index_methods.py
@@ -359,7 +359,7 @@ def test_has_duplicates(index):
         assert index.has_duplicates == snow_index.has_duplicates
 
 
-@sql_count_checker(query_count=6)
+@sql_count_checker(query_count=6, join_count=6)
 def test_index_parent():
     """
     Check whether the parent field in Index is updated properly.
diff --git a/tests/integ/modin/resample/test_resample.py b/tests/integ/modin/resample/test_resample.py
index af99185294..b4e8858273 100644
--- a/tests/integ/modin/resample/test_resample.py
+++ b/tests/integ/modin/resample/test_resample.py
@@ -145,7 +145,7 @@ def test_resample_duplicated_timestamps():
 @freq
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_series(freq, interval, agg_func):
     rule = f"{interval}{freq}"
     eval_snowpark_pandas_result(
@@ -188,7 +188,7 @@ def test_resample_df_with_nan(agg_func):
 
 
 @agg_func
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_ser_with_nan(agg_func):
     # 1 resample bin of all NaN, 1 resample bin partially NaN, 1 resample bin no NaNs
     eval_snowpark_pandas_result(
@@ -242,7 +242,7 @@ def test_resample_df_getitem():
     )
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_ser_getitem():
     eval_snowpark_pandas_result(
         *create_test_series(
diff --git a/tests/integ/modin/resample/test_resample_fillna.py b/tests/integ/modin/resample/test_resample_fillna.py
index d4e959123a..96ad514a2b 100644
--- a/tests/integ/modin/resample/test_resample_fillna.py
+++ b/tests/integ/modin/resample/test_resample_fillna.py
@@ -44,7 +44,7 @@ def test_resample_fill(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=5)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_fill_ser(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -139,7 +139,7 @@ def test_resample_ffill_missing_in_middle(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=5)
+@sql_count_checker(query_count=2, join_count=3)
 def test_resample_ffill_ser_missing_in_middle(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
diff --git a/tests/integ/modin/series/test_add_prefix.py b/tests/integ/modin/series/test_add_prefix.py
index 4d05f78d94..6bba930c43 100644
--- a/tests/integ/modin/series/test_add_prefix.py
+++ b/tests/integ/modin/series/test_add_prefix.py
@@ -46,7 +46,7 @@ def test_series_add_prefix_multiindex(prefix, multiindex_native_int_series):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
 def test_series_add_prefix_time_column_df(prefix, time_index_series_data):
     series_data, kwargs = time_index_series_data
diff --git a/tests/integ/modin/series/test_add_suffix.py b/tests/integ/modin/series/test_add_suffix.py
index 43a98ab951..f3329c6789 100644
--- a/tests/integ/modin/series/test_add_suffix.py
+++ b/tests/integ/modin/series/test_add_suffix.py
@@ -46,7 +46,7 @@ def test_add_suffix_multiindex(suffix, multiindex_native_int_series):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
 def test_add_suffix_time_column_df(suffix, time_index_series_data):
     series_data, kwargs = time_index_series_data
diff --git a/tests/integ/modin/series/test_at.py b/tests/integ/modin/series/test_at.py
index 9452a0d736..4533c20d35 100644
--- a/tests/integ/modin/series/test_at.py
+++ b/tests/integ/modin/series/test_at.py
@@ -18,7 +18,7 @@ def test_at_get_default_index(
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_at_set_default_index(
     default_index_snowpark_pandas_series,
     default_index_native_series,
@@ -42,7 +42,7 @@ def test_at_get_str_index(
     assert str_index_snowpark_pandas_series.at["b"] == str_index_native_series.at["b"]
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_at_set_str_index(
     str_index_snowpark_pandas_series,
     str_index_native_series,
@@ -58,7 +58,7 @@ def at_set_helper(series):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_at_get_time_index(
     time_index_snowpark_pandas_series,
     time_index_native_series,
@@ -69,7 +69,7 @@ def test_at_get_time_index(
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_at_set_time_index(
     time_index_snowpark_pandas_series,
     time_index_native_series,
diff --git a/tests/integ/modin/series/test_bitwise_operators.py b/tests/integ/modin/series/test_bitwise_operators.py
index eda9c536c9..ad542fd223 100644
--- a/tests/integ/modin/series/test_bitwise_operators.py
+++ b/tests/integ/modin/series/test_bitwise_operators.py
@@ -11,7 +11,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
     eval_snowpark_pandas_result,
@@ -47,15 +47,16 @@ def try_cast_to_snow_series(value: Any) -> Any:
 
 
 @pytest.mark.parametrize("value", BITWISE_TEST_DATA)
-@sql_count_checker(query_count=1)
 def test_bitwise_unary(value):
 
     # Note: In pandas, using NaN values without specfiying a null-compatible dtype will yield an error.
     # SnowPandas will allow this behavior.
     # Note: NaN values like pd.NA, pd.NaT, np.nan will raise a TypeError: boolean value of NA is ambiguous
-    snow_value = try_cast_to_snow_series(value)
-
-    eval_snowpark_pandas_result(snow_value, native_pd.Series(value), lambda s: ~s)
+    with SqlCounter(
+        query_count=1, join_count=1 if isinstance(value, native_pd.Series) else 0
+    ):
+        snow_value = try_cast_to_snow_series(value)
+        eval_snowpark_pandas_result(snow_value, native_pd.Series(value), lambda s: ~s)
 
 
 @pytest.mark.parametrize("series", SERIES_BITWISE_TEST_DATA)
@@ -121,7 +122,6 @@ def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
 @pytest.mark.parametrize(
     "op", [operator.or_, operator.and_]
 )  # |, &.  ^ is not supported in Snowflake
-@sql_count_checker(query_count=2, join_count=2)
 def test_bitwise_binary_between_series(lhs, rhs, op):
     def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
         snow_ans = op(snow_lhs, snow_rhs)
@@ -131,10 +131,14 @@ def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
             snow_ans, native_ans, lambda s: s, check_index_type=False
         )
 
-    check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
+    with SqlCounter(
+        query_count=2,
+        join_count=10 if isinstance(lhs.index, native_pd.MultiIndex) else 6,
+    ):
+        check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
 
-    # commute series
-    check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
+        # commute series
+        check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
 
 
 # Due to differences in logical or/and in SQL and pandas' |,& implementation, behavior doesn't match here, in particular
@@ -230,18 +234,21 @@ def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
         ),
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
 def test_bitwise_binary_between_series_with_deviating_behavior_or(
     lhs, rhs, expected_pandas, expected_snowpark_pandas
 ):
-    snow_ans = try_cast_to_snow_series(lhs) | try_cast_to_snow_series(rhs)
-    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
-        snow_ans, expected_snowpark_pandas
-    )
+    with SqlCounter(
+        query_count=1,
+        join_count=5 if isinstance(lhs.index, native_pd.MultiIndex) else 3,
+    ):
+        snow_ans = try_cast_to_snow_series(lhs) | try_cast_to_snow_series(rhs)
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_ans, expected_snowpark_pandas
+        )
 
-    # test here pandas to track any version regressions
-    native_ans = lhs | rhs
-    tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
+        # test here pandas to track any version regressions
+        native_ans = lhs | rhs
+        tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
 
 
 @pytest.mark.parametrize(
@@ -315,16 +322,19 @@ def test_bitwise_binary_between_series_with_deviating_behavior_or(
         ),
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
 def test_bitwise_binary_between_series_with_deviating_behavior_and(
     lhs, rhs, expected_pandas, expected_snowpark_pandas
 ):
-    snow_ans = try_cast_to_snow_series(lhs) & try_cast_to_snow_series(rhs)
-    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
-        snow_ans, expected_snowpark_pandas
-    )
+    with SqlCounter(
+        query_count=1,
+        join_count=5 if isinstance(lhs.index, native_pd.MultiIndex) else 3,
+    ):
+        snow_ans = try_cast_to_snow_series(lhs) & try_cast_to_snow_series(rhs)
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_ans, expected_snowpark_pandas
+        )
 
-    # test here pandas to track any version regressions
-    native_ans = lhs & rhs
-    print(native_ans.index)
-    tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
+        # test here pandas to track any version regressions
+        native_ans = lhs & rhs
+        print(native_ans.index)
+        tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
diff --git a/tests/integ/modin/series/test_compare.py b/tests/integ/modin/series/test_compare.py
index c5c927343e..8d60d7f75a 100644
--- a/tests/integ/modin/series/test_compare.py
+++ b/tests/integ/modin/series/test_compare.py
@@ -50,7 +50,7 @@ class TestDefaultParameters:
         # copying the original series's index to the final resulting dataframe
         # adds 1 extra query to materialize the index.
         query_count=QUERY_COUNT + 1,
-        join_count=JOIN_COUNT,
+        join_count=5,
     )
     def test_no_diff(self, base_series):
         other_series = base_series.copy()
diff --git a/tests/integ/modin/series/test_describe.py b/tests/integ/modin/series/test_describe.py
index 9ecd2e33a3..32876f1608 100644
--- a/tests/integ/modin/series/test_describe.py
+++ b/tests/integ/modin/series/test_describe.py
@@ -8,7 +8,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import (
     assert_series_equal,
     create_test_series,
@@ -129,15 +129,18 @@ def timestamp_describe_comparator(snow_res, native_res):
 
 
 @pytest.mark.parametrize(
-    "index",
+    "index, join_count",
     [
-        pytest.param(None, id="default_index"),
-        pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_index"),
+        pytest.param(None, 0, id="default_index"),
+        pytest.param(
+            ["one", "two", "three", "four", "five", "six"], 6, id="flat_index"
+        ),
         pytest.param(
             [
                 np.array(["bar", "bar", "baz", "baz", "foo", "foo"]),
                 np.array(["one", "two", "one", "two", "one", "two"]),
             ],
+            12,
             id="2D_index",
         ),
     ],
@@ -151,8 +154,10 @@ def timestamp_describe_comparator(snow_res, native_res):
     ],
     ids=["ints", "floats", "objects"],
 )
-@sql_count_checker(query_count=1, union_count=5)
-def test_describe_multiindex(data, index):
-    eval_snowpark_pandas_result(
-        *create_test_series(data, index=index), lambda ser: ser.describe()
-    )
+def test_describe_multiindex(data, index, join_count):
+    if isinstance(data[0], str) and index is not None:
+        join_count = 8 if len(index) == 2 else 4
+    with SqlCounter(query_count=1, union_count=5, join_count=join_count):
+        eval_snowpark_pandas_result(
+            *create_test_series(data, index=index), lambda ser: ser.describe()
+        )
diff --git a/tests/integ/modin/series/test_empty.py b/tests/integ/modin/series/test_empty.py
index a30a69116c..8e7aa9d915 100644
--- a/tests/integ/modin/series/test_empty.py
+++ b/tests/integ/modin/series/test_empty.py
@@ -34,7 +34,7 @@
         "empty series with only index",
     ],
 )
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_empty(args, kwargs):
     eval_snowpark_pandas_result(
         pd.Series(*args, **kwargs),
diff --git a/tests/integ/modin/series/test_iat.py b/tests/integ/modin/series/test_iat.py
index b3e2255403..7b9a4d4c06 100644
--- a/tests/integ/modin/series/test_iat.py
+++ b/tests/integ/modin/series/test_iat.py
@@ -103,7 +103,7 @@ def iat_set_helper(series):
         (0,),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=4)
 def test_iat_get_time_index(
     key,
     time_index_snowpark_pandas_series,
@@ -122,7 +122,7 @@ def test_iat_get_time_index(
         (0,),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=4)
 def test_iat_set_time_index(
     key,
     time_index_snowpark_pandas_series,
diff --git a/tests/integ/modin/series/test_mask.py b/tests/integ/modin/series/test_mask.py
index 2ef2465b58..0d3680cff4 100644
--- a/tests/integ/modin/series/test_mask.py
+++ b/tests/integ/modin/series/test_mask.py
@@ -76,7 +76,7 @@ def test_series_mask_duplicate_labels():
     eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.mask(ser > 3))
 
 
-@sql_count_checker(query_count=1, join_count=0)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_mask_multi_index():
     data = [1, 2, 3, 4, 5]
     index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
@@ -233,7 +233,7 @@ def test_series_mask_with_scalar_cond(cond):
         )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_series_mask_series_cond_unmatched_index():
     data = [1, 2, 3, 4]
     index1 = [0, 1, 2, 3]
@@ -258,9 +258,10 @@ def perform_mask(series):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
-@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
-def test_series_mask_short_series_cond(index):
+@pytest.mark.parametrize(
+    "index, join_count", [("matched_index", 1), ("unmatched_index", 2)]
+)
+def test_series_mask_short_series_cond(index, join_count):
     data = [1, 2, 3, 4]
     if index != "matched_index":
         index = [7, 8, 9]
@@ -279,16 +280,18 @@ def perform_mask(series):
         else:
             return series.mask(native_cond, -1)
 
-    eval_snowpark_pandas_result(
-        snow_ser,
-        native_ser,
-        perform_mask,
-    )
+    with SqlCounter(query_count=1, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            perform_mask,
+        )
 
 
-@sql_count_checker(query_count=1, join_count=1)
-@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
-def test_series_mask_long_series_cond(index):
+@pytest.mark.parametrize(
+    "index, join_count", [("matched_index", 1), ("unmatched_index", 2)]
+)
+def test_series_mask_long_series_cond(index, join_count):
     data = [1, 2, 3, 4]
     if index != "matched_index":
         index = [7, 8, 9, 10, 11]
@@ -307,8 +310,9 @@ def perform_mask(series):
         else:
             return series.mask(native_cond, -1)
 
-    eval_snowpark_pandas_result(
-        snow_ser,
-        native_ser,
-        perform_mask,
-    )
+    with SqlCounter(query_count=1, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            perform_mask,
+        )
diff --git a/tests/integ/modin/series/test_nlargest_nsmallest.py b/tests/integ/modin/series/test_nlargest_nsmallest.py
index a15cc5dfb2..253230156b 100644
--- a/tests/integ/modin/series/test_nlargest_nsmallest.py
+++ b/tests/integ/modin/series/test_nlargest_nsmallest.py
@@ -88,7 +88,7 @@ def test_nlargest_nsmallest_non_numeric_types(method, data):
     assert_series_equal(getattr(snow_s, method)(n), expected_s)
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=3, join_count=2)
 def test_nlargest_nsmallest_no_columns(method):
     snow_s = pd.Series(query_compiler=pd.DataFrame(index=[1, 2])._query_compiler)
     snow_s = snow_s
diff --git a/tests/integ/modin/series/test_nunique.py b/tests/integ/modin/series/test_nunique.py
index bb20e9e4a5..f2aba15ada 100644
--- a/tests/integ/modin/series/test_nunique.py
+++ b/tests/integ/modin/series/test_nunique.py
@@ -8,7 +8,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import (
     assert_values_equal,
     create_test_series,
@@ -63,11 +63,11 @@ def test_series_nunique_deviating_nan_behavior(input_data, expected):
         ),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_dataframe_nunique_multiindex(index):
     data = [0.1, 0.2, 0.1, 0]
-    eval_snowpark_pandas_result(
-        *create_test_series(data, index=index),
-        lambda ser: ser.nunique(),
-        comparator=assert_values_equal,
-    )
+    with SqlCounter(query_count=1, join_count=0 if index is None else 2):
+        eval_snowpark_pandas_result(
+            *create_test_series(data, index=index),
+            lambda ser: ser.nunique(),
+            comparator=assert_values_equal,
+        )
diff --git a/tests/integ/modin/series/test_rank.py b/tests/integ/modin/series/test_rank.py
index 2544f12e43..24801b581f 100644
--- a/tests/integ/modin/series/test_rank.py
+++ b/tests/integ/modin/series/test_rank.py
@@ -7,7 +7,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
     eval_snowpark_pandas_result,
@@ -83,7 +83,6 @@ def test_series_rank_numeric_only(method, ascending, na_option):
     )
 
 
-@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -99,10 +98,15 @@ def test_series_rank_numeric_only(method, ascending, na_option):
 )
 # test Series percentile rank
 def test_df_rank_pct(data, index, method, ascending, na_option):
-    snow_df = pd.DataFrame(data, index=index).rank(
-        method=method, ascending=ascending, na_option=na_option, pct=True
-    )
-    native_df = native_pd.DataFrame(data, index=index).rank(
-        method=method, ascending=ascending, na_option=na_option, pct=True
-    )
-    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
+    with SqlCounter(
+        query_count=1, join_count=2 if isinstance(index, native_pd.MultiIndex) else 0
+    ):
+        snow_df = pd.DataFrame(data, index=index).rank(
+            method=method, ascending=ascending, na_option=na_option, pct=True
+        )
+        native_df = native_pd.DataFrame(data, index=index).rank(
+            method=method, ascending=ascending, na_option=na_option, pct=True
+        )
+        assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
+            snow_df, native_df
+        )
diff --git a/tests/integ/modin/series/test_rename.py b/tests/integ/modin/series/test_rename.py
index 4ccf29706f..53873e0b2f 100644
--- a/tests/integ/modin/series/test_rename.py
+++ b/tests/integ/modin/series/test_rename.py
@@ -45,7 +45,7 @@ def renamer(x):
             # values in the variant column will be quoted
             assert_index_equal(renamed.index, renamed2.index.str.replace('"', ""))
 
-    @sql_count_checker(query_count=1, join_count=1)
+    @sql_count_checker(query_count=1, join_count=2)
     def test_rename_partial_dict(self):
         # partial dict
         ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
@@ -63,7 +63,7 @@ def test_rename_retain_index_name(self):
         renamed = renamer.rename({})
         assert renamed.index.name == renamer.index.name
 
-    @sql_count_checker(query_count=2, join_count=1)
+    @sql_count_checker(query_count=2, join_count=2)
     def test_rename_by_series(self):
         ser = Series(range(5), name="foo")
         renamer = Series({1: 10, 2: 20})
@@ -80,7 +80,7 @@ def test_rename_set_name(self):
                 tm.assert_numpy_array_equal(result.index.values, ser.index.values)
                 assert ser.name is None
 
-    @sql_count_checker(query_count=5)
+    @sql_count_checker(query_count=5, join_count=5)
     def test_rename_set_name_inplace(self):
         ser = Series(range(3), index=list("abc"))
         for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
diff --git a/tests/integ/modin/series/test_setitem.py b/tests/integ/modin/series/test_setitem.py
index 39358b9870..929226bc89 100644
--- a/tests/integ/modin/series/test_setitem.py
+++ b/tests/integ/modin/series/test_setitem.py
@@ -1601,7 +1601,7 @@ def test_series_setitem_with_empty_key_and_empty_series_item(
     else:
         snowpark_key = key
 
-    with SqlCounter(query_count=4):
+    with SqlCounter(query_count=1):
         native_ser[key] = item
         snowpark_ser[
             pd.Series(snowpark_key)
diff --git a/tests/integ/modin/series/test_shape.py b/tests/integ/modin/series/test_shape.py
index 7bbc1270a0..ba62dfde67 100644
--- a/tests/integ/modin/series/test_shape.py
+++ b/tests/integ/modin/series/test_shape.py
@@ -9,7 +9,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter
 from tests.integ.modin.utils import eval_snowpark_pandas_result
 
 
@@ -34,11 +34,13 @@
         "empty series with only index",
     ],
 )
-@sql_count_checker(query_count=1)
 def test_series_shape(args, kwargs):
-    eval_snowpark_pandas_result(
-        pd.Series(*args, **kwargs),
-        native_pd.Series(*args, **kwargs),
-        lambda df: df.shape,
-        comparator=lambda x, y: x == y,
-    )
+    with SqlCounter(
+        query_count=1, join_count=1 if kwargs.get("index", None) == [] else 0
+    ):
+        eval_snowpark_pandas_result(
+            pd.Series(*args, **kwargs),
+            native_pd.Series(*args, **kwargs),
+            lambda df: df.shape,
+            comparator=lambda x, y: x == y,
+        )
diff --git a/tests/integ/modin/series/test_take.py b/tests/integ/modin/series/test_take.py
index 7861686a02..2ba09be1b8 100644
--- a/tests/integ/modin/series/test_take.py
+++ b/tests/integ/modin/series/test_take.py
@@ -16,7 +16,7 @@ def test_series_take():
 
     actual = ser.take([1, 3, 4])
     expected = pd.Series([5, 2, 4], index=[1, 3, 4])
-    with SqlCounter(query_count=2, join_count=2):
+    with SqlCounter(query_count=2, join_count=3):
         assert_series_equal(actual, expected)
 
     actual = ser.take([-1, 3, 4])
diff --git a/tests/integ/modin/series/test_to_snowflake.py b/tests/integ/modin/series/test_to_snowflake.py
index 92b428f70e..f542edfa17 100644
--- a/tests/integ/modin/series/test_to_snowflake.py
+++ b/tests/integ/modin/series/test_to_snowflake.py
@@ -68,7 +68,7 @@ def test_to_snowflake_index_label_none_raises(test_table_name):
         snow_series.to_snowflake(test_table_name, if_exists="replace", index=True)
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_to_snowflake_multiindex(test_table_name, snow_series):
     index = native_pd.MultiIndex.from_arrays(
         [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], names=("number", "color")
diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py
index d82d0266e3..5c236731a0 100644
--- a/tests/integ/modin/test_concat.py
+++ b/tests/integ/modin/test_concat.py
@@ -1058,7 +1058,7 @@ def test_concat_sorted_frames():
         ),  # duplicate in frame2
     ],
 )
-@sql_count_checker(query_count=2, union_count=1)
+@sql_count_checker(query_count=2, union_count=1, join_count=1)
 def test_concat_duplicate_columns(columns1, columns2, expected_rows, expected_cols):
     df1 = pd.DataFrame([[1, 2, 3]], columns=columns1)
     df2 = pd.DataFrame([[4, 5, 6]], columns=columns2)
@@ -1123,7 +1123,7 @@ def test_concat_from_file(resources_path):
     )
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=5)
 def test_concat_keys():
     native_data = {
         "one": native_pd.Series([1, 2, 3], index=["a", "b", "c"]),
@@ -1180,5 +1180,6 @@ def test_df_creation_from_series_from_same_df():
 
 @sql_count_checker(query_count=0)
 def test_concat_timedelta_not_implemented(df1):
+    df1 = pd.DataFrame(df1)
     with pytest.raises(NotImplementedError):
         pd.concat([df1, df1, df1.astype({"C": "timedelta64[ns]"})])
diff --git a/tests/integ/modin/test_from_pandas_to_pandas.py b/tests/integ/modin/test_from_pandas_to_pandas.py
index 8490bb808f..c9e29147ae 100644
--- a/tests/integ/modin/test_from_pandas_to_pandas.py
+++ b/tests/integ/modin/test_from_pandas_to_pandas.py
@@ -572,7 +572,7 @@ def test_from_pandas_series_with_tuple_name():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_to_pandas():
     array = ["a", "b", "c"]
     pandas_series = native_pd.Series(data=array, index=array)
@@ -632,7 +632,7 @@ def test_snowpark_pandas_statement_params():
         assert "efg" == mock_to_pandas.call_args.kwargs["statement_params"]["abc"]
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=5)
 def test_create_df_from_series():
     native_data = {
         "one": native_pd.Series([1, 2, 3], index=["a", "b", "c"]),
diff --git a/tests/integ/modin/test_internal_frame.py b/tests/integ/modin/test_internal_frame.py
index da38322b9a..c7a95fa601 100644
--- a/tests/integ/modin/test_internal_frame.py
+++ b/tests/integ/modin/test_internal_frame.py
@@ -38,7 +38,7 @@ def test_strip_duplicates(input, expected):
     assert_frame_equal(result, pd.DataFrame(expected))
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2, join_count=2)
 def test_strip_duplicates_after_sort():
     df = pd.DataFrame({"A": [0, 1, 0, 1, 2], "B": [1, 2, 3, 4, 5]})
     df = df.sort_values(by="B", ascending=False)
diff --git a/tests/integ/modin/test_numpy.py b/tests/integ/modin/test_numpy.py
index cafbd08f36..43b9ef263f 100644
--- a/tests/integ/modin/test_numpy.py
+++ b/tests/integ/modin/test_numpy.py
@@ -113,7 +113,7 @@ def test_np_where_notimplemented():
             )
 
 
-@sql_count_checker(query_count=5, join_count=4)
+@sql_count_checker(query_count=5, join_count=7)
 def test_scalar():
     pdf_scalar = native_pd.DataFrame([[99, 99], [99, 99]])
     sdf_scalar = pd.DataFrame([[99, 99], [99, 99]])
@@ -172,7 +172,7 @@ def test_different_inputs(cond, x, y):
         assert_array_equal(sp_result, np_orig_result)
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2, join_count=3)
 def test_broadcast_scalar_x_df():
     input_df = native_pd.DataFrame([[False, True], [False, True]])
     input_df2 = native_pd.DataFrame([[1, 0], [0, 1]])
@@ -183,7 +183,7 @@ def test_broadcast_scalar_x_df():
     assert_array_equal(snow_result, np_result)
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2, join_count=3)
 def test_broadcast_scalar_x_ser():
     input_ser = native_pd.Series([False, True])
     input_ser2 = native_pd.Series([1, 0])

From f4a80f385f800c8b529ef1b6c452a78bb9731280 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 9 Sep 2024 10:48:24 -0700
Subject: [PATCH 23/42] fix loc and iloc tests

---
 tests/integ/modin/binary/test_binary_op.py    | 24 +++++---
 tests/integ/modin/frame/test_fillna.py        |  2 +-
 tests/integ/modin/frame/test_getitem.py       |  2 +-
 tests/integ/modin/frame/test_iloc.py          | 46 ++++++++-------
 tests/integ/modin/frame/test_loc.py           | 56 +++++++++----------
 tests/integ/modin/frame/test_merge.py         |  6 +-
 tests/integ/modin/frame/test_transpose.py     |  2 +-
 tests/integ/modin/frame/test_where.py         |  4 +-
 .../index/test_datetime_index_methods.py      |  2 +-
 .../modin/resample/test_resample_negative.py  |  2 +-
 tests/integ/modin/series/test_all_any.py      |  2 +-
 tests/integ/modin/series/test_empty.py        | 19 ++++---
 tests/integ/modin/series/test_iloc.py         | 34 +++++------
 tests/integ/modin/series/test_loc.py          | 50 +++++++++--------
 tests/integ/modin/test_telemetry.py           |  2 +-
 15 files changed, 135 insertions(+), 118 deletions(-)

diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py
index cd036bcb04..4f50096ce4 100644
--- a/tests/integ/modin/binary/test_binary_op.py
+++ b/tests/integ/modin/binary/test_binary_op.py
@@ -1289,20 +1289,26 @@ def test_other_with_native_pandas_object_raises(op):
     ],
 )
 @pytest.mark.parametrize("op", [operator.add])
-@sql_count_checker(query_count=2, join_count=2)
 def test_binary_add_between_series_for_index_alignment(lhs, rhs, op):
     def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
         snow_ans = op(snow_lhs, snow_rhs)
         native_ans = op(native_lhs, native_rhs)
-        # for one multi-index test case (marked with comment) the "inferred_type" doesn't match (Snowpark: float vs. pandas integer)
-        eval_snowpark_pandas_result(
-            snow_ans, native_ans, lambda s: s, check_index_type=False
-        )
+        with SqlCounter(
+            query_count=2, join_count=10 if isinstance(lhs.index, pd.MultiIndex) else 6
+        ):
+            # for one multi-index test case (marked with comment) the "inferred_type" doesn't match (Snowpark: float vs. pandas integer)
+            eval_snowpark_pandas_result(
+                snow_ans, native_ans, lambda s: s, check_index_type=False
+            )
 
-    check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
+            check_op(
+                lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs)
+            )
 
-    # commute series
-    check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
+            # commute series
+            check_op(
+                rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs)
+            )
 
 
 # MOD TESTS
@@ -1872,7 +1878,7 @@ def test_binary_rpow_between_df_and_list_like_on_axis_1(rhs):
         "rmod",
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_generated_docstring_examples(opname):
     # test for operators that correct examples are generated and match up with pandas.
     # if this test passes, this ensures that all the examples generated in utils.py will be correct.
diff --git a/tests/integ/modin/frame/test_fillna.py b/tests/integ/modin/frame/test_fillna.py
index 677c8d3ddc..6ae668d694 100644
--- a/tests/integ/modin/frame/test_fillna.py
+++ b/tests/integ/modin/frame/test_fillna.py
@@ -426,7 +426,7 @@ def test_multiindex_df_values_dict_various_levels(test_fillna_multiindex_df):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=3)
 def test_multiindex_df_values_series(test_fillna_multiindex_df, test_fillna_multiindex):
     values = pd.Series([10, 1, 2, 3], index=test_fillna_multiindex)
     native_values = native_pd.Series([10, 1, 2, 3], index=test_fillna_multiindex)
diff --git a/tests/integ/modin/frame/test_getitem.py b/tests/integ/modin/frame/test_getitem.py
index fd4ede77d7..e08e25513a 100644
--- a/tests/integ/modin/frame/test_getitem.py
+++ b/tests/integ/modin/frame/test_getitem.py
@@ -343,7 +343,7 @@ def test_df_getitem_with_slice(
         slice("z", "a", -1),
     ],
 )
-@sql_count_checker(query_count=1, join_count=0)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_getitem_with_non_int_slice(key):
     data = {"a": [1, 2, 3], "b": [4, 5, 6]}
     index = ["x", "y", "z"]
diff --git a/tests/integ/modin/frame/test_iloc.py b/tests/integ/modin/frame/test_iloc.py
index cb69f78172..d8b874b1cf 100644
--- a/tests/integ/modin/frame/test_iloc.py
+++ b/tests/integ/modin/frame/test_iloc.py
@@ -118,7 +118,7 @@
     ("RangeIndex", 0),
     ("Index[bool]", 1),
     ("emptyFloatSeries", 2),
-    ("multi_index_Series", 2),
+    ("multi_index_Series", 6),
 ]
 
 # Snowflake type checking will fail if the item values aren't type compatible, so we normalize to int to stay compatible.
@@ -315,7 +315,10 @@ def eval_func(df):
     if key == "RangeIndex":
         expected_query_count = 1
 
-    with SqlCounter(query_count=expected_query_count, join_count=0):
+    with SqlCounter(
+        query_count=expected_query_count,
+        join_count=4 if key == "multi_index_Series" else 0,
+    ):
         eval_snowpark_pandas_result(
             default_index_snowpark_pandas_df, default_index_native_df, eval_func
         )
@@ -445,7 +448,7 @@ def test_df_iloc_get_diff2native(
     )
 
 
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2, join_count=8)
 def test_df_iloc_get_with_conflict():
     # index and data columns have conflict in get_by_col
     df = DataFrame({"A": [0, 1]}, index=native_pd.Index([2, 3], name="A")).rename(
@@ -2619,34 +2622,34 @@ def perform_iloc(df):
 
 
 @pytest.mark.parametrize(
-    "row_key, row_key_index",
+    "row_key, row_key_index, row_add_joins",
     [
-        [1, None],
-        [[3, 0], None],
-        [[1, 2], [("A",), ("B",)]],
-        [[2, 1], [("A", 1), ("B", 2)]],
+        [1, None, 0],
+        [[3, 0], None, 0],
+        [[1, 2], [("A",), ("B",)], 1],
+        [[2, 1], [("A", 1), ("B", 2)], 2],
     ],
 )
 @pytest.mark.parametrize(
-    "col_key, col_key_index",
+    "col_key, col_key_index, col_add_joins",
     [
-        [2, None],
-        [[2, 1], None],
-        [[1, 2], [("X",), ("Y",)]],
-        [[2, 1], [("X", 11), ("Y", 21)]],
+        [2, None, 0],
+        [[2, 1], None, 0],
+        [[1, 2], [("X",), ("Y",)], 1],
+        [[2, 1], [("X", 11), ("Y", 21)], 2],
     ],
 )
 @pytest.mark.parametrize(
     "item_values, item_index, item_columns, expected_join_count",
     [
-        [999, None, None, 2],
-        [TEST_ITEMS_DATA_2X2, None, None, 3],
-        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], None, 5],
-        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], [("e", 5), ("f", 6)], 5],
-        [TEST_ITEMS_DATA_2X2, None, [("e", 5), ("f", 6)], 3],
+        [999, None, None, 6],
+        [TEST_ITEMS_DATA_2X2, None, None, 7],
+        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], None, 9],
+        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], [("e", 5), ("f", 6)], 9],
+        [TEST_ITEMS_DATA_2X2, None, [("e", 5), ("f", 6)], 7],
     ],
 )
-def test_df_iloc_set_with_multi_index(
+def test_df_iloc_set_with_multiindex(
     row_key,
     row_key_index,
     col_key,
@@ -2655,6 +2658,8 @@ def test_df_iloc_set_with_multi_index(
     item_index,
     item_columns,
     expected_join_count,
+    row_add_joins,
+    col_add_joins,
 ):
     df_data = [
         [1, 2, 3, 4, 5],
@@ -2733,6 +2738,7 @@ def helper_iloc(df):
     if isinstance(snow_col_key, pd.Series):
         expected_query_count += 1
 
+    expected_join_count += row_add_joins + col_add_joins
     with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
         eval_snowpark_pandas_result(snow_df, native_df, helper_iloc, inplace=True)
 
@@ -2808,7 +2814,7 @@ def iloc_helper(df: Union[pd.DataFrame, native_pd.DataFrame]) -> None:
 
     # For a Series row key, the key is joined with the df to derive the iloc results. For column keys, a select
     # statement is used instead of a join.
-    join_count = 2 if axis == "row" else 0
+    join_count = 4 if axis == "row" else 2
     query_count = 1 if axis == "row" else 2
 
     # Evaluate with MultiIndex created from tuples.
diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py
index ec9e93a77f..d94f9f21d0 100644
--- a/tests/integ/modin/frame/test_loc.py
+++ b/tests/integ/modin/frame/test_loc.py
@@ -215,7 +215,7 @@ def test_df_loc_get_col_non_boolean_key(
     "key",
     boolean_indexer,
 )
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_df_loc_get_col_boolean_indexer(
     key, str_index_snowpark_pandas_df, str_index_native_df
 ):
@@ -243,7 +243,7 @@ def test_df_loc_get_col_boolean_indexer(
     "key",
     list_like_time_col_inputs,
 )
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_loc_get_col_time_df(
     key, time_column_snowpark_pandas_df, time_column_native_df
 ):
@@ -258,7 +258,7 @@ def test_df_loc_get_col_time_df(
     "key",
     snowpark_pandas_int_index_row_inputs,
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_df_loc_get_int_index_row_snowpark_pandas_input(
     key,
     default_index_snowpark_pandas_df,
@@ -606,7 +606,7 @@ def test_mi_df_loc_get_non_boolean_list_tuple_key(mi_table_df, row, col):
             )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2, join_count=4)
 def test_mi_df_loc_get_boolean_series_row_key(mi_table_df):
     df = pd.DataFrame(mi_table_df)
     bool_indexer = [False, True, True, False, False, True]
@@ -639,7 +639,7 @@ def test_mi_df_loc_get_boolean_series_row_key(mi_table_df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=0)
+@sql_count_checker(query_count=3, join_count=2)
 def test_mi_df_loc_get_boolean_series_col_key(mi_table_df):
     df = pd.DataFrame(mi_table_df)
     bool_indexer = [False, True]
@@ -1448,11 +1448,9 @@ def helper(df):
                 snow_df.to_pandas()
     else:
         expected_query_count = 1
-        expected_join_count = 1
+        expected_join_count = 2
         if key == slice(None):
             expected_join_count = 0
-        elif isinstance(key, slice) and key.step == 2:
-            expected_join_count += 1
 
         with SqlCounter(
             query_count=expected_query_count, join_count=expected_join_count
@@ -1672,7 +1670,7 @@ def test_df_loc_get_key_bool_self_series():
         [random.choice([True, False]) for _ in range(5)],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_df_loc_get_key_bool_series_with_aligned_indices(key, use_default_index):
     # aligned indices means both row_pos and index are exactly match
     if use_default_index:
@@ -1701,7 +1699,7 @@ def test_df_loc_get_key_bool_series_with_aligned_indices(key, use_default_index)
         [random.choice([True, False]) for _ in range(5)],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_df_loc_get_key_bool_series_with_unaligned_and_distinct_indices(
     key, use_default_index
 ):
@@ -1778,7 +1776,7 @@ def test_df_loc_get_key_bool_series_with_unaligned_and_duplicate_indices():
         ],  # larger length
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_df_loc_get_key_bool_series_with_mismatch_index_len(key, use_default_index):
     if use_default_index:
         index = None
@@ -2406,7 +2404,7 @@ def loc_set_helper(df):
         ["a", "a", "c", "d"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_df_loc_set_scalar_row_key_enlargement(
     row_key, col_key, item_values, data_index
 ):
@@ -2478,7 +2476,7 @@ def set_loc_helper(df):
         ["a", "a", "c", "d"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_df_loc_set_scalar_row_key_enlargement_deviates_from_native_pandas(
     row_key, col_key, item_values, data_index
 ):
@@ -3203,7 +3201,7 @@ def test_df_loc_set_boolean_series_with_non_default_index_key_and_scalar_item():
         ["duplicate", [1, 1, 2, 3]],
     ],
 )
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=5)
 def test_df_loc_set_duplicate_index(
     self_index_type, self_index_val, index, columns, item
 ):
@@ -3784,7 +3782,7 @@ def loc_set_helper(df):
         [2, "x"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_df_setitem_boolean_key(key, index):
     item = 99
     num_columns = 3
@@ -3862,7 +3860,7 @@ def test_df_single_value_with_slice_key():
     eval_snowpark_pandas_result(snowpark_df, native_df, lambda df: df.loc[0:1])
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_df_loc_set_none():
     native_df = native_pd.DataFrame({"a": [1, 2, 3]})
 
@@ -3885,7 +3883,7 @@ def loc_set_helper(df):
     )
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=4)
 def test_df_loc_set_with_index_and_column_labels():
     """
     Create a DataFrame using 3 Series objects and perform loc set with a scalar.
@@ -3932,25 +3930,25 @@ def test_raise_set_cell_with_list_like_value_error():
         pytest.param(
             "1 day",
             2,
-            3,
+            4,
             marks=pytest.mark.xfail(
                 reason="SNOW-1652608 result series name incorrectly set"
             ),
-        ),  # 1 join from squeeze, 2 joins from to_pandas during eval
+        ),
         pytest.param(
             native_pd.to_timedelta("1 day"),
             2,
-            3,
+            4,
             marks=pytest.mark.xfail(
                 reason="SNOW-1652608 result series name incorrectly set"
             ),
-        ),  # 1 join from squeeze, 2 joins from to_pandas during eval
-        (["1 day", "3 days"], 1, 1),
-        ([True, False, False], 1, 1),
-        (slice(None, "4 days"), 1, 0),
-        (slice(None, "4 days", 2), 1, 0),
-        (slice("1 day", "2 days"), 1, 0),
-        (slice("1 day 1 hour", "2 days 2 hours", -1), 1, 0),
+        ),
+        (["1 day", "3 days"], 1, 2),
+        ([True, False, False], 1, 2),
+        (slice(None, "4 days"), 1, 1),
+        (slice(None, "4 days", 2), 1, 1),
+        (slice("1 day", "2 days"), 1, 1),
+        (slice("1 day 1 hour", "2 days 2 hours", -1), 1, 1),
     ],
 )
 def test_df_loc_get_with_timedelta(key, query_count, join_count):
@@ -4017,7 +4015,7 @@ def test_df_loc_get_with_timedelta(key, query_count, join_count):
         ),
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_loc_get_with_timedelta_behavior_difference(key, expected_result):
     # In these test cases, native pandas raises a KeyError but Snowpark pandas works correctly.
     data = {
@@ -4037,7 +4035,7 @@ def test_df_loc_get_with_timedelta_behavior_difference(key, expected_result):
     assert_frame_equal(actual_result, expected_result)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=2)
 def test_df_loc_get_with_timedeltaindex_key():
     data = {
         "A": [1, 2, 3],
diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py
index 8b9b5472e3..15ad41a580 100644
--- a/tests/integ/modin/frame/test_merge.py
+++ b/tests/integ/modin/frame/test_merge.py
@@ -302,7 +302,7 @@ def test_merge_on_index_columns(left_df, right_df, how, on, sort):
 
 @pytest.mark.parametrize("index1", [[3, 4], [1.5, 8.0], [None, None]])
 @pytest.mark.parametrize("index2", [[7, 8], [1.5, 3.0], [None, None]])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3, join_count=5)
 def test_join_type_mismatch(index1, index2):
     df1 = pd.DataFrame({"A": [1, 2]}, index=index1)
     df2 = pd.DataFrame({"B": [3, 4]}, index=index2)
@@ -351,7 +351,7 @@ def test_join_type_mismatch_negative(index1, index2):
         ),
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_join_type_mismatch_diff_with_native_pandas(index1, index2, expected_res):
     df1 = pd.DataFrame({"A": [1, 2]}, index=index1)
     df2 = pd.DataFrame({"B": [3, 4]}, index=index2)
@@ -960,7 +960,7 @@ def test_merge_no_join_keys_negative(left_name, right_name, left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_merge_no_join_keys_common_index_negative(left_df, right_df):
     left_df = pd.DataFrame({"A": [1, 2, 3]}, native_pd.Index([7, 8, 9], name="KEY"))
     right_df = pd.DataFrame({"B": [1, 2, 3]}, native_pd.Index([7, 8, 9], name="KEY"))
diff --git a/tests/integ/modin/frame/test_transpose.py b/tests/integ/modin/frame/test_transpose.py
index 894bbbbc1b..469a66dd51 100644
--- a/tests/integ/modin/frame/test_transpose.py
+++ b/tests/integ/modin/frame/test_transpose.py
@@ -242,7 +242,7 @@ def test_dataframe_transpose_preserve_float_dtypes():
     assert all([dtype == "float64" for dtype in snow_df.T.dtypes])
 
 
-@sql_count_checker(query_count=1, union_count=1)
+@sql_count_checker(query_count=1, union_count=1, join_count=2)
 def test_dataframe_transpose_single_numeric_column():
     single_column_data = ({0: "A", 1: "B", 2: "C", 3: "D"},)
     native_df = native_pd.DataFrame(single_column_data, index=(0,))
diff --git a/tests/integ/modin/frame/test_where.py b/tests/integ/modin/frame/test_where.py
index 006b7e76fb..75a5d6db7a 100644
--- a/tests/integ/modin/frame/test_where.py
+++ b/tests/integ/modin/frame/test_where.py
@@ -902,7 +902,7 @@ def perform_where(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=4)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
@@ -947,7 +947,7 @@ def perform_where(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3, union_count=1)
+@sql_count_checker(query_count=2, join_count=5, union_count=1)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
diff --git a/tests/integ/modin/index/test_datetime_index_methods.py b/tests/integ/modin/index/test_datetime_index_methods.py
index 56fd40a6cb..b727b4750e 100644
--- a/tests/integ/modin/index/test_datetime_index_methods.py
+++ b/tests/integ/modin/index/test_datetime_index_methods.py
@@ -89,7 +89,7 @@ def test_non_default_args(kwargs):
         pd.DatetimeIndex(query_compiler=idx._query_compiler, **kwargs)
 
 
-@sql_count_checker(query_count=6)
+@sql_count_checker(query_count=6, join_count=6)
 def test_index_parent():
     """
     Check whether the parent field in Index is updated properly.
diff --git a/tests/integ/modin/resample/test_resample_negative.py b/tests/integ/modin/resample/test_resample_negative.py
index e20fc397ef..44319c120b 100644
--- a/tests/integ/modin/resample/test_resample_negative.py
+++ b/tests/integ/modin/resample/test_resample_negative.py
@@ -137,7 +137,7 @@ def test_resample_fillna_invalid_method():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_resample_tz_negative():
     snow_df = pd.DataFrame(
         {"a": range(3)},
diff --git a/tests/integ/modin/series/test_all_any.py b/tests/integ/modin/series/test_all_any.py
index 0f78b320fe..d0d1c0987a 100644
--- a/tests/integ/modin/series/test_all_any.py
+++ b/tests/integ/modin/series/test_all_any.py
@@ -65,7 +65,7 @@ def test_any_int(data):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_all_named_index():
     data = [1, 0, 3]
     index_name = ["a", "b", "c"]
diff --git a/tests/integ/modin/series/test_empty.py b/tests/integ/modin/series/test_empty.py
index 8e7aa9d915..d53cd6e3d5 100644
--- a/tests/integ/modin/series/test_empty.py
+++ b/tests/integ/modin/series/test_empty.py
@@ -9,7 +9,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import eval_snowpark_pandas_result
 
 
@@ -34,14 +34,17 @@
         "empty series with only index",
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
 def test_series_empty(args, kwargs):
-    eval_snowpark_pandas_result(
-        pd.Series(*args, **kwargs),
-        native_pd.Series(*args, **kwargs),
-        lambda df: df.empty,
-        comparator=lambda x, y: x == y,
-    )
+    with SqlCounter(
+        query_count=1,
+        join_count=1 if (args == [] and kwargs.get("index", None) == []) else 0,
+    ):
+        eval_snowpark_pandas_result(
+            pd.Series(*args, **kwargs),
+            native_pd.Series(*args, **kwargs),
+            lambda df: df.empty,
+            comparator=lambda x, y: x == y,
+        )
 
 
 @sql_count_checker(query_count=5, join_count=2)
diff --git a/tests/integ/modin/series/test_iloc.py b/tests/integ/modin/series/test_iloc.py
index b35681e4ee..eea764af40 100644
--- a/tests/integ/modin/series/test_iloc.py
+++ b/tests/integ/modin/series/test_iloc.py
@@ -78,7 +78,7 @@ def test_diff2native(default_index_snowpark_pandas_series, default_index_native_
     "key, val",
     setitem_key_val_pair,
 )
-def test_setitem(
+def test_series_iloc_setitem(
     key,
     val,
     default_index_native_int_snowpark_pandas_series,
@@ -89,7 +89,7 @@ def operation(ser):
         # Based on snowflake type results, the result becomes 'str' type so we normalize to float for comparison.
         return ser.astype("float")
 
-    expected_join_count = 3 if isinstance(val, list) else 2
+    expected_join_count = 5 if isinstance(val, list) else 4
     with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             default_index_native_int_snowpark_pandas_series,
@@ -777,25 +777,25 @@ def perform_iloc(df):
 
 
 @pytest.mark.parametrize(
-    "row_key, row_key_index",
+    "row_key, row_key_index, add_joins",
     [
-        [1, None],
-        [[3, 0], None],
-        [[1, 2], [("A",), ("B",)]],
-        [[2, 1], [("A", 1), ("B", 2)]],
+        [1, None, 0],
+        [[3, 0], None, 0],
+        [[1, 2], [("A",), ("B",)], 1],
+        [[2, 1], [("A", 1), ("B", 2)], 2],
     ],
 )
 @pytest.mark.parametrize(
     "item_values, item_index, expected_join_count",
     [
-        [999, None, 2],
-        [TEST_ITEMS_DATA_2X1, None, 3],
-        [TEST_ITEMS_DATA_2X1, [("r",), ("s",)], 4],
-        [TEST_ITEMS_DATA_2X1, [("r", 20), ("s", 25)], 5],
+        [999, None, 6],
+        [TEST_ITEMS_DATA_2X1, None, 7],
+        [TEST_ITEMS_DATA_2X1, [("r",), ("s",)], 8],
+        [TEST_ITEMS_DATA_2X1, [("r", 20), ("s", 25)], 9],
     ],
 )
-def test_df_iloc_set_with_multi_index(
-    row_key, row_key_index, item_values, item_index, expected_join_count
+def test_df_iloc_set_with_multiindex(
+    row_key, row_key_index, item_values, item_index, expected_join_count, add_joins
 ):
     ser_data = [10, 11, 12, 13, 14]
     row_index = pd.MultiIndex.from_tuples(
@@ -835,7 +835,7 @@ def helper_iloc(ser):
         else:
             ser.iloc[snow_row_key] = snow_items
 
-    with SqlCounter(query_count=1, join_count=expected_join_count):
+    with SqlCounter(query_count=1, join_count=expected_join_count + add_joins):
         eval_snowpark_pandas_result(snow_ser, native_ser, helper_iloc, inplace=True)
 
 
@@ -851,7 +851,7 @@ def iloc_helper(series: Union[pd.Series, native_pd.Series]) -> None:
             )
 
         # test ser with default index
-        with SqlCounter(query_count=1, join_count=2):
+        with SqlCounter(query_count=1, join_count=4):
             eval_snowpark_pandas_result(
                 default_index_int_series,
                 default_index_native_int_series,
@@ -859,7 +859,7 @@ def iloc_helper(series: Union[pd.Series, native_pd.Series]) -> None:
             )
 
         # test ser with non default index
-        with SqlCounter(query_count=1, join_count=2):
+        with SqlCounter(query_count=1, join_count=4):
             eval_snowpark_pandas_result(
                 int_series_with_non_default_index,
                 native_int_series_with_non_default_index,
@@ -867,7 +867,7 @@ def iloc_helper(series: Union[pd.Series, native_pd.Series]) -> None:
             )
 
         # test ser with MultiIndex
-        with SqlCounter(query_count=1, join_count=2):
+        with SqlCounter(query_count=1, join_count=4):
             eval_snowpark_pandas_result(
                 int_series_with_multiindex,
                 multiindex_native_int_series,
diff --git a/tests/integ/modin/series/test_loc.py b/tests/integ/modin/series/test_loc.py
index 70af8247bd..da13247cd7 100644
--- a/tests/integ/modin/series/test_loc.py
+++ b/tests/integ/modin/series/test_loc.py
@@ -224,7 +224,6 @@ def apply_loc(df):
         [random.choice([True, False]) for _ in range(5)],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
 def test_series_loc_get_key_bool_series_with_aligned_indices(key, use_default_index):
     # aligned indices means both row_pos and index are exactly match
     if use_default_index:
@@ -234,13 +233,14 @@ def test_series_loc_get_key_bool_series_with_aligned_indices(key, use_default_in
         index = native_pd.Index(["a", "a", None, "b", "b"], name="index")
     native_series = native_pd.Series([1, 2, 3, 4, 5], index=index)
     snow_series = pd.Series(native_series)
-    eval_snowpark_pandas_result(
-        snow_series,
-        native_series,
-        lambda s: s.loc[pd.Series(key, index=index, dtype="bool")]
-        if isinstance(s, pd.Series)
-        else s.loc[native_pd.Series(key, index=index, dtype="bool")],
-    )
+    with SqlCounter(query_count=1, join_count=1 if use_default_index else 2):
+        eval_snowpark_pandas_result(
+            snow_series,
+            native_series,
+            lambda s: s.loc[pd.Series(key, index=index, dtype="bool")]
+            if isinstance(s, pd.Series)
+            else s.loc[native_pd.Series(key, index=index, dtype="bool")],
+        )
 
 
 @pytest.mark.parametrize(
@@ -861,7 +861,7 @@ def loc_set_helper(s):
         ["a", "a", "c", "d"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=3)
 def test_series_loc_set_scalar_row_key_enlargement(row_key, item_values, ser_index):
     data = [1, 2, 3, 4]
 
@@ -1407,7 +1407,7 @@ def test_series_loc_set_slice_item_negative(key, default_index_native_series):
         [2, "x"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_loc_set_boolean_key(key, index):
     # series.loc[True/False key] = scalar item
     # ----------------------------------------
@@ -1596,7 +1596,7 @@ def test_series_loc_set_with_scalar_key_and_list_like_item(
         assert_series_equal(snowpark_ser, native_ser)
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 @pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
 @pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
 def test_series_loc_set_with_scalar_key_and_scalar_item(
@@ -1776,7 +1776,7 @@ def test_series_partial_string_indexing_behavior_diff():
     assert len(series_minute["2022"]) == 0
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_series_loc_set_none():
     # Note that pandas does not support df.loc[None,:] like the series does here.
     native_s = native_pd.Series([1, 2, 3])
@@ -1792,18 +1792,22 @@ def loc_set_helper(s):
 @pytest.mark.parametrize(
     "key, query_count, join_count",
     [
-        ("1 day", 2, 2),  # 1 join from squeeze, 1 join from to_pandas during eval
         (
-            native_pd.to_timedelta("1 day"),
+            "1 day",
             2,
+            4,
+        ),  # 1 join from series creation (double counted), 1 join from squeeze, 1 join from to_pandas during eval
+        (
+            native_pd.to_timedelta("1 day"),
             2,
-        ),  # 1 join from squeeze, 1 join from to_pandas during eval
-        (["1 day", "3 days"], 1, 1),
-        ([True, False, False], 1, 1),
-        (slice(None, "4 days"), 1, 0),
-        (slice(None, "4 days", 2), 1, 0),
-        (slice("1 day", "2 days"), 1, 0),
-        (slice("1 day 1 hour", "2 days 2 hours", 1), 1, 0),
+            4,
+        ),  # 1 join from series creation (double counted), 1 join from squeeze, 1 join from to_pandas during eval
+        (["1 day", "3 days"], 1, 2),
+        ([True, False, False], 1, 2),
+        (slice(None, "4 days"), 1, 1),
+        (slice(None, "4 days", 2), 1, 1),
+        (slice("1 day", "2 days"), 1, 1),
+        (slice("1 day 1 hour", "2 days 2 hours", 1), 1, 1),
     ],
 )
 def test_series_loc_get_with_timedelta(key, query_count, join_count):
@@ -1854,7 +1858,7 @@ def test_series_loc_get_with_timedelta(key, query_count, join_count):
         ),
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_loc_get_with_timedelta_behavior_difference(key, expected_result):
     data = ["A", "B", "C", "D"]
     idx = ["1 days", "2 days", "3 days", "25 hours"]
@@ -1869,7 +1873,7 @@ def test_series_loc_get_with_timedelta_behavior_difference(key, expected_result)
     assert_series_equal(actual_result, expected_result)
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=2)
 def test_series_loc_get_with_timedeltaindex_key():
     data = ["A", "B", "C"]
     idx = ["1 days", "2 days", "3 days"]
diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py
index 80317357af..ec1710980a 100644
--- a/tests/integ/modin/test_telemetry.py
+++ b/tests/integ/modin/test_telemetry.py
@@ -342,7 +342,7 @@ def test_telemetry_with_update_inplace():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_telemetry_with_resample():
     # verify api_calls have been collected correctly for Resample APIs
     index = pandas.date_range("1/1/2000", periods=9, freq="min")

From ce1ffa65b8e7795266aeec8f343f22e0b40e5c84 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 9 Sep 2024 11:24:59 -0700
Subject: [PATCH 24/42] fix test

---
 tests/integ/modin/binary/test_binary_op.py    | 24 ++++++++-----------
 tests/integ/modin/frame/test_loc.py           | 18 +++++++-------
 .../modin/groupby/test_groupby_basic_agg.py   |  2 +-
 3 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py
index 4f50096ce4..ff83a1b8c7 100644
--- a/tests/integ/modin/binary/test_binary_op.py
+++ b/tests/integ/modin/binary/test_binary_op.py
@@ -1293,22 +1293,18 @@ def test_binary_add_between_series_for_index_alignment(lhs, rhs, op):
     def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
         snow_ans = op(snow_lhs, snow_rhs)
         native_ans = op(native_lhs, native_rhs)
-        with SqlCounter(
-            query_count=2, join_count=10 if isinstance(lhs.index, pd.MultiIndex) else 6
-        ):
-            # for one multi-index test case (marked with comment) the "inferred_type" doesn't match (Snowpark: float vs. pandas integer)
-            eval_snowpark_pandas_result(
-                snow_ans, native_ans, lambda s: s, check_index_type=False
-            )
+        # for one multi-index test case (marked with comment) the "inferred_type" doesn't match (Snowpark: float vs. pandas integer)
+        eval_snowpark_pandas_result(
+            snow_ans, native_ans, lambda s: s, check_index_type=False
+        )
 
-            check_op(
-                lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs)
-            )
+    with SqlCounter(
+        query_count=2, join_count=10 if isinstance(lhs.index, pd.MultiIndex) else 6
+    ):
+        check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
 
-            # commute series
-            check_op(
-                rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs)
-            )
+        # commute series
+        check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
 
 
 # MOD TESTS
diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py
index d94f9f21d0..d75b16658d 100644
--- a/tests/integ/modin/frame/test_loc.py
+++ b/tests/integ/modin/frame/test_loc.py
@@ -1670,7 +1670,6 @@ def test_df_loc_get_key_bool_self_series():
         [random.choice([True, False]) for _ in range(5)],
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
 def test_df_loc_get_key_bool_series_with_aligned_indices(key, use_default_index):
     # aligned indices means both row_pos and index are exactly match
     if use_default_index:
@@ -1681,14 +1680,15 @@ def test_df_loc_get_key_bool_series_with_aligned_indices(key, use_default_index)
     native_df = native_pd.DataFrame(
         {"c1": [1, 2, 3, 4, 5], "c2": ["x", "y", "z", "d", "e"]}, index=index
     )
-    snow_df = pd.DataFrame(native_df)
-    eval_snowpark_pandas_result(
-        snow_df,
-        native_df,
-        lambda df: df.loc[pd.Series(key, index=index, dtype="bool")]
-        if isinstance(df, pd.DataFrame)
-        else df.loc[native_pd.Series(key, index=index, dtype="bool")],
-    )
+    with SqlCounter(query_count=1, join_count=1 if use_default_index else 2):
+        snow_df = pd.DataFrame(native_df)
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.loc[pd.Series(key, index=index, dtype="bool")]
+            if isinstance(df, pd.DataFrame)
+            else df.loc[native_pd.Series(key, index=index, dtype="bool")],
+        )
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integ/modin/groupby/test_groupby_basic_agg.py b/tests/integ/modin/groupby/test_groupby_basic_agg.py
index 09acd49bb2..f3002901d0 100644
--- a/tests/integ/modin/groupby/test_groupby_basic_agg.py
+++ b/tests/integ/modin/groupby/test_groupby_basic_agg.py
@@ -951,7 +951,7 @@ def test_groupby_with_level(df_multi, level):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=2)
 def test_groupby_with_hier_columns():
     tuples = list(
         zip(

From 00d2a8b6af28792b3dbc58b663cc51e91740280e Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 9 Sep 2024 12:04:42 -0700
Subject: [PATCH 25/42] fix test

---
 tests/integ/modin/series/test_all_any.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integ/modin/series/test_all_any.py b/tests/integ/modin/series/test_all_any.py
index d0d1c0987a..517252e7af 100644
--- a/tests/integ/modin/series/test_all_any.py
+++ b/tests/integ/modin/series/test_all_any.py
@@ -77,7 +77,7 @@ def test_all_named_index():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=1, join_count=1)
 def test_any_named_index():
     data = [1, 0, 3]
     index_name = ["a", "b", "c"]

From cb918495a48615d5a19747c4476be00e3f6db60e Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 9 Sep 2024 12:22:47 -0700
Subject: [PATCH 26/42] fix last valid index error

---
 src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py
index c2c224e404..d121baf823 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py
@@ -128,6 +128,7 @@ def get_valid_index_values(
     -------
     Optional[Row]: The desired index (a Snowpark Row) if it exists, else None.
     """
+    frame = frame.ensure_row_position_column()
     index_quoted_identifier = frame.index_column_snowflake_quoted_identifiers
     data_quoted_identifier = frame.data_column_snowflake_quoted_identifiers
     row_position_quoted_identifier = frame.row_position_snowflake_quoted_identifier

From d9fdbb06127b0cacd16b5bb15a0edf15af700676 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 9 Sep 2024 12:34:00 -0700
Subject: [PATCH 27/42] remove stuff unnecessarily commented out

---
 src/snowflake/snowpark/modin/plugin/docstrings/base.py | 2 +-
 tests/integ/modin/frame/test_describe.py               | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py
index 657da3e528..4eb1bd1584 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/base.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py
@@ -1649,7 +1649,7 @@ def last_valid_index():
         >>> df.last_valid_index()
         12
         >>> df = pd.DataFrame([5, 6, 7, 8], index=["i", "am", "iron", "man"])
-        >>> df.last_valid_index()  # doctest: +SKIP
+        >>> df.last_valid_index()
         'man'
         """
 
diff --git a/tests/integ/modin/frame/test_describe.py b/tests/integ/modin/frame/test_describe.py
index 28425ab695..a9668c5794 100644
--- a/tests/integ/modin/frame/test_describe.py
+++ b/tests/integ/modin/frame/test_describe.py
@@ -255,8 +255,8 @@ def timestamp_describe_comparator(snow_res, native_res):
 @pytest.mark.parametrize(
     "index",
     [
-        # pytest.param(None, id="default_index"),
-        # pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_index"),
+        pytest.param(None, id="default_index"),
+        pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_index"),
         pytest.param(
             [
                 np.array(["bar", "bar", "baz", "baz", "foo", "foo"]),

From 3d5b785ef43707ca26bffa7aaaa928be3729487d Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 9 Sep 2024 12:37:49 -0700
Subject: [PATCH 28/42] explain high query count

---
 tests/integ/modin/binary/test_binary_op.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py
index ff83a1b8c7..5732eca6cc 100644
--- a/tests/integ/modin/binary/test_binary_op.py
+++ b/tests/integ/modin/binary/test_binary_op.py
@@ -1298,6 +1298,13 @@ def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
             snow_ans, native_ans, lambda s: s, check_index_type=False
         )
 
+    # The join count is high because:
+    # - When creating a single index Series, 1 join is performed; four series are created.
+    #   Therefore, 4 joins are performed. Each binary operation uses 1 join; two operations are performed.
+    #   This results in 6 joins.
+    # - Similarly, when creating a MultiIndex Series, 1 join is performed per column in the MultiIndex, in our case
+    #   there are two columns. Four Series are created, resulting in 8 joins. Each binary operation uses 1 join;
+    #   two operations are performed. This results in 10 joins.
     with SqlCounter(
         query_count=2, join_count=10 if isinstance(lhs.index, pd.MultiIndex) else 6
     ):

From 7f9dbaa4c2937a1490fb0a8ac7db85acf357fe3e Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 9 Sep 2024 14:27:45 -0700
Subject: [PATCH 29/42] rewrite binary op test, fix coverage

---
 .../snowpark/modin/pandas/dataframe.py        |  6 ++---
 .../plugin/extensions/series_overrides.py     |  2 +-
 tests/integ/modin/binary/test_binary_op.py    | 26 ++++++-------------
 3 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 9aa1b1fb26..1c8f9f084a 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -195,7 +195,7 @@ def __init__(
                 # the DataFrame and sets columns to the columns provided.
                 query_compiler = from_pandas(
                     self.__constructor__(columns=columns)
-                )._query_compiler
+                )._query_compiler  # pragma: no cover
 
         elif isinstance(data, DataFrame):
             # CASE 5: data is a Snowpark pandas DataFrame
@@ -256,7 +256,7 @@ def __init__(
                     if dtype is not None:
                         new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
                     if index is not None:
-                        if isinstance(index, Index):
+                        if isinstance(index, Index):  # pragma: no cover
                             index = index.to_series()._query_compiler
                         elif isinstance(index, Series):
                             index = index._query_compiler
@@ -318,7 +318,7 @@ def __init__(
                 if isinstance(labels, Index):
                     labels = labels.to_series()._query_compiler
                 elif isinstance(labels, Series):
-                    labels = labels._query_compiler
+                    labels = labels._query_compiler  # pragma: no cover
                 else:
                     labels = Index(labels).to_series()._query_compiler
                 query_compiler = query_compiler.reindex(axis=0, labels=labels)
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index 492098c2b6..e4f3f4856d 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -441,7 +441,7 @@ def __init__(
             if isinstance(labels, Index):
                 labels = labels.to_series()._query_compiler
             elif isinstance(labels, Series):
-                labels = labels._query_compiler
+                labels = labels._query_compiler  # pragma: no cover
             else:
                 labels = Index(labels).to_series()._query_compiler
             query_compiler = query_compiler.reindex(axis=0, labels=labels)
diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py
index 5732eca6cc..9bc8dd9f43 100644
--- a/tests/integ/modin/binary/test_binary_op.py
+++ b/tests/integ/modin/binary/test_binary_op.py
@@ -17,7 +17,6 @@
 import snowflake.snowpark.modin.plugin  # noqa: F401
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
-from tests.integ.modin.series.test_bitwise_operators import try_cast_to_snow_series
 from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equal_to_pandas,
@@ -1294,24 +1293,15 @@ def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
         snow_ans = op(snow_lhs, snow_rhs)
         native_ans = op(native_lhs, native_rhs)
         # for one multi-index test case (marked with comment) the "inferred_type" doesn't match (Snowpark: float vs. pandas integer)
-        eval_snowpark_pandas_result(
-            snow_ans, native_ans, lambda s: s, check_index_type=False
-        )
-
-    # The join count is high because:
-    # - When creating a single index Series, 1 join is performed; four series are created.
-    #   Therefore, 4 joins are performed. Each binary operation uses 1 join; two operations are performed.
-    #   This results in 6 joins.
-    # - Similarly, when creating a MultiIndex Series, 1 join is performed per column in the MultiIndex, in our case
-    #   there are two columns. Four Series are created, resulting in 8 joins. Each binary operation uses 1 join;
-    #   two operations are performed. This results in 10 joins.
-    with SqlCounter(
-        query_count=2, join_count=10 if isinstance(lhs.index, pd.MultiIndex) else 6
-    ):
-        check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
+        with SqlCounter(query_count=1, join_count=1):
+            eval_snowpark_pandas_result(
+                snow_ans, native_ans, lambda s: s, check_index_type=False
+            )
 
-        # commute series
-        check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
+    snow_lhs, snow_rhs = pd.Series(lhs), pd.Series(rhs)
+    check_op(lhs, rhs, snow_lhs, snow_rhs)
+    # commute series
+    check_op(rhs, lhs, snow_rhs, snow_lhs)
 
 
 # MOD TESTS

From 6de9f4924e30692e5815158e39a10aa63fcb43dc Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 11 Sep 2024 14:19:50 -0700
Subject: [PATCH 30/42] fix tests

---
 .../snowpark/modin/pandas/dataframe.py        |  7 +-
 .../plugin/extensions/series_overrides.py     |  6 +-
 tests/integ/modin/binary/test_binary_op.py    |  2 +-
 tests/integ/modin/frame/test_add_prefix.py    |  2 +-
 tests/integ/modin/frame/test_add_suffix.py    |  2 +-
 tests/integ/modin/frame/test_assign.py        |  4 +-
 tests/integ/modin/frame/test_astype.py        |  2 +-
 tests/integ/modin/frame/test_at.py            |  8 +-
 tests/integ/modin/frame/test_axis.py          |  2 +-
 tests/integ/modin/frame/test_copy.py          |  6 +-
 tests/integ/modin/frame/test_drop.py          |  6 +-
 tests/integ/modin/frame/test_dtypes.py        | 40 ++++----
 tests/integ/modin/frame/test_fillna.py        |  2 +-
 tests/integ/modin/frame/test_getitem.py       |  2 +-
 tests/integ/modin/frame/test_iat.py           |  4 +-
 tests/integ/modin/frame/test_idxmax_idxmin.py | 23 ++---
 tests/integ/modin/frame/test_iloc.py          | 46 +++++-----
 tests/integ/modin/frame/test_insert.py        | 92 +++++++++----------
 tests/integ/modin/frame/test_loc.py           | 32 ++++---
 tests/integ/modin/frame/test_mask.py          |  4 +-
 tests/integ/modin/frame/test_merge.py         |  6 +-
 tests/integ/modin/frame/test_nunique.py       | 10 +-
 tests/integ/modin/frame/test_rank.py          | 40 ++++----
 tests/integ/modin/frame/test_reindex.py       |  6 +-
 tests/integ/modin/frame/test_rename.py        |  4 +-
 tests/integ/modin/frame/test_repr.py          |  2 +-
 tests/integ/modin/frame/test_setitem.py       |  6 +-
 tests/integ/modin/frame/test_stack.py         |  2 +-
 tests/integ/modin/frame/test_transpose.py     |  2 +-
 tests/integ/modin/frame/test_where.py         |  4 +-
 .../integ/modin/groupby/test_groupby_apply.py | 28 +++---
 .../modin/groupby/test_groupby_basic_agg.py   |  4 +-
 .../groupby/test_groupby_dataframe_rank.py    | 46 ++++------
 .../groupby/test_groupby_default2pandas.py    |  2 +-
 .../modin/groupby/test_groupby_head_tail.py   |  4 +-
 .../groupby/test_groupby_idxmax_idxmin.py     |  4 +-
 .../modin/groupby/test_groupby_ngroups.py     |  2 +-
 .../modin/groupby/test_groupby_series.py      | 18 ++--
 .../modin/groupby/test_groupby_transform.py   |  8 +-
 .../index/test_datetime_index_methods.py      |  2 +-
 .../test_df_series_creation_with_index.py     |  4 +-
 tests/integ/modin/index/test_index_methods.py |  2 +-
 tests/integ/modin/resample/test_resample.py   |  6 +-
 .../modin/resample/test_resample_fillna.py    |  4 +-
 .../modin/resample/test_resample_negative.py  |  2 +-
 tests/integ/modin/series/test_add_prefix.py   |  2 +-
 tests/integ/modin/series/test_add_suffix.py   |  2 +-
 tests/integ/modin/series/test_all_any.py      |  4 +-
 tests/integ/modin/series/test_at.py           |  8 +-
 .../modin/series/test_bitwise_operators.py    | 62 ++++++-------
 tests/integ/modin/series/test_compare.py      |  2 +-
 tests/integ/modin/series/test_describe.py     | 23 ++---
 tests/integ/modin/series/test_empty.py        | 19 ++--
 tests/integ/modin/series/test_iat.py          |  4 +-
 tests/integ/modin/series/test_iloc.py         | 30 +++---
 tests/integ/modin/series/test_loc.py          | 24 ++---
 tests/integ/modin/series/test_mask.py         | 42 ++++-----
 .../modin/series/test_nlargest_nsmallest.py   |  2 +-
 tests/integ/modin/series/test_nunique.py      | 14 +--
 tests/integ/modin/series/test_rank.py         | 22 ++---
 tests/integ/modin/series/test_rename.py       |  6 +-
 tests/integ/modin/series/test_setitem.py      | 22 ++---
 tests/integ/modin/series/test_shape.py        | 18 ++--
 tests/integ/modin/series/test_size.py         | 21 ++---
 tests/integ/modin/series/test_take.py         |  8 +-
 tests/integ/modin/series/test_to_snowflake.py |  2 +-
 tests/integ/modin/series/test_transpose.py    |  6 +-
 tests/integ/modin/series/test_where.py        | 42 ++++-----
 tests/integ/modin/test_concat.py              |  4 +-
 tests/integ/modin/test_numpy.py               |  6 +-
 .../modin/types/test_timedelta_indexing.py    | 24 ++---
 71 files changed, 437 insertions(+), 492 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 1c8f9f084a..f83c1d7549 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -297,7 +297,7 @@ def __init__(
                     # pd.DataFrame({'a': 1, 'b': 2}, index=[0])
                     dummy_index = index
 
-            if is_scalar(data) and not isinstance(index, type(None)):
+            if not isinstance(index, (Index, type(self))):
                 dummy_index = index
             query_compiler = from_pandas(
                 pandas.DataFrame(
@@ -309,7 +309,10 @@ def __init__(
                 )
             )._query_compiler
 
-        if index is not None:
+        if index is not None and (
+            isinstance(index, (Index, Series))
+            or isinstance(data, (Index, Series, type(self)))
+        ):
             if isinstance(data, (type(self), Series, type(None))):
                 # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
                 # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index e4f3f4856d..7808f4050a 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -411,7 +411,7 @@ def __init__(
         # If the data is not a Snowpark pandas object, convert it to a query compiler.
         name = MODIN_UNNAMED_SERIES_LABEL if name is None else name
         dummy_index = None
-        if is_scalar(data) and not isinstance(index, type(None)):
+        if not isinstance(index, (Index, type(self))):
             dummy_index = index
         if (
             isinstance(data, (native_pd.Series, native_pd.Index))
@@ -431,7 +431,9 @@ def __init__(
             )
         )._query_compiler
 
-    if index is not None:
+    if index is not None and (
+        isinstance(index, (Index, type(self))) or isinstance(data, (Index, type(self)))
+    ):
         if is_dict_like(data) or isinstance(data, (type(self), type(None))):
             # The `index` parameter is used to select the rows from `data` that will be in the resultant Series.
             # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py
index 9bc8dd9f43..3190751887 100644
--- a/tests/integ/modin/binary/test_binary_op.py
+++ b/tests/integ/modin/binary/test_binary_op.py
@@ -1871,7 +1871,7 @@ def test_binary_rpow_between_df_and_list_like_on_axis_1(rhs):
         "rmod",
     ],
 )
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_generated_docstring_examples(opname):
     # test for operators that correct examples are generated and match up with pandas.
     # if this test passes, this ensures that all the examples generated in utils.py will be correct.
diff --git a/tests/integ/modin/frame/test_add_prefix.py b/tests/integ/modin/frame/test_add_prefix.py
index 8cf30f4913..5ac652ea92 100644
--- a/tests/integ/modin/frame/test_add_prefix.py
+++ b/tests/integ/modin/frame/test_add_prefix.py
@@ -46,7 +46,7 @@ def test_df_add_prefix_multiindex(prefix, native_df_with_multiindex_columns):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
 def test_df_add_prefix_time_column_df(
     prefix, time_column_snowpark_pandas_df, time_column_native_df
diff --git a/tests/integ/modin/frame/test_add_suffix.py b/tests/integ/modin/frame/test_add_suffix.py
index 0dceff54d7..4fbaf1e319 100644
--- a/tests/integ/modin/frame/test_add_suffix.py
+++ b/tests/integ/modin/frame/test_add_suffix.py
@@ -46,7 +46,7 @@ def test_df_add_suffix_multiindex(suffix, native_df_with_multiindex_columns):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
 def test_df_add_suffix_time_column_df(
     suffix, time_column_snowpark_pandas_df, time_column_native_df
diff --git a/tests/integ/modin/frame/test_assign.py b/tests/integ/modin/frame/test_assign.py
index f60107057e..8f1e1294e2 100644
--- a/tests/integ/modin/frame/test_assign.py
+++ b/tests/integ/modin/frame/test_assign.py
@@ -36,7 +36,7 @@ def assign_func(df):
     eval_snowpark_pandas_result(snow_df, native_df, assign_func)
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=2)
 @pytest.mark.parametrize(
     "index", [[2, 1, 0], [4, 5, 6]], ids=["reversed_index", "different_index"]
 )
@@ -136,7 +136,7 @@ def test_assign_short_series():
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=2)
 @pytest.mark.parametrize(
     "index", [[1, 0], [4, 5]], ids=["reversed_index", "different_index"]
 )
diff --git a/tests/integ/modin/frame/test_astype.py b/tests/integ/modin/frame/test_astype.py
index dbd267b307..8007b264b4 100644
--- a/tests/integ/modin/frame/test_astype.py
+++ b/tests/integ/modin/frame/test_astype.py
@@ -35,7 +35,7 @@ def test_series_input():
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_input_negative():
     df = pd.DataFrame({"a": [1, 2, 3], "b": [2.4, 2.5, 3.1]})
     with pytest.raises(KeyError, match="not found in columns"):
diff --git a/tests/integ/modin/frame/test_at.py b/tests/integ/modin/frame/test_at.py
index 9194416648..f43270ff53 100644
--- a/tests/integ/modin/frame/test_at.py
+++ b/tests/integ/modin/frame/test_at.py
@@ -20,7 +20,7 @@ def test_at_get_default_index_str_columns(
     )
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_at_set_default_index_str_columns(
     default_index_snowpark_pandas_df,
     default_index_native_df,
@@ -44,7 +44,7 @@ def test_at_get_str_index_str_columns(
     assert str_index_snowpark_pandas_df.at["b", "B"] == str_index_native_df.at["b", "B"]
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_at_set_str_index_str_columns(
     str_index_snowpark_pandas_df,
     str_index_native_df,
@@ -57,7 +57,7 @@ def at_set_helper(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_at_get_time_index_time_columns(
     time_index_snowpark_pandas_df,
     time_index_native_df,
@@ -68,7 +68,7 @@ def test_at_get_time_index_time_columns(
     )
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_at_set_time_index_time_columns(
     time_index_snowpark_pandas_df,
     time_index_native_df,
diff --git a/tests/integ/modin/frame/test_axis.py b/tests/integ/modin/frame/test_axis.py
index 0fb3fa2c5f..a6a156a05f 100644
--- a/tests/integ/modin/frame/test_axis.py
+++ b/tests/integ/modin/frame/test_axis.py
@@ -244,7 +244,7 @@ def test_set_columns_index_name(index_name):
     )
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1)
 def test_duplicate_labels_assignment():
     # Duplicate data labels
     snow_df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
diff --git a/tests/integ/modin/frame/test_copy.py b/tests/integ/modin/frame/test_copy.py
index 7844ca321a..b4c5f4f2a5 100644
--- a/tests/integ/modin/frame/test_copy.py
+++ b/tests/integ/modin/frame/test_copy.py
@@ -28,7 +28,7 @@ def native_df(snow_df):
 
 
 @pytest.mark.parametrize("deep", [None, True, False])
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_copy(deep, snow_df, native_df):
     # Verify copy is same as original
     assert_snowpark_pandas_equal_to_pandas(snow_df.copy(deep=deep), native_df)
@@ -61,7 +61,7 @@ def test_copy_deep_false_column_names(snow_df):
         lambda df: df.rename(columns={"a": "new_a"}, inplace=True),
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_copy_inplace_operations_on_deep_copy(snow_df, native_df, operation):
     snow_df_copy = snow_df.copy(deep=True)
     operation(snow_df_copy)
@@ -79,7 +79,7 @@ def test_copy_inplace_operations_on_deep_copy(snow_df, native_df, operation):
         lambda df: df.rename(columns={"a": "new_a"}, inplace=True),
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_copy_inplace_operations_on_shallow_copy(snow_df, operation):
     snow_df_copy = snow_df.copy(deep=False)
     operation(snow_df_copy)
diff --git a/tests/integ/modin/frame/test_drop.py b/tests/integ/modin/frame/test_drop.py
index 4dcae76af7..cc1a1a203d 100644
--- a/tests/integ/modin/frame/test_drop.py
+++ b/tests/integ/modin/frame/test_drop.py
@@ -209,7 +209,7 @@ def test_drop_invalid_labels_axis0_negative(
         ([], None),  # empty labels
     ],
 )
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2)
 def test_drop_invalid_axis1_labels_errors_ignore(labels, level, multiindex_snow_df):
     result = multiindex_snow_df.drop(labels, level=level, axis=1, errors="ignore")
     assert_frame_equal(multiindex_snow_df, result)
@@ -231,7 +231,7 @@ def test_drop_invalid_axis1_labels_errors_ignore(labels, level, multiindex_snow_
         ([], None),  # empty labels
     ],
 )
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2)
 def test_drop_invalid_axis0_labels_errors_ignore(labels, level, multiindex_snow_df):
     result = multiindex_snow_df.drop(labels, level=level, errors="ignore")
     assert_frame_equal(multiindex_snow_df, result)
@@ -263,7 +263,7 @@ def test_empty_tuple_multiindex(multiindex_snow_df, axis):
             assert len(result.index) == 0
 
 
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2)
 def test_drop_preserve_index_names(multiindex_snow_df):
     df_dropped_e = multiindex_snow_df.drop("red", axis=1)
     df_inplace_e = multiindex_snow_df.copy()
diff --git a/tests/integ/modin/frame/test_dtypes.py b/tests/integ/modin/frame/test_dtypes.py
index 49d8abfe2a..c3773bdd6d 100644
--- a/tests/integ/modin/frame/test_dtypes.py
+++ b/tests/integ/modin/frame/test_dtypes.py
@@ -18,7 +18,7 @@
     StringType,
     VariantType,
 )
-from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import (
     assert_frame_equal,
     assert_series_equal,
@@ -77,7 +77,7 @@ def validate_series_snowpark_dtype(series: pd.Series, snowpark_type: DataType) -
         ),
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2)
 def test_integer(dataframe_input, input_dtype, logical_dtype):
     expected = native_pd.Series(dataframe_input, dtype=input_dtype)
     created = pd.Series(dataframe_input, dtype=input_dtype)
@@ -218,7 +218,7 @@ def test_extended_float64_with_nan():
         ),
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2)
 def test_float(dataframe_input, input_dtype, expected_dtype, logical_dtype):
     expected = native_pd.Series(dataframe_input, dtype=input_dtype)
     created = pd.Series(dataframe_input, dtype=input_dtype)
@@ -256,7 +256,7 @@ def test_float(dataframe_input, input_dtype, expected_dtype, logical_dtype):
         ),
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2)
 def test_string(dataframe_input, input_dtype, index):
     expected = native_pd.Series(dataframe_input, dtype=input_dtype)
     created = pd.Series(dataframe_input)
@@ -305,7 +305,7 @@ def test_string_explicit(dataframe_input, input_dtype, index):
         (["level0"], ["col1", "col2", "col1"]),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1)
 def test_insert_multiindex_multi_label(label1, label2):
     arrays = [["apple", "apple", "banana", "banana"], [1, 2, 1, 2]]
     index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"])
@@ -452,24 +452,24 @@ def test_empty(input_dtype, expected_dtype, snowpark_dtype, to_pandas_dtype):
 
 
 @pytest.mark.parametrize(
-    "index, expected_index_dtype, join_count",
+    "index, expected_index_dtype",
     [
-        (None, np.dtype("int64"), 0),
-        (native_pd.Index([]), np.dtype("object"), 1),
-        (native_pd.Index([], dtype="float64"), np.dtype("float64"), 1),
+        (None, np.dtype("int64")),
+        (native_pd.Index([]), np.dtype("object")),
+        (native_pd.Index([], dtype="float64"), np.dtype("float64")),
     ],
 )
-def test_empty_index(index, expected_index_dtype, join_count):
-    with SqlCounter(query_count=1, join_count=join_count):
-        expected = native_pd.Series(data=[], index=index)
-        assert expected.dtype == np.dtype("object")
-        assert expected.index.dtype == expected_index_dtype
-        created = pd.Series(data=[], index=index)
-        assert created.dtype == np.dtype("object")
-        assert created.index.dtype == expected_index_dtype
-        roundtripped = created.to_pandas()
-        assert roundtripped.dtype == np.dtype("object")
-        assert roundtripped.index.dtype == expected_index_dtype
+@sql_count_checker(query_count=1)
+def test_empty_index(index, expected_index_dtype):
+    expected = native_pd.Series(data=[], index=index)
+    assert expected.dtype == np.dtype("object")
+    assert expected.index.dtype == expected_index_dtype
+    created = pd.Series(data=[], index=index)
+    assert created.dtype == np.dtype("object")
+    assert created.index.dtype == expected_index_dtype
+    roundtripped = created.to_pandas()
+    assert roundtripped.dtype == np.dtype("object")
+    assert roundtripped.index.dtype == expected_index_dtype
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integ/modin/frame/test_fillna.py b/tests/integ/modin/frame/test_fillna.py
index 6ae668d694..677c8d3ddc 100644
--- a/tests/integ/modin/frame/test_fillna.py
+++ b/tests/integ/modin/frame/test_fillna.py
@@ -426,7 +426,7 @@ def test_multiindex_df_values_dict_various_levels(test_fillna_multiindex_df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2)
 def test_multiindex_df_values_series(test_fillna_multiindex_df, test_fillna_multiindex):
     values = pd.Series([10, 1, 2, 3], index=test_fillna_multiindex)
     native_values = native_pd.Series([10, 1, 2, 3], index=test_fillna_multiindex)
diff --git a/tests/integ/modin/frame/test_getitem.py b/tests/integ/modin/frame/test_getitem.py
index e08e25513a..76a30f1e68 100644
--- a/tests/integ/modin/frame/test_getitem.py
+++ b/tests/integ/modin/frame/test_getitem.py
@@ -343,7 +343,7 @@ def test_df_getitem_with_slice(
         slice("z", "a", -1),
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_df_getitem_with_non_int_slice(key):
     data = {"a": [1, 2, 3], "b": [4, 5, 6]}
     index = ["x", "y", "z"]
diff --git a/tests/integ/modin/frame/test_iat.py b/tests/integ/modin/frame/test_iat.py
index dbf3d50759..2191fb8db8 100644
--- a/tests/integ/modin/frame/test_iat.py
+++ b/tests/integ/modin/frame/test_iat.py
@@ -103,7 +103,7 @@ def iat_set_helper(df):
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_get_time_index_time_columns(
     key,
     time_index_snowpark_pandas_df,
@@ -121,7 +121,7 @@ def test_iat_get_time_index_time_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_set_time_index_time_columns(
     key,
     time_index_snowpark_pandas_df,
diff --git a/tests/integ/modin/frame/test_idxmax_idxmin.py b/tests/integ/modin/frame/test_idxmax_idxmin.py
index f9dc28bba9..56159484a2 100644
--- a/tests/integ/modin/frame/test_idxmax_idxmin.py
+++ b/tests/integ/modin/frame/test_idxmax_idxmin.py
@@ -13,6 +13,7 @@
 from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result
 
 
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize(
     "data, index",
     [
@@ -73,20 +74,16 @@ def test_idxmax_idxmin_df(data, index, func, axis, skipna):
         pytest.xfail(
             "Snowpark pandas returns a Series with None whereas pandas throws a ValueError"
         )
-    with SqlCounter(
-        query_count=1,
-        join_count=0 if index is None or (data == {} and index == []) else 1,
-    ):
-        eval_snowpark_pandas_result(
-            *create_test_dfs(
-                data=data,
-                index=index,
-            ),
-            lambda df: getattr(df, func)(axis=axis, skipna=skipna),
-        )
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            data=data,
+            index=index,
+        ),
+        lambda df: getattr(df, func)(axis=axis, skipna=skipna),
+    )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize(
     "data, index",
     [
@@ -217,7 +214,7 @@ def test_idxmax_idxmin_with_timedelta(func, axis):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize("axis", [0, 1])
 def test_idxmax_idxmin_with_strings(func, axis):
diff --git a/tests/integ/modin/frame/test_iloc.py b/tests/integ/modin/frame/test_iloc.py
index d8b874b1cf..d9bb4c1bc8 100644
--- a/tests/integ/modin/frame/test_iloc.py
+++ b/tests/integ/modin/frame/test_iloc.py
@@ -118,7 +118,7 @@
     ("RangeIndex", 0),
     ("Index[bool]", 1),
     ("emptyFloatSeries", 2),
-    ("multi_index_Series", 6),
+    ("multi_index_Series", 2),
 ]
 
 # Snowflake type checking will fail if the item values aren't type compatible, so we normalize to int to stay compatible.
@@ -315,10 +315,7 @@ def eval_func(df):
     if key == "RangeIndex":
         expected_query_count = 1
 
-    with SqlCounter(
-        query_count=expected_query_count,
-        join_count=4 if key == "multi_index_Series" else 0,
-    ):
+    with SqlCounter(query_count=expected_query_count):
         eval_snowpark_pandas_result(
             default_index_snowpark_pandas_df, default_index_native_df, eval_func
         )
@@ -448,7 +445,7 @@ def test_df_iloc_get_diff2native(
     )
 
 
-@sql_count_checker(query_count=2, join_count=8)
+@sql_count_checker(query_count=2, join_count=4)
 def test_df_iloc_get_with_conflict():
     # index and data columns have conflict in get_by_col
     df = DataFrame({"A": [0, 1]}, index=native_pd.Index([2, 3], name="A")).rename(
@@ -2622,31 +2619,31 @@ def perform_iloc(df):
 
 
 @pytest.mark.parametrize(
-    "row_key, row_key_index, row_add_joins",
+    "row_key, row_key_index",
     [
-        [1, None, 0],
-        [[3, 0], None, 0],
-        [[1, 2], [("A",), ("B",)], 1],
-        [[2, 1], [("A", 1), ("B", 2)], 2],
+        [1, None],
+        [[3, 0], None],
+        [[1, 2], [("A",), ("B",)]],
+        [[2, 1], [("A", 1), ("B", 2)]],
     ],
 )
 @pytest.mark.parametrize(
-    "col_key, col_key_index, col_add_joins",
+    "col_key, col_key_index",
     [
-        [2, None, 0],
-        [[2, 1], None, 0],
-        [[1, 2], [("X",), ("Y",)], 1],
-        [[2, 1], [("X", 11), ("Y", 21)], 2],
+        [2, None],
+        [[2, 1], None],
+        [[1, 2], [("X",), ("Y",)]],
+        [[2, 1], [("X", 11), ("Y", 21)]],
     ],
 )
 @pytest.mark.parametrize(
     "item_values, item_index, item_columns, expected_join_count",
     [
-        [999, None, None, 6],
-        [TEST_ITEMS_DATA_2X2, None, None, 7],
-        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], None, 9],
-        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], [("e", 5), ("f", 6)], 9],
-        [TEST_ITEMS_DATA_2X2, None, [("e", 5), ("f", 6)], 7],
+        [999, None, None, 2],
+        [TEST_ITEMS_DATA_2X2, None, None, 3],
+        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], None, 5],
+        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], [("e", 5), ("f", 6)], 5],
+        [TEST_ITEMS_DATA_2X2, None, [("e", 5), ("f", 6)], 3],
     ],
 )
 def test_df_iloc_set_with_multiindex(
@@ -2658,8 +2655,6 @@ def test_df_iloc_set_with_multiindex(
     item_index,
     item_columns,
     expected_join_count,
-    row_add_joins,
-    col_add_joins,
 ):
     df_data = [
         [1, 2, 3, 4, 5],
@@ -2715,6 +2710,7 @@ def test_df_iloc_set_with_multiindex(
         native_items.columns = pd.MultiIndex.from_tuples(item_columns)
 
     if row_key_index:
+        # Using native pandas index since row_key[2] is a MultiIndex object.
         snow_row_key = pd.Series(row_key, index=native_pd.Index(row_key_index))
         native_row_key = native_pd.Series(row_key, index=native_pd.Index(row_key_index))
     else:
@@ -2722,6 +2718,7 @@ def test_df_iloc_set_with_multiindex(
         native_row_key = row_key
 
     if col_key_index:
+        # Using native pandas index since col_key[2] is a MultiIndex object.
         snow_col_key = pd.Series(col_key, index=native_pd.Index(col_key_index))
         native_col_key = native_pd.Series(col_key, index=native_pd.Index(col_key_index))
     else:
@@ -2738,7 +2735,6 @@ def helper_iloc(df):
     if isinstance(snow_col_key, pd.Series):
         expected_query_count += 1
 
-    expected_join_count += row_add_joins + col_add_joins
     with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
         eval_snowpark_pandas_result(snow_df, native_df, helper_iloc, inplace=True)
 
@@ -2814,7 +2810,7 @@ def iloc_helper(df: Union[pd.DataFrame, native_pd.DataFrame]) -> None:
 
     # For a Series row key, the key is joined with the df to derive the iloc results. For column keys, a select
     # statement is used instead of a join.
-    join_count = 4 if axis == "row" else 2
+    join_count = 2 if axis == "row" else 0
     query_count = 1 if axis == "row" else 2
 
     # Evaluate with MultiIndex created from tuples.
diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py
index c7a1c980c9..414889d337 100644
--- a/tests/integ/modin/frame/test_insert.py
+++ b/tests/integ/modin/frame/test_insert.py
@@ -277,13 +277,13 @@ def test_insert_loc_negative(native_df, loc, expected_query_count):
 @pytest.mark.parametrize(
     "value, expected_query_count, expected_join_count",
     [
-        (np.array(["a", "b", "c", "d"]), 2, 5),  # numpy array of shape (N,)
-        (np.array([["a"], ["b"], ["c"], ["d"]]), 2, 5),  # numpy array of shape (N, 1)
-        (["a", "b", "c", "d"], 2, 5),  # python list
-        (("a", "b", "c", "d"), 2, 5),  # python tuple
-        ({(3, 1): 1}, 1, 3),  # python dict
-        ("abc", 1, 2),  # sting scalar
-        (1, 1, 2),  # int scalar
+        (np.array(["a", "b", "c", "d"]), 2, 1),  # numpy array of shape (N,)
+        (np.array([["a"], ["b"], ["c"], ["d"]]), 2, 1),  # numpy array of shape (N, 1)
+        (["a", "b", "c", "d"], 2, 1),  # python list
+        (("a", "b", "c", "d"), 2, 1),  # python tuple
+        ({(3, 1): 1}, 1, 1),  # python dict
+        ("abc", 1, 0),  # sting scalar
+        (1, 1, 0),  # int scalar
     ],
 )
 def test_insert_multiindex_array_like_and_scalar(
@@ -310,7 +310,7 @@ def test_insert_multiindex_array_like_and_scalar(
         ("a", "b", "c", "d"),  # python tuple
     ],
 )
-@sql_count_checker(query_count=2, join_count=5)
+@sql_count_checker(query_count=2, join_count=1)
 def test_insert_empty_multiindex_frame(value):
     mi = pd.MultiIndex.from_arrays([np.array([], dtype=int), np.array([], dtype=int)])
     snow_df = pd.DataFrame([], index=mi)
@@ -344,61 +344,55 @@ def test_insert_multiindex_dict_negative():
 
 
 @pytest.mark.parametrize(
-    "df_index, value_index, join_count",
+    "df_index, value_index",
     [
-        ([3, 0, 4], [1, 2, 3], 6),
-        ([(1, 0), (1, 2), (2, 2)], [(1, 1), (1, 2), (2, 2)], 11),
-        ([1.0, 2.5, 3.0], [1, 2, 3], 6),  # Long and Double can be joined
+        ([3, 0, 4], [1, 2, 3]),
+        ([(1, 0), (1, 2), (2, 2)], [(1, 1), (1, 2), (2, 2)]),
+        ([1.0, 2.5, 3.0], [1, 2, 3]),  # Long and Double can be joined
     ],
 )
-def test_insert_compatible_index(df_index, value_index, join_count):
+@sql_count_checker(query_count=4, join_count=1)
+def test_insert_compatible_index(df_index, value_index):
     snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
     value = pd.DataFrame({"col2": ["x", "y", "z"]}, index=native_pd.Index(value_index))
-    with SqlCounter(query_count=4, join_count=join_count):
-        eval_snowpark_pandas_result(
-            snow_df,
-            snow_df.to_pandas(),
-            lambda df: df.insert(
-                0, "col3", value if isinstance(df, pd.DataFrame) else value.to_pandas()
-            ),
-            inplace=True,  # insert operation is always inplace
-        )
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.insert(
+            0, "col3", value if isinstance(df, pd.DataFrame) else value.to_pandas()
+        ),
+        inplace=True,  # insert operation is always inplace
+    )
 
 
 @pytest.mark.parametrize(
-    "df_index, value_index, join_count",
+    "df_index, value_index",
     [
-        ([3, 2, 1], [(1, 0, 1), (1, 2, 3), (2, 1, 0)], 3),  # length mismatch 1 != 3
+        ([3, 2, 1], [(1, 0, 1), (1, 2, 3), (2, 1, 0)]),  # length mismatch 1 != 3
         (
             [(3, 1), (2, 1), (1, 2)],
             [(1, 0, 1), (1, 2, 3), (2, 1, 0)],
-            3,
         ),  # length mismatch 2 != 3
-        ([1, 2, 3], [(1, 0), (1, 2), (2, 2)], 2),  # 1 != 2
-        ([(1, 0), (1, 2), (2, 2)], [(1, 2, 3), (3, 4, 5), (6, 5, 4)], 3),  # 2 != 3
-        ([(1, 2, 3), (3, 4, 5), (6, 5, 4)], [3, 1, 2], 1),  # length mismatch 3 != 1
+        ([1, 2, 3], [(1, 0), (1, 2), (2, 2)]),  # 1 != 2
+        ([(1, 0), (1, 2), (2, 2)], [(1, 2, 3), (3, 4, 5), (6, 5, 4)]),  # 2 != 3
+        ([(1, 2, 3), (3, 4, 5), (6, 5, 4)], [3, 1, 2]),  # length mismatch 3 != 1
         (
             [(1, 1), (1, 2), (2, 2)],
             ["(1, 0)", "(1, 2)", "(2, 2)"],
-            1,
         ),  # length and type mismatch
     ],
 )
-def test_insert_index_num_levels_mismatch_negative(df_index, value_index, join_count):
-    with SqlCounter(query_count=1, join_count=join_count):
-        snow_df = pd.DataFrame(
-            {"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index)
-        )
-        value = pd.DataFrame(
-            {"col2": ["w", "x", "y"]}, index=native_pd.Index(value_index)
-        )
-        # This is different behavior from native pandas. Native pandas in some cases
-        # insert new column with null values but in Snowpark pandas we always raise error.
-        with pytest.raises(
-            ValueError,
-            match="Number of index levels of inserted column are different from frame index",
-        ):
-            snow_df.insert(0, "col3", value)
+@sql_count_checker(query_count=1)
+def test_insert_index_num_levels_mismatch_negative(df_index, value_index):
+    snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
+    value = pd.DataFrame({"col2": ["w", "x", "y"]}, index=native_pd.Index(value_index))
+    # This is different behavior from native pandas. Native pandas in some cases
+    # insert new column with null values but in Snowpark pandas we always raise error.
+    with pytest.raises(
+        ValueError,
+        match="Number of index levels of inserted column are different from frame index",
+    ):
+        snow_df.insert(0, "col3", value)
 
 
 @pytest.mark.parametrize(
@@ -413,7 +407,7 @@ def test_insert_index_num_levels_mismatch_negative(df_index, value_index, join_c
         ),  # type mismatch boolean != long
     ],
 )
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2, join_count=1)
 def test_insert_index_type_mismatch(df_index, value_index, expected_index):
     # Note: This is different behavior than native pandas. In native pandas when
     # index datatype mismatch new columns in inserted will all NULL values.
@@ -430,7 +424,7 @@ def test_insert_index_type_mismatch(df_index, value_index, expected_index):
     assert_snowpark_pandas_equal_to_pandas(snow_df, expected_df)
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_with_null_index_values():
     snow_df = pd.DataFrame(
         {"A": ["p", "q", "r", "s"]}, native_pd.Index(["a", None, "b", None])
@@ -446,7 +440,7 @@ def test_insert_with_null_index_values():
     )
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_multiple_null():
     snow_df = pd.DataFrame(
         {"A": ["p", "q", "r", "s"]}, native_pd.Index(["a", "b", "c", "d"])
@@ -471,8 +465,8 @@ def test_insert_multiple_null():
 @pytest.mark.parametrize(
     "index, value, expected_query_count, expected_join_count",
     [
-        ([1, 2], native_pd.Series([1, 2], index=[2, 3]), 1, 3),
-        ([1, 2], [3, 4], 2, 3),
+        ([1, 2], native_pd.Series([1, 2], index=[2, 3]), 1, 1),
+        ([1, 2], [3, 4], 2, 1),
     ],
 )
 def test_insert_into_empty_dataframe_with_index(
diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py
index d75b16658d..33c1fb98e5 100644
--- a/tests/integ/modin/frame/test_loc.py
+++ b/tests/integ/modin/frame/test_loc.py
@@ -215,7 +215,7 @@ def test_df_loc_get_col_non_boolean_key(
     "key",
     boolean_indexer,
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=3)
 def test_df_loc_get_col_boolean_indexer(
     key, str_index_snowpark_pandas_df, str_index_native_df
 ):
@@ -243,7 +243,7 @@ def test_df_loc_get_col_boolean_indexer(
     "key",
     list_like_time_col_inputs,
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_df_loc_get_col_time_df(
     key, time_column_snowpark_pandas_df, time_column_native_df
 ):
@@ -258,7 +258,7 @@ def test_df_loc_get_col_time_df(
     "key",
     snowpark_pandas_int_index_row_inputs,
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_loc_get_int_index_row_snowpark_pandas_input(
     key,
     default_index_snowpark_pandas_df,
@@ -606,7 +606,7 @@ def test_mi_df_loc_get_non_boolean_list_tuple_key(mi_table_df, row, col):
             )
 
 
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2, join_count=2)
 def test_mi_df_loc_get_boolean_series_row_key(mi_table_df):
     df = pd.DataFrame(mi_table_df)
     bool_indexer = [False, True, True, False, False, True]
@@ -639,7 +639,7 @@ def test_mi_df_loc_get_boolean_series_row_key(mi_table_df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=2)
+@sql_count_checker(query_count=3)
 def test_mi_df_loc_get_boolean_series_col_key(mi_table_df):
     df = pd.DataFrame(mi_table_df)
     bool_indexer = [False, True]
@@ -1448,9 +1448,11 @@ def helper(df):
                 snow_df.to_pandas()
     else:
         expected_query_count = 1
-        expected_join_count = 2
+        expected_join_count = 1
         if key == slice(None):
             expected_join_count = 0
+        elif isinstance(key, slice) and key.step == 2:
+            expected_join_count += 1
 
         with SqlCounter(
             query_count=expected_query_count, join_count=expected_join_count
@@ -1680,7 +1682,7 @@ def test_df_loc_get_key_bool_series_with_aligned_indices(key, use_default_index)
     native_df = native_pd.DataFrame(
         {"c1": [1, 2, 3, 4, 5], "c2": ["x", "y", "z", "d", "e"]}, index=index
     )
-    with SqlCounter(query_count=1, join_count=1 if use_default_index else 2):
+    with SqlCounter(query_count=1, join_count=1):
         snow_df = pd.DataFrame(native_df)
         eval_snowpark_pandas_result(
             snow_df,
@@ -1699,7 +1701,7 @@ def test_df_loc_get_key_bool_series_with_aligned_indices(key, use_default_index)
         [random.choice([True, False]) for _ in range(5)],
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_loc_get_key_bool_series_with_unaligned_and_distinct_indices(
     key, use_default_index
 ):
@@ -1776,7 +1778,7 @@ def test_df_loc_get_key_bool_series_with_unaligned_and_duplicate_indices():
         ],  # larger length
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_loc_get_key_bool_series_with_mismatch_index_len(key, use_default_index):
     if use_default_index:
         index = None
@@ -2404,7 +2406,7 @@ def loc_set_helper(df):
         ["a", "a", "c", "d"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_loc_set_scalar_row_key_enlargement(
     row_key, col_key, item_values, data_index
 ):
@@ -2476,7 +2478,7 @@ def set_loc_helper(df):
         ["a", "a", "c", "d"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_loc_set_scalar_row_key_enlargement_deviates_from_native_pandas(
     row_key, col_key, item_values, data_index
 ):
@@ -3201,7 +3203,7 @@ def test_df_loc_set_boolean_series_with_non_default_index_key_and_scalar_item():
         ["duplicate", [1, 1, 2, 3]],
     ],
 )
-@sql_count_checker(query_count=1, join_count=5)
+@sql_count_checker(query_count=1, join_count=4)
 def test_df_loc_set_duplicate_index(
     self_index_type, self_index_val, index, columns, item
 ):
@@ -3782,7 +3784,7 @@ def loc_set_helper(df):
         [2, "x"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_setitem_boolean_key(key, index):
     item = 99
     num_columns = 3
@@ -3860,7 +3862,7 @@ def test_df_single_value_with_slice_key():
     eval_snowpark_pandas_result(snowpark_df, native_df, lambda df: df.loc[0:1])
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_loc_set_none():
     native_df = native_pd.DataFrame({"a": [1, 2, 3]})
 
@@ -3883,7 +3885,7 @@ def loc_set_helper(df):
     )
 
 
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=3)
 def test_df_loc_set_with_index_and_column_labels():
     """
     Create a DataFrame using 3 Series objects and perform loc set with a scalar.
diff --git a/tests/integ/modin/frame/test_mask.py b/tests/integ/modin/frame/test_mask.py
index 7b47880557..53afbd7bf8 100644
--- a/tests/integ/modin/frame/test_mask.py
+++ b/tests/integ/modin/frame/test_mask.py
@@ -864,7 +864,7 @@ def perform_mask(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2, join_count=3)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
@@ -909,7 +909,7 @@ def perform_mask(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=5, union_count=1)
+@sql_count_checker(query_count=2, join_count=3, union_count=1)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py
index 15ad41a580..8b9b5472e3 100644
--- a/tests/integ/modin/frame/test_merge.py
+++ b/tests/integ/modin/frame/test_merge.py
@@ -302,7 +302,7 @@ def test_merge_on_index_columns(left_df, right_df, how, on, sort):
 
 @pytest.mark.parametrize("index1", [[3, 4], [1.5, 8.0], [None, None]])
 @pytest.mark.parametrize("index2", [[7, 8], [1.5, 3.0], [None, None]])
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_join_type_mismatch(index1, index2):
     df1 = pd.DataFrame({"A": [1, 2]}, index=index1)
     df2 = pd.DataFrame({"B": [3, 4]}, index=index2)
@@ -351,7 +351,7 @@ def test_join_type_mismatch_negative(index1, index2):
         ),
     ],
 )
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_join_type_mismatch_diff_with_native_pandas(index1, index2, expected_res):
     df1 = pd.DataFrame({"A": [1, 2]}, index=index1)
     df2 = pd.DataFrame({"B": [3, 4]}, index=index2)
@@ -960,7 +960,7 @@ def test_merge_no_join_keys_negative(left_name, right_name, left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_no_join_keys_common_index_negative(left_df, right_df):
     left_df = pd.DataFrame({"A": [1, 2, 3]}, native_pd.Index([7, 8, 9], name="KEY"))
     right_df = pd.DataFrame({"B": [1, 2, 3]}, native_pd.Index([7, 8, 9], name="KEY"))
diff --git a/tests/integ/modin/frame/test_nunique.py b/tests/integ/modin/frame/test_nunique.py
index 6fd1751e3a..d0cad8ec2a 100644
--- a/tests/integ/modin/frame/test_nunique.py
+++ b/tests/integ/modin/frame/test_nunique.py
@@ -85,12 +85,12 @@ def test_dataframe_nunique_no_columns(native_df):
         ),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_dataframe_nunique_multiindex(index, columns):
-    with SqlCounter(query_count=1, join_count=0 if index is None else 2):
-        eval_snowpark_pandas_result(
-            *create_test_dfs(TEST_DATA, index=index, columns=columns),
-            lambda df: df.nunique(axis=0),
-        )
+    eval_snowpark_pandas_result(
+        *create_test_dfs(TEST_DATA, index=index, columns=columns),
+        lambda df: df.nunique(axis=0),
+    )
 
 
 @sql_count_checker(query_count=0)
diff --git a/tests/integ/modin/frame/test_rank.py b/tests/integ/modin/frame/test_rank.py
index 05fa47b99b..1687ce4905 100644
--- a/tests/integ/modin/frame/test_rank.py
+++ b/tests/integ/modin/frame/test_rank.py
@@ -7,7 +7,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
     eval_snowpark_pandas_result,
@@ -40,6 +40,7 @@
 ]
 
 
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -55,16 +56,13 @@
 )
 # test df.rank with all method, na_option, ascending parameter combinations
 def test_df_rank(data, index, method, ascending, na_option):
-    with SqlCounter(
-        query_count=1, join_count=2 if isinstance(index, native_pd.MultiIndex) else 0
-    ):
-        snow_df = pd.DataFrame(data, index=index)
-        native_df = native_pd.DataFrame(data, index=index)
-        eval_snowpark_pandas_result(
-            snow_df,
-            native_df,
-            lambda df: df.rank(method=method, na_option=na_option, ascending=ascending),
-        )
+    snow_df = pd.DataFrame(data, index=index)
+    native_df = native_pd.DataFrame(data, index=index)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.rank(method=method, na_option=na_option, ascending=ascending),
+    )
 
 
 @sql_count_checker(query_count=1)
@@ -120,6 +118,7 @@ def test_rank_unsupported_args_negative(method, ascending, na_option):
         snow_df.rank(axis=1, method=method, ascending=ascending, na_option=na_option)
 
 
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -135,15 +134,10 @@ def test_rank_unsupported_args_negative(method, ascending, na_option):
 )
 # test df percentile rank
 def test_df_rank_pct(data, index, method, ascending, na_option):
-    with SqlCounter(
-        query_count=1, join_count=2 if isinstance(index, native_pd.MultiIndex) else 0
-    ):
-        snow_df = pd.DataFrame(data, index=index).rank(
-            method=method, ascending=ascending, na_option=na_option, pct=True
-        )
-        native_df = native_pd.DataFrame(data, index=index).rank(
-            method=method, ascending=ascending, na_option=na_option, pct=True
-        )
-        assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
-            snow_df, native_df
-        )
+    snow_df = pd.DataFrame(data, index=index).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=True
+    )
+    native_df = native_pd.DataFrame(data, index=index).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=True
+    )
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
diff --git a/tests/integ/modin/frame/test_reindex.py b/tests/integ/modin/frame/test_reindex.py
index 1f7a7e3966..98d0a41e7a 100644
--- a/tests/integ/modin/frame/test_reindex.py
+++ b/tests/integ/modin/frame/test_reindex.py
@@ -454,7 +454,7 @@ def test_reindex_columns_fill_method_with_old_na_values_negative(
                     lambda df: df.reindex(columns=list("CEBFGA"), method=method),
                 )
 
-    @sql_count_checker(query_count=5, join_count=1)
+    @sql_count_checker(query_count=5)
     @pytest.mark.parametrize("limit", [None, 1, 2, 100])
     @pytest.mark.parametrize("method", ["bfill", "backfill", "pad", "ffill"])
     def test_reindex_columns_datetime_with_fill(self, limit, method):
@@ -495,7 +495,7 @@ def test_reindex_columns_non_overlapping_columns(self):
             snow_df, native_df, lambda df: df.reindex(axis=1, labels=list("EFG"))
         )
 
-    @sql_count_checker(query_count=5, join_count=1)
+    @sql_count_checker(query_count=5)
     def test_reindex_columns_non_overlapping_datetime_columns(self):
         date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
         native_df = native_pd.DataFrame(
@@ -520,7 +520,7 @@ def perform_reindex(df):
             snow_df, native_df, perform_reindex, check_freq=False
         )
 
-    @sql_count_checker(query_count=2, join_count=1)
+    @sql_count_checker(query_count=2)
     def test_reindex_columns_non_overlapping_different_types_columns(self):
         date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
         native_df = native_pd.DataFrame(
diff --git a/tests/integ/modin/frame/test_rename.py b/tests/integ/modin/frame/test_rename.py
index 15351ec6fa..a5595ec716 100644
--- a/tests/integ/modin/frame/test_rename.py
+++ b/tests/integ/modin/frame/test_rename.py
@@ -294,7 +294,7 @@ def test_rename_objects(self, snow_float_string_frame):
         assert "FOO" in renamed
         assert "foo" not in renamed
 
-    @sql_count_checker(query_count=6, join_count=8)
+    @sql_count_checker(query_count=6, join_count=2)
     def test_rename_axis_style(self):
         # https://github.com/pandas-dev/pandas/issues/12392
         df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
@@ -420,7 +420,7 @@ def test_rename_mapper_and_positional_arguments_raises(self):
         with pytest.raises(TypeError, match=msg):
             df.rename({}, columns={}, index={})
 
-    @sql_count_checker(query_count=1, join_count=5)
+    @sql_count_checker(query_count=1, join_count=1)
     def test_rename_with_duplicate_columns(self):
         # GH#4403
         df4 = DataFrame(
diff --git a/tests/integ/modin/frame/test_repr.py b/tests/integ/modin/frame/test_repr.py
index f499146806..2109bdccb5 100644
--- a/tests/integ/modin/frame/test_repr.py
+++ b/tests/integ/modin/frame/test_repr.py
@@ -227,7 +227,7 @@ def test_repr_deviating_behavior():
     assert native_str[:N] == snow_str[:N]
 
 
-@sql_count_checker(query_count=2, union_count=1, join_count=6)
+@sql_count_checker(query_count=2, union_count=1)
 def test_repr_of_multiindex_df():
     tuples = [
         ("cobra", "mark i"),
diff --git a/tests/integ/modin/frame/test_setitem.py b/tests/integ/modin/frame/test_setitem.py
index 6bbdc30fa0..6152089f39 100644
--- a/tests/integ/modin/frame/test_setitem.py
+++ b/tests/integ/modin/frame/test_setitem.py
@@ -141,7 +141,7 @@ def setitem(df):
         else:
             df[key] = val
 
-    expected_join_count = 6 if isinstance(key.start, int) else 7
+    expected_join_count = 3 if isinstance(key.start, int) else 4
 
     with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(snow_df, native_df, setitem, inplace=True)
@@ -361,7 +361,9 @@ def func_insert_new_column(df, column):
         df[key] = column
 
     expected_join_count = 2
-    if isinstance(column, native_pd.Index) and not isinstance(
+    if isinstance(column, native_pd.Series):
+        expected_join_count = 1
+    elif isinstance(column, native_pd.Index) and not isinstance(
         column, native_pd.DatetimeIndex
     ):
         expected_join_count = 4
diff --git a/tests/integ/modin/frame/test_stack.py b/tests/integ/modin/frame/test_stack.py
index 80c437dea7..9b06c32ff0 100644
--- a/tests/integ/modin/frame/test_stack.py
+++ b/tests/integ/modin/frame/test_stack.py
@@ -20,7 +20,7 @@
 )
 @pytest.mark.parametrize("dropna", [True, False])
 @pytest.mark.parametrize("sort", [True, False])
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_stack(data, index, columns, dropna, sort):
     eval_snowpark_pandas_result(
         *create_test_dfs(data=data, index=index, columns=columns),
diff --git a/tests/integ/modin/frame/test_transpose.py b/tests/integ/modin/frame/test_transpose.py
index 469a66dd51..894bbbbc1b 100644
--- a/tests/integ/modin/frame/test_transpose.py
+++ b/tests/integ/modin/frame/test_transpose.py
@@ -242,7 +242,7 @@ def test_dataframe_transpose_preserve_float_dtypes():
     assert all([dtype == "float64" for dtype in snow_df.T.dtypes])
 
 
-@sql_count_checker(query_count=1, union_count=1, join_count=2)
+@sql_count_checker(query_count=1, union_count=1)
 def test_dataframe_transpose_single_numeric_column():
     single_column_data = ({0: "A", 1: "B", 2: "C", 3: "D"},)
     native_df = native_pd.DataFrame(single_column_data, index=(0,))
diff --git a/tests/integ/modin/frame/test_where.py b/tests/integ/modin/frame/test_where.py
index 75a5d6db7a..006b7e76fb 100644
--- a/tests/integ/modin/frame/test_where.py
+++ b/tests/integ/modin/frame/test_where.py
@@ -902,7 +902,7 @@ def perform_where(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2, join_count=3)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
@@ -947,7 +947,7 @@ def perform_where(df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=5, union_count=1)
+@sql_count_checker(query_count=2, join_count=3, union_count=1)
 @pytest.mark.parametrize(
     "data",
     [[10], [10, 11, 12], [10, 11, 12, 13]],
diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
index 7c43b00a7b..e83fcbe00b 100644
--- a/tests/integ/modin/groupby/test_groupby_apply.py
+++ b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -191,7 +191,7 @@ class TestFuncReturnsDataFrame:
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_group_by_one_column_and_one_level_with_default_kwargs(
         self, grouping_dfs_with_multiindexes, func
@@ -206,7 +206,7 @@ def test_group_by_one_column_and_one_level_with_default_kwargs(
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_df_with_default_index(self, grouping_dfs_with_multiindexes):
         eval_snowpark_pandas_result(
@@ -232,7 +232,7 @@ def test_func_returns_empty_frame(self):
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_args_and_kwargs(self, grouping_dfs_with_multiindexes):
         def func(df, num1, str1):
@@ -258,7 +258,7 @@ def func(df, num1, str1):
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_group_by_level(self, grouping_dfs_with_multiindexes, level):
         eval_snowpark_pandas_result(
@@ -281,7 +281,7 @@ def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
             # When dropna=False, we can skip the dropna query
             query_count=4,
             udtf_count=UDTF_COUNT,
-            join_count=3,
+            join_count=JOIN_COUNT,
         ):
             snow_result = operation(snow_df)
         pandas_result = operation(pandas_df)
@@ -332,7 +332,7 @@ def test_group_dataframe_with_column_of_all_nulls_snow_1233832(self, null_value)
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     @pytest.mark.parametrize(
         "by, expected_output",
@@ -417,7 +417,7 @@ def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     @pytest.mark.parametrize("by", ["level_0", ("a", "string_col_1")])
     @pytest.mark.parametrize(
@@ -444,7 +444,7 @@ def test_as_index_false(self, grouping_dfs_with_multiindexes, by, func):
         # transform because we only reindex to the original ordering if
         query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_group_keys_false(self, grouping_dfs_with_multiindexes, as_index):
         eval_snowpark_pandas_result(
@@ -598,7 +598,7 @@ def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
     @sql_count_checker(
         # we need a transform check because group_keys=False.
         query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK,
-        join_count=3,
+        join_count=JOIN_COUNT,
         udtf_count=UDTF_COUNT,
     )
     def test_apply_transfform_to_subset(
@@ -631,7 +631,7 @@ def test_apply_transfform_to_subset(
     )
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
-        join_count=3,
+        join_count=JOIN_COUNT,
         udtf_count=UDTF_COUNT,
     )
     def test_numpy_ints_in_result(self, grouping_dfs_with_multiindexes, result):
@@ -800,7 +800,7 @@ def test_root_mean_squared_error(self):
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_multiindex_df(self, grouping_dfs_with_multiindexes, by, sort, as_index):
         eval_snowpark_pandas_result(
@@ -836,7 +836,7 @@ def test_multiindex_df(self, grouping_dfs_with_multiindexes, by, sort, as_index)
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_non_series_or_dataframe_return_types(
         self, return_value, grouping_dfs_with_multiindexes
@@ -918,7 +918,7 @@ class TestFuncReturnsSeries:
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_return_series_with_two_columns(
         self, grouping_dfs_with_multiindexes, by, level, as_index, sort, group_keys
@@ -943,7 +943,7 @@ def test_return_series_with_two_columns(
     @sql_count_checker(
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=3,
+        join_count=JOIN_COUNT,
     )
     def test_args_and_kwargs(self, grouping_dfs_with_multiindexes):
         eval_snowpark_pandas_result(
diff --git a/tests/integ/modin/groupby/test_groupby_basic_agg.py b/tests/integ/modin/groupby/test_groupby_basic_agg.py
index f3002901d0..d4211f2a41 100644
--- a/tests/integ/modin/groupby/test_groupby_basic_agg.py
+++ b/tests/integ/modin/groupby/test_groupby_basic_agg.py
@@ -951,8 +951,8 @@ def test_groupby_with_level(df_multi, level):
     )
 
 
-@sql_count_checker(query_count=1, join_count=2)
-def test_groupby_with_hier_columns():
+@sql_count_checker(query_count=1)
+def test_groupby_with_higher_columns():
     tuples = list(
         zip(
             *[
diff --git a/tests/integ/modin/groupby/test_groupby_dataframe_rank.py b/tests/integ/modin/groupby/test_groupby_dataframe_rank.py
index 78443c3bbf..3bb4a4b455 100644
--- a/tests/integ/modin/groupby/test_groupby_dataframe_rank.py
+++ b/tests/integ/modin/groupby/test_groupby_dataframe_rank.py
@@ -7,7 +7,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
     eval_snowpark_pandas_result,
@@ -211,6 +211,7 @@
 ]
 
 
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -232,18 +233,16 @@
 def test_df_groupby_rank(data, index, method, ascending, na_option, dropna):
     snow_df = pd.DataFrame(data, index=index)
     native_df = native_pd.DataFrame(data, index=index)
-    with SqlCounter(
-        query_count=1, join_count=2 if isinstance(index, pd.MultiIndex) else 0
-    ):
-        eval_snowpark_pandas_result(
-            snow_df,
-            native_df,
-            lambda df: df.groupby("group", dropna=dropna).rank(
-                method=method, na_option=na_option, ascending=ascending
-            ),
-        )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby("group", dropna=dropna).rank(
+            method=method, na_option=na_option, ascending=ascending
+        ),
+    )
 
 
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -273,14 +272,10 @@ def test_df_rank_pct(data, index, method, ascending, na_option, dropna):
         .groupby("group", dropna=dropna)
         .rank(method=method, ascending=ascending, na_option=na_option, pct=True)
     )
-    with SqlCounter(
-        query_count=1, join_count=2 if isinstance(index, pd.MultiIndex) else 0
-    ):
-        assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
-            snow_df, native_df
-        )
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
 
 
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA_MUL)
 @pytest.mark.parametrize(
     "method",
@@ -298,16 +293,13 @@ def test_df_rank_pct(data, index, method, ascending, na_option, dropna):
 def test_df_groupby_rank_by_list(data, index, method, ascending, na_option):
     snow_df = pd.DataFrame(data, index=index)
     native_df = native_pd.DataFrame(data, index=index)
-    with SqlCounter(
-        query_count=1, join_count=2 if isinstance(index, pd.MultiIndex) else 0
-    ):
-        eval_snowpark_pandas_result(
-            snow_df,
-            native_df,
-            lambda df: df.groupby(["group", "a"]).rank(
-                method=method, na_option=na_option, ascending=ascending
-            ),
-        )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(["group", "a"]).rank(
+            method=method, na_option=na_option, ascending=ascending
+        ),
+    )
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integ/modin/groupby/test_groupby_default2pandas.py b/tests/integ/modin/groupby/test_groupby_default2pandas.py
index 74aac8f77c..49d45a1009 100644
--- a/tests/integ/modin/groupby/test_groupby_default2pandas.py
+++ b/tests/integ/modin/groupby/test_groupby_default2pandas.py
@@ -124,7 +124,7 @@ def test_groupby_with_numpy_array(basic_snowpark_pandas_df) -> None:
     "by_list",
     [[2, 1, 1, 2, 3, 3], [[2, 1, 1, 2, 3, 3], "a"]],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1)
 def test_groupby_series_with_numpy_array(series_multi_numeric, by_list) -> None:
     with pytest.raises(
         NotImplementedError, match=AGGREGATE_UNSUPPORTED_GROUPING_ERROR_PATTERN
diff --git a/tests/integ/modin/groupby/test_groupby_head_tail.py b/tests/integ/modin/groupby/test_groupby_head_tail.py
index d462b89150..90819ec2d6 100644
--- a/tests/integ/modin/groupby/test_groupby_head_tail.py
+++ b/tests/integ/modin/groupby/test_groupby_head_tail.py
@@ -45,7 +45,7 @@ class TestDataFrameGroupByHeadTail:
         ["lion", 1234, 456, 78, 9],
     ]
 
-    @sql_count_checker(query_count=1, join_count=1)
+    @sql_count_checker(query_count=1)
     def test_df_groupby_head_tail(self, op_type, n, dropna, as_index, sort, group_keys):
         """
         Test DataFrameGroupBy.head and DataFrameGroupBy.tail with a small df with no NA values.
@@ -66,7 +66,7 @@ def test_df_groupby_head_tail(self, op_type, n, dropna, as_index, sort, group_ke
             check_index_type=False,
         )
 
-    @sql_count_checker(query_count=6, join_count=1)
+    @sql_count_checker(query_count=6)
     def test_df_groupby_head_tail_large_data(
         self, op_type, n, dropna, as_index, sort, group_keys, large_df_with_na_values
     ):
diff --git a/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py b/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
index e87b6327bc..ec1e36d1e3 100644
--- a/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
+++ b/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
@@ -20,7 +20,7 @@
 @pytest.mark.parametrize("grouping_columns", ["B", ["A", "B"]])
 @pytest.mark.parametrize("skipna", [False, True])
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_df_groupby_idxmax_idxmin_on_axis_0(
     df_with_multiple_columns, grouping_columns, skipna, func
 ):
@@ -73,7 +73,7 @@ def test_df_groupby_idxmax_idxmin_on_axis_1_negative(df_with_multiple_columns, f
 
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize("numeric_only", [True, False])
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_groupby_idxmax_idxmin_with_different_column_dtypes_on_axis_0(
     func, numeric_only
 ):
diff --git a/tests/integ/modin/groupby/test_groupby_ngroups.py b/tests/integ/modin/groupby/test_groupby_ngroups.py
index 6216c4c223..332e4c88eb 100644
--- a/tests/integ/modin/groupby/test_groupby_ngroups.py
+++ b/tests/integ/modin/groupby/test_groupby_ngroups.py
@@ -17,7 +17,7 @@ def assert_ngroups_equal(snow_res, pd_res):
 
 
 @pytest.mark.parametrize("by", ["a", "b", ["a", "b"]])
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2)
 def test_groupby_sort_multiindex_series(series_multi_numeric, by):
 
     snow_ser = series_multi_numeric
diff --git a/tests/integ/modin/groupby/test_groupby_series.py b/tests/integ/modin/groupby/test_groupby_series.py
index 10dd08b6fd..7756f8b620 100644
--- a/tests/integ/modin/groupby/test_groupby_series.py
+++ b/tests/integ/modin/groupby/test_groupby_series.py
@@ -19,14 +19,14 @@
 
 
 @pytest.mark.parametrize("by", ["a", ["b"], ["a", "b"]])
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2)
 def test_groupby_sort_multiindex_series(series_multi_numeric, agg_method, by):
     native_mseries_group = series_multi_numeric.to_pandas().groupby(by=by, sort=True)
     mseries_group = series_multi_numeric.groupby(by=by, sort=True)
     eval_snowpark_pandas_result(mseries_group, native_mseries_group, agg_method)
 
 
-@sql_count_checker(query_count=3, join_count=6)
+@sql_count_checker(query_count=3)
 def test_groupby_sort_false_multiindex_series(series_multi_numeric):
     # it is known that groupby sort=False is buggy with multiIndex, it is always
     # sorting when only part of the level is used.
@@ -48,7 +48,7 @@ def test_groupby_sort_false_multiindex_series(series_multi_numeric):
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_groupby_series_count_with_nan():
     index = native_pd.Index(["a", "b", "b", "a", "c"])
     index.names = ["grp_col"]
@@ -75,7 +75,7 @@ def test_groupby_series_count_with_nan():
     ],
 )
 @pytest.mark.parametrize("sort", [True, False])
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_groupby_agg_series(agg_func, sort):
     index = native_pd.Index(["a", "b", "b", "a", "c"])
     index.names = ["grp_col"]
@@ -113,7 +113,7 @@ def test_groupby_agg_series_dict_func_negative():
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize(
     "agg_func, type_str",
     [({"x": ("y", "sum")}, "tuple"), ({"x": pd.NamedAgg("y", "sum")}, "NamedAgg")],
@@ -139,7 +139,7 @@ def test_groupby_agg_series_raises_for_2_tuple_agg(agg_func, type_str):
 
 @pytest.mark.parametrize("sort", [True, False])
 @pytest.mark.parametrize("aggs", [{"minimum": min}, {"minimum": min, "maximum": max}])
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_groupby_agg_series_named_agg(aggs, sort):
     index = native_pd.Index(["a", "b", "b", "a", "c"])
     index.names = ["grp_col"]
@@ -153,7 +153,7 @@ def test_groupby_agg_series_named_agg(aggs, sort):
 
 
 @pytest.mark.parametrize("numeric_only", [False, None])
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_groupby_series_numeric_only(series_str, numeric_only):
     native_series = series_str.to_pandas()
     eval_snowpark_pandas_result(
@@ -164,7 +164,7 @@ def test_groupby_series_numeric_only(series_str, numeric_only):
 
 
 @pytest.mark.parametrize("level", [0, 1, [1, 0], "b", [1, 1], [0, "b"], [-1]])
-@sql_count_checker(query_count=2, join_count=4)
+@sql_count_checker(query_count=2)
 def test_groupby_sort_multiindex_series_level(series_multi_numeric, level):
     native_series = series_multi_numeric.to_pandas()
 
@@ -173,7 +173,7 @@ def test_groupby_sort_multiindex_series_level(series_multi_numeric, level):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_groupby_series_single_index():
     snow_ser = pd.Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
     native_ser = native_pd.Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
diff --git a/tests/integ/modin/groupby/test_groupby_transform.py b/tests/integ/modin/groupby/test_groupby_transform.py
index 46ef42f4f4..5f2339f2e4 100644
--- a/tests/integ/modin/groupby/test_groupby_transform.py
+++ b/tests/integ/modin/groupby/test_groupby_transform.py
@@ -39,7 +39,7 @@ def test_dataframe_groupby_transform(
     #   temporary function's resultant table.
     # - A second join is performed only when the groupby object specifies dropna=True.
     #   This is because a loc set operation is being performed to correctly set NA values.
-    with SqlCounter(query_count=6, join_count=2 + (2 if dropna else 0), udtf_count=1):
+    with SqlCounter(query_count=6, join_count=1 + (1 if dropna else 0), udtf_count=1):
         eval_snowpark_pandas_result(
             *df_with_multiple_columns,
             lambda df: df.groupby(
@@ -85,11 +85,11 @@ def test_dataframe_groupby_transform_with_func_args_and_kwargs(
     Test DataFrameGroupby.transform with functions that require *args and **kwargs.
     """
     # - A UDTF is created to run `groupby.transform(func)` on every group via `apply`.
-    # - Two joins always occurs when joining the original DataFrame's table with the
+    # - One join always occurs when joining the original DataFrame's table with the
     #   temporary function's resultant table.
-    # - Another two joins are performed only when the groupby object specifies dropna=True.
+    # - A second join is performed only when the groupby object specifies dropna=True.
     #   This is because a loc set operation is being performed to correctly set NA values.
-    with SqlCounter(query_count=6, join_count=2 + (2 if dropna else 0), udtf_count=1):
+    with SqlCounter(query_count=6, join_count=1 + (1 if dropna else 0), udtf_count=1):
         eval_snowpark_pandas_result(
             *df_with_multiple_columns,
             lambda df: df.groupby(
diff --git a/tests/integ/modin/index/test_datetime_index_methods.py b/tests/integ/modin/index/test_datetime_index_methods.py
index b727b4750e..56fd40a6cb 100644
--- a/tests/integ/modin/index/test_datetime_index_methods.py
+++ b/tests/integ/modin/index/test_datetime_index_methods.py
@@ -89,7 +89,7 @@ def test_non_default_args(kwargs):
         pd.DatetimeIndex(query_compiler=idx._query_compiler, **kwargs)
 
 
-@sql_count_checker(query_count=6, join_count=6)
+@sql_count_checker(query_count=6)
 def test_index_parent():
     """
     Check whether the parent field in Index is updated properly.
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 9a629101f3..1fd5701fda 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -494,7 +494,7 @@ def test_create_df_with_dict_as_data_and_index_as_index():
     assert_frame_equal(snow_df, native_df)
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1)
 def test_create_series_with_list_of_lists_index():
     # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
     arrays = [
@@ -507,7 +507,7 @@ def test_create_series_with_list_of_lists_index():
     assert_series_equal(snow_series, native_series)
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_create_series_with_none_data_and_non_empty_index():
     # When creating an empty Series with a non-empty index, the index should be used as the index of the Series.
     index = ["A", "B", "C", "D"]
diff --git a/tests/integ/modin/index/test_index_methods.py b/tests/integ/modin/index/test_index_methods.py
index d8c3646d97..8f6f5b9f59 100644
--- a/tests/integ/modin/index/test_index_methods.py
+++ b/tests/integ/modin/index/test_index_methods.py
@@ -359,7 +359,7 @@ def test_has_duplicates(index):
         assert index.has_duplicates == snow_index.has_duplicates
 
 
-@sql_count_checker(query_count=6, join_count=6)
+@sql_count_checker(query_count=6)
 def test_index_parent():
     """
     Check whether the parent field in Index is updated properly.
diff --git a/tests/integ/modin/resample/test_resample.py b/tests/integ/modin/resample/test_resample.py
index b4e8858273..af99185294 100644
--- a/tests/integ/modin/resample/test_resample.py
+++ b/tests/integ/modin/resample/test_resample.py
@@ -145,7 +145,7 @@ def test_resample_duplicated_timestamps():
 @freq
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=1)
 def test_resample_series(freq, interval, agg_func):
     rule = f"{interval}{freq}"
     eval_snowpark_pandas_result(
@@ -188,7 +188,7 @@ def test_resample_df_with_nan(agg_func):
 
 
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=1)
 def test_resample_ser_with_nan(agg_func):
     # 1 resample bin of all NaN, 1 resample bin partially NaN, 1 resample bin no NaNs
     eval_snowpark_pandas_result(
@@ -242,7 +242,7 @@ def test_resample_df_getitem():
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=1)
 def test_resample_ser_getitem():
     eval_snowpark_pandas_result(
         *create_test_series(
diff --git a/tests/integ/modin/resample/test_resample_fillna.py b/tests/integ/modin/resample/test_resample_fillna.py
index 96ad514a2b..53352fd4ef 100644
--- a/tests/integ/modin/resample/test_resample_fillna.py
+++ b/tests/integ/modin/resample/test_resample_fillna.py
@@ -44,7 +44,7 @@ def test_resample_fill(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=1)
 def test_resample_fill_ser(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -139,7 +139,7 @@ def test_resample_ffill_missing_in_middle(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=1)
 def test_resample_ffill_ser_missing_in_middle(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
diff --git a/tests/integ/modin/resample/test_resample_negative.py b/tests/integ/modin/resample/test_resample_negative.py
index 44319c120b..e20fc397ef 100644
--- a/tests/integ/modin/resample/test_resample_negative.py
+++ b/tests/integ/modin/resample/test_resample_negative.py
@@ -137,7 +137,7 @@ def test_resample_fillna_invalid_method():
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_resample_tz_negative():
     snow_df = pd.DataFrame(
         {"a": range(3)},
diff --git a/tests/integ/modin/series/test_add_prefix.py b/tests/integ/modin/series/test_add_prefix.py
index 6bba930c43..4d05f78d94 100644
--- a/tests/integ/modin/series/test_add_prefix.py
+++ b/tests/integ/modin/series/test_add_prefix.py
@@ -46,7 +46,7 @@ def test_series_add_prefix_multiindex(prefix, multiindex_native_int_series):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
 def test_series_add_prefix_time_column_df(prefix, time_index_series_data):
     series_data, kwargs = time_index_series_data
diff --git a/tests/integ/modin/series/test_add_suffix.py b/tests/integ/modin/series/test_add_suffix.py
index f3329c6789..43a98ab951 100644
--- a/tests/integ/modin/series/test_add_suffix.py
+++ b/tests/integ/modin/series/test_add_suffix.py
@@ -46,7 +46,7 @@ def test_add_suffix_multiindex(suffix, multiindex_native_int_series):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
 def test_add_suffix_time_column_df(suffix, time_index_series_data):
     series_data, kwargs = time_index_series_data
diff --git a/tests/integ/modin/series/test_all_any.py b/tests/integ/modin/series/test_all_any.py
index 517252e7af..0f78b320fe 100644
--- a/tests/integ/modin/series/test_all_any.py
+++ b/tests/integ/modin/series/test_all_any.py
@@ -65,7 +65,7 @@ def test_any_int(data):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_all_named_index():
     data = [1, 0, 3]
     index_name = ["a", "b", "c"]
@@ -77,7 +77,7 @@ def test_all_named_index():
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_any_named_index():
     data = [1, 0, 3]
     index_name = ["a", "b", "c"]
diff --git a/tests/integ/modin/series/test_at.py b/tests/integ/modin/series/test_at.py
index 4533c20d35..9452a0d736 100644
--- a/tests/integ/modin/series/test_at.py
+++ b/tests/integ/modin/series/test_at.py
@@ -18,7 +18,7 @@ def test_at_get_default_index(
     )
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_at_set_default_index(
     default_index_snowpark_pandas_series,
     default_index_native_series,
@@ -42,7 +42,7 @@ def test_at_get_str_index(
     assert str_index_snowpark_pandas_series.at["b"] == str_index_native_series.at["b"]
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_at_set_str_index(
     str_index_snowpark_pandas_series,
     str_index_native_series,
@@ -58,7 +58,7 @@ def at_set_helper(series):
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_at_get_time_index(
     time_index_snowpark_pandas_series,
     time_index_native_series,
@@ -69,7 +69,7 @@ def test_at_get_time_index(
     )
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_at_set_time_index(
     time_index_snowpark_pandas_series,
     time_index_native_series,
diff --git a/tests/integ/modin/series/test_bitwise_operators.py b/tests/integ/modin/series/test_bitwise_operators.py
index ad542fd223..eda9c536c9 100644
--- a/tests/integ/modin/series/test_bitwise_operators.py
+++ b/tests/integ/modin/series/test_bitwise_operators.py
@@ -11,7 +11,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
     eval_snowpark_pandas_result,
@@ -47,16 +47,15 @@ def try_cast_to_snow_series(value: Any) -> Any:
 
 
 @pytest.mark.parametrize("value", BITWISE_TEST_DATA)
+@sql_count_checker(query_count=1)
 def test_bitwise_unary(value):
 
     # Note: In pandas, using NaN values without specfiying a null-compatible dtype will yield an error.
     # SnowPandas will allow this behavior.
     # Note: NaN values like pd.NA, pd.NaT, np.nan will raise a TypeError: boolean value of NA is ambiguous
-    with SqlCounter(
-        query_count=1, join_count=1 if isinstance(value, native_pd.Series) else 0
-    ):
-        snow_value = try_cast_to_snow_series(value)
-        eval_snowpark_pandas_result(snow_value, native_pd.Series(value), lambda s: ~s)
+    snow_value = try_cast_to_snow_series(value)
+
+    eval_snowpark_pandas_result(snow_value, native_pd.Series(value), lambda s: ~s)
 
 
 @pytest.mark.parametrize("series", SERIES_BITWISE_TEST_DATA)
@@ -122,6 +121,7 @@ def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
 @pytest.mark.parametrize(
     "op", [operator.or_, operator.and_]
 )  # |, &.  ^ is not supported in Snowflake
+@sql_count_checker(query_count=2, join_count=2)
 def test_bitwise_binary_between_series(lhs, rhs, op):
     def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
         snow_ans = op(snow_lhs, snow_rhs)
@@ -131,14 +131,10 @@ def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
             snow_ans, native_ans, lambda s: s, check_index_type=False
         )
 
-    with SqlCounter(
-        query_count=2,
-        join_count=10 if isinstance(lhs.index, native_pd.MultiIndex) else 6,
-    ):
-        check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
+    check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
 
-        # commute series
-        check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
+    # commute series
+    check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
 
 
 # Due to differences in logical or/and in SQL and pandas' |,& implementation, behavior doesn't match here, in particular
@@ -234,21 +230,18 @@ def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
         ),
     ],
 )
+@sql_count_checker(query_count=1, join_count=1)
 def test_bitwise_binary_between_series_with_deviating_behavior_or(
     lhs, rhs, expected_pandas, expected_snowpark_pandas
 ):
-    with SqlCounter(
-        query_count=1,
-        join_count=5 if isinstance(lhs.index, native_pd.MultiIndex) else 3,
-    ):
-        snow_ans = try_cast_to_snow_series(lhs) | try_cast_to_snow_series(rhs)
-        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
-            snow_ans, expected_snowpark_pandas
-        )
+    snow_ans = try_cast_to_snow_series(lhs) | try_cast_to_snow_series(rhs)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_ans, expected_snowpark_pandas
+    )
 
-        # test here pandas to track any version regressions
-        native_ans = lhs | rhs
-        tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
+    # test here pandas to track any version regressions
+    native_ans = lhs | rhs
+    tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
 
 
 @pytest.mark.parametrize(
@@ -322,19 +315,16 @@ def test_bitwise_binary_between_series_with_deviating_behavior_or(
         ),
     ],
 )
+@sql_count_checker(query_count=1, join_count=1)
 def test_bitwise_binary_between_series_with_deviating_behavior_and(
     lhs, rhs, expected_pandas, expected_snowpark_pandas
 ):
-    with SqlCounter(
-        query_count=1,
-        join_count=5 if isinstance(lhs.index, native_pd.MultiIndex) else 3,
-    ):
-        snow_ans = try_cast_to_snow_series(lhs) & try_cast_to_snow_series(rhs)
-        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
-            snow_ans, expected_snowpark_pandas
-        )
+    snow_ans = try_cast_to_snow_series(lhs) & try_cast_to_snow_series(rhs)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_ans, expected_snowpark_pandas
+    )
 
-        # test here pandas to track any version regressions
-        native_ans = lhs & rhs
-        print(native_ans.index)
-        tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
+    # test here pandas to track any version regressions
+    native_ans = lhs & rhs
+    print(native_ans.index)
+    tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
diff --git a/tests/integ/modin/series/test_compare.py b/tests/integ/modin/series/test_compare.py
index 8d60d7f75a..c5c927343e 100644
--- a/tests/integ/modin/series/test_compare.py
+++ b/tests/integ/modin/series/test_compare.py
@@ -50,7 +50,7 @@ class TestDefaultParameters:
         # copying the original series's index to the final resulting dataframe
         # adds 1 extra query to materialize the index.
         query_count=QUERY_COUNT + 1,
-        join_count=5,
+        join_count=JOIN_COUNT,
     )
     def test_no_diff(self, base_series):
         other_series = base_series.copy()
diff --git a/tests/integ/modin/series/test_describe.py b/tests/integ/modin/series/test_describe.py
index 32876f1608..9ecd2e33a3 100644
--- a/tests/integ/modin/series/test_describe.py
+++ b/tests/integ/modin/series/test_describe.py
@@ -8,7 +8,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import (
     assert_series_equal,
     create_test_series,
@@ -129,18 +129,15 @@ def timestamp_describe_comparator(snow_res, native_res):
 
 
 @pytest.mark.parametrize(
-    "index, join_count",
+    "index",
     [
-        pytest.param(None, 0, id="default_index"),
-        pytest.param(
-            ["one", "two", "three", "four", "five", "six"], 6, id="flat_index"
-        ),
+        pytest.param(None, id="default_index"),
+        pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_index"),
         pytest.param(
             [
                 np.array(["bar", "bar", "baz", "baz", "foo", "foo"]),
                 np.array(["one", "two", "one", "two", "one", "two"]),
             ],
-            12,
             id="2D_index",
         ),
     ],
@@ -154,10 +151,8 @@ def timestamp_describe_comparator(snow_res, native_res):
     ],
     ids=["ints", "floats", "objects"],
 )
-def test_describe_multiindex(data, index, join_count):
-    if isinstance(data[0], str) and index is not None:
-        join_count = 8 if len(index) == 2 else 4
-    with SqlCounter(query_count=1, union_count=5, join_count=join_count):
-        eval_snowpark_pandas_result(
-            *create_test_series(data, index=index), lambda ser: ser.describe()
-        )
+@sql_count_checker(query_count=1, union_count=5)
+def test_describe_multiindex(data, index):
+    eval_snowpark_pandas_result(
+        *create_test_series(data, index=index), lambda ser: ser.describe()
+    )
diff --git a/tests/integ/modin/series/test_empty.py b/tests/integ/modin/series/test_empty.py
index d53cd6e3d5..a30a69116c 100644
--- a/tests/integ/modin/series/test_empty.py
+++ b/tests/integ/modin/series/test_empty.py
@@ -9,7 +9,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import eval_snowpark_pandas_result
 
 
@@ -34,17 +34,14 @@
         "empty series with only index",
     ],
 )
+@sql_count_checker(query_count=1)
 def test_series_empty(args, kwargs):
-    with SqlCounter(
-        query_count=1,
-        join_count=1 if (args == [] and kwargs.get("index", None) == []) else 0,
-    ):
-        eval_snowpark_pandas_result(
-            pd.Series(*args, **kwargs),
-            native_pd.Series(*args, **kwargs),
-            lambda df: df.empty,
-            comparator=lambda x, y: x == y,
-        )
+    eval_snowpark_pandas_result(
+        pd.Series(*args, **kwargs),
+        native_pd.Series(*args, **kwargs),
+        lambda df: df.empty,
+        comparator=lambda x, y: x == y,
+    )
 
 
 @sql_count_checker(query_count=5, join_count=2)
diff --git a/tests/integ/modin/series/test_iat.py b/tests/integ/modin/series/test_iat.py
index 7b9a4d4c06..b3e2255403 100644
--- a/tests/integ/modin/series/test_iat.py
+++ b/tests/integ/modin/series/test_iat.py
@@ -103,7 +103,7 @@ def iat_set_helper(series):
         (0,),
     ],
 )
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_get_time_index(
     key,
     time_index_snowpark_pandas_series,
@@ -122,7 +122,7 @@ def test_iat_get_time_index(
         (0,),
     ],
 )
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_set_time_index(
     key,
     time_index_snowpark_pandas_series,
diff --git a/tests/integ/modin/series/test_iloc.py b/tests/integ/modin/series/test_iloc.py
index eea764af40..7b6369934d 100644
--- a/tests/integ/modin/series/test_iloc.py
+++ b/tests/integ/modin/series/test_iloc.py
@@ -89,7 +89,7 @@ def operation(ser):
         # Based on snowflake type results, the result becomes 'str' type so we normalize to float for comparison.
         return ser.astype("float")
 
-    expected_join_count = 5 if isinstance(val, list) else 4
+    expected_join_count = 3 if isinstance(val, list) else 2
     with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             default_index_native_int_snowpark_pandas_series,
@@ -777,25 +777,25 @@ def perform_iloc(df):
 
 
 @pytest.mark.parametrize(
-    "row_key, row_key_index, add_joins",
+    "row_key, row_key_index",
     [
-        [1, None, 0],
-        [[3, 0], None, 0],
-        [[1, 2], [("A",), ("B",)], 1],
-        [[2, 1], [("A", 1), ("B", 2)], 2],
+        [1, None],
+        [[3, 0], None],
+        [[1, 2], [("A",), ("B",)]],
+        [[2, 1], [("A", 1), ("B", 2)]],
     ],
 )
 @pytest.mark.parametrize(
     "item_values, item_index, expected_join_count",
     [
-        [999, None, 6],
-        [TEST_ITEMS_DATA_2X1, None, 7],
-        [TEST_ITEMS_DATA_2X1, [("r",), ("s",)], 8],
-        [TEST_ITEMS_DATA_2X1, [("r", 20), ("s", 25)], 9],
+        [999, None, 2],
+        [TEST_ITEMS_DATA_2X1, None, 3],
+        [TEST_ITEMS_DATA_2X1, [("r",), ("s",)], 4],
+        [TEST_ITEMS_DATA_2X1, [("r", 20), ("s", 25)], 5],
     ],
 )
 def test_df_iloc_set_with_multiindex(
-    row_key, row_key_index, item_values, item_index, expected_join_count, add_joins
+    row_key, row_key_index, item_values, item_index, expected_join_count
 ):
     ser_data = [10, 11, 12, 13, 14]
     row_index = pd.MultiIndex.from_tuples(
@@ -835,7 +835,7 @@ def helper_iloc(ser):
         else:
             ser.iloc[snow_row_key] = snow_items
 
-    with SqlCounter(query_count=1, join_count=expected_join_count + add_joins):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(snow_ser, native_ser, helper_iloc, inplace=True)
 
 
@@ -851,7 +851,7 @@ def iloc_helper(series: Union[pd.Series, native_pd.Series]) -> None:
             )
 
         # test ser with default index
-        with SqlCounter(query_count=1, join_count=4):
+        with SqlCounter(query_count=1, join_count=2):
             eval_snowpark_pandas_result(
                 default_index_int_series,
                 default_index_native_int_series,
@@ -859,7 +859,7 @@ def iloc_helper(series: Union[pd.Series, native_pd.Series]) -> None:
             )
 
         # test ser with non default index
-        with SqlCounter(query_count=1, join_count=4):
+        with SqlCounter(query_count=1, join_count=2):
             eval_snowpark_pandas_result(
                 int_series_with_non_default_index,
                 native_int_series_with_non_default_index,
@@ -867,7 +867,7 @@ def iloc_helper(series: Union[pd.Series, native_pd.Series]) -> None:
             )
 
         # test ser with MultiIndex
-        with SqlCounter(query_count=1, join_count=4):
+        with SqlCounter(query_count=1, join_count=2):
             eval_snowpark_pandas_result(
                 int_series_with_multiindex,
                 multiindex_native_int_series,
diff --git a/tests/integ/modin/series/test_loc.py b/tests/integ/modin/series/test_loc.py
index da13247cd7..2603eaa61c 100644
--- a/tests/integ/modin/series/test_loc.py
+++ b/tests/integ/modin/series/test_loc.py
@@ -224,6 +224,7 @@ def apply_loc(df):
         [random.choice([True, False]) for _ in range(5)],
     ],
 )
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_loc_get_key_bool_series_with_aligned_indices(key, use_default_index):
     # aligned indices means both row_pos and index are exactly match
     if use_default_index:
@@ -233,14 +234,13 @@ def test_series_loc_get_key_bool_series_with_aligned_indices(key, use_default_in
         index = native_pd.Index(["a", "a", None, "b", "b"], name="index")
     native_series = native_pd.Series([1, 2, 3, 4, 5], index=index)
     snow_series = pd.Series(native_series)
-    with SqlCounter(query_count=1, join_count=1 if use_default_index else 2):
-        eval_snowpark_pandas_result(
-            snow_series,
-            native_series,
-            lambda s: s.loc[pd.Series(key, index=index, dtype="bool")]
-            if isinstance(s, pd.Series)
-            else s.loc[native_pd.Series(key, index=index, dtype="bool")],
-        )
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda s: s.loc[pd.Series(key, index=index, dtype="bool")]
+        if isinstance(s, pd.Series)
+        else s.loc[native_pd.Series(key, index=index, dtype="bool")],
+    )
 
 
 @pytest.mark.parametrize(
@@ -861,7 +861,7 @@ def loc_set_helper(s):
         ["a", "a", "c", "d"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_loc_set_scalar_row_key_enlargement(row_key, item_values, ser_index):
     data = [1, 2, 3, 4]
 
@@ -1407,7 +1407,7 @@ def test_series_loc_set_slice_item_negative(key, default_index_native_series):
         [2, "x"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_loc_set_boolean_key(key, index):
     # series.loc[True/False key] = scalar item
     # ----------------------------------------
@@ -1596,7 +1596,7 @@ def test_series_loc_set_with_scalar_key_and_list_like_item(
         assert_series_equal(snowpark_ser, native_ser)
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 @pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
 @pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
 def test_series_loc_set_with_scalar_key_and_scalar_item(
@@ -1776,7 +1776,7 @@ def test_series_partial_string_indexing_behavior_diff():
     assert len(series_minute["2022"]) == 0
 
 
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_loc_set_none():
     # Note that pandas does not support df.loc[None,:] like the series does here.
     native_s = native_pd.Series([1, 2, 3])
diff --git a/tests/integ/modin/series/test_mask.py b/tests/integ/modin/series/test_mask.py
index 0d3680cff4..baeaa37751 100644
--- a/tests/integ/modin/series/test_mask.py
+++ b/tests/integ/modin/series/test_mask.py
@@ -76,8 +76,8 @@ def test_series_mask_duplicate_labels():
     eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.mask(ser > 3))
 
 
-@sql_count_checker(query_count=1, join_count=1)
-def test_series_mask_multi_index():
+@sql_count_checker(query_count=1)
+def test_series_mask_multiindex():
     data = [1, 2, 3, 4, 5]
     index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
 
@@ -233,7 +233,7 @@ def test_series_mask_with_scalar_cond(cond):
         )
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_mask_series_cond_unmatched_index():
     data = [1, 2, 3, 4]
     index1 = [0, 1, 2, 3]
@@ -258,10 +258,9 @@ def perform_mask(series):
     )
 
 
-@pytest.mark.parametrize(
-    "index, join_count", [("matched_index", 1), ("unmatched_index", 2)]
-)
-def test_series_mask_short_series_cond(index, join_count):
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
+def test_series_mask_short_series_cond(index):
     data = [1, 2, 3, 4]
     if index != "matched_index":
         index = [7, 8, 9]
@@ -280,18 +279,16 @@ def perform_mask(series):
         else:
             return series.mask(native_cond, -1)
 
-    with SqlCounter(query_count=1, join_count=join_count):
-        eval_snowpark_pandas_result(
-            snow_ser,
-            native_ser,
-            perform_mask,
-        )
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_mask,
+    )
 
 
-@pytest.mark.parametrize(
-    "index, join_count", [("matched_index", 1), ("unmatched_index", 2)]
-)
-def test_series_mask_long_series_cond(index, join_count):
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
+def test_series_mask_long_series_cond(index):
     data = [1, 2, 3, 4]
     if index != "matched_index":
         index = [7, 8, 9, 10, 11]
@@ -310,9 +307,8 @@ def perform_mask(series):
         else:
             return series.mask(native_cond, -1)
 
-    with SqlCounter(query_count=1, join_count=join_count):
-        eval_snowpark_pandas_result(
-            snow_ser,
-            native_ser,
-            perform_mask,
-        )
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_mask,
+    )
diff --git a/tests/integ/modin/series/test_nlargest_nsmallest.py b/tests/integ/modin/series/test_nlargest_nsmallest.py
index 253230156b..a15cc5dfb2 100644
--- a/tests/integ/modin/series/test_nlargest_nsmallest.py
+++ b/tests/integ/modin/series/test_nlargest_nsmallest.py
@@ -88,7 +88,7 @@ def test_nlargest_nsmallest_non_numeric_types(method, data):
     assert_series_equal(getattr(snow_s, method)(n), expected_s)
 
 
-@sql_count_checker(query_count=3, join_count=2)
+@sql_count_checker(query_count=3)
 def test_nlargest_nsmallest_no_columns(method):
     snow_s = pd.Series(query_compiler=pd.DataFrame(index=[1, 2])._query_compiler)
     snow_s = snow_s
diff --git a/tests/integ/modin/series/test_nunique.py b/tests/integ/modin/series/test_nunique.py
index f2aba15ada..bb20e9e4a5 100644
--- a/tests/integ/modin/series/test_nunique.py
+++ b/tests/integ/modin/series/test_nunique.py
@@ -8,7 +8,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import (
     assert_values_equal,
     create_test_series,
@@ -63,11 +63,11 @@ def test_series_nunique_deviating_nan_behavior(input_data, expected):
         ),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_dataframe_nunique_multiindex(index):
     data = [0.1, 0.2, 0.1, 0]
-    with SqlCounter(query_count=1, join_count=0 if index is None else 2):
-        eval_snowpark_pandas_result(
-            *create_test_series(data, index=index),
-            lambda ser: ser.nunique(),
-            comparator=assert_values_equal,
-        )
+    eval_snowpark_pandas_result(
+        *create_test_series(data, index=index),
+        lambda ser: ser.nunique(),
+        comparator=assert_values_equal,
+    )
diff --git a/tests/integ/modin/series/test_rank.py b/tests/integ/modin/series/test_rank.py
index 24801b581f..2544f12e43 100644
--- a/tests/integ/modin/series/test_rank.py
+++ b/tests/integ/modin/series/test_rank.py
@@ -7,7 +7,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import (
     assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
     eval_snowpark_pandas_result,
@@ -83,6 +83,7 @@ def test_series_rank_numeric_only(method, ascending, na_option):
     )
 
 
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("data, index", TEST_RANK_DATA)
 @pytest.mark.parametrize(
     "method",
@@ -98,15 +99,10 @@ def test_series_rank_numeric_only(method, ascending, na_option):
 )
 # test Series percentile rank
 def test_df_rank_pct(data, index, method, ascending, na_option):
-    with SqlCounter(
-        query_count=1, join_count=2 if isinstance(index, native_pd.MultiIndex) else 0
-    ):
-        snow_df = pd.DataFrame(data, index=index).rank(
-            method=method, ascending=ascending, na_option=na_option, pct=True
-        )
-        native_df = native_pd.DataFrame(data, index=index).rank(
-            method=method, ascending=ascending, na_option=na_option, pct=True
-        )
-        assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
-            snow_df, native_df
-        )
+    snow_df = pd.DataFrame(data, index=index).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=True
+    )
+    native_df = native_pd.DataFrame(data, index=index).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=True
+    )
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
diff --git a/tests/integ/modin/series/test_rename.py b/tests/integ/modin/series/test_rename.py
index 53873e0b2f..4ccf29706f 100644
--- a/tests/integ/modin/series/test_rename.py
+++ b/tests/integ/modin/series/test_rename.py
@@ -45,7 +45,7 @@ def renamer(x):
             # values in the variant column will be quoted
             assert_index_equal(renamed.index, renamed2.index.str.replace('"', ""))
 
-    @sql_count_checker(query_count=1, join_count=2)
+    @sql_count_checker(query_count=1, join_count=1)
     def test_rename_partial_dict(self):
         # partial dict
         ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
@@ -63,7 +63,7 @@ def test_rename_retain_index_name(self):
         renamed = renamer.rename({})
         assert renamed.index.name == renamer.index.name
 
-    @sql_count_checker(query_count=2, join_count=2)
+    @sql_count_checker(query_count=2, join_count=1)
     def test_rename_by_series(self):
         ser = Series(range(5), name="foo")
         renamer = Series({1: 10, 2: 20})
@@ -80,7 +80,7 @@ def test_rename_set_name(self):
                 tm.assert_numpy_array_equal(result.index.values, ser.index.values)
                 assert ser.name is None
 
-    @sql_count_checker(query_count=5, join_count=5)
+    @sql_count_checker(query_count=5)
     def test_rename_set_name_inplace(self):
         ser = Series(range(3), index=list("abc"))
         for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
diff --git a/tests/integ/modin/series/test_setitem.py b/tests/integ/modin/series/test_setitem.py
index 929226bc89..50405643bc 100644
--- a/tests/integ/modin/series/test_setitem.py
+++ b/tests/integ/modin/series/test_setitem.py
@@ -175,7 +175,7 @@
         (None, 35),  # None scalar
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_setitem_scalar_key_and_scalar_item(
     key, item, default_index_native_int_series
 ):
@@ -276,7 +276,7 @@ def test_series_setitem_none_key_and_scalar_item_mixed_type_series(
         (3.14, "a"),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_setitem_scalar_key_and_scalar_item_mixed_type_series_type_coercion(
     key, item, mixed_type_index_native_series_mixed_type_index
 ):
@@ -341,7 +341,7 @@ def test_series_setitem_scalar_key_and_scalar_item_mixed_type_series_type_coerci
 # TODO: SNOW-986548 fix where key is False, row is missed in this case
 @pytest.mark.parametrize("key", [True, False])
 @pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_setitem_boolean_key_and_scalar_item_label_updated(key, item):
     # series[scalar boolean key] = scalar item
     # ----------------------------------------
@@ -493,14 +493,14 @@ def test_series_setitem_boolean_key_and_scalar_item_case2_numeric_index(key, ite
 
     expected_ser = native_pd.Series(data=data, index=index)
 
-    with SqlCounter(query_count=1, join_count=2):
+    with SqlCounter(query_count=1, join_count=1):
         # verify that the result is correct
         assert_series_equal(snowpark_ser, expected_ser)
 
 
 @pytest.mark.parametrize("key", [True, False])
 @pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_setitem_boolean_key_and_scalar_item_case2_non_numeric_index(key, item):
     # series[scalar boolean key] = scalar item
     # ----------------------------------------
@@ -559,7 +559,7 @@ def test_series_setitem_boolean_key_and_scalar_item_case2_non_numeric_index(key,
 
 @pytest.mark.parametrize("key", [0, 1])
 @pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_setitem_boolean_key_and_scalar_item_case3(
     key, item, native_series_with_duplicate_boolean_index
 ):
@@ -1835,7 +1835,7 @@ def test_series_setitem_check_type_behavior_with_string_key_and_number_scalar_it
                 assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
     else:
         # All other cases match native pandas behavior
-        with SqlCounter(query_count=1, join_count=2):
+        with SqlCounter(query_count=1, join_count=1):
             assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
 
 
@@ -1886,7 +1886,7 @@ def test_series_setitem_check_type_behavior_with_string_key_and_boolean_scalar_i
     # b    True
     # c    True
     # dtype: bool
-    with SqlCounter(query_count=1, join_count=2):
+    with SqlCounter(query_count=1, join_count=1):
         err_msg = "Series are different"
         with pytest.raises(AssertionError, match=err_msg):
             assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
@@ -1997,7 +1997,7 @@ def test_series_setitem_check_type_behavior_with_string_key_and_string_scalar_it
 
         expected_data = [str(val) for val in native_ser]
         expected_ser = native_pd.Series(data=expected_data, index=index)
-        with SqlCounter(query_count=1, join_count=2):
+        with SqlCounter(query_count=1, join_count=1):
             assert_series_equal(snowpark_ser, expected_ser, check_dtype=False)
 
 
@@ -2093,7 +2093,7 @@ def set_loc_helper(ser):
         [2, "x"],
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_df_setitem_boolean_key(key, index):
     item = 99
 
@@ -2435,7 +2435,7 @@ def test_behavior_table_is_up_to_date():
                         prev_err_msg = expected_err_msg
 
 
-@sql_count_checker(query_count=2, join_count=6)
+@sql_count_checker(query_count=2, join_count=2)
 def test_series_setitem_int_key():
     # pandas series setitem with int key is similar to loc set in most cases:
     # E.g., set index with label 3 to 100
diff --git a/tests/integ/modin/series/test_shape.py b/tests/integ/modin/series/test_shape.py
index ba62dfde67..7bbc1270a0 100644
--- a/tests/integ/modin/series/test_shape.py
+++ b/tests/integ/modin/series/test_shape.py
@@ -9,7 +9,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import eval_snowpark_pandas_result
 
 
@@ -34,13 +34,11 @@
         "empty series with only index",
     ],
 )
+@sql_count_checker(query_count=1)
 def test_series_shape(args, kwargs):
-    with SqlCounter(
-        query_count=1, join_count=1 if kwargs.get("index", None) == [] else 0
-    ):
-        eval_snowpark_pandas_result(
-            pd.Series(*args, **kwargs),
-            native_pd.Series(*args, **kwargs),
-            lambda df: df.shape,
-            comparator=lambda x, y: x == y,
-        )
+    eval_snowpark_pandas_result(
+        pd.Series(*args, **kwargs),
+        native_pd.Series(*args, **kwargs),
+        lambda df: df.shape,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/series/test_size.py b/tests/integ/modin/series/test_size.py
index 65730da0fd..4543525b2d 100644
--- a/tests/integ/modin/series/test_size.py
+++ b/tests/integ/modin/series/test_size.py
@@ -8,7 +8,7 @@
 import pytest
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
-from tests.integ.modin.sql_counter import SqlCounter
+from tests.integ.modin.sql_counter import sql_count_checker
 from tests.integ.modin.utils import eval_snowpark_pandas_result
 
 
@@ -36,16 +36,11 @@
         "multi index",
     ],
 )
+@sql_count_checker(query_count=1)
 def test_series_size(args, kwargs):
-    with SqlCounter(
-        query_count=1,
-        join_count=2
-        if isinstance(kwargs.get("index", None), native_pd.MultiIndex)
-        else 0,
-    ):
-        eval_snowpark_pandas_result(
-            pd.Series(*args, **kwargs),
-            native_pd.Series(*args, **kwargs),
-            lambda df: df.size,
-            comparator=lambda x, y: x == y,
-        )
+    eval_snowpark_pandas_result(
+        pd.Series(*args, **kwargs),
+        native_pd.Series(*args, **kwargs),
+        lambda df: df.size,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/series/test_take.py b/tests/integ/modin/series/test_take.py
index 2ba09be1b8..9eed1559a8 100644
--- a/tests/integ/modin/series/test_take.py
+++ b/tests/integ/modin/series/test_take.py
@@ -16,23 +16,23 @@ def test_series_take():
 
     actual = ser.take([1, 3, 4])
     expected = pd.Series([5, 2, 4], index=[1, 3, 4])
-    with SqlCounter(query_count=2, join_count=3):
+    with SqlCounter(query_count=2, join_count=2):
         assert_series_equal(actual, expected)
 
     actual = ser.take([-1, 3, 4])
     expected = pd.Series([4, 2, 4], index=[4, 3, 4])
-    with SqlCounter(query_count=2, join_count=3):
+    with SqlCounter(query_count=2, join_count=2):
         assert_series_equal(actual, expected)
 
     # Out-of-bounds testing - valid because .iloc is used in backend.
     actual = ser.take([1, 10])
     expected = pd.Series([5], index=[1])
-    with SqlCounter(query_count=2, join_count=3):
+    with SqlCounter(query_count=2, join_count=2):
         assert_series_equal(actual, expected)
 
     actual = ser.take([2, 5])
     expected = pd.Series([6], index=[2])
-    with SqlCounter(query_count=2, join_count=3):
+    with SqlCounter(query_count=2, join_count=2):
         assert_series_equal(actual, expected)
 
 
diff --git a/tests/integ/modin/series/test_to_snowflake.py b/tests/integ/modin/series/test_to_snowflake.py
index f542edfa17..92b428f70e 100644
--- a/tests/integ/modin/series/test_to_snowflake.py
+++ b/tests/integ/modin/series/test_to_snowflake.py
@@ -68,7 +68,7 @@ def test_to_snowflake_index_label_none_raises(test_table_name):
         snow_series.to_snowflake(test_table_name, if_exists="replace", index=True)
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_to_snowflake_multiindex(test_table_name, snow_series):
     index = native_pd.MultiIndex.from_arrays(
         [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], names=("number", "color")
diff --git a/tests/integ/modin/series/test_transpose.py b/tests/integ/modin/series/test_transpose.py
index ae2a076171..53ef87bb15 100644
--- a/tests/integ/modin/series/test_transpose.py
+++ b/tests/integ/modin/series/test_transpose.py
@@ -51,8 +51,8 @@ def test_series_transpose_empty():
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
-def test_series_transpose_multi_index():
+@sql_count_checker(query_count=1)
+def test_series_transpose_multiindex():
     data = [1, 2, 3, 4, 5]
     index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
 
@@ -66,7 +66,7 @@ def test_series_transpose_multi_index():
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_series_transpose_index_no_names():
     data = [1, 2, 3, 4, 5]
     index = [None, None, None, None, None]
diff --git a/tests/integ/modin/series/test_where.py b/tests/integ/modin/series/test_where.py
index cff58d4a82..8c5f8a27f0 100644
--- a/tests/integ/modin/series/test_where.py
+++ b/tests/integ/modin/series/test_where.py
@@ -76,8 +76,8 @@ def test_series_where_duplicate_labels():
     eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.where(ser > 3))
 
 
-@sql_count_checker(query_count=1, join_count=1)
-def test_series_where_multi_index():
+@sql_count_checker(query_count=1)
+def test_series_where_multiindex():
     data = [1, 2, 3, 4, 5]
     index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
 
@@ -234,7 +234,7 @@ def test_series_where_with_scalar_cond(cond):
         )
 
 
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=1, join_count=1)
 def test_series_where_series_cond_unmatched_index():
     data = [1, 2, 3, 4]
     index1 = [0, 1, 2, 3]
@@ -259,10 +259,9 @@ def perform_where(series):
     )
 
 
-@pytest.mark.parametrize(
-    "index, join_count", [("matched_index", 1), ("unmatched_index", 2)]
-)
-def test_series_where_short_series_cond(index, join_count):
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
+def test_series_where_short_series_cond(index):
     data = [1, 2, 3, 4]
     if index != "matched_index":
         index = [7, 8, 9]
@@ -281,18 +280,16 @@ def perform_where(series):
         else:
             return series.where(native_cond, -1)
 
-    with SqlCounter(query_count=1, join_count=join_count):
-        eval_snowpark_pandas_result(
-            snow_ser,
-            native_ser,
-            perform_where,
-        )
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_where,
+    )
 
 
-@pytest.mark.parametrize(
-    "index, join_count", [("matched_index", 1), ("unmatched_index", 2)]
-)
-def test_series_where_long_series_cond(index, join_count):
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
+def test_series_where_long_series_cond(index):
     data = [1, 2, 3, 4]
     if index != "matched_index":
         index = [7, 8, 9, 10, 11]
@@ -311,9 +308,8 @@ def perform_where(series):
         else:
             return series.where(native_cond, -1)
 
-    with SqlCounter(query_count=1, join_count=join_count):
-        eval_snowpark_pandas_result(
-            snow_ser,
-            native_ser,
-            perform_where,
-        )
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_where,
+    )
diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py
index 5c236731a0..19693ad381 100644
--- a/tests/integ/modin/test_concat.py
+++ b/tests/integ/modin/test_concat.py
@@ -1058,7 +1058,7 @@ def test_concat_sorted_frames():
         ),  # duplicate in frame2
     ],
 )
-@sql_count_checker(query_count=2, union_count=1, join_count=1)
+@sql_count_checker(query_count=2, union_count=1)
 def test_concat_duplicate_columns(columns1, columns2, expected_rows, expected_cols):
     df1 = pd.DataFrame([[1, 2, 3]], columns=columns1)
     df2 = pd.DataFrame([[4, 5, 6]], columns=columns2)
@@ -1123,7 +1123,7 @@ def test_concat_from_file(resources_path):
     )
 
 
-@sql_count_checker(query_count=1, join_count=5)
+@sql_count_checker(query_count=1, join_count=2)
 def test_concat_keys():
     native_data = {
         "one": native_pd.Series([1, 2, 3], index=["a", "b", "c"]),
diff --git a/tests/integ/modin/test_numpy.py b/tests/integ/modin/test_numpy.py
index 43b9ef263f..cafbd08f36 100644
--- a/tests/integ/modin/test_numpy.py
+++ b/tests/integ/modin/test_numpy.py
@@ -113,7 +113,7 @@ def test_np_where_notimplemented():
             )
 
 
-@sql_count_checker(query_count=5, join_count=7)
+@sql_count_checker(query_count=5, join_count=4)
 def test_scalar():
     pdf_scalar = native_pd.DataFrame([[99, 99], [99, 99]])
     sdf_scalar = pd.DataFrame([[99, 99], [99, 99]])
@@ -172,7 +172,7 @@ def test_different_inputs(cond, x, y):
         assert_array_equal(sp_result, np_orig_result)
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=2)
 def test_broadcast_scalar_x_df():
     input_df = native_pd.DataFrame([[False, True], [False, True]])
     input_df2 = native_pd.DataFrame([[1, 0], [0, 1]])
@@ -183,7 +183,7 @@ def test_broadcast_scalar_x_df():
     assert_array_equal(snow_result, np_result)
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=2, join_count=2)
 def test_broadcast_scalar_x_ser():
     input_ser = native_pd.Series([False, True])
     input_ser2 = native_pd.Series([1, 0])
diff --git a/tests/integ/modin/types/test_timedelta_indexing.py b/tests/integ/modin/types/test_timedelta_indexing.py
index 62f98107b9..0d5cf838c6 100644
--- a/tests/integ/modin/types/test_timedelta_indexing.py
+++ b/tests/integ/modin/types/test_timedelta_indexing.py
@@ -264,7 +264,7 @@ def loc_set(key, item, df):
         df.loc[key] = item
         return df
 
-    with SqlCounter(query_count=1, join_count=2):
+    with SqlCounter(query_count=1, join_count=1):
         # single value
         key = (1, "a")
         run_test(key, item, api=loc_set)
@@ -304,9 +304,9 @@ def iloc_set(key, item, df):
         df.iloc[key] = item
         return df
 
-    def run_test(key, item, natvie_df=td, api=iloc_set):
+    def run_test(key, item, native_df=td, api=iloc_set):
         eval_snowpark_pandas_result(
-            snow_td.copy(), natvie_df.copy(), functools.partial(api, key, item)
+            snow_td.copy(), native_df.copy(), functools.partial(api, key, item)
         )
 
     item = "string"
@@ -325,7 +325,7 @@ def run_test(key, item, natvie_df=td, api=iloc_set):
         td_int = td.copy()
         td_int["b"] = td_int["b"].astype("int64")
         # timedelta type is not preserved in this case
-        run_test(key, item, natvie_df=td_int)
+        run_test(key, item, native_df=td_int)
 
     def df_set(key, item, df):
         df[key] = item
@@ -346,13 +346,13 @@ def loc_set(key, item, df):
             run_test(key, item, api=loc_set)
 
     item = 1000
-    with SqlCounter(query_count=1, join_count=2):
+    with SqlCounter(query_count=1, join_count=1):
         # single value
         key = (1, "b")
         td_int = td.copy()
         td_int["b"] = td_int["b"].astype("int64")
         # timedelta type is not preserved in this case
-        run_test(key, item, natvie_df=td_int, api=loc_set)
+        run_test(key, item, native_df=td_int, api=loc_set)
 
 
 @pytest.mark.parametrize("item", [None, pd.Timedelta("1 hour")])
@@ -383,7 +383,7 @@ def setitem_enlargement(key, item, df):
         )
 
     key = 10
-    with SqlCounter(query_count=1, join_count=2):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             snow_td["a"].copy(),
             td["a"].copy(),
@@ -402,7 +402,7 @@ def loc_enlargement(key, item, df):
         )
 
     key = 10
-    with SqlCounter(query_count=1, join_count=2):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             snow_td["a"].copy(),
             td["a"].copy(),
@@ -412,7 +412,7 @@ def loc_enlargement(key, item, df):
     # single row
     key = (10, slice(None, None, None))
 
-    with SqlCounter(query_count=1, join_count=2):
+    with SqlCounter(query_count=1, join_count=1):
         if pd.isna(item):
             eval_snowpark_pandas_result(
                 snow_td.copy(), td.copy(), functools.partial(loc_enlargement, key, item)
@@ -566,7 +566,7 @@ def setitem_enlargement(key, item, df):
         )
 
     key = native_pd.Timedelta("2 days 45 minutes")
-    with SqlCounter(query_count=1, join_count=3):
+    with SqlCounter(query_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_df["a"].copy(),
             native_df["a"].copy(),
@@ -587,7 +587,7 @@ def loc_enlargement(key, item, df):
         )
 
     key = native_pd.Timedelta("2 days 25 minutes")
-    with SqlCounter(query_count=1, join_count=3):
+    with SqlCounter(query_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_df["a"].copy(),
             native_df["a"].copy(),
@@ -597,7 +597,7 @@ def loc_enlargement(key, item, df):
     # single row
     key = (native_pd.Timedelta("2 days 45 minutes"), slice(None, None, None))
 
-    with SqlCounter(query_count=1, join_count=3):
+    with SqlCounter(query_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_df.copy(),
             native_df.copy(),

From 2274d1e25b9970e2d940120b3e8c767ee4fd50d1 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 11 Sep 2024 14:39:29 -0700
Subject: [PATCH 31/42] remove print statements and unnecessary comments

---
 src/snowflake/snowpark/modin/pandas/dataframe.py              | 2 ++
 .../snowpark/modin/plugin/extensions/series_overrides.py      | 2 ++
 tests/integ/modin/frame/test_cache_result.py                  | 2 --
 tests/integ/modin/frame/test_loc.py                           | 4 ++--
 tests/integ/modin/frame/test_setitem.py                       | 1 -
 tests/integ/modin/frame/test_where.py                         | 3 +--
 tests/integ/modin/groupby/test_groupby_basic_agg.py           | 2 +-
 tests/integ/modin/series/test_iloc.py                         | 1 +
 8 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index e4474a53b3..003f1d56f2 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -308,6 +308,8 @@ def __init__(
                 )
             )._query_compiler
 
+        # The index is already set if the data is a non-Snowpark pandas object. If either the data or the index is
+        # a Snowpark pandas object, set the index here.
         if index is not None and (
             isinstance(index, (Index, Series))
             or isinstance(data, (Index, Series, type(self)))
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index d9fbe613cf..c5435f139f 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -425,6 +425,8 @@ def __init__(
             )
         )._query_compiler
 
+    # The index is already set if the data is a non-Snowpark pandas object. If either the data or the index is
+    # a Snowpark pandas object, set the index here.
     if index is not None and (
         isinstance(index, (Index, type(self))) or isinstance(data, (Index, type(self)))
     ):
diff --git a/tests/integ/modin/frame/test_cache_result.py b/tests/integ/modin/frame/test_cache_result.py
index c26b28e4ab..c78cefaa3a 100644
--- a/tests/integ/modin/frame/test_cache_result.py
+++ b/tests/integ/modin/frame/test_cache_result.py
@@ -81,8 +81,6 @@ def perform_chained_operations(df, module):
 @pytest.mark.parametrize("inplace", [True, False])
 def test_cache_result_empty_dataframe(init_kwargs, inplace):
     snow_df, native_df = create_test_dfs(**init_kwargs)
-    print(snow_df)
-    print(native_df)
     snow_df_copy = snow_df.copy(deep=True)
     with SqlCounter(query_count=1):
         cached_snow_df = cache_and_return_df(snow_df, inplace)
diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py
index 33c1fb98e5..73f06df4b1 100644
--- a/tests/integ/modin/frame/test_loc.py
+++ b/tests/integ/modin/frame/test_loc.py
@@ -3936,7 +3936,7 @@ def test_raise_set_cell_with_list_like_value_error():
             marks=pytest.mark.xfail(
                 reason="SNOW-1652608 result series name incorrectly set"
             ),
-        ),
+        ),  # 1 join from df creation, 1 join from squeeze, 2 joins from to_pandas during eval
         pytest.param(
             native_pd.to_timedelta("1 day"),
             2,
@@ -3944,7 +3944,7 @@ def test_raise_set_cell_with_list_like_value_error():
             marks=pytest.mark.xfail(
                 reason="SNOW-1652608 result series name incorrectly set"
             ),
-        ),
+        ),  # 1 join fron df creation, 1 join from squeeze, 2 joins from to_pandas during eval
         (["1 day", "3 days"], 1, 2),
         ([True, False, False], 1, 2),
         (slice(None, "4 days"), 1, 1),
diff --git a/tests/integ/modin/frame/test_setitem.py b/tests/integ/modin/frame/test_setitem.py
index 6152089f39..3d51277b2c 100644
--- a/tests/integ/modin/frame/test_setitem.py
+++ b/tests/integ/modin/frame/test_setitem.py
@@ -444,7 +444,6 @@ def setitem_helper(df):
         [["a", "b", "b", "d", "e"], ["x", "y", "z", "u", "u"], True],
     ],
 )
-# 2 extra queries to convert to native pandas when creating the two snowpark pandas dataframes
 @sql_count_checker(query_count=1, join_count=3)
 def test_df_setitem_with_unique_and_duplicate_index_values(
     index_values, other_index_values, expect_mismatch
diff --git a/tests/integ/modin/frame/test_where.py b/tests/integ/modin/frame/test_where.py
index 006b7e76fb..bd7a5b5808 100644
--- a/tests/integ/modin/frame/test_where.py
+++ b/tests/integ/modin/frame/test_where.py
@@ -628,7 +628,6 @@ def test_dataframe_where_with_dataframe_cond_single_index_different_names():
     )
 
 
-# one extra query to convert index to native pandas when creating the snowpark pandas dataframe
 @sql_count_checker(query_count=1, join_count=3)
 def test_dataframe_where_with_dataframe_cond_single_index_different_names_2():
     data = [1, 2, 3]
@@ -702,7 +701,7 @@ def test_dataframe_where_with_duplicated_index_aligned(cond_frame, other):
         )
 
 
-# 3 extra join queries to create the 3 snowpark pandas dataframe with non-Snowpark pandas data
+# 3 extra joins to create the 3 snowpark pandas dataframe with non-Snowpark pandas data
 # and a Snowpark pandas Index.
 @sql_count_checker(query_count=1, join_count=5)
 def test_dataframe_where_with_duplicated_index_unaligned():
diff --git a/tests/integ/modin/groupby/test_groupby_basic_agg.py b/tests/integ/modin/groupby/test_groupby_basic_agg.py
index d4211f2a41..09acd49bb2 100644
--- a/tests/integ/modin/groupby/test_groupby_basic_agg.py
+++ b/tests/integ/modin/groupby/test_groupby_basic_agg.py
@@ -952,7 +952,7 @@ def test_groupby_with_level(df_multi, level):
 
 
 @sql_count_checker(query_count=1)
-def test_groupby_with_higher_columns():
+def test_groupby_with_hier_columns():
     tuples = list(
         zip(
             *[
diff --git a/tests/integ/modin/series/test_iloc.py b/tests/integ/modin/series/test_iloc.py
index 7b6369934d..b5a0df3164 100644
--- a/tests/integ/modin/series/test_iloc.py
+++ b/tests/integ/modin/series/test_iloc.py
@@ -823,6 +823,7 @@ def test_df_iloc_set_with_multiindex(
         native_items.index = pd.MultiIndex.from_tuples(item_index)
 
     if row_key_index:
+        # Using native pandas index since row_key[2] is a MultiIndex object.
         snow_row_key = pd.Series(row_key, index=native_pd.Index(row_key_index))
         native_row_key = native_pd.Series(row_key, index=native_pd.Index(row_key_index))
     else:

From 9eef8d77449cff6a98a1d539740f38d4ce3ea7e4 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 11 Sep 2024 15:36:49 -0700
Subject: [PATCH 32/42] fix tests

---
 tests/integ/modin/groupby/test_groupby_series.py | 2 +-
 tests/integ/modin/test_from_pandas_to_pandas.py  | 4 ++--
 tests/integ/modin/test_internal_frame.py         | 2 +-
 tests/integ/modin/test_telemetry.py              | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/integ/modin/groupby/test_groupby_series.py b/tests/integ/modin/groupby/test_groupby_series.py
index 7756f8b620..ae8ae0926d 100644
--- a/tests/integ/modin/groupby/test_groupby_series.py
+++ b/tests/integ/modin/groupby/test_groupby_series.py
@@ -153,7 +153,7 @@ def test_groupby_agg_series_named_agg(aggs, sort):
 
 
 @pytest.mark.parametrize("numeric_only", [False, None])
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_groupby_series_numeric_only(series_str, numeric_only):
     native_series = series_str.to_pandas()
     eval_snowpark_pandas_result(
diff --git a/tests/integ/modin/test_from_pandas_to_pandas.py b/tests/integ/modin/test_from_pandas_to_pandas.py
index 28a6c54950..ceef588410 100644
--- a/tests/integ/modin/test_from_pandas_to_pandas.py
+++ b/tests/integ/modin/test_from_pandas_to_pandas.py
@@ -525,7 +525,7 @@ def test_from_pandas_series_with_tuple_name():
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_series_to_pandas():
     array = ["a", "b", "c"]
     pandas_series = native_pd.Series(data=array, index=array)
@@ -585,7 +585,7 @@ def test_snowpark_pandas_statement_params():
         assert "efg" == mock_to_pandas.call_args.kwargs["statement_params"]["abc"]
 
 
-@sql_count_checker(query_count=1, join_count=5)
+@sql_count_checker(query_count=1, join_count=2)
 def test_create_df_from_series():
     native_data = {
         "one": native_pd.Series([1, 2, 3], index=["a", "b", "c"]),
diff --git a/tests/integ/modin/test_internal_frame.py b/tests/integ/modin/test_internal_frame.py
index c7a95fa601..da38322b9a 100644
--- a/tests/integ/modin/test_internal_frame.py
+++ b/tests/integ/modin/test_internal_frame.py
@@ -38,7 +38,7 @@ def test_strip_duplicates(input, expected):
     assert_frame_equal(result, pd.DataFrame(expected))
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2, join_count=1)
 def test_strip_duplicates_after_sort():
     df = pd.DataFrame({"A": [0, 1, 0, 1, 2], "B": [1, 2, 3, 4, 5]})
     df = df.sort_values(by="B", ascending=False)
diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py
index faa012e144..ce9e1caf32 100644
--- a/tests/integ/modin/test_telemetry.py
+++ b/tests/integ/modin/test_telemetry.py
@@ -342,7 +342,7 @@ def test_telemetry_with_update_inplace():
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_telemetry_with_resample():
     # verify api_calls have been collected correctly for Resample APIs
     index = pandas.date_range("1/1/2000", periods=9, freq="min")

From cc09403f06ecb20c88611fc9d377922b8155829a Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 11 Sep 2024 17:01:44 -0700
Subject: [PATCH 33/42] increase coverage

---
 .../snowpark/modin/pandas/dataframe.py        |  6 +++--
 .../plugin/extensions/series_overrides.py     |  8 +++---
 .../test_df_series_creation_with_index.py     | 26 +++++++++++++++++++
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 003f1d56f2..f619732f16 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -257,7 +257,7 @@ def __init__(
                     if index is not None:
                         if isinstance(index, Index):  # pragma: no cover
                             index = index.to_series()._query_compiler
-                        elif isinstance(index, Series):
+                        elif isinstance(index, Series):  # pragma: no cover
                             index = index._query_compiler
                         new_qc = new_qc.reindex(axis=0, labels=index)
                     if columns is not None:
@@ -354,7 +354,9 @@ def __init__(
                             ]
                         ]
                     else:
-                        index_qc_list = [Series(index)._query_compiler]
+                        index_qc_list = [
+                            Series(index)._query_compiler
+                        ]  # pragma: no cover
                 query_compiler = query_compiler.set_index(index_qc_list)
 
         if isinstance(data, DataFrame):
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index c5435f139f..724b58838c 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -438,9 +438,9 @@ def __init__(
             labels = index
             if isinstance(labels, Index):
                 labels = labels.to_series()._query_compiler
-            elif isinstance(labels, Series):
-                labels = labels._query_compiler  # pragma: no cover
-            else:
+            elif isinstance(labels, Series):  # pragma: no cover
+                labels = labels._query_compiler
+            else:  # pragma: no cover
                 labels = Index(labels).to_series()._query_compiler
             query_compiler = query_compiler.reindex(axis=0, labels=labels)
 
@@ -470,7 +470,7 @@ def __init__(
                         ]
                     ]
                 else:
-                    index_qc_list = [Series(index)._query_compiler]
+                    index_qc_list = [Series(index)._query_compiler]  # pragma: no cover
             query_compiler = query_compiler.set_index(index_qc_list)
 
     # Set the query compiler and name fields.
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 1fd5701fda..920523ff3d 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -507,6 +507,32 @@ def test_create_series_with_list_of_lists_index():
     assert_series_equal(snow_series, native_series)
 
 
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_series_with_index_data_and_list_of_lists_index():
+    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
+    arrays = [
+        ["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"],
+        ["two", "one", "two", "one", "two", "one", "two", "one"],
+    ]
+    data = native_pd.Index([1, 2, 3, 4, 5, 6, 7, 8])
+    native_series = native_pd.Series(data, index=arrays)
+    snow_series = pd.Series(pd.Index(data), index=arrays)
+    assert_series_equal(snow_series, native_series)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_df_with_index_data_and_list_of_lists_index():
+    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
+    arrays = [
+        ["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"],
+        ["two", "one", "two", "one", "two", "one", "two", "one"],
+    ]
+    data = native_pd.Index([1, 2, 3, 4, 5, 6, 7, 8])
+    native_df = native_pd.DataFrame(data, index=arrays)
+    snow_df = pd.DataFrame(pd.Index(data), index=arrays)
+    assert_frame_equal(snow_df, native_df)
+
+
 @sql_count_checker(query_count=1)
 def test_create_series_with_none_data_and_non_empty_index():
     # When creating an empty Series with a non-empty index, the index should be used as the index of the Series.

From 10c395445575116af8a4de0c70430360c87b9c34 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 13 Sep 2024 17:45:01 -0700
Subject: [PATCH 34/42] try to move out common logic, add more tests

---
 .../snowpark/modin/pandas/dataframe.py        | 284 ++++++-------
 .../snowpark/modin/plugin/_internal/utils.py  |  66 +++
 .../plugin/extensions/series_overrides.py     | 106 ++---
 .../test_df_series_creation_with_index.py     | 381 +++++++++++++++++-
 4 files changed, 637 insertions(+), 200 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index f619732f16..28122944e1 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -89,8 +89,13 @@
     raise_if_native_pandas_objects,
     replace_external_data_keys_with_empty_pandas_series,
     replace_external_data_keys_with_query_compiler,
+    try_convert_index_to_native,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    convert_index_to_list_of_qcs,
+    convert_index_to_qc,
+    is_repr_truncated,
 )
-from snowflake.snowpark.modin.plugin._internal.utils import is_repr_truncated
 from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
 from snowflake.snowpark.modin.plugin.utils.error_message import (
     ErrorMessage,
@@ -151,28 +156,40 @@ def __init__(
         # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
         # Siblings are other dataframes that share the same query compiler. We
         # use this list to update inplace when there is a shallow copy.
-        from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
         from snowflake.snowpark.modin.plugin.extensions.index import Index
 
         self._siblings = []
 
-        if isinstance(index, DataFrame):  # pandas raises the same error
-            raise ValueError("Index data must be 1-dimensional")
-
+        # 0. Setting the query compiler
+        # -----------------------------
         if query_compiler is not None:
-            # CASE 1: query_compiler
-            # If a query_compiler is passed in, only use the query_compiler and name fields to create a new Series.
+            # CASE I: query_compiler
+            # If a query_compiler is passed in only use the query_compiler field to create a new DataFrame.
+            assert (
+                data is None
+            ), "Invalid DataFrame construction! Cannot pass both data and query_compiler."
+            assert (
+                index is None
+            ), "Invalid DataFrame construction! Cannot pass both index and query_compiler."
+            assert (
+                columns is None
+            ), "Invalid DataFrame construction! Cannot pass both columns and query_compiler."
             self._query_compiler = query_compiler
             return
 
+        if isinstance(index, DataFrame):  # pandas raises the same error
+            raise ValueError("Index data must be 1-dimensional")
+
         # The logic followed here is:
         # 1. Create a query_compiler from the provided data. If columns are provided, add/select the columns.
         # 2. If an index is provided, set the index through set_index or reindex.
         # 3. If the data is a DataFrame, perform loc to select the required index and columns from the DataFrame.
         # 4. The resultant query_compiler is then set as the query_compiler for the DataFrame.
 
+        # 1. Setting the data (and columns)
+        # ---------------------------------
         if isinstance(data, Index):
-            # CASE 2: data is a Snowpark pandas Index
+            # CASE II: data is a Snowpark pandas Index
             # If the data is an Index object, convert it to a DataFrame to make sure that the values are in the
             # correct format: the values are a data column, not an index column.
             if data.name is None:
@@ -182,26 +199,23 @@ def __init__(
             query_compiler = data.to_frame(index=False, name=new_name)._query_compiler
 
         elif isinstance(data, Series):
-            # CASE 3: data is a Snowpark pandas Series
+            # CASE III: data is a Snowpark pandas Series
             query_compiler = data._query_compiler.copy()
             # We set the column name if it is not in the provided Series `data`.
             if data.name is None:
-                query_compiler = query_compiler.set_columns(
-                    [0] if columns is None else columns
-                )
+                query_compiler = query_compiler.set_columns(columns or [0])
             if columns is not None and data.name not in columns:
                 # If the columns provided are not in the named Series, pandas clears
                 # the DataFrame and sets columns to the columns provided.
                 query_compiler = from_pandas(
-                    self.__constructor__(columns=columns)
-                )._query_compiler  # pragma: no cover
+                    pandas.DataFrame(columns=columns)
+                )._query_compiler
 
         elif isinstance(data, DataFrame):
-            # CASE 5: data is a Snowpark pandas DataFrame
+            # CASE IV: data is a Snowpark pandas DataFrame
             query_compiler = data._query_compiler.copy()
-
             if columns is None and index is None:
-                # If the new DataFrame has the same columns and index as the original DataFrame,
+                # Special case IV.a: if the new DataFrame has the same columns and index as the original DataFrame,
                 # the query compiler is shared and kept track of as a sibling.
                 self._query_compiler = query_compiler
                 data._add_sibling(self)
@@ -218,147 +232,139 @@ def __init__(
             )
 
         else:
-            # CASE 5: Non-Snowpark pandas data
-            dummy_index = None  # used in a special dict case
-            if isinstance(data, pandas.Index):
-                # CASE 5.B: data is a pandas Index
-                pass
-
-            elif is_list_like(data) and not is_dict_like(data):
-                # CASE 5.C: data is list-like
-                old_dtype = getattr(data, "dtype", None)
-                values = [
-                    obj._to_pandas() if isinstance(obj, Series) else obj for obj in data
-                ]
-                if isinstance(data, np.ndarray):
-                    data = np.array(values, dtype=old_dtype)
-                else:
-                    try:
-                        data = type(data)(values, dtype=old_dtype)
-                    except TypeError:
-                        data = values
-
-            elif is_dict_like(data) and not isinstance(
-                data, (pandas.Series, pandas.DataFrame)
-            ):
-                # CASE 5.D: data is dict-like
-                if columns is not None:
-                    data = {key: value for key, value in data.items() if key in columns}
-
-                if len(data) and all(isinstance(v, Series) for v in data.values()):
-                    # Special case: data is a dictionary where all the values are Snowpark pandas Series
-                    from .general import concat
-
-                    new_qc = concat(
-                        data.values(), axis=1, keys=data.keys()
-                    )._query_compiler
-                    if dtype is not None:
-                        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
-                    if index is not None:
-                        if isinstance(index, Index):  # pragma: no cover
-                            index = index.to_series()._query_compiler
-                        elif isinstance(index, Series):  # pragma: no cover
-                            index = index._query_compiler
-                        new_qc = new_qc.reindex(axis=0, labels=index)
+            # CASE V: Non-Snowpark pandas data
+            if not isinstance(
+                data, (pandas.Series, pandas.DataFrame, pandas.Index)
+            ) and is_list_like(data):
+                from .general import concat
+
+                if is_dict_like(data):
+                    # Setting up keys and values for processing if all the values are Snowpark pandas objects.
                     if columns is not None:
-                        new_qc = new_qc.reindex(
-                            axis=1, labels=try_convert_index_to_native(columns)
-                        )
-                    self._query_compiler = new_qc
-                    return
-
-                data = {
-                    k: v._to_pandas() if isinstance(v, Series) else v
-                    for k, v in data.items()
-                }
-
-                if (
-                    all(not is_scalar(v) and len(v) == 1 for v in data.values())
-                    and index is not None
-                ):
-                    # Special case: the values in the dictionary are all non-scalar objects of length 1
-                    # >>> DataFrame({"A": [1], "V": [2]}, native_pd.Index(["A", "B", "C"]), name="cake")
-                    #       A  V
-                    # cake
-                    # A     1  2
-                    # B     1  2  <--- the first row is copied into the rest of the rows.
-                    # C     1  2
-                    # Recreate a 2-d array with the first row copied into the rest of the rows.
-                    self._query_compiler = DataFrame(
-                        data=[[v[0] for v in data.values()]] * len(index),
-                        index=index,
-                        columns=list(data.keys()),
-                    )._query_compiler
-                    return
-
-                if all(is_scalar(k) and is_scalar(v) for k, v in data.items()):
-                    # Special case: All keys and values in the dict are all scalars, an index needs to be provided.
-                    # pd.DataFrame({'a': 1, 'b': 2}, index=[0])
-                    dummy_index = index
-
-            if not isinstance(index, (Index, type(self))):
-                dummy_index = index
+                        # Reduce the dictionary to only the relevant columns as the keys.
+                        data = {
+                            key: value for key, value in data.items() if key in columns
+                        }
+
+                    if len(data) and all(
+                        isinstance(v, (Index, BasePandasDataset)) for v in data.values()
+                    ):
+                        # Special case V.a: data is a list/dict where all the values are Snowpark pandas objects.
+                        # Concat can only be performed with BasePandasDataset objects.
+                        # If a value is an Index, convert it to a Series where the index is the index to be set
+                        # since these values are always present in the final DataFrame.
+                        values = [
+                            Series(v, index=index) if isinstance(v, Index) else v
+                            for v in data.values()
+                        ]
+                        new_qc = concat(
+                            values, axis=1, keys=data.keys()
+                        )._query_compiler
+                        if dtype is not None:
+                            new_qc = new_qc.astype(
+                                {col: dtype for col in new_qc.columns}
+                            )
+                        if index is not None:
+                            new_qc = new_qc.reindex(
+                                axis=0, labels=convert_index_to_qc(index)
+                            )
+                        if columns is not None:
+                            new_qc = new_qc.reindex(
+                                axis=1, labels=try_convert_index_to_native(columns)
+                            )
+                        self._query_compiler = new_qc
+                        return
+
+                    # If only some data is a Snowpark pandas object, convert it to pandas objects.
+                    res = {}
+                    index = try_convert_index_to_native(index)
+                    for k, v in data.items():
+                        if isinstance(v, (Index)):
+                            res[k] = v.to_pandas()
+                        elif isinstance(v, BasePandasDataset):
+                            # Need to perform reindex on the Series or DataFrame objects since only the data
+                            # whose index matches the given index is kept.
+                            res[k] = v.to_pandas().reindex(index=index)
+                        else:
+                            res[k] = v
+                    data = res
+
+                else:  # list-like but not dict-like data.
+                    if len(data) and all(
+                        isinstance(v, (Index, BasePandasDataset)) for v in data
+                    ):
+                        # Special case V.c: data is a list/dict where all the values are Snowpark pandas objects.
+                        # Concat can only be performed with BasePandasDataset objects.
+                        # If a value is an Index, convert it to a Series.
+                        values = [
+                            Series(v) if isinstance(v, Index) else v for v in data
+                        ]
+                        new_qc = concat(values, axis=1).T._query_compiler
+                        if dtype is not None:
+                            new_qc = new_qc.astype(
+                                {col: dtype for col in new_qc.columns}
+                            )
+                        if index is not None:
+                            new_qc = new_qc.set_index([convert_index_to_qc(index)])
+                        if columns is not None:
+                            if all(isinstance(v, Index) for v in data):
+                                # Special case: if all the values are Index objects, they are always present in the
+                                # final result with the provided column names. Therefore, rename the columns.
+                                new_qc = new_qc.set_columns(
+                                    try_convert_index_to_native(columns)
+                                )
+                            else:
+                                new_qc = new_qc.reindex(axis=1, labels=columns)
+                        self._query_compiler = new_qc
+                        return
+
+                    # If only some data is a Snowpark pandas object, convert it to pandas objects.
+                    res = []
+                    for v in data:
+                        if isinstance(v, (Index)):
+                            res.append(v.to_pandas())
+                        elif isinstance(v, BasePandasDataset):
+                            res.append(v.to_pandas())
+                        else:
+                            # Need to convert this is a native pandas object since native pandas incorrectly
+                            # tries to perform `get_indexer` on it.
+                            res.append(pandas.Index(v if is_list_like(v) else [v]))
+                    data = res
+
             query_compiler = from_pandas(
                 pandas.DataFrame(
                     data=data,
-                    index=dummy_index,
+                    # Handle setting the index, if it is a lazy index, outside this block.
+                    index=None if isinstance(index, (Index, Series)) else index,
                     columns=try_convert_index_to_native(columns),
                     dtype=dtype,
                     copy=copy,
                 )
             )._query_compiler
 
-        # The index is already set if the data is a non-Snowpark pandas object. If either the data or the index is
-        # a Snowpark pandas object, set the index here.
+        # 2. Setting the index
+        # --------------------
+        # The index is already set if the data is a non-Snowpark pandas object.
+        # If either the data or the index is a Snowpark pandas object, set the index here.
         if index is not None and (
             isinstance(index, (Index, Series))
-            or isinstance(data, (Index, Series, type(self)))
+            or isinstance(data, (Index, BasePandasDataset))
         ):
             if isinstance(data, (type(self), Series, type(None))):
                 # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
                 # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
                 # If data is None and an index is provided, set the index.
-                labels = index
-                if isinstance(labels, Index):
-                    labels = labels.to_series()._query_compiler
-                elif isinstance(labels, Series):
-                    labels = labels._query_compiler  # pragma: no cover
-                else:
-                    labels = Index(labels).to_series()._query_compiler
-                query_compiler = query_compiler.reindex(axis=0, labels=labels)
-
+                query_compiler = query_compiler.reindex(
+                    axis=0, labels=convert_index_to_qc(index)
+                )
             else:
                 # Performing set index to directly set the index column (joining on row-position instead of index).
-                if isinstance(index, Series):
-                    index_qc_list = [index._query_compiler]
-                elif isinstance(index, Index):
-                    index_qc_list = [index.to_series()._query_compiler]
-                else:
-                    if (
-                        not isinstance(index, pandas.MultiIndex)
-                        and is_list_like(index)
-                        and len(index) > 0
-                        and all(
-                            (not isinstance(i, tuple) and is_list_like(i))
-                            for i in index
-                        )
-                    ):
-                        # If given a list of lists, convert it to a MultiIndex.
-                        index = pandas.MultiIndex.from_arrays(index)
-                    if isinstance(index, pandas.MultiIndex):
-                        index_qc_list = [
-                            s._query_compiler
-                            for s in [
-                                pd.Series(index.get_level_values(level))
-                                for level in range(index.nlevels)
-                            ]
-                        ]
-                    else:
-                        index_qc_list = [
-                            Series(index)._query_compiler
-                        ]  # pragma: no cover
-                query_compiler = query_compiler.set_index(index_qc_list)
+                query_compiler = query_compiler.set_index(
+                    convert_index_to_list_of_qcs(index)
+                )
 
+        # 3. If data is a DataFrame, filter result
+        # ----------------------------------------
         if isinstance(data, DataFrame):
             # To select the required index and columns for the resultant DataFrame,
             # perform .loc[] on the created query compiler.
@@ -370,6 +376,8 @@ def __init__(
                 ._query_compiler
             )
 
+        # 4. Setting the query compiler
+        # -----------------------------
         self._query_compiler = query_compiler
 
     def __repr__(self):
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/utils.py b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
index 9f01954ab2..fdfc9b8da3 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/utils.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
@@ -13,6 +13,7 @@
 import pandas as native_pd
 from pandas._typing import Scalar
 from pandas.core.dtypes.common import is_integer_dtype, is_object_dtype, is_scalar
+from pandas.core.dtypes.inference import is_list_like
 
 import snowflake.snowpark.modin.pandas as pd
 import snowflake.snowpark.modin.plugin._internal.statement_params_constants as STATEMENT_PARAMS
@@ -1995,3 +1996,68 @@ def create_frame_with_data_columns(
 def rindex(lst: list, value: int) -> int:
     """Find the last index in the list of item value."""
     return len(lst) - lst[::-1].index(value) - 1
+
+
+def convert_index_to_qc(index: Any) -> Any:
+    """
+    Method to convert an object representing an index into a query compiler for set_index or reindex.
+
+    Parameters
+    ----------
+    index: Any
+        The object to convert to a query compiler.
+
+    Returns
+    -------
+    SnowflakeQueryCompiler
+        The converted query compiler.
+    """
+    from modin.pandas import Series
+
+    from snowflake.snowpark.modin.plugin.extensions.index import Index
+
+    if isinstance(index, Index):
+        idx_qc = index.to_series()._query_compiler
+    elif isinstance(index, Series):
+        idx_qc = index._query_compiler
+    else:
+        idx_qc = Series(index)._query_compiler
+    return idx_qc
+
+
+def convert_index_to_list_of_qcs(index: Any) -> list:
+    """
+    Method to convert an object representing an index into a list of query compilers for set_index.
+
+    Parameters
+    ----------
+    index: Any
+        The object to convert to a list of query compilers.
+
+    Returns
+    -------
+    list
+        The list of query compilers.
+    """
+    from modin.pandas import Series
+
+    from snowflake.snowpark.modin.plugin.extensions.index import Index
+
+    if (
+        not isinstance(index, (native_pd.MultiIndex, Series, Index))
+        and is_list_like(index)
+        and len(index) > 0
+        and all((is_list_like(i) and not isinstance(i, tuple)) for i in index)
+    ):
+        # If given a list of lists, convert it to a MultiIndex.
+        index = native_pd.MultiIndex.from_arrays(index)
+    if isinstance(index, native_pd.MultiIndex):
+        index_qc_list = [
+            s._query_compiler
+            for s in [
+                Series(index.get_level_values(level)) for level in range(index.nlevels)
+            ]
+        ]
+    else:
+        index_qc_list = [convert_index_to_qc(index)]
+    return index_qc_list
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index 724b58838c..7707ed82f9 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -51,6 +51,10 @@
 from snowflake.snowpark.modin import pandas as spd  # noqa: F401
 from snowflake.snowpark.modin.pandas.api.extensions import register_series_accessor
 from snowflake.snowpark.modin.pandas.utils import from_pandas, is_scalar
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    convert_index_to_list_of_qcs,
+    convert_index_to_qc,
+)
 from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
 from snowflake.snowpark.modin.plugin.utils.error_message import (
     ErrorMessage,
@@ -374,50 +378,80 @@ def __init__(
 
     from snowflake.snowpark.modin.plugin.extensions.index import Index
 
-    if query_compiler:
-        # CASE 1: query_compiler
+    # 0. Setting the query compiler
+    # -----------------------------
+    if query_compiler is not None:
+        # CASE I: query_compiler
         # If a query_compiler is passed in, only use the query_compiler and name fields to create a new Series.
+        assert (
+            data is None
+        ), "Invalid Series construction! Cannot pass both data and query_compiler."
+        assert (
+            index is None
+        ), "Invalid Series construction! Cannot pass both index and query_compiler."
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
             self.name = name
         return
 
+    if isinstance(index, spd.DataFrame):  # pandas raises the same error
+        raise ValueError("Index data must be 1-dimensional")
+
+    if isinstance(data, spd.DataFrame):
+        # pandas raises an ambiguous error:
+        # ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
+        raise ValueError("Data cannot be a DataFrame")
+
     # The logic followed here is:
     # 1. Create a query_compiler from the provided data.
     # 2. If an index is provided, set the index. This is either through set_index or reindex.
     # 3. The resultant query_compiler is columnarized and set as the query_compiler for the Series.
     # 4. If a name is provided, set the name.
 
+    # 1. Setting the data
+    # -------------------
     if isinstance(data, Index):
-        # CASE 2: Index
+        # CASE II: Index
         # If the data is an Index object, convert it to a Series, and get the query_compiler.
         query_compiler = (
             data.to_series(index=None, name=name).reset_index(drop=True)._query_compiler
         )
 
     elif isinstance(data, type(self)):
-        # CASE 3: Series
+        # CASE III: Series
         # If the data is a Series object, copy the query_compiler.
         query_compiler = data._query_compiler.copy()
 
     else:
-        # CASE 4: Non-Snowpark pandas data
+        # CASE IV: Non-Snowpark pandas data
         # If the data is not a Snowpark pandas object, convert it to a query compiler.
-        name = MODIN_UNNAMED_SERIES_LABEL if name is None else name
-        dummy_index = None
-        if not isinstance(index, (Index, type(self))):
-            dummy_index = index
+        name = name or MODIN_UNNAMED_SERIES_LABEL
         if (
             isinstance(data, (native_pd.Series, native_pd.Index))
             and data.name is not None
         ):
             name = data.name
+        # If any of the values are Snowpark pandas objects, convert them to native pandas objects.
+        if not isinstance(
+            data, (native_pd.DataFrame, native_pd.Series, native_pd.Index)
+        ) and is_list_like(data):
+            if is_dict_like(data):
+                data = {
+                    k: v.to_list() if isinstance(v, (Index, BasePandasDataset)) else v
+                    for k, v in data.items()
+                }
+            else:
+                data = [
+                    v.to_list() if isinstance(v, (Index, BasePandasDataset)) else v
+                    for v in data
+                ]
         query_compiler = from_pandas(
             native_pd.DataFrame(
                 native_pd.Series(
                     data=data,
                     dtype=dtype,
-                    index=dummy_index,
+                    # Handle setting the index, if it is a lazy index, outside this block.
+                    index=None if isinstance(index, (Index, Series)) else index,
                     name=name,
                     copy=copy,
                     fastpath=fastpath,
@@ -425,8 +459,10 @@ def __init__(
             )
         )._query_compiler
 
-    # The index is already set if the data is a non-Snowpark pandas object. If either the data or the index is
-    # a Snowpark pandas object, set the index here.
+    # 2. Setting the index
+    # --------------------
+    # The index is already set if the data is a non-Snowpark pandas object.
+    # If either the data or the index is a Snowpark pandas object, set the index here.
     if index is not None and (
         isinstance(index, (Index, type(self))) or isinstance(data, (Index, type(self)))
     ):
@@ -435,45 +471,17 @@ def __init__(
             # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
             # If data is None and an index is provided, all the values in the Series will be NaN and the index
             # will be the provided index.
-            labels = index
-            if isinstance(labels, Index):
-                labels = labels.to_series()._query_compiler
-            elif isinstance(labels, Series):  # pragma: no cover
-                labels = labels._query_compiler
-            else:  # pragma: no cover
-                labels = Index(labels).to_series()._query_compiler
-            query_compiler = query_compiler.reindex(axis=0, labels=labels)
-
+            query_compiler = query_compiler.reindex(
+                axis=0, labels=convert_index_to_qc(index)
+            )
         else:
             # Performing set index to directly set the index column (joining on row-position instead of index).
-            if isinstance(index, Series):
-                index_qc_list = [index._query_compiler]
-            elif isinstance(index, Index):
-                index_qc_list = [index.to_series()._query_compiler]
-            else:
-                if (
-                    not isinstance(index, native_pd.MultiIndex)
-                    and is_list_like(index)
-                    and len(index) > 0
-                    and all(
-                        (not isinstance(i, tuple) and is_list_like(i)) for i in index
-                    )
-                ):
-                    # If given a list of lists, convert it to a MultiIndex.
-                    index = native_pd.MultiIndex.from_arrays(index)
-                if isinstance(index, native_pd.MultiIndex):
-                    index_qc_list = [
-                        s._query_compiler
-                        for s in [
-                            pd.Series(index.get_level_values(level))
-                            for level in range(index.nlevels)
-                        ]
-                    ]
-                else:
-                    index_qc_list = [Series(index)._query_compiler]  # pragma: no cover
-            query_compiler = query_compiler.set_index(index_qc_list)
-
-    # Set the query compiler and name fields.
+            query_compiler = query_compiler.set_index(
+                convert_index_to_list_of_qcs(index)
+            )
+
+    # 3 and 4. Setting the query compiler and name
+    # --------------------------------------------
     self._query_compiler = query_compiler.columnarize()
     if name is not None:
         self.name = name
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 920523ff3d..e337e53d4b 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -220,6 +220,7 @@ def test_create_with_series_as_data_and_index_as_index(
     assert_equal_func(
         snow_obj(data=snow_series, index=snow_index),
         native_obj(data=native_series, index=native_index),
+        check_dtype=False,
     )
 
 
@@ -440,8 +441,9 @@ def test_create_df_with_df_as_data_and_index_as_index_and_different_columns(
     )
     snow_index = pd.Index(native_index)
     qc = 1 if column_type == "list" else 2
+    qc += 1 if (isinstance(native_df, dict)) else 0
     qc += 1 if (isinstance(native_df, dict) and column_type == "index") else 0
-    jc = 2 if isinstance(native_df, native_pd.DataFrame) else 1
+    jc = 2 if isinstance(native_df, native_pd.DataFrame) else 0
     with SqlCounter(query_count=qc, join_count=jc):
         assert_frame_equal(
             pd.DataFrame(snow_df, index=snow_index, columns=native_columns),
@@ -464,18 +466,7 @@ def test_create_df_with_new_columns():
     )
 
 
-@sql_count_checker(query_count=0)
-def test_create_df_with_df_index_negative():
-    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
-        pd.DataFrame([1, 2, 3], index=pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
-    with pytest.raises(
-        ValueError,
-        match=re.escape("Shape of passed values is (3, 1), indices imply (2, 1)"),
-    ):
-        native_pd.DataFrame([1, 2, 3], index=[[1, 2], [3, 4], [5, 6]])
-
-
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2)
 def test_create_df_with_dict_as_data_and_index_as_index():
     """
     Special case when creating:
@@ -540,3 +531,367 @@ def test_create_series_with_none_data_and_non_empty_index():
     native_series = native_pd.Series(None, index=index, dtype=object)
     snow_series = pd.Series(None, index=index, dtype=object)
     assert_series_equal(snow_series, native_series)
+
+
+@pytest.mark.parametrize(
+    "data1, data2", [("series", "series"), ("series", "index"), ("index", "index")]
+)
+def test_create_df_with_series_index_dict_data(data1, data2):
+    # Create the dict data.
+    native_data1 = (
+        native_pd.Series([1, 2, 3]) if data1 == "series" else native_pd.Index([1, 2, 3])
+    )
+    native_data2 = (
+        native_pd.Series([4, 5, 6]) if data2 == "series" else native_pd.Index([4, 5, 6])
+    )
+    snow_data1 = pd.Series([1, 2, 3]) if data1 == "series" else pd.Index([1, 2, 3])
+    snow_data2 = pd.Series([4, 5, 6]) if data2 == "series" else pd.Index([4, 5, 6])
+    native_data = {"A": native_data1, "B": native_data2}
+    snow_data = {"A": snow_data1, "B": snow_data2}
+
+    # Create DataFrame only with dict data.
+    native_df = native_pd.DataFrame(native_data)
+    snow_df = pd.DataFrame(snow_data)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data and Series index.
+    native_ser_index = native_pd.Series([9, 2, 999])
+    snow_ser_index = pd.Series([9, 2, 999])
+    native_df = native_pd.DataFrame(native_data, index=native_ser_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data and Index index.
+    native_index = native_pd.Index([9, 2, 999])
+    snow_index = pd.Index([9, 2, 999])
+    native_df = native_pd.DataFrame(native_data, index=native_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data, Series index, and columns.
+    columns = ["A", "B", "C"]
+    native_df = native_pd.DataFrame(
+        native_data, index=native_ser_index, columns=columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index, columns=columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data, Index index, and Index columns.
+    native_columns = native_pd.Index(columns)
+    snow_columns = pd.Index(columns)
+    native_df = native_pd.DataFrame(
+        native_data, index=native_index, columns=native_columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_index, columns=snow_columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+
+@pytest.mark.parametrize(
+    "data1, data2", [("series", "series"), ("series", "index"), ("index", "index")]
+)
+def test_create_df_with_series_index_list_data(data1, data2):
+    # Create the list data.
+    native_data1 = (
+        native_pd.Series([11, 22, 33])
+        if data1 == "series"
+        else native_pd.Index([11, 22, 33])
+    )
+    native_data2 = (
+        native_pd.Series([44, 55, 66])
+        if data2 == "series"
+        else native_pd.Index([44, 55, 66])
+    )
+    snow_data1 = (
+        pd.Series([11, 22, 33]) if data1 == "series" else pd.Index([11, 22, 33])
+    )
+    snow_data2 = (
+        pd.Series([44, 55, 66]) if data2 == "series" else pd.Index([44, 55, 66])
+    )
+    native_data = [native_data1, native_data2]
+    snow_data = [snow_data1, snow_data2]
+
+    # Create DataFrame only with list data.
+    native_df = native_pd.DataFrame(native_data)
+    snow_df = pd.DataFrame(snow_data)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with list data and Series index.
+    native_ser_index = native_pd.Series([2, 11])
+    snow_ser_index = pd.Series([2, 11])
+    native_df = native_pd.DataFrame(native_data, index=native_ser_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df, check_dtype=False)
+
+    # Create DataFrame with list data and Index index.
+    native_index = native_pd.Index([22, 11])
+    snow_index = pd.Index([22, 11])
+    native_df = native_pd.DataFrame(native_data, index=native_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df, check_dtype=False)
+
+    # Create DataFrame with list data, Series index, and columns.
+    columns = ["A", "B", "C"]
+    native_df = native_pd.DataFrame(
+        native_data, index=native_ser_index, columns=columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index, columns=columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df, check_dtype=False)
+
+    # Create DataFrame with list data, Index index, and Index columns.
+    native_columns = native_pd.Index(columns)
+    snow_columns = pd.Index(columns)
+    native_df = native_pd.DataFrame(
+        native_data, index=native_index, columns=native_columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_index, columns=snow_columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data1, data2", [("series", "series"), ("series", "index"), ("index", "index")]
+)
+def test_create_series_with_series_index_list_data(data1, data2):
+    # Create the list data.
+    native_data1 = (
+        native_pd.Series([11, 22, 33])
+        if data1 == "series"
+        else native_pd.Index([11, 22, 33])
+    )
+    native_data2 = (
+        native_pd.Series([44, 55, 66])
+        if data2 == "series"
+        else native_pd.Index([44, 55, 66])
+    )
+    snow_data1 = (
+        pd.Series([11, 22, 33]) if data1 == "series" else pd.Index([11, 22, 33])
+    )
+    snow_data2 = (
+        pd.Series([44, 55, 66]) if data2 == "series" else pd.Index([44, 55, 66])
+    )
+    native_data = [native_data1, native_data2]
+    snow_data = [snow_data1, snow_data2]
+
+    # Create Series only with list data.
+    native_df = native_pd.Series(native_data)
+    snow_df = pd.Series(snow_data)
+    with SqlCounter(query_count=1):
+        assert_series_equal(snow_df, native_df)
+
+    # Create Series with list data and Series index.
+    native_ser_index = native_pd.Series([2, 11])
+    snow_ser_index = pd.Series([2, 11])
+    native_df = native_pd.Series(native_data, index=native_ser_index)
+    snow_df = pd.Series(snow_data, index=snow_ser_index)
+    with SqlCounter(query_count=1):
+        assert_series_equal(snow_df, native_df, check_dtype=False)
+
+    # Create Series with list data and Index index.
+    native_index = native_pd.Index([22, 11])
+    snow_index = pd.Index([22, 11])
+    native_df = native_pd.Series(native_data, index=native_index)
+    snow_df = pd.Series(snow_data, index=snow_index)
+    with SqlCounter(query_count=1):
+        assert_series_equal(snow_df, native_df, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data1, data2", [("series", "series"), ("series", "index"), ("index", "index")]
+)
+def test_create_series_with_series_index_dict_data(data1, data2):
+    # Create the dict data.
+    native_data1 = (
+        native_pd.Series([1, 2, 3]) if data1 == "series" else native_pd.Index([1, 2, 3])
+    )
+    native_data2 = (
+        native_pd.Series([4, 5, 6]) if data2 == "series" else native_pd.Index([4, 5, 6])
+    )
+    snow_data1 = pd.Series([1, 2, 3]) if data1 == "series" else pd.Index([1, 2, 3])
+    snow_data2 = pd.Series([4, 5, 6]) if data2 == "series" else pd.Index([4, 5, 6])
+    native_data = {11: native_data1, 22: native_data2}
+    snow_data = {11: snow_data1, 22: snow_data2}
+
+    # Create DataFrame only with dict data.
+    native_df = native_pd.Series(native_data)
+    snow_df = pd.Series(snow_data)
+    with SqlCounter(query_count=1):
+        assert_series_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data and Series index.
+    native_ser_index = native_pd.Series([9, 2, 999])
+    snow_ser_index = pd.Series([9, 2, 999])
+    native_df = native_pd.Series(native_data, index=native_ser_index)
+    snow_df = pd.Series(snow_data, index=snow_ser_index)
+    with SqlCounter(query_count=1):
+        assert_series_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data and Index index.
+    native_index = native_pd.Index([9, 2, 999])
+    snow_index = pd.Index([9, 2, 999])
+    native_df = native_pd.Series(native_data, index=native_index)
+    snow_df = pd.Series(snow_data, index=snow_index)
+    with SqlCounter(query_count=1):
+        assert_series_equal(snow_df, native_df)
+
+
+def test_create_df_with_mixed_series_index_dict_data():
+    # Create the dict data.
+    native_data1 = native_pd.Series([1, 2, 3])
+    native_data2 = native_pd.Index([4, 5, 6])
+    data3 = [7, 8, 9]
+    snow_data1 = pd.Series([1, 2, 3])
+    snow_data2 = pd.Index([4, 5, 6])
+    native_data = {"A": native_data1, "B": native_data2, "C": data3}
+    snow_data = {"A": snow_data1, "B": snow_data2, "C": data3}
+
+    # Create DataFrame only with dict data.
+    native_df = native_pd.DataFrame(native_data)
+    snow_df = pd.DataFrame(snow_data)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data and Series index.
+    native_ser_index = native_pd.Series([9, 2, 999])
+    snow_ser_index = pd.Series([9, 2, 999])
+    native_df = native_pd.DataFrame(native_data, index=native_ser_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data and Index index.
+    native_index = native_pd.Index([9, 2, 999])
+    snow_index = pd.Index([9, 2, 999])
+    native_df = native_pd.DataFrame(native_data, index=native_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data, Series index, and columns.
+    columns = ["A", "B", "C"]
+    native_df = native_pd.DataFrame(
+        native_data, index=native_ser_index, columns=columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index, columns=columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data, Index index, and Index columns.
+    native_columns = native_pd.Index(columns)
+    snow_columns = pd.Index(columns)
+    native_df = native_pd.DataFrame(
+        native_data, index=native_index, columns=native_columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_index, columns=snow_columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+
+def test_create_df_with_mixed_series_index_list_data():
+    # Create the list data.
+    native_data1 = native_pd.Series([1, 2, 3])
+    native_data2 = native_pd.Index([4, 5, 6])
+    data3 = [7, 8, 9]
+    snow_data1 = pd.Series([1, 2, 3])
+    snow_data2 = pd.Index([4, 5, 6])
+    # Need to convert data3 to an Index since native pandas tries to perform `get_indexer` on it.
+    native_data = [native_data1, native_data2, native_pd.Index(data3)]
+    snow_data = [snow_data1, snow_data2, data3]
+
+    # Create DataFrame only with list data.
+    native_df = native_pd.DataFrame(native_data)
+    snow_df = pd.DataFrame(snow_data)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with list data and Series index.
+    native_ser_index = native_pd.Series([2, 11, 0])
+    snow_ser_index = pd.Series([2, 11, 0])
+    native_df = native_pd.DataFrame(native_data, index=native_ser_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df, check_dtype=False)
+
+    # Create DataFrame with list data and Index index.
+    native_index = native_pd.Index([22, 11, 0])
+    snow_index = pd.Index([22, 11, 0])
+    native_df = native_pd.DataFrame(native_data, index=native_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df, check_dtype=False)
+
+    # Create DataFrame with list data, Series index, and columns.
+    columns = ["A", "B", "C"]
+    native_df = native_pd.DataFrame(
+        native_data, index=native_ser_index, columns=columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index, columns=columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df, check_dtype=False)
+
+    # Create DataFrame with list data, Index index, and Index columns.
+    native_columns = native_pd.Index(columns)
+    snow_columns = pd.Index(columns)
+    native_df = native_pd.DataFrame(
+        native_data, index=native_index, columns=native_columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_index, columns=snow_columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df, check_dtype=False)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1638397 DataFrane creation fails: reindex does not work with string index"
+)
+def test_create_df_with_series_data_and_series_index():
+    # Create the data and index.
+    native_data = native_pd.Series([1, 2, 3])
+    native_index = native_pd.Series(["A", 0, "C"])
+    snow_data = pd.Series(native_data)
+    snow_index = pd.Series(native_index)
+
+    # Create DataFrame with Series data and Series index.
+    native_df = native_pd.DataFrame(native_data, index=native_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+
+@sql_count_checker(query_count=0)
+def test_create_df_with_df_index_negative():
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        native_pd.DataFrame(
+            [1, 2, 3], index=native_pd.DataFrame([[1, 2], [3, 4], [5, 6]])
+        )
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        pd.DataFrame([1, 2, 3], index=pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+
+
+@sql_count_checker(query_count=0)
+def test_create_series_with_df_index_negative():
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        native_pd.Series([1, 2, 3], index=native_pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        pd.Series([1, 2, 3], index=pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+
+
+@sql_count_checker(query_count=0)
+def test_create_series_with_df_data_negative():
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "The truth value of a DataFrame is ambiguous. Use a.empty, a.bool()"
+            ", a.item(), a.any() or a.all()."
+        ),
+    ):
+        native_pd.Series(native_pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        pd.Series(pd.DataFrame([[1, 2], [3, 4], [5, 6]]))

From da56734f3a7b14cb8d6d26c1f20882d69a21e147 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 13 Sep 2024 18:09:11 -0700
Subject: [PATCH 35/42] update df init

---
 .../plugin/extensions/dataframe_overrides.py  | 295 ++++++++++++------
 1 file changed, 206 insertions(+), 89 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
index 62c9cab4dc..a37b252e20 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -74,17 +74,21 @@
 )
 from snowflake.snowpark.modin.pandas.utils import (
     create_empty_native_pandas_frame,
-    from_non_pandas,
     from_pandas,
     is_scalar,
     raise_if_native_pandas_objects,
     replace_external_data_keys_with_empty_pandas_series,
     replace_external_data_keys_with_query_compiler,
+    try_convert_index_to_native,
 )
 from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
     is_snowflake_agg_func,
 )
-from snowflake.snowpark.modin.plugin._internal.utils import is_repr_truncated
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    convert_index_to_list_of_qcs,
+    convert_index_to_qc,
+    is_repr_truncated,
+)
 from snowflake.snowpark.modin.plugin._typing import ListLike
 from snowflake.snowpark.modin.plugin.utils.error_message import (
     ErrorMessage,
@@ -459,104 +463,217 @@ def __init__(
     # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
     # Siblings are other dataframes that share the same query compiler. We
     # use this list to update inplace when there is a shallow copy.
-    from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native
+    from snowflake.snowpark.modin.plugin.extensions.index import Index
 
     self._siblings = []
 
-    # Engine.subscribe(_update_engine)
-    if isinstance(data, (DataFrame, Series)):
-        self._query_compiler = data._query_compiler.copy()
-        if index is not None and any(i not in data.index for i in index):
-            ErrorMessage.not_implemented(
-                "Passing non-existant columns or index values to constructor not"
-                + " yet implemented."
-            )  # pragma: no cover
-        if isinstance(data, Series):
-            # We set the column name if it is not in the provided Series
-            if data.name is None:
-                self.columns = [0] if columns is None else columns
+    # 0. Setting the query compiler
+    # -----------------------------
+    if query_compiler is not None:
+        # CASE I: query_compiler
+        # If a query_compiler is passed in only use the query_compiler field to create a new DataFrame.
+        assert (
+            data is None
+        ), "Invalid DataFrame construction! Cannot pass both data and query_compiler."
+        assert (
+            index is None
+        ), "Invalid DataFrame construction! Cannot pass both index and query_compiler."
+        assert (
+            columns is None
+        ), "Invalid DataFrame construction! Cannot pass both columns and query_compiler."
+        self._query_compiler = query_compiler
+        return
+
+    if isinstance(index, DataFrame):  # pandas raises the same error
+        raise ValueError("Index data must be 1-dimensional")
+
+    # The logic followed here is:
+    # 1. Create a query_compiler from the provided data. If columns are provided, add/select the columns.
+    # 2. If an index is provided, set the index through set_index or reindex.
+    # 3. If the data is a DataFrame, perform loc to select the required index and columns from the DataFrame.
+    # 4. The resultant query_compiler is then set as the query_compiler for the DataFrame.
+
+    # 1. Setting the data (and columns)
+    # ---------------------------------
+    if isinstance(data, Index):
+        # CASE II: data is a Snowpark pandas Index
+        # If the data is an Index object, convert it to a DataFrame to make sure that the values are in the
+        # correct format: the values are a data column, not an index column.
+        if data.name is None:
+            new_name = 0 if columns is None else columns[0]
+        else:
+            new_name = data.name
+        query_compiler = data.to_frame(index=False, name=new_name)._query_compiler
+
+    elif isinstance(data, Series):
+        # CASE III: data is a Snowpark pandas Series
+        query_compiler = data._query_compiler.copy()
+        # We set the column name if it is not in the provided Series `data`.
+        if data.name is None:
+            query_compiler = query_compiler.set_columns(columns or [0])
+        if columns is not None and data.name not in columns:
             # If the columns provided are not in the named Series, pandas clears
             # the DataFrame and sets columns to the columns provided.
-            elif columns is not None and data.name not in columns:
-                self._query_compiler = from_pandas(
-                    self.__constructor__(columns=columns)
-                )._query_compiler
-            if index is not None:
-                self._query_compiler = data.loc[index]._query_compiler
-        elif columns is None and index is None:
+            query_compiler = from_pandas(
+                native_pd.DataFrame(columns=columns)
+            )._query_compiler
+
+    elif isinstance(data, DataFrame):
+        # CASE IV: data is a Snowpark pandas DataFrame
+        query_compiler = data._query_compiler.copy()
+        if columns is None and index is None:
+            # Special case IV.a: if the new DataFrame has the same columns and index as the original DataFrame,
+            # the query compiler is shared and kept track of as a sibling.
+            self._query_compiler = query_compiler
             data._add_sibling(self)
-        else:
-            if columns is not None and any(i not in data.columns for i in columns):
-                ErrorMessage.not_implemented(
-                    "Passing non-existant columns or index values to constructor not"
-                    + " yet implemented."
-                )  # pragma: no cover
-            if index is None:
-                index = slice(None)
-            if columns is None:
-                columns = slice(None)
-            self._query_compiler = data.loc[index, columns]._query_compiler
-
-    # Check type of data and use appropriate constructor
-    elif query_compiler is None:
-        distributed_frame = from_non_pandas(data, index, columns, dtype)
-        if distributed_frame is not None:
-            self._query_compiler = distributed_frame._query_compiler
             return
+        # The `columns` parameter is used to select the columns from `data` that will be in the resultant
+        # DataFrame. If a value in `columns` is not present in `data`'s columns, it will be added as a
+        # new column filled with NaN values. These columns are tracked by the `extra_columns` variable.
+        if data.columns is not None and columns is not None:
+            extra_columns = [col for col in columns if col not in data.columns]
+        else:
+            extra_columns = []
+        query_compiler = data._query_compiler.create_qc_with_extra_columns(
+            extra_columns
+        )
 
-        if isinstance(data, native_pd.Index):
-            pass
-        elif is_list_like(data) and not is_dict_like(data):
-            old_dtype = getattr(data, "dtype", None)
-            values = [
-                obj._to_pandas() if isinstance(obj, Series) else obj for obj in data
-            ]
-            if isinstance(data, np.ndarray):
-                data = np.array(values, dtype=old_dtype)
-            else:
-                try:
-                    data = type(data)(values, dtype=old_dtype)
-                except TypeError:
-                    data = values
-        elif is_dict_like(data) and not isinstance(
-            data, (native_pd.Series, Series, native_pd.DataFrame, DataFrame)
-        ):
-            if columns is not None:
-                data = {key: value for key, value in data.items() if key in columns}
-
-            if len(data) and all(isinstance(v, Series) for v in data.values()):
-                from modin.pandas import concat
-
-                new_qc = concat(data.values(), axis=1, keys=data.keys())._query_compiler
-
-                if dtype is not None:
-                    new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
-                if index is not None:
-                    new_qc = new_qc.reindex(
-                        axis=0, labels=try_convert_index_to_native(index)
-                    )
+    else:
+        # CASE V: Non-Snowpark pandas data
+        if not isinstance(
+            data, (native_pd.Series, native_pd.DataFrame, native_pd.Index)
+        ) and is_list_like(data):
+            from snowflake.snowpark.modin.pandas import concat
+
+            if is_dict_like(data):
+                # Setting up keys and values for processing if all the values are Snowpark pandas objects.
                 if columns is not None:
-                    new_qc = new_qc.reindex(
-                        axis=1, labels=try_convert_index_to_native(columns)
-                    )
+                    # Reduce the dictionary to only the relevant columns as the keys.
+                    data = {key: value for key, value in data.items() if key in columns}
+
+                if len(data) and all(
+                    isinstance(v, (Index, BasePandasDataset)) for v in data.values()
+                ):
+                    # Special case V.a: data is a list/dict where all the values are Snowpark pandas objects.
+                    # Concat can only be performed with BasePandasDataset objects.
+                    # If a value is an Index, convert it to a Series where the index is the index to be set
+                    # since these values are always present in the final DataFrame.
+                    values = [
+                        Series(v, index=index) if isinstance(v, Index) else v
+                        for v in data.values()
+                    ]
+                    new_qc = concat(values, axis=1, keys=data.keys())._query_compiler
+                    if dtype is not None:
+                        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
+                    if index is not None:
+                        new_qc = new_qc.reindex(
+                            axis=0, labels=convert_index_to_qc(index)
+                        )
+                    if columns is not None:
+                        new_qc = new_qc.reindex(
+                            axis=1, labels=try_convert_index_to_native(columns)
+                        )
+                    self._query_compiler = new_qc
+                    return
+
+                # If only some data is a Snowpark pandas object, convert it to pandas objects.
+                res = {}
+                index = try_convert_index_to_native(index)
+                for k, v in data.items():
+                    if isinstance(v, (Index)):
+                        res[k] = v.to_pandas()
+                    elif isinstance(v, BasePandasDataset):
+                        # Need to perform reindex on the Series or DataFrame objects since only the data
+                        # whose index matches the given index is kept.
+                        res[k] = v.to_pandas().reindex(index=index)
+                    else:
+                        res[k] = v
+                data = res
+
+            else:  # list-like but not dict-like data.
+                if len(data) and all(
+                    isinstance(v, (Index, BasePandasDataset)) for v in data
+                ):
+                    # Special case V.c: data is a list/dict where all the values are Snowpark pandas objects.
+                    # Concat can only be performed with BasePandasDataset objects.
+                    # If a value is an Index, convert it to a Series.
+                    values = [Series(v) if isinstance(v, Index) else v for v in data]
+                    new_qc = concat(values, axis=1).T._query_compiler
+                    if dtype is not None:
+                        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
+                    if index is not None:
+                        new_qc = new_qc.set_index([convert_index_to_qc(index)])
+                    if columns is not None:
+                        if all(isinstance(v, Index) for v in data):
+                            # Special case: if all the values are Index objects, they are always present in the
+                            # final result with the provided column names. Therefore, rename the columns.
+                            new_qc = new_qc.set_columns(
+                                try_convert_index_to_native(columns)
+                            )
+                        else:
+                            new_qc = new_qc.reindex(axis=1, labels=columns)
+                    self._query_compiler = new_qc
+                    return
+
+                # If only some data is a Snowpark pandas object, convert it to pandas objects.
+                res = []
+                for v in data:
+                    if isinstance(v, (Index)):
+                        res.append(v.to_pandas())
+                    elif isinstance(v, BasePandasDataset):
+                        res.append(v.to_pandas())
+                    else:
+                        # Need to convert this is a native pandas object since native pandas incorrectly
+                        # tries to perform `get_indexer` on it.
+                        res.append(native_pd.Index(v if is_list_like(v) else [v]))
+                data = res
+
+        query_compiler = from_pandas(
+            native_pd.DataFrame(
+                data=data,
+                # Handle setting the index, if it is a lazy index, outside this block.
+                index=None if isinstance(index, (Index, Series)) else index,
+                columns=try_convert_index_to_native(columns),
+                dtype=dtype,
+                copy=copy,
+            )
+        )._query_compiler
+
+    # 2. Setting the index
+    # --------------------
+    # The index is already set if the data is a non-Snowpark pandas object.
+    # If either the data or the index is a Snowpark pandas object, set the index here.
+    if index is not None and (
+        isinstance(index, (Index, Series))
+        or isinstance(data, (Index, BasePandasDataset))
+    ):
+        if isinstance(data, (type(self), Series, type(None))):
+            # The `index` parameter is used to select the rows from `data` that will be in the resultant DataFrame.
+            # If a value in `index` is not present in `data`'s index, it will be filled with a NaN value.
+            # If data is None and an index is provided, set the index.
+            query_compiler = query_compiler.reindex(
+                axis=0, labels=convert_index_to_qc(index)
+            )
+        else:
+            # Performing set index to directly set the index column (joining on row-position instead of index).
+            query_compiler = query_compiler.set_index(
+                convert_index_to_list_of_qcs(index)
+            )
 
-                self._query_compiler = new_qc
-                return
-
-            data = {
-                k: v._to_pandas() if isinstance(v, Series) else v
-                for k, v in data.items()
-            }
-        pandas_df = native_pd.DataFrame(
-            data=try_convert_index_to_native(data),
-            index=try_convert_index_to_native(index),
-            columns=try_convert_index_to_native(columns),
-            dtype=dtype,
-            copy=copy,
+    # 3. If data is a DataFrame, filter result
+    # ----------------------------------------
+    if isinstance(data, DataFrame):
+        # To select the required index and columns for the resultant DataFrame,
+        # perform .loc[] on the created query compiler.
+        index = slice(None) if index is None else index
+        columns = slice(None) if columns is None else columns
+        query_compiler = (
+            DataFrame(query_compiler=query_compiler).loc[index, columns]._query_compiler
         )
-        self._query_compiler = from_pandas(pandas_df)._query_compiler
-    else:
-        self._query_compiler = query_compiler
+
+    # 4. Setting the query compiler
+    # -----------------------------
+    self._query_compiler = query_compiler
 
 
 @register_dataframe_accessor("__dataframe__")

From 8b47e174092d2f623e5ade75e774fb4bb02055fd Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 16 Sep 2024 15:46:29 -0700
Subject: [PATCH 36/42] moved common logic out, fixed some tests

---
 .../snowpark/modin/plugin/_internal/utils.py  | 23 +++++
 .../plugin/extensions/dataframe_overrides.py  | 40 ++++----
 .../plugin/extensions/series_overrides.py     | 17 ++--
 tests/integ/modin/frame/test_dtypes.py        | 11 ++-
 tests/integ/modin/frame/test_idxmax_idxmin.py |  2 +-
 tests/integ/modin/frame/test_insert.py        |  6 +-
 tests/integ/modin/frame/test_loc.py           | 16 ++--
 tests/integ/modin/frame/test_mask.py          |  7 +-
 tests/integ/modin/frame/test_merge.py         | 96 +++++++++----------
 tests/integ/modin/frame/test_reindex.py       |  6 +-
 tests/integ/modin/frame/test_rename.py        |  2 +-
 tests/integ/modin/frame/test_setitem.py       |  2 +-
 tests/integ/modin/frame/test_to_snowflake.py  |  6 +-
 tests/integ/modin/frame/test_where.py         |  7 +-
 .../test_df_series_creation_with_index.py     | 67 ++++---------
 .../modin/pivot/test_pivot_table_single.py    |  2 +-
 tests/integ/modin/resample/test_resample.py   | 26 ++---
 .../modin/resample/test_resample_asfreq.py    |  4 +-
 .../modin/resample/test_resample_fillna.py    | 12 +--
 tests/integ/modin/series/test_loc.py          |  6 +-
 tests/integ/modin/series/test_sort_values.py  |  2 +-
 tests/integ/modin/test_concat.py              |  2 +-
 22 files changed, 175 insertions(+), 187 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/utils.py b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
index 178111b0f3..1e53ee34f9 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/utils.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
@@ -12,6 +12,7 @@
 import numpy as np
 import pandas as native_pd
 from pandas._typing import Scalar
+from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.common import is_integer_dtype, is_object_dtype, is_scalar
 from pandas.core.dtypes.inference import is_list_like
 
@@ -1998,6 +1999,28 @@ def rindex(lst: list, value: int) -> int:
     return len(lst) - lst[::-1].index(value) - 1
 
 
+def error_checking_for_init(
+    index: Any, dtype: Union[str, np.dtype, ExtensionDtype]
+) -> None:
+    """
+    Common error messages for the Series and DataFrame constructors.
+
+    Parameters
+    ----------
+    index: Any
+        The index to check.
+    dtype: str, numpy.dtype, or ExtensionDtype
+        The dtype to check.
+    """
+    from modin.pandas import DataFrame
+
+    if isinstance(index, DataFrame):  # pandas raises the same error
+        raise ValueError("Index data must be 1-dimensional")
+
+    if dtype == "category":
+        raise NotImplementedError("pandas type category is not implemented")
+
+
 def convert_index_to_qc(index: Any) -> Any:
     """
     Method to convert an object representing an index into a query compiler for set_index or reindex.
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
index a37b252e20..890c486bb0 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -87,6 +87,7 @@
 from snowflake.snowpark.modin.plugin._internal.utils import (
     convert_index_to_list_of_qcs,
     convert_index_to_qc,
+    error_checking_for_init,
     is_repr_truncated,
 )
 from snowflake.snowpark.modin.plugin._typing import ListLike
@@ -484,8 +485,7 @@ def __init__(
         self._query_compiler = query_compiler
         return
 
-    if isinstance(index, DataFrame):  # pandas raises the same error
-        raise ValueError("Index data must be 1-dimensional")
+    error_checking_for_init(index, dtype)
 
     # The logic followed here is:
     # 1. Create a query_compiler from the provided data. If columns are provided, add/select the columns.
@@ -500,6 +500,7 @@ def __init__(
         # If the data is an Index object, convert it to a DataFrame to make sure that the values are in the
         # correct format: the values are a data column, not an index column.
         if data.name is None:
+            # If no name is provided, the default name is 0.
             new_name = 0 if columns is None else columns[0]
         else:
             new_name = data.name
@@ -510,6 +511,7 @@ def __init__(
         query_compiler = data._query_compiler.copy()
         # We set the column name if it is not in the provided Series `data`.
         if data.name is None:
+            # If no name is provided, the default name is 0.
             query_compiler = query_compiler.set_columns(columns or [0])
         if columns is not None and data.name not in columns:
             # If the columns provided are not in the named Series, pandas clears
@@ -607,9 +609,7 @@ def __init__(
                         if all(isinstance(v, Index) for v in data):
                             # Special case: if all the values are Index objects, they are always present in the
                             # final result with the provided column names. Therefore, rename the columns.
-                            new_qc = new_qc.set_columns(
-                                try_convert_index_to_native(columns)
-                            )
+                            new_qc = new_qc.set_columns(columns)
                         else:
                             new_qc = new_qc.reindex(axis=1, labels=columns)
                     self._query_compiler = new_qc
@@ -618,14 +618,16 @@ def __init__(
                 # If only some data is a Snowpark pandas object, convert it to pandas objects.
                 res = []
                 for v in data:
-                    if isinstance(v, (Index)):
-                        res.append(v.to_pandas())
-                    elif isinstance(v, BasePandasDataset):
+                    if isinstance(v, (Index, BasePandasDataset)):
                         res.append(v.to_pandas())
+                    # elif is_dict_like(v) or isinstance(v, (native_pd.Series, native_pd.DataFrame, native_pd.Index)):
+                    #     res.append(v)
                     else:
-                        # Need to convert this is a native pandas object since native pandas incorrectly
-                        # tries to perform `get_indexer` on it.
-                        res.append(native_pd.Index(v if is_list_like(v) else [v]))
+                        # # Need to convert this is a native pandas object since native pandas incorrectly
+                        # # tries to perform `get_indexer` on it. Specify dtype=object so that pandas does not
+                        # # cast the data provided. In some cases, None turns to NaN, which is not desired.
+                        # res.append(native_pd.Index(v, dtype=object) if is_list_like(v) else v)
+                        res.append(v)
                 data = res
 
         query_compiler = from_pandas(
@@ -662,13 +664,14 @@ def __init__(
 
     # 3. If data is a DataFrame, filter result
     # ----------------------------------------
-    if isinstance(data, DataFrame):
-        # To select the required index and columns for the resultant DataFrame,
-        # perform .loc[] on the created query compiler.
-        index = slice(None) if index is None else index
-        columns = slice(None) if columns is None else columns
+    if isinstance(data, DataFrame) and columns is not None:
+        # To select the columns for the resultant DataFrame, perform .loc[] on the created query compiler.
+        # This step is performed to ensure that the right columns are picked from the InternalFrame since we
+        # never explicitly drop the unwanted columns.
         query_compiler = (
-            DataFrame(query_compiler=query_compiler).loc[index, columns]._query_compiler
+            DataFrame(query_compiler=query_compiler)
+            .loc[slice(None), columns]
+            ._query_compiler
         )
 
     # 4. Setting the query compiler
@@ -1181,6 +1184,9 @@ def insert(
     # Dictionary keys are treated as index column and this should be joined with
     # index of target dataframe. This behavior is similar to 'value' being DataFrame
     # or Series, so we simply create Series from dict data here.
+    if isinstance(value, set):
+        raise TypeError(f"'{type(value).__name__}' type is unordered")
+
     if isinstance(value, dict):
         value = Series(value, name=column)
 
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index 1719a9d167..bb3bb612b5 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -45,6 +45,7 @@
 from snowflake.snowpark.modin.plugin._internal.utils import (
     convert_index_to_list_of_qcs,
     convert_index_to_qc,
+    error_checking_for_init,
 )
 from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
 from snowflake.snowpark.modin.plugin.utils.error_message import (
@@ -367,8 +368,7 @@ def __init__(
             self.name = name
         return
 
-    if isinstance(index, spd.DataFrame):  # pandas raises the same error
-        raise ValueError("Index data must be 1-dimensional")
+    error_checking_for_init(index, dtype)
 
     if isinstance(data, spd.DataFrame):
         # pandas raises an ambiguous error:
@@ -398,11 +398,12 @@ def __init__(
     else:
         # CASE IV: Non-Snowpark pandas data
         # If the data is not a Snowpark pandas object, convert it to a query compiler.
+        # The query compiler uses the '__reduced__' name internally as a column name to represent pandas
+        # Series objects that are not explicitly assigned a name.
+        # This helps to distinguish between an N-element Series and 1xN DataFrame.
         name = name or MODIN_UNNAMED_SERIES_LABEL
-        if (
-            isinstance(data, (native_pd.Series, native_pd.Index))
-            and data.name is not None
-        ):
+        if hasattr(data, "name") and data.name is not None:
+            # If data is an object that has a name field, use that as the name of the new Series.
             name = data.name
         # If any of the values are Snowpark pandas objects, convert them to native pandas objects.
         if not isinstance(
@@ -422,9 +423,9 @@ def __init__(
             native_pd.DataFrame(
                 native_pd.Series(
                     data=data,
-                    dtype=dtype,
-                    # Handle setting the index, if it is a lazy index, outside this block.
+                    # If the index is a lazy index, handle setting it outside this block.
                     index=None if isinstance(index, (Index, Series)) else index,
+                    dtype=dtype,
                     name=name,
                     copy=copy,
                     fastpath=fastpath,
diff --git a/tests/integ/modin/frame/test_dtypes.py b/tests/integ/modin/frame/test_dtypes.py
index b078b31f6c..d4cff60164 100644
--- a/tests/integ/modin/frame/test_dtypes.py
+++ b/tests/integ/modin/frame/test_dtypes.py
@@ -473,22 +473,23 @@ def test_empty_index(index, expected_index_dtype):
 
 
 @pytest.mark.parametrize(
-    "input_data, type_msg",
+    "input_data, dtype, type_msg",
     [
-        (native_pd.Categorical([1, 2, 3, 1, 2, 3]), "category"),
-        (native_pd.Categorical(["a", "b", "c", "a", "b", "c"]), "category"),
+        (native_pd.Categorical([1, 2, 3, 1, 2, 3]), "category", "category"),
+        (native_pd.Categorical(["a", "b", "c", "a", "b", "c"]), "category", "category"),
         (
             native_pd.period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"),
+            None,
             r"period\[s\]",
         ),
     ],
 )
 @sql_count_checker(query_count=0)
-def test_unsupported_dtype_raises(input_data, type_msg) -> None:
+def test_unsupported_dtype_raises(input_data, dtype, type_msg) -> None:
     with pytest.raises(
         NotImplementedError, match=f"pandas type {type_msg} is not implemented"
     ):
-        pd.Series(input_data)
+        pd.Series(input_data, dtype=dtype)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integ/modin/frame/test_idxmax_idxmin.py b/tests/integ/modin/frame/test_idxmax_idxmin.py
index 1059abf9d8..87041060bd 100644
--- a/tests/integ/modin/frame/test_idxmax_idxmin.py
+++ b/tests/integ/modin/frame/test_idxmax_idxmin.py
@@ -194,7 +194,7 @@ def test_idxmax_idxmin_with_dates(func, axis):
     )
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize(
     "axis",
diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py
index 414889d337..5f995e24a7 100644
--- a/tests/integ/modin/frame/test_insert.py
+++ b/tests/integ/modin/frame/test_insert.py
@@ -212,7 +212,7 @@ def test_insert_dataframe_shape_negative(native_df):
         (np.ones((1, 1)), 1),
         ([1, 2], 1),  # len < number of rows
         ((6, 7, 8, 9), 1),  # len > number of rows
-        ({"a", "b", "c"}, 1),  # python set
+        ({"a", "b", "c"}, 0),  # python set
     ],
 )
 def test_insert_value_negative(native_df, value, expected_query_count):
@@ -725,12 +725,10 @@ def test_insert_multiindex_column_negative(snow_df, columns, insert_label):
         [["a", "b", "b", "d", "e"], ["x", "y", "z", "u", "u"], True],
     ],
 )
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_with_unique_and_duplicate_index_values(
     index_values, other_index_values, expect_mismatch
 ):
-    # Two of the three joins come from creating the DataFrame with non-Snowpark pandas data
-    # and a Snowpark pandas Index. The third join is from the insert operation.
     data = list(range(5))
     data1 = {"foo": data}
     data2 = {"bar": [val * 10 for val in data]}
diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py
index b1f89365eb..81afb9cfd8 100644
--- a/tests/integ/modin/frame/test_loc.py
+++ b/tests/integ/modin/frame/test_loc.py
@@ -3945,12 +3945,12 @@ def test_raise_set_cell_with_list_like_value_error():
                 reason="SNOW-1652608 result series name incorrectly set"
             ),
         ),  # 1 join fron df creation, 1 join from squeeze, 2 joins from to_pandas during eval
-        (["1 day", "3 days"], 1, 2),
-        ([True, False, False], 1, 2),
-        (slice(None, "4 days"), 1, 1),
-        (slice(None, "4 days", 2), 1, 1),
-        (slice("1 day", "2 days"), 1, 1),
-        (slice("1 day 1 hour", "2 days 2 hours", -1), 1, 1),
+        (["1 day", "3 days"], 1, 1),
+        ([True, False, False], 1, 1),
+        (slice(None, "4 days"), 1, 0),
+        (slice(None, "4 days", 2), 1, 0),
+        (slice("1 day", "2 days"), 1, 0),
+        (slice("1 day 1 hour", "2 days 2 hours", -1), 1, 0),
     ],
 )
 def test_df_loc_get_with_timedelta(key, query_count, join_count):
@@ -4017,7 +4017,7 @@ def test_df_loc_get_with_timedelta(key, query_count, join_count):
         ),
     ],
 )
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=2)
 def test_df_loc_get_with_timedelta_behavior_difference(key, expected_result):
     # In these test cases, native pandas raises a KeyError but Snowpark pandas works correctly.
     data = {
@@ -4037,7 +4037,7 @@ def test_df_loc_get_with_timedelta_behavior_difference(key, expected_result):
     assert_frame_equal(actual_result, expected_result)
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=3, join_count=1)
 def test_df_loc_get_with_timedeltaindex_key():
     data = {
         "A": [1, 2, 3],
diff --git a/tests/integ/modin/frame/test_mask.py b/tests/integ/modin/frame/test_mask.py
index 53afbd7bf8..c6b1290902 100644
--- a/tests/integ/modin/frame/test_mask.py
+++ b/tests/integ/modin/frame/test_mask.py
@@ -683,7 +683,7 @@ def test_dataframe_mask_with_duplicated_index_aligned(cond_frame, other):
         native_other = other
         snow_other = other
 
-    expected_join_count = 2 if isinstance(other, int) else 3
+    expected_join_count = 1 if isinstance(other, int) else 2
     with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             snow_df,
@@ -694,9 +694,8 @@ def test_dataframe_mask_with_duplicated_index_aligned(cond_frame, other):
         )
 
 
-# Three extra joins when creating the 3 snowpark pandas dataframes with non-Snowpark pandas
-# data and Snowpark pandas Index.
-@sql_count_checker(query_count=1, join_count=5)
+# Three extra queries to convert to native index for dataframe constructor when creating the 3 snowpark pandas dataframes
+@sql_count_checker(query_count=4, join_count=2)
 def test_dataframe_mask_with_duplicated_index_unaligned():
     data = [3, 4, 5, 2]
     df_index = pd.Index([2, 1, 2, 3], name="index")
diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py
index 8b9b5472e3..04f0ca42fd 100644
--- a/tests/integ/modin/frame/test_merge.py
+++ b/tests/integ/modin/frame/test_merge.py
@@ -276,13 +276,13 @@ def _verify_merge(
 
 
 @pytest.mark.parametrize("on", ["A", "B", ["A", "B"], ("A", "B")])
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_on(left_df, right_df, on, how, sort):
     _verify_merge(left_df, right_df, how, on=on, sort=sort)
 
 
 @pytest.mark.parametrize("on", ["left_i", "right_i"])
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_on_index_columns(left_df, right_df, how, on, sort):
     # Change left_df to: columns=["right_i", "B", "left_c", "left_d"] index=["left_i"]
     left_df = left_df.rename(columns={"A": "right_i"})
@@ -361,7 +361,7 @@ def test_join_type_mismatch_diff_with_native_pandas(index1, index2, expected_res
 
 
 @pytest.mark.parametrize("on", ["A", "B", "C"])
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_on_index_columns_with_multiindex(left_df, right_df, how, on, sort):
     # Change left_df to: columns = ["C", "left_d"] index = ["A", "B"]
     left_df = left_df.rename(columns={"left_c": "C"}).set_index(["A", "B"])
@@ -370,7 +370,7 @@ def test_merge_on_index_columns_with_multiindex(left_df, right_df, how, on, sort
     _verify_merge(left_df, right_df, how, on=on, sort=sort)
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_on_multiindex_with_non_multiindex(left_df, right_df, how, sort):
     # Change left_df to: columns = ["A", "B"] index = ["left_c", "left_d"]
     left_df = left_df.set_index(["left_c", "left_d"])
@@ -392,29 +392,29 @@ def test_merge_on_multiindex_with_non_multiindex(left_df, right_df, how, sort):
         (["A", "left_i"], ["B", "right_i"]),  # Mix of index and data join keys
     ],
 )
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_left_on_right_on(left_df, right_df, how, left_on, right_on, sort):
     _verify_merge(left_df, right_df, how, left_on=left_on, right_on=right_on, sort=sort)
 
 
 @pytest.mark.parametrize("left_on", ["left_i", "A", "B"])
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_left_on_right_index(left_df, right_df, how, left_on, sort):
     _verify_merge(left_df, right_df, how, left_on=left_on, right_index=True, sort=sort)
 
 
 @pytest.mark.parametrize("right_on", ["right_i", "A", "B"])
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_left_index_right_on(left_df, right_df, how, right_on, sort):
     _verify_merge(left_df, right_df, how, left_index=True, right_on=right_on, sort=sort)
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_on_index_single_index(left_df, right_df, how, sort):
     _verify_merge(left_df, right_df, how, left_index=True, right_index=True, sort=sort)
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_on_index_multiindex_common_labels(left_df, right_df, how, sort):
     left_df = left_df.set_index("A", append=True)  # index columns ['left_i', 'A']
     right_df = right_df.set_index("A", append=True)  # index columns ['right_i', 'A']
@@ -444,7 +444,7 @@ def test_merge_on_index_multiindex_common_labels_with_none(
         )
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_on_index_multiindex_equal_labels(left_df, right_df, how, sort):
     # index columns ['A', 'B]
     left_df = left_df.set_index(["A", "B"])
@@ -463,7 +463,7 @@ def test_merge_left_index_right_index_single_to_multi(left_df, right_df, how, so
         if how == "inner" and sort is False:
             pytest.skip("pandas bug: https://github.com/pandas-dev/pandas/issues/55774")
         else:
-            with SqlCounter(query_count=3, join_count=5):
+            with SqlCounter(query_count=3, join_count=1):
                 _verify_merge(
                     left_df,
                     right_df,
@@ -489,7 +489,7 @@ def test_merge_left_index_right_index_single_to_multi(left_df, right_df, how, so
             .merge(right_df.to_pandas(), how=how, on="left_i", sort=sort)
             .reset_index(drop=True)
         )
-        with SqlCounter(query_count=1, join_count=3):
+        with SqlCounter(query_count=1, join_count=1):
             assert_snowpark_pandas_equal_to_pandas(
                 snow_res.reset_index(drop=True), native_res
             )
@@ -500,7 +500,7 @@ def test_merge_left_index_right_index_multi_to_single(left_df, right_df, how, so
         "right_i", append=True
     )  # index columns ['left_i', 'right_i']
     if how in ("left", "inner"):
-        with SqlCounter(query_count=3, join_count=5):
+        with SqlCounter(query_count=3, join_count=1):
             _verify_merge(
                 left_df, right_df, how=how, left_index=True, right_index=True, sort=sort
             )
@@ -519,13 +519,13 @@ def test_merge_left_index_right_index_multi_to_single(left_df, right_df, how, so
             .merge(right_df.to_pandas(), how=how, on="right_i", sort=sort)
             .reset_index(drop=True)
         )
-        with SqlCounter(query_count=1, join_count=3):
+        with SqlCounter(query_count=1, join_count=1):
             assert_snowpark_pandas_equal_to_pandas(
                 snow_res.reset_index(drop=True), native_res
             )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_left_index_right_index_no_common_names_negative(left_df, right_df):
     left_df = left_df.set_index("B", append=True)  # index columns ['left_i', 'B']
     right_df = right_df.set_index("A", append=True)  # index columns ['right_i', 'A']
@@ -543,7 +543,7 @@ def test_merge_left_index_right_index_no_common_names_negative(left_df, right_df
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_left_index_right_index_none_as_common_label_negative(left_df, right_df):
     # index columns [None, 'B']
     left_df = left_df.reset_index(drop=True).set_index("B", append=True)
@@ -563,7 +563,7 @@ def test_merge_left_index_right_index_none_as_common_label_negative(left_df, rig
     )
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_cross(left_df, right_df, sort):
     eval_snowpark_pandas_result(
         left_df,
@@ -587,7 +587,7 @@ def test_merge_cross(left_df, right_df, sort):
         {"left_index": True, "right_on": "A"},
     ],
 )
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=3)
 def test_merge_non_empty_with_empty(left_df, empty_df, how, kwargs, sort):
     _verify_merge(left_df, empty_df, how, sort=sort, **kwargs)
 
@@ -601,7 +601,7 @@ def test_merge_non_empty_with_empty(left_df, empty_df, how, kwargs, sort):
         {"left_index": True, "right_on": "A"},
     ],
 )
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=3)
 def test_merge_empty_with_non_empty(empty_df, right_df, how, kwargs, sort):
     # Native pandas returns incorrect column order when left frame is empty.
     # https://github.com/pandas-dev/pandas/issues/51929
@@ -637,7 +637,7 @@ def test_merge_empty_with_non_empty(empty_df, right_df, how, kwargs, sort):
         (None, None, ["A", "B"], True, False),  # left.num_index_levels != len(right_on)
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_mis_specified_negative(
     left_df, right_df, on, left_on, right_on, left_index, right_index
 ):
@@ -666,7 +666,7 @@ def test_merge_mis_specified_negative(
         (None, None, None, False, True),  # right_index is set to True
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_cross_mis_specified_negative(
     left_df, right_df, on, left_on, right_on, left_index, right_index
 ):
@@ -704,7 +704,7 @@ def test_merge_cross_mis_specified_negative(
         (0.0, 0.0, {"suffixes": ("_x", None)}),
     ],
 )
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_suffix(left_df, right_df, left_col, right_col, kwargs):
     left_df = left_df.rename(columns={"A": left_col})
     right_df = right_df.rename(columns={"A": right_col})
@@ -720,7 +720,7 @@ def test_merge_suffix(left_df, right_df, left_col, right_col, kwargs):
     )
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_duplicate_suffix(left_df, right_df):
     eval_snowpark_pandas_result(
         left_df,
@@ -734,7 +734,7 @@ def test_merge_duplicate_suffix(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_label_conflict_with_suffix(left_df, right_df):
     # Test the behavior when adding suffix crates a conflict with another label.
     # Note: This raises a warning in pandas 2.0 and will raise an error in future
@@ -758,7 +758,7 @@ def test_merge_label_conflict_with_suffix(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_non_str_suffix(left_df, right_df):
     eval_snowpark_pandas_result(
         left_df,
@@ -776,7 +776,7 @@ def test_merge_non_str_suffix(left_df, right_df):
     "suffixes",
     [(None, None), ("", None), (None, ""), ("", "")],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_empty_suffix_negative(left_df, right_df, suffixes):
     eval_snowpark_pandas_result(
         left_df,
@@ -794,7 +794,7 @@ def test_merge_empty_suffix_negative(left_df, right_df, suffixes):
     "suffixes",
     [("a", "b", "c"), tuple("a")],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_suffix_length_error_negative(left_df, right_df, suffixes):
     eval_snowpark_pandas_result(
         left_df,
@@ -808,7 +808,7 @@ def test_merge_suffix_length_error_negative(left_df, right_df, suffixes):
     )
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_duplicate_labels(left_df, right_df):
     # Change left_df columns to ["A", "B", "left_c", "left_c"]
     # 'left_c' is a duplicate label.
@@ -824,7 +824,7 @@ def test_merge_duplicate_labels(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_duplicate_join_keys_negative(left_df, right_df):
     # Change left_df columns to ["A", "B", "left_c", "left_c"]
     # 'left_c' is a duplicate label. This can not be used as join key.
@@ -860,14 +860,14 @@ def test_merge_with_self():
 
 
 @pytest.mark.parametrize("on", ["A", "B"])
-@sql_count_checker(query_count=4, join_count=4)
+@sql_count_checker(query_count=4, join_count=1)
 def test_merge_with_series(left_df, right_df, how, on, sort):
     native_series = right_df.to_pandas()[on]
     snow_series = pd.Series(native_series)
     _verify_merge(left_df, snow_series, how=how, on=on, sort=sort)
 
 
-@sql_count_checker(query_count=1, join_count=1)
+@sql_count_checker(query_count=1)
 def test_merge_with_unnamed_series_negative(left_df):
     native_series = native_pd.Series([1, 2, 3])
     snow_series = pd.Series(native_series)
@@ -922,7 +922,7 @@ def test_merge_outer_with_nan(dtype):
     _verify_merge(right, left, "outer", on="key")
 
 
-@sql_count_checker(query_count=5, join_count=5)
+@sql_count_checker(query_count=5, join_count=1)
 def test_merge_different_index_names():
     left = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="c"))
     right = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="d"))
@@ -937,13 +937,13 @@ def test_merge_different_index_names():
     )
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_no_join_keys(left_df, right_df, how, sort):
     _verify_merge(left_df, right_df, how, sort=sort)
 
 
 @pytest.mark.parametrize("left_name, right_name", [("left_a", "right_a"), (1, "1")])
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_no_join_keys_negative(left_name, right_name, left_df, right_df):
     left_df = left_df.rename(columns={"A": left_name, "B": "left_b"})
     right_df = right_df.rename(columns={"A": right_name, "B": "right_b"})
@@ -978,7 +978,7 @@ def test_merge_no_join_keys_common_index_negative(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_no_join_keys_common_index_with_data_negative(left_df, right_df):
     left_df = left_df.rename(columns={"A": "left_a", "B": "left_b"})
     right_df = right_df.rename(columns={"A": "right_a", "B": "left_i"})
@@ -1002,16 +1002,16 @@ def test_merge_no_join_keys_common_index_with_data_negative(left_df, right_df):
 @pytest.mark.parametrize(
     "left_on, right_on, expected_query_count, expected_join_count",
     [
-        (np.array(["a", "b", "c", "x", "y"]), "right_d", 5, 7),
-        ([np.array(["a", "b", "c", "x", "y"]), "A"], ["right_d", "A"], 5, 7),
-        ("left_d", np.array(["a", "b", "c", "x", "y"]), 5, 7),
-        (["left_d", "A"], [np.array(["a", "b", "c", "x", "y"]), "A"], 5, 7),
-        (["left_d", "A"], (np.array(["a", "b", "c", "x", "y"]), "A"), 5, 7),  # tuple
+        (np.array(["a", "b", "c", "x", "y"]), "right_d", 5, 2),
+        ([np.array(["a", "b", "c", "x", "y"]), "A"], ["right_d", "A"], 5, 2),
+        ("left_d", np.array(["a", "b", "c", "x", "y"]), 5, 2),
+        (["left_d", "A"], [np.array(["a", "b", "c", "x", "y"]), "A"], 5, 2),
+        (["left_d", "A"], (np.array(["a", "b", "c", "x", "y"]), "A"), 5, 2),  # tuple
         (
             np.array(["a", "b", "c", "x", "y"]),
             np.array(["x", "y", "c", "a", "b"]),
             7,
-            9,
+            3,
         ),
     ],
 )
@@ -1022,7 +1022,7 @@ def test_merge_on_array_like_keys(
         _verify_merge(left_df, right_df, how=how, left_on=left_on, right_on=right_on)
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_on_array_like_keys_conflict_negative(left_df, right_df):
     left_on = np.array(["a", "b", "c", "x", "y"])
     right_on = np.array(["x", "y", "c", "a", "b"])
@@ -1049,7 +1049,7 @@ def test_merge_on_array_like_keys_conflict_negative(left_df, right_df):
         np.array(["a", "b", "c", "a", "b", "c"]),  # too long
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=2)
 def test_merge_on_array_like_keys_length_mismatch_negative(left_df, right_df, left_on):
     # Native pandas raises
     # ValueError: The truth value of an array with more than one element is ambiguous
@@ -1061,22 +1061,22 @@ def test_merge_on_array_like_keys_length_mismatch_negative(left_df, right_df, le
         left_df.merge(right_df, left_on=left_on, right_on="right_d")
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_with_indicator(left_df, right_df, how):
     _verify_merge(left_df, right_df, how, on="A", indicator=True)
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_with_indicator_cross_join(left_df, right_df):
     _verify_merge(left_df, right_df, how="cross", indicator=True)
 
 
-@sql_count_checker(query_count=3, join_count=5)
+@sql_count_checker(query_count=3, join_count=1)
 def test_merge_with_indicator_explicit_name(left_df, right_df):
     _verify_merge(left_df, right_df, "outer", on="A", indicator="indicator_col")
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_with_invalid_indicator_type_negative(left_df, right_df):
     eval_snowpark_pandas_result(
         left_df,
@@ -1092,7 +1092,7 @@ def test_merge_with_invalid_indicator_type_negative(left_df, right_df):
     )
 
 
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=2)
 def test_merge_with_indicator_explicit_name_negative(left_df, right_df):
     left_df = left_df.rename(columns={"left_c": "_merge"})
     eval_snowpark_pandas_result(
diff --git a/tests/integ/modin/frame/test_reindex.py b/tests/integ/modin/frame/test_reindex.py
index 98d0a41e7a..de1aacd786 100644
--- a/tests/integ/modin/frame/test_reindex.py
+++ b/tests/integ/modin/frame/test_reindex.py
@@ -209,7 +209,7 @@ def perform_reindex(df):
             perform_reindex,
         )
 
-    @sql_count_checker(query_count=1, join_count=2)
+    @sql_count_checker(query_count=2, join_count=1)
     @pytest.mark.parametrize("limit", [None, 1, 2, 100])
     @pytest.mark.parametrize("method", ["bfill", "backfill", "pad", "ffill"])
     def test_reindex_index_datetime_with_fill(self, limit, method):
@@ -248,7 +248,7 @@ def test_reindex_index_non_overlapping_index(self):
             snow_df, native_df, lambda df: df.reindex(axis=0, labels=list("EFG"))
         )
 
-    @sql_count_checker(query_count=1, join_count=2)
+    @sql_count_checker(query_count=2, join_count=1)
     def test_reindex_index_non_overlapping_datetime_index(self):
         date_index = native_pd.date_range("1/1/2010", periods=6, freq="D")
         native_df = native_pd.DataFrame(
@@ -273,7 +273,7 @@ def perform_reindex(df):
             snow_df, native_df, perform_reindex, check_freq=False
         )
 
-    @sql_count_checker(query_count=0)
+    @sql_count_checker(query_count=1)
     def test_reindex_index_non_overlapping_different_types_index_negative(self):
         date_index = pd.date_range("1/1/2010", periods=6, freq="D")
         snow_df = pd.DataFrame(
diff --git a/tests/integ/modin/frame/test_rename.py b/tests/integ/modin/frame/test_rename.py
index a5595ec716..1be8956a9d 100644
--- a/tests/integ/modin/frame/test_rename.py
+++ b/tests/integ/modin/frame/test_rename.py
@@ -104,7 +104,7 @@ def test_rename(self, snow_float_frame):
             assert_index_equal(renamed.index, native_pd.Index(["A", "B", "foo", "bar"]))
 
         # index with name
-        with SqlCounter(query_count=1, join_count=2):
+        with SqlCounter(query_count=2, join_count=1):
             index = Index(["foo", "bar"], name="name")
             renamer = DataFrame(data, index=index)
             renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
diff --git a/tests/integ/modin/frame/test_setitem.py b/tests/integ/modin/frame/test_setitem.py
index 3d51277b2c..bc4a1393b2 100644
--- a/tests/integ/modin/frame/test_setitem.py
+++ b/tests/integ/modin/frame/test_setitem.py
@@ -444,7 +444,7 @@ def setitem_helper(df):
         [["a", "b", "b", "d", "e"], ["x", "y", "z", "u", "u"], True],
     ],
 )
-@sql_count_checker(query_count=1, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_df_setitem_with_unique_and_duplicate_index_values(
     index_values, other_index_values, expect_mismatch
 ):
diff --git a/tests/integ/modin/frame/test_to_snowflake.py b/tests/integ/modin/frame/test_to_snowflake.py
index 253fa542c9..6ddcdaf0db 100644
--- a/tests/integ/modin/frame/test_to_snowflake.py
+++ b/tests/integ/modin/frame/test_to_snowflake.py
@@ -16,7 +16,7 @@
 @pytest.mark.parametrize("index", [True, False])
 @pytest.mark.parametrize("index_labels", [None, ["my_index"]])
 # one extra query to convert index to native pandas when creating the snowpark pandas dataframe
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=3)
 def test_to_snowflake_index(test_table_name, index, index_labels):
     df = pd.DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6]}, index=pd.Index([2, 3, 4], name="index")
@@ -180,7 +180,7 @@ def test_to_snowflake_column_with_quotes(session, test_table_name):
 
 
 # one extra query to convert index to native pandas when creating the snowpark pandas dataframe
-@sql_count_checker(query_count=0)
+@sql_count_checker(query_count=1)
 def test_to_snowflake_index_label_none_raises(test_table_name):
     df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 
@@ -198,7 +198,7 @@ def test_to_snowflake_index_label_none_raises(test_table_name):
 
 
 # one extra query to convert index to native pandas when creating the snowpark pandas dataframe
-@sql_count_checker(query_count=0)
+@sql_count_checker(query_count=1)
 def test_to_snowflake_data_label_none_raises(test_table_name):
     df = pd.DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6]}, index=pd.Index([2, 3, 4], name="index")
diff --git a/tests/integ/modin/frame/test_where.py b/tests/integ/modin/frame/test_where.py
index bd7a5b5808..59565b4fcb 100644
--- a/tests/integ/modin/frame/test_where.py
+++ b/tests/integ/modin/frame/test_where.py
@@ -690,7 +690,7 @@ def test_dataframe_where_with_duplicated_index_aligned(cond_frame, other):
         native_other = other
         snow_other = other
 
-    expected_join_count = 2 if isinstance(other, int) else 3
+    expected_join_count = 1 if isinstance(other, int) else 2
     with SqlCounter(query_count=1, join_count=expected_join_count):
         eval_snowpark_pandas_result(
             snow_df,
@@ -701,9 +701,8 @@ def test_dataframe_where_with_duplicated_index_aligned(cond_frame, other):
         )
 
 
-# 3 extra joins to create the 3 snowpark pandas dataframe with non-Snowpark pandas data
-# and a Snowpark pandas Index.
-@sql_count_checker(query_count=1, join_count=5)
+# 3 extra queries to convert index to native pandas when creating the 3 snowpark pandas dataframe
+@sql_count_checker(query_count=4, join_count=2)
 def test_dataframe_where_with_duplicated_index_unaligned():
     data = [3, 4, 5, 2]
     df_index = pd.Index([2, 1, 2, 3], name="index")
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index e337e53d4b..ccf1753f18 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -298,7 +298,7 @@ def test_create_with_series_as_data_and_index_as_index(
         ),  # no index values match
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_create_df_with_df_as_data_and_index_as_index(native_df, native_index):
     """
     Creating a DataFrame where the data is a DataFrame and the index is an Index.
@@ -329,7 +329,7 @@ def test_create_df_with_df_as_data_and_index_as_index(native_df, native_index):
         ({}, native_pd.Index([10, 0, 1], name="non-empty index")),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=1, join_count=1)
 def test_create_df_with_empty_df_as_data_and_index_as_index(native_df, native_index):
     """
     Creating a DataFrame where the data is an empty DataFrame and the index is an Index.
@@ -443,7 +443,7 @@ def test_create_df_with_df_as_data_and_index_as_index_and_different_columns(
     qc = 1 if column_type == "list" else 2
     qc += 1 if (isinstance(native_df, dict)) else 0
     qc += 1 if (isinstance(native_df, dict) and column_type == "index") else 0
-    jc = 2 if isinstance(native_df, native_pd.DataFrame) else 0
+    jc = 1 if isinstance(native_df, native_pd.DataFrame) else 0
     with SqlCounter(query_count=qc, join_count=jc):
         assert_frame_equal(
             pd.DataFrame(snow_df, index=snow_index, columns=native_columns),
@@ -795,57 +795,22 @@ def test_create_df_with_mixed_series_index_dict_data():
         assert_frame_equal(snow_df, native_df)
 
 
-def test_create_df_with_mixed_series_index_list_data():
+@sql_count_checker(query_count=2)
+def test_create_df_with_mixed_series_index_list_data_negative():
+    """
+    Since Snowpark pandas relies on native pandas for initialization a DataFrame with mixed data types,
+    they both raise the same error.
+    """
     # Create the list data.
-    native_data1 = native_pd.Series([1, 2, 3])
-    native_data2 = native_pd.Index([4, 5, 6])
+    data1 = native_pd.Series([1, 2, 3])
+    data2 = native_pd.Index([4, 5, 6])
     data3 = [7, 8, 9]
-    snow_data1 = pd.Series([1, 2, 3])
-    snow_data2 = pd.Index([4, 5, 6])
     # Need to convert data3 to an Index since native pandas tries to perform `get_indexer` on it.
-    native_data = [native_data1, native_data2, native_pd.Index(data3)]
-    snow_data = [snow_data1, snow_data2, data3]
-
-    # Create DataFrame only with list data.
-    native_df = native_pd.DataFrame(native_data)
-    snow_df = pd.DataFrame(snow_data)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df)
-
-    # Create DataFrame with list data and Series index.
-    native_ser_index = native_pd.Series([2, 11, 0])
-    snow_ser_index = pd.Series([2, 11, 0])
-    native_df = native_pd.DataFrame(native_data, index=native_ser_index)
-    snow_df = pd.DataFrame(snow_data, index=snow_ser_index)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df, check_dtype=False)
-
-    # Create DataFrame with list data and Index index.
-    native_index = native_pd.Index([22, 11, 0])
-    snow_index = pd.Index([22, 11, 0])
-    native_df = native_pd.DataFrame(native_data, index=native_index)
-    snow_df = pd.DataFrame(snow_data, index=snow_index)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df, check_dtype=False)
-
-    # Create DataFrame with list data, Series index, and columns.
-    columns = ["A", "B", "C"]
-    native_df = native_pd.DataFrame(
-        native_data, index=native_ser_index, columns=columns
-    )
-    snow_df = pd.DataFrame(snow_data, index=snow_ser_index, columns=columns)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df, check_dtype=False)
-
-    # Create DataFrame with list data, Index index, and Index columns.
-    native_columns = native_pd.Index(columns)
-    snow_columns = pd.Index(columns)
-    native_df = native_pd.DataFrame(
-        native_data, index=native_index, columns=native_columns
-    )
-    snow_df = pd.DataFrame(snow_data, index=snow_index, columns=snow_columns)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df, check_dtype=False)
+    err_msg = "'builtin_function_or_method' object has no attribute 'get_indexer'"
+    with pytest.raises(AttributeError, match=err_msg):
+        native_pd.DataFrame([data1, data2, data3])
+    with pytest.raises(AttributeError, match=err_msg):
+        pd.DataFrame([pd.Series(data1), pd.Index(data2), data3])
 
 
 @pytest.mark.xfail(
diff --git a/tests/integ/modin/pivot/test_pivot_table_single.py b/tests/integ/modin/pivot/test_pivot_table_single.py
index e53b553090..9feab0c09f 100644
--- a/tests/integ/modin/pivot/test_pivot_table_single.py
+++ b/tests/integ/modin/pivot/test_pivot_table_single.py
@@ -226,7 +226,7 @@ def test_pivot_table_with_sum_and_count_null_and_empty_values_matching_behavior_
 
 
 # One extra query to convert to native pandas in dataframe constructor when creating snow_df
-@sql_count_checker(query_count=5, join_count=2)
+@sql_count_checker(query_count=6, join_count=1)
 def test_pivot_on_inline_data_using_temp_table():
     # Create a large dataframe of inlined data that will spill to a temporary table.
     snow_df = pd.DataFrame(
diff --git a/tests/integ/modin/resample/test_resample.py b/tests/integ/modin/resample/test_resample.py
index af99185294..5ade497c4d 100644
--- a/tests/integ/modin/resample/test_resample.py
+++ b/tests/integ/modin/resample/test_resample.py
@@ -32,7 +32,7 @@ def randomword(length):
 @freq
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_with_varying_freq_and_interval(freq, interval, agg_func):
     rule = f"{interval}{freq}"
     eval_snowpark_pandas_result(
@@ -45,7 +45,7 @@ def test_resample_with_varying_freq_and_interval(freq, interval, agg_func):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_date_before_snowflake_alignment_date():
     # Snowflake TIMESLICE alignment date is 1970-01-01 00:00:00
     date_data = native_pd.to_datetime(
@@ -66,7 +66,7 @@ def test_resample_date_before_snowflake_alignment_date():
 
 
 @interval
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_date_wraparound_snowflake_alignment_date(interval):
     # Snowflake TIMESLICE alignment date is 1970-01-01 00:00:00
     date_data = native_pd.to_datetime(
@@ -89,7 +89,7 @@ def test_resample_date_wraparound_snowflake_alignment_date(interval):
 
 @agg_func
 @freq
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_missing_data_upsample(agg_func, freq):
     # this tests to make sure that missing resample bins will be filled in.
     date_data = native_pd.date_range("2020-01-01", periods=13, freq=f"1{freq}").delete(
@@ -103,7 +103,7 @@ def test_resample_missing_data_upsample(agg_func, freq):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_duplicated_timestamps_downsample():
     date_data = native_pd.to_datetime(
         [
@@ -122,7 +122,7 @@ def test_resample_duplicated_timestamps_downsample():
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_duplicated_timestamps():
     date_data = native_pd.to_datetime(
         [
@@ -161,7 +161,7 @@ def test_resample_series(freq, interval, agg_func):
 @pytest.mark.parametrize(
     "agg_func", ["max", "min", "mean", "median", "sum", "std", "var"]
 )
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_numeric_only(agg_func):
     eval_snowpark_pandas_result(
         *create_test_dfs(
@@ -174,7 +174,7 @@ def test_resample_numeric_only(agg_func):
 
 
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_df_with_nan(agg_func):
     # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
     eval_snowpark_pandas_result(
@@ -202,7 +202,7 @@ def test_resample_ser_with_nan(agg_func):
 
 
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_single_resample_bin(agg_func):
     eval_snowpark_pandas_result(
         *create_test_dfs(
@@ -215,7 +215,7 @@ def test_resample_single_resample_bin(agg_func):
 
 
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_index_with_nan(agg_func):
     datecol = native_pd.to_datetime(
         ["2020-01-01", "2020-01-03", "2020-01-05", np.nan, "2020-01-09", np.nan]
@@ -230,7 +230,7 @@ def test_resample_index_with_nan(agg_func):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_df_getitem():
     eval_snowpark_pandas_result(
         *create_test_dfs(
@@ -253,7 +253,7 @@ def test_resample_ser_getitem():
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_date_trunc_day():
     # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
     eval_snowpark_pandas_result(
@@ -266,7 +266,7 @@ def test_resample_date_trunc_day():
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_date_trunc_hour():
     # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
     eval_snowpark_pandas_result(
diff --git a/tests/integ/modin/resample/test_resample_asfreq.py b/tests/integ/modin/resample/test_resample_asfreq.py
index fc60f62621..50e9646a4c 100644
--- a/tests/integ/modin/resample/test_resample_asfreq.py
+++ b/tests/integ/modin/resample/test_resample_asfreq.py
@@ -19,7 +19,7 @@
 
 @freq
 @interval
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_asfreq_no_method(freq, interval):
     rule = f"{interval}{freq}"
     eval_snowpark_pandas_result(
@@ -32,7 +32,7 @@ def test_asfreq_no_method(freq, interval):
     )
 
 
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_asfreq_ffill():
     eval_snowpark_pandas_result(
         *create_test_dfs(
diff --git a/tests/integ/modin/resample/test_resample_fillna.py b/tests/integ/modin/resample/test_resample_fillna.py
index 53352fd4ef..c15aef3fa9 100644
--- a/tests/integ/modin/resample/test_resample_fillna.py
+++ b/tests/integ/modin/resample/test_resample_fillna.py
@@ -17,7 +17,7 @@
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_fill(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -69,7 +69,7 @@ def test_resample_fill_ser(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_ffill_one_gap(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -113,7 +113,7 @@ def resample_ffill_ser_one_gap(agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_ffill_missing_in_middle(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -163,7 +163,7 @@ def test_resample_ffill_ser_missing_in_middle(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_ffill_ffilled_with_none(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -187,7 +187,7 @@ def test_resample_ffill_ffilled_with_none(interval, agg_func):
 
 @interval
 @agg_func
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_ffill_large_gaps(interval, agg_func):
     datecol = native_pd.to_datetime(
         [
@@ -209,7 +209,7 @@ def test_resample_ffill_large_gaps(interval, agg_func):
 
 @interval
 @pytest.mark.parametrize("method", ["ffill", "pad", "backfill", "bfill"])
-@sql_count_checker(query_count=2, join_count=3)
+@sql_count_checker(query_count=3, join_count=1)
 def test_resample_fillna(interval, method):
     datecol = native_pd.to_datetime(
         [
diff --git a/tests/integ/modin/series/test_loc.py b/tests/integ/modin/series/test_loc.py
index 2603eaa61c..8d74fd856a 100644
--- a/tests/integ/modin/series/test_loc.py
+++ b/tests/integ/modin/series/test_loc.py
@@ -4,7 +4,6 @@
 import functools
 import numbers
 import random
-import re
 
 import modin.pandas as pd
 import numpy as np
@@ -1451,10 +1450,7 @@ def test_series_loc_set_df_key_negative(item, default_index_native_series):
         native_ser.loc[df_key] = item
 
     # Snowpark pandas error verification.
-    err_msg = re.escape(
-        "The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), "
-        "a.any() or a.all()."
-    )
+    err_msg = "Data cannot be a DataFrame"
     with pytest.raises(ValueError, match=err_msg):
         snowpark_ser.loc[pd.DataFrame(df_key)] = item
         assert_series_equal(snowpark_ser, native_ser)
diff --git a/tests/integ/modin/series/test_sort_values.py b/tests/integ/modin/series/test_sort_values.py
index b147377f75..e966409dfc 100644
--- a/tests/integ/modin/series/test_sort_values.py
+++ b/tests/integ/modin/series/test_sort_values.py
@@ -33,7 +33,7 @@ def snow_series(snow_df):
 
 @pytest.mark.parametrize("by", ["A", "B", "a", "b"])
 @pytest.mark.parametrize("ascending", [True, False])
-@sql_count_checker(query_count=3, join_count=3)
+@sql_count_checker(query_count=3)
 def test_sort_values(snow_df, by, ascending):
     snow_series = snow_df[by]
     native_series = snow_series.to_pandas()
diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py
index 19693ad381..3170241be4 100644
--- a/tests/integ/modin/test_concat.py
+++ b/tests/integ/modin/test_concat.py
@@ -1063,7 +1063,7 @@ def test_concat_duplicate_columns(columns1, columns2, expected_rows, expected_co
     df1 = pd.DataFrame([[1, 2, 3]], columns=columns1)
     df2 = pd.DataFrame([[4, 5, 6]], columns=columns2)
     expected_df = pd.DataFrame(expected_rows, columns=expected_cols, index=[0, 0])
-    assert_frame_equal(pd.concat([df1, df2]), expected_df)
+    assert_frame_equal(pd.concat([df1, df2]), expected_df, check_dtype=False)
 
 
 @pytest.mark.parametrize("value1", [4, 1.5, True, "c", (1, 2), {"a": 1}])

From fa4eb09836460eb6a3bb9bb64c7e4a3622367087 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 16 Sep 2024 16:08:41 -0700
Subject: [PATCH 37/42] remove unnecessary diffs

---
 tests/integ/modin/frame/test_merge.py              |  1 +
 tests/integ/modin/frame/test_rename.py             |  1 +
 tests/integ/modin/frame/test_setitem.py            |  1 +
 tests/integ/modin/resample/test_resample.py        | 13 +++++++++++++
 tests/integ/modin/resample/test_resample_fillna.py |  1 +
 5 files changed, 17 insertions(+)

diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py
index 04f0ca42fd..d006706418 100644
--- a/tests/integ/modin/frame/test_merge.py
+++ b/tests/integ/modin/frame/test_merge.py
@@ -922,6 +922,7 @@ def test_merge_outer_with_nan(dtype):
     _verify_merge(right, left, "outer", on="key")
 
 
+# Two extra queries to convert to native index for dataframe constructor when creating left and right
 @sql_count_checker(query_count=5, join_count=1)
 def test_merge_different_index_names():
     left = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="c"))
diff --git a/tests/integ/modin/frame/test_rename.py b/tests/integ/modin/frame/test_rename.py
index 1be8956a9d..289fb6e159 100644
--- a/tests/integ/modin/frame/test_rename.py
+++ b/tests/integ/modin/frame/test_rename.py
@@ -104,6 +104,7 @@ def test_rename(self, snow_float_frame):
             assert_index_equal(renamed.index, native_pd.Index(["A", "B", "foo", "bar"]))
 
         # index with name
+        # Two extra queries, one for converting to native pandas in renamer Dataframe constructor, one to get the name
         with SqlCounter(query_count=2, join_count=1):
             index = Index(["foo", "bar"], name="name")
             renamer = DataFrame(data, index=index)
diff --git a/tests/integ/modin/frame/test_setitem.py b/tests/integ/modin/frame/test_setitem.py
index bc4a1393b2..cc5698b684 100644
--- a/tests/integ/modin/frame/test_setitem.py
+++ b/tests/integ/modin/frame/test_setitem.py
@@ -444,6 +444,7 @@ def setitem_helper(df):
         [["a", "b", "b", "d", "e"], ["x", "y", "z", "u", "u"], True],
     ],
 )
+# 2 extra queries to convert to native pandas when creating the two snowpark pandas dataframes
 @sql_count_checker(query_count=3, join_count=1)
 def test_df_setitem_with_unique_and_duplicate_index_values(
     index_values, other_index_values, expect_mismatch
diff --git a/tests/integ/modin/resample/test_resample.py b/tests/integ/modin/resample/test_resample.py
index 5ade497c4d..63c72452c1 100644
--- a/tests/integ/modin/resample/test_resample.py
+++ b/tests/integ/modin/resample/test_resample.py
@@ -32,6 +32,7 @@ def randomword(length):
 @freq
 @interval
 @agg_func
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_with_varying_freq_and_interval(freq, interval, agg_func):
     rule = f"{interval}{freq}"
@@ -45,6 +46,7 @@ def test_resample_with_varying_freq_and_interval(freq, interval, agg_func):
     )
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_date_before_snowflake_alignment_date():
     # Snowflake TIMESLICE alignment date is 1970-01-01 00:00:00
@@ -66,6 +68,7 @@ def test_resample_date_before_snowflake_alignment_date():
 
 
 @interval
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_date_wraparound_snowflake_alignment_date(interval):
     # Snowflake TIMESLICE alignment date is 1970-01-01 00:00:00
@@ -89,6 +92,7 @@ def test_resample_date_wraparound_snowflake_alignment_date(interval):
 
 @agg_func
 @freq
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_missing_data_upsample(agg_func, freq):
     # this tests to make sure that missing resample bins will be filled in.
@@ -103,6 +107,7 @@ def test_resample_missing_data_upsample(agg_func, freq):
     )
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_duplicated_timestamps_downsample():
     date_data = native_pd.to_datetime(
@@ -122,6 +127,7 @@ def test_resample_duplicated_timestamps_downsample():
     )
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_duplicated_timestamps():
     date_data = native_pd.to_datetime(
@@ -158,6 +164,7 @@ def test_resample_series(freq, interval, agg_func):
     )
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @pytest.mark.parametrize(
     "agg_func", ["max", "min", "mean", "median", "sum", "std", "var"]
 )
@@ -173,6 +180,7 @@ def test_resample_numeric_only(agg_func):
     )
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @agg_func
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_df_with_nan(agg_func):
@@ -202,6 +210,7 @@ def test_resample_ser_with_nan(agg_func):
 
 
 @agg_func
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_single_resample_bin(agg_func):
     eval_snowpark_pandas_result(
@@ -215,6 +224,7 @@ def test_resample_single_resample_bin(agg_func):
 
 
 @agg_func
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_index_with_nan(agg_func):
     datecol = native_pd.to_datetime(
@@ -230,6 +240,7 @@ def test_resample_index_with_nan(agg_func):
     )
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_df_getitem():
     eval_snowpark_pandas_result(
@@ -253,6 +264,7 @@ def test_resample_ser_getitem():
     )
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_date_trunc_day():
     # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
@@ -266,6 +278,7 @@ def test_resample_date_trunc_day():
     )
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @sql_count_checker(query_count=3, join_count=1)
 def test_resample_date_trunc_hour():
     # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
diff --git a/tests/integ/modin/resample/test_resample_fillna.py b/tests/integ/modin/resample/test_resample_fillna.py
index c15aef3fa9..3aad42dbc4 100644
--- a/tests/integ/modin/resample/test_resample_fillna.py
+++ b/tests/integ/modin/resample/test_resample_fillna.py
@@ -15,6 +15,7 @@
 agg_func = pytest.mark.parametrize("agg_func", ["ffill", "bfill"])
 
 
+# One extra query to convert index to native pandas for dataframe constructor
 @interval
 @agg_func
 @sql_count_checker(query_count=3, join_count=1)

From db2863067685e5504a40744ce802e011c6a8a9c2 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Mon, 16 Sep 2024 17:43:03 -0700
Subject: [PATCH 38/42] fix doctest and couple of tests

---
 .../plugin/extensions/dataframe_overrides.py  | 31 ++++++++++---------
 tests/integ/modin/frame/test_name.py          |  2 +-
 .../test_df_series_creation_with_index.py     |  2 +-
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
index 890c486bb0..a095d63232 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -615,20 +615,23 @@ def __init__(
                     self._query_compiler = new_qc
                     return
 
-                # If only some data is a Snowpark pandas object, convert it to pandas objects.
-                res = []
-                for v in data:
-                    if isinstance(v, (Index, BasePandasDataset)):
-                        res.append(v.to_pandas())
-                    # elif is_dict_like(v) or isinstance(v, (native_pd.Series, native_pd.DataFrame, native_pd.Index)):
-                    #     res.append(v)
-                    else:
-                        # # Need to convert this is a native pandas object since native pandas incorrectly
-                        # # tries to perform `get_indexer` on it. Specify dtype=object so that pandas does not
-                        # # cast the data provided. In some cases, None turns to NaN, which is not desired.
-                        # res.append(native_pd.Index(v, dtype=object) if is_list_like(v) else v)
-                        res.append(v)
-                data = res
+                if not isinstance(data, np.ndarray):
+                    # Sometimes the ndarray representation of a list is different from a regular list.
+                    # For instance, [(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]
+                    # is different from np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]).
+                    # The list has the shape (3, 3) while the ndarray has the shape (3,).
+                    # If only some data is a Snowpark pandas object, convert it to pandas objects.
+                    res = []
+                    for v in data:
+                        if isinstance(v, (Index, BasePandasDataset)):
+                            res.append(v.to_pandas())
+                        else:
+                            # # Need to convert this is a native pandas object since native pandas incorrectly
+                            # # tries to perform `get_indexer` on it. Specify dtype=object so that pandas does not
+                            # # cast the data provided. In some cases, None turns to NaN, which is not desired.
+                            # res.append(native_pd.Index(v, dtype=object) if is_list_like(v) else v)
+                            res.append(v)
+                    data = res
 
         query_compiler = from_pandas(
             native_pd.DataFrame(
diff --git a/tests/integ/modin/frame/test_name.py b/tests/integ/modin/frame/test_name.py
index b23a3b26f0..aa5d04ed03 100644
--- a/tests/integ/modin/frame/test_name.py
+++ b/tests/integ/modin/frame/test_name.py
@@ -39,7 +39,7 @@ def test_create_dataframe_from_object_with_name(sample):
     )
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=1, join_count=2, union_count=1)
 def test_create_dataframe_from_snowpark_pandas_series():
     df = pd.DataFrame([[2, 3, 4], [5, 6, 7]], columns=["X", "Y", "Z"])
     df = pd.DataFrame([df.X, df.iloc[:, 2]])
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index ccf1753f18..354bb2f11e 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -858,5 +858,5 @@ def test_create_series_with_df_data_negative():
         ),
     ):
         native_pd.Series(native_pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
-    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+    with pytest.raises(ValueError, match="Data cannot be a DataFrame"):
         pd.Series(pd.DataFrame([[1, 2], [3, 4], [5, 6]]))

From 17be4c30a705416cac656230235cc1f18331ef74 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 18 Sep 2024 13:53:46 -0700
Subject: [PATCH 39/42] apply feedback to simplify logic

---
 .../snowpark/modin/plugin/_internal/utils.py  |  58 ++++-
 .../compiler/snowflake_query_compiler.py      |  50 ----
 .../modin/plugin/docstrings/dataframe.py      |  15 +-
 .../plugin/extensions/dataframe_overrides.py  | 237 +++++++++---------
 .../plugin/extensions/series_overrides.py     |  47 ++--
 .../integ/modin/groupby/test_groupby_apply.py |   8 +-
 .../test_df_series_creation_with_index.py     |  96 ++++++-
 7 files changed, 310 insertions(+), 201 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/utils.py b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
index 1e53ee34f9..8563997dfa 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/utils.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 import pandas as native_pd
-from pandas._typing import Scalar
+from pandas._typing import AnyArrayLike, Scalar
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.common import is_integer_dtype, is_object_dtype, is_scalar
 from pandas.core.dtypes.inference import is_list_like
@@ -2021,6 +2021,20 @@ def error_checking_for_init(
         raise NotImplementedError("pandas type category is not implemented")
 
 
+def assert_fields_are_none(
+    class_name: str, data: Any, index: Any, columns: Any = None
+) -> None:
+    assert (
+        data is None
+    ), f"Invalid {class_name} construction! Cannot pass both data and query_compiler."
+    assert (
+        index is None
+    ), f"Invalid {class_name} construction! Cannot pass both index and query_compiler."
+    assert (
+        columns is None
+    ), f"Invalid {class_name} construction! Cannot pass both columns and query_compiler."
+
+
 def convert_index_to_qc(index: Any) -> Any:
     """
     Method to convert an object representing an index into a query compiler for set_index or reindex.
@@ -2084,3 +2098,45 @@ def convert_index_to_list_of_qcs(index: Any) -> list:
     else:
         index_qc_list = [convert_index_to_qc(index)]
     return index_qc_list
+
+
+def add_extra_columns_and_select_required_columns(
+    query_compiler: Any,
+    columns: Union[AnyArrayLike, list],
+    data_columns: Union[AnyArrayLike, list],
+) -> Any:
+    """
+    Method to add extra columns to and select the required columns from the provided query compiler.
+    This is used in DataFrame construction in the following cases:
+    - general case when data is a DataFrame
+    - data is a named Series, and this name is in `columns`
+
+    Parameters
+    ----------
+    query_compiler: Any
+        The query compiler to select columns from, i.e., data's query compiler.
+    columns: AnyArrayLike or list
+        The columns to select from the query compiler.
+    data_columns: AnyArrayLike or list
+        The columns in the data. This is data.columns if data is a DataFrame or data.name if data is a Series.
+
+    """
+    from modin.pandas import DataFrame
+
+    # The `columns` parameter is used to select the columns from `data` that will be in the resultant DataFrame.
+    # If a value in `columns` is not present in data's columns, it will be added as a new column filled with NaN values.
+    # These columns are tracked by the `extra_columns` variable.
+    if data_columns is not None and columns is not None:
+        extra_columns = [col for col in columns if col not in data_columns]
+        # To add these new columns to the DataFrame, perform `__getitem__` only with the extra columns
+        # and set them to None.
+        extra_columns_df = DataFrame(query_compiler=query_compiler)
+        extra_columns_df[extra_columns] = None
+        query_compiler = extra_columns_df._query_compiler
+
+    # To select the columns for the resultant DataFrame, perform `.loc[]` on the created query compiler.
+    # This step is performed to ensure that the right columns are picked from the InternalFrame since we
+    # never explicitly drop the unwanted columns. `.loc[]` also ensures that the columns in the resultant
+    # DataFrame are in the same order as the columns in the `columns` parameter.
+    columns = slice(None) if columns is None else columns
+    return DataFrame(query_compiler=query_compiler).loc[:, columns]._query_compiler
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index 256b0de6b8..6e706c133c 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -18292,53 +18292,3 @@ def timedelta_property(
         return SnowflakeQueryCompiler(
             self._modin_frame.apply_snowpark_function_to_columns(func, include_index)
         )
-
-    def create_qc_with_extra_columns(
-        self, extra_columns: List[Hashable]
-    ) -> "SnowflakeQueryCompiler":
-        """
-        This is a helper function for creating a DataFrame where the data is a DataFrame object. Sometimes, columns
-        not present in the `data` DataFrame can be passed as arguments - these are added to the resultant DataFrame
-        as NaN columns.
-
-        Parameters
-        ----------
-        extra_columns : list of hashable, default None
-            New columns that are not a part of the original query compiler
-
-        Returns
-        -------
-        SnowflakeQueryCompiler
-            A new query compiler with the new columns.
-        """
-        self_frame = self._modin_frame
-
-        if not extra_columns or len(extra_columns) == 0:
-            return self.copy()
-
-        # Append the new columns to the data's internal frame.
-        new_snowflake_quoted_identifiers = (
-            self._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
-                pandas_labels=extra_columns,
-                excluded=self_frame.data_column_snowflake_quoted_identifiers,
-            )
-        )
-        new_ordered_frame = append_columns(
-            self_frame.ordered_dataframe,
-            new_snowflake_quoted_identifiers,
-            [pandas_lit(np.nan)] * len(extra_columns),
-        )
-        new_internal_frame = InternalFrame.create(
-            ordered_dataframe=new_ordered_frame,
-            data_column_pandas_labels=self_frame.data_column_pandas_labels
-            + extra_columns,
-            data_column_snowflake_quoted_identifiers=self_frame.data_column_snowflake_quoted_identifiers
-            + new_snowflake_quoted_identifiers,
-            data_column_pandas_index_names=self_frame.data_column_pandas_index_names,
-            index_column_pandas_labels=self_frame.index_column_pandas_labels,
-            index_column_snowflake_quoted_identifiers=self_frame.index_column_snowflake_quoted_identifiers,
-            data_column_types=None,
-            index_column_types=None,
-        )
-
-        return SnowflakeQueryCompiler(new_internal_frame)
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
index f7e93e6c2d..c7b11c8fbe 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
@@ -82,16 +82,13 @@ class DataFrame(BasePandasDataset):
     Notes
     -----
     ``DataFrame`` can be created either from passed `data` or `query_compiler`. If both
-    parameters are provided, data source will be prioritized in the next order:
+    parameters are provided, an assertion error will be raised. `query_compiler` can only
+    be specified when the `data`, `index`, and `columns` are None.
 
-    1) Modin ``DataFrame`` or ``Series`` passed with `data` parameter.
-    2) Query compiler from the `query_compiler` parameter.
-    3) Various pandas/NumPy/Python data structures passed with `data` parameter.
-
-    The last option is less desirable since import of such data structures is very
-    inefficient, please use previously created Modin structures from the fist two
-    options or import data using highly efficient Modin IO tools (for example
-    ``pd.read_csv``).
+    Using pandas/NumPy/Python data structures as the `data` parameter is less desirable since
+    importing such data structures is very inefficient.
+    Please use previously created Modin structures or import data using highly efficient Modin IO
+    tools (for example ``pd.read_csv``).
 
     Examples
     --------
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
index a095d63232..a9a5ea511e 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -85,12 +85,15 @@
     is_snowflake_agg_func,
 )
 from snowflake.snowpark.modin.plugin._internal.utils import (
+    add_extra_columns_and_select_required_columns,
+    assert_fields_are_none,
     convert_index_to_list_of_qcs,
     convert_index_to_qc,
     error_checking_for_init,
     is_repr_truncated,
 )
 from snowflake.snowpark.modin.plugin._typing import ListLike
+from snowflake.snowpark.modin.plugin.extensions.index import Index
 from snowflake.snowpark.modin.plugin.utils.error_message import (
     ErrorMessage,
     dataframe_not_implemented,
@@ -464,43 +467,41 @@ def __init__(
     # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
     # Siblings are other dataframes that share the same query compiler. We
     # use this list to update inplace when there is a shallow copy.
-    from snowflake.snowpark.modin.plugin.extensions.index import Index
-
     self._siblings = []
 
-    # 0. Setting the query compiler
-    # -----------------------------
+    # Setting the query compiler
+    # --------------------------
     if query_compiler is not None:
         # CASE I: query_compiler
         # If a query_compiler is passed in only use the query_compiler field to create a new DataFrame.
-        assert (
-            data is None
-        ), "Invalid DataFrame construction! Cannot pass both data and query_compiler."
-        assert (
-            index is None
-        ), "Invalid DataFrame construction! Cannot pass both index and query_compiler."
-        assert (
-            columns is None
-        ), "Invalid DataFrame construction! Cannot pass both columns and query_compiler."
+        # Verify that the data, index, and columns parameters are None.
+        assert_fields_are_none(
+            class_name="DataFrame", data=data, index=index, columns=columns
+        )
         self._query_compiler = query_compiler
         return
 
+    # A DataFrame cannot be used as an index and Snowpark pandas does not support the Categorical type yet.
+    # Check that index is not a DataFrame and dtype is not "category".
     error_checking_for_init(index, dtype)
 
+    # Convert columns to a local object if it is a lazy Index.
+    columns = try_convert_index_to_native(columns)
+
     # The logic followed here is:
-    # 1. Create a query_compiler from the provided data. If columns are provided, add/select the columns.
-    # 2. If an index is provided, set the index through set_index or reindex.
-    # 3. If the data is a DataFrame, perform loc to select the required index and columns from the DataFrame.
-    # 4. The resultant query_compiler is then set as the query_compiler for the DataFrame.
+    # STEP 1: Create a query_compiler from the provided data. If columns are provided, add/select the columns.
+    # STEP 2: If an index is provided, set the index through set_index or reindex.
+    # STEP 3: The resultant query_compiler is then set as the query_compiler for the DataFrame.
 
-    # 1. Setting the data (and columns)
-    # ---------------------------------
+    # STEP 1: Setting the data (and columns)
+    # --------------------------------------
     if isinstance(data, Index):
         # CASE II: data is a Snowpark pandas Index
         # If the data is an Index object, convert it to a DataFrame to make sure that the values are in the
-        # correct format: the values are a data column, not an index column.
+        # correct format: the values should be a data column, not an index column.
         if data.name is None:
-            # If no name is provided, the default name is 0.
+            # If no name is provided, the default name is 0. Otherwise, only use the first value in `columns` to
+            # set the column name; this is because the resultant DataFrame will have only one column.
             new_name = 0 if columns is None else columns[0]
         else:
             new_name = data.name
@@ -508,36 +509,38 @@ def __init__(
 
     elif isinstance(data, Series):
         # CASE III: data is a Snowpark pandas Series
-        query_compiler = data._query_compiler.copy()
-        # We set the column name if it is not in the provided Series `data`.
-        if data.name is None:
-            # If no name is provided, the default name is 0.
-            query_compiler = query_compiler.set_columns(columns or [0])
-        if columns is not None and data.name not in columns:
-            # If the columns provided are not in the named Series, pandas clears
-            # the DataFrame and sets columns to the columns provided.
+        # If the Series `data` has no name, the default name is 0.
+        name = [data.name] if data.name is not None else [0]
+        if columns is None:
+            # If no columns are provided, the resultant DataFrame has only one column.
+            # The column name is the Series' name.
+            query_compiler = data._query_compiler.set_columns(name)
+        elif data.name in columns:
+            # Treat any columns that are not data.name as extra columns. They will be appended as NaN columns.
+            # After this, select the required columns in the order provided by `columns`.
+            query_compiler = add_extra_columns_and_select_required_columns(
+                data._query_compiler, columns, name
+            )
+        else:
+            # If the columns provided are not in the named Series, pandas clears the DataFrame and sets columns.
             query_compiler = from_pandas(
                 native_pd.DataFrame(columns=columns)
             )._query_compiler
 
     elif isinstance(data, DataFrame):
         # CASE IV: data is a Snowpark pandas DataFrame
-        query_compiler = data._query_compiler.copy()
         if columns is None and index is None:
-            # Special case IV.a: if the new DataFrame has the same columns and index as the original DataFrame,
+            # Special case: if the new DataFrame has the same columns and index as the original DataFrame,
             # the query compiler is shared and kept track of as a sibling.
-            self._query_compiler = query_compiler
-            data._add_sibling(self)
+            self._query_compiler = data._query_compiler
+            if not copy:
+                # When copy is False, the DataFrame is a shallow copy of the original DataFrame.
+                data._add_sibling(self)
             return
-        # The `columns` parameter is used to select the columns from `data` that will be in the resultant
-        # DataFrame. If a value in `columns` is not present in `data`'s columns, it will be added as a
-        # new column filled with NaN values. These columns are tracked by the `extra_columns` variable.
-        if data.columns is not None and columns is not None:
-            extra_columns = [col for col in columns if col not in data.columns]
-        else:
-            extra_columns = []
-        query_compiler = data._query_compiler.create_qc_with_extra_columns(
-            extra_columns
+        # Treat any columns that are not in data.columns as extra columns. They will be appended as NaN columns.
+        # After this, select the required columns in the order provided by `columns`.
+        query_compiler = add_extra_columns_and_select_required_columns(
+            data._query_compiler, columns, data.columns
         )
 
     else:
@@ -545,8 +548,6 @@ def __init__(
         if not isinstance(
             data, (native_pd.Series, native_pd.DataFrame, native_pd.Index)
         ) and is_list_like(data):
-            from snowflake.snowpark.modin.pandas import concat
-
             if is_dict_like(data):
                 # Setting up keys and values for processing if all the values are Snowpark pandas objects.
                 if columns is not None:
@@ -556,33 +557,19 @@ def __init__(
                 if len(data) and all(
                     isinstance(v, (Index, BasePandasDataset)) for v in data.values()
                 ):
-                    # Special case V.a: data is a list/dict where all the values are Snowpark pandas objects.
-                    # Concat can only be performed with BasePandasDataset objects.
-                    # If a value is an Index, convert it to a Series where the index is the index to be set
-                    # since these values are always present in the final DataFrame.
-                    values = [
-                        Series(v, index=index) if isinstance(v, Index) else v
-                        for v in data.values()
-                    ]
-                    new_qc = concat(values, axis=1, keys=data.keys())._query_compiler
-                    if dtype is not None:
-                        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
-                    if index is not None:
-                        new_qc = new_qc.reindex(
-                            axis=0, labels=convert_index_to_qc(index)
+                    # Special case: data is a dict where all the values are Snowpark pandas objects.
+                    self._query_compiler = (
+                        _df_init_dict_data_with_snowpark_pandas_values(
+                            data, index, columns, dtype
                         )
-                    if columns is not None:
-                        new_qc = new_qc.reindex(
-                            axis=1, labels=try_convert_index_to_native(columns)
-                        )
-                    self._query_compiler = new_qc
+                    )
                     return
 
                 # If only some data is a Snowpark pandas object, convert it to pandas objects.
                 res = {}
                 index = try_convert_index_to_native(index)
                 for k, v in data.items():
-                    if isinstance(v, (Index)):
+                    if isinstance(v, Index):
                         res[k] = v.to_pandas()
                     elif isinstance(v, BasePandasDataset):
                         # Need to perform reindex on the Series or DataFrame objects since only the data
@@ -596,41 +583,27 @@ def __init__(
                 if len(data) and all(
                     isinstance(v, (Index, BasePandasDataset)) for v in data
                 ):
-                    # Special case V.c: data is a list/dict where all the values are Snowpark pandas objects.
-                    # Concat can only be performed with BasePandasDataset objects.
-                    # If a value is an Index, convert it to a Series.
-                    values = [Series(v) if isinstance(v, Index) else v for v in data]
-                    new_qc = concat(values, axis=1).T._query_compiler
-                    if dtype is not None:
-                        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
-                    if index is not None:
-                        new_qc = new_qc.set_index([convert_index_to_qc(index)])
-                    if columns is not None:
-                        if all(isinstance(v, Index) for v in data):
-                            # Special case: if all the values are Index objects, they are always present in the
-                            # final result with the provided column names. Therefore, rename the columns.
-                            new_qc = new_qc.set_columns(columns)
-                        else:
-                            new_qc = new_qc.reindex(axis=1, labels=columns)
-                    self._query_compiler = new_qc
+                    # Special case: data is a list/dict where all the values are Snowpark pandas objects.
+                    self._query_compiler = (
+                        _df_init_list_data_with_snowpark_pandas_values(
+                            data, index, columns, dtype
+                        )
+                    )
                     return
 
+                # Sometimes the ndarray representation of a list is different from a regular list.
+                # For instance, [(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]
+                # is different from np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]).
+                # The list has the shape (3, 3) while the ndarray has the shape (3,). Therefore, do not modify
+                # the ndarray data.
                 if not isinstance(data, np.ndarray):
-                    # Sometimes the ndarray representation of a list is different from a regular list.
-                    # For instance, [(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]
-                    # is different from np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]).
-                    # The list has the shape (3, 3) while the ndarray has the shape (3,).
                     # If only some data is a Snowpark pandas object, convert it to pandas objects.
-                    res = []
-                    for v in data:
-                        if isinstance(v, (Index, BasePandasDataset)):
-                            res.append(v.to_pandas())
-                        else:
-                            # # Need to convert this is a native pandas object since native pandas incorrectly
-                            # # tries to perform `get_indexer` on it. Specify dtype=object so that pandas does not
-                            # # cast the data provided. In some cases, None turns to NaN, which is not desired.
-                            # res.append(native_pd.Index(v, dtype=object) if is_list_like(v) else v)
-                            res.append(v)
+                    res = [
+                        v.to_pandas()
+                        if isinstance(v, (Index, BasePandasDataset))
+                        else v
+                        for v in data
+                    ]
                     data = res
 
         query_compiler = from_pandas(
@@ -638,14 +611,14 @@ def __init__(
                 data=data,
                 # Handle setting the index, if it is a lazy index, outside this block.
                 index=None if isinstance(index, (Index, Series)) else index,
-                columns=try_convert_index_to_native(columns),
+                columns=columns,
                 dtype=dtype,
                 copy=copy,
             )
         )._query_compiler
 
-    # 2. Setting the index
-    # --------------------
+    # STEP 2: Setting the index
+    # -------------------------
     # The index is already set if the data is a non-Snowpark pandas object.
     # If either the data or the index is a Snowpark pandas object, set the index here.
     if index is not None and (
@@ -665,23 +638,63 @@ def __init__(
                 convert_index_to_list_of_qcs(index)
             )
 
-    # 3. If data is a DataFrame, filter result
-    # ----------------------------------------
-    if isinstance(data, DataFrame) and columns is not None:
-        # To select the columns for the resultant DataFrame, perform .loc[] on the created query compiler.
-        # This step is performed to ensure that the right columns are picked from the InternalFrame since we
-        # never explicitly drop the unwanted columns.
-        query_compiler = (
-            DataFrame(query_compiler=query_compiler)
-            .loc[slice(None), columns]
-            ._query_compiler
-        )
-
-    # 4. Setting the query compiler
-    # -----------------------------
+    # STEP 3: Setting the query compiler
+    # ----------------------------------
     self._query_compiler = query_compiler
 
 
+def _df_init_dict_data_with_snowpark_pandas_values(
+    data: AnyArrayLike | list,
+    index: list | AnyArrayLike | Series | Index,
+    columns: list | AnyArrayLike | Series | Index,
+    dtype: str | np.dtype | native_pd.ExtensionDtype | None,
+):
+    # Special case: data is a dict where all the values are Snowpark pandas objects.
+    # Concat can only be performed with BasePandasDataset objects.
+    # If a value is an Index, convert it to a Series where the index is the index to be set since these values
+    # are always present in the final DataFrame.
+    from snowflake.snowpark.modin.pandas import concat
+
+    values = [
+        Series(v, index=index) if isinstance(v, Index) else v for v in data.values()
+    ]
+    new_qc = concat(values, axis=1, keys=data.keys())._query_compiler
+    if dtype is not None:
+        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
+    if index is not None:
+        new_qc = new_qc.reindex(axis=0, labels=convert_index_to_qc(index))
+    if columns is not None:
+        new_qc = new_qc.reindex(axis=1, labels=columns)
+    return new_qc
+
+
+def _df_init_list_data_with_snowpark_pandas_values(
+    data: AnyArrayLike | list,
+    index: list | AnyArrayLike | Series | Index,
+    columns: list | AnyArrayLike | Series | Index,
+    dtype: str | np.dtype | native_pd.ExtensionDtype | None,
+):
+    # Special case: data is a list/dict where all the values are Snowpark pandas objects.
+    # Concat can only be performed with BasePandasDataset objects.
+    # If a value is an Index, convert it to a Series.
+    from snowflake.snowpark.modin.pandas import concat
+
+    values = [Series(v) if isinstance(v, Index) else v for v in data]
+    new_qc = concat(values, axis=1).T._query_compiler
+    if dtype is not None:
+        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
+    if index is not None:
+        new_qc = new_qc.set_index([convert_index_to_qc(index)])
+    if columns is not None:
+        if all(isinstance(v, Index) for v in data):
+            # Special case: if all the values are Index objects, they are always present in the
+            # final result with the provided column names. Therefore, rename the columns.
+            new_qc = new_qc.set_columns(columns)
+        else:
+            new_qc = new_qc.reindex(axis=1, labels=columns)
+    return new_qc
+
+
 @register_dataframe_accessor("__dataframe__")
 def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
     """
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index bb3bb612b5..46cf45041a 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -43,6 +43,7 @@
 from snowflake.snowpark.modin.pandas.api.extensions import register_series_accessor
 from snowflake.snowpark.modin.pandas.utils import from_pandas, is_scalar
 from snowflake.snowpark.modin.plugin._internal.utils import (
+    assert_fields_are_none,
     convert_index_to_list_of_qcs,
     convert_index_to_qc,
     error_checking_for_init,
@@ -352,37 +353,36 @@ def __init__(
 
     from snowflake.snowpark.modin.plugin.extensions.index import Index
 
-    # 0. Setting the query compiler
-    # -----------------------------
+    # Setting the query compiler
+    # --------------------------
     if query_compiler is not None:
         # CASE I: query_compiler
         # If a query_compiler is passed in, only use the query_compiler and name fields to create a new Series.
-        assert (
-            data is None
-        ), "Invalid Series construction! Cannot pass both data and query_compiler."
-        assert (
-            index is None
-        ), "Invalid Series construction! Cannot pass both index and query_compiler."
+        # Verify that the data and index parameters are None.
+        assert_fields_are_none(class_name="Series", data=data, index=index)
         self._query_compiler = query_compiler.columnarize()
         if name is not None:
             self.name = name
         return
 
+    # A DataFrame cannot be used as an index and Snowpark pandas does not support the Categorical type yet.
+    # Check that index is not a DataFrame and dtype is not "category".
     error_checking_for_init(index, dtype)
 
     if isinstance(data, spd.DataFrame):
+        # data cannot be a DataFrame, raise a clear error message.
         # pandas raises an ambiguous error:
         # ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
         raise ValueError("Data cannot be a DataFrame")
 
     # The logic followed here is:
-    # 1. Create a query_compiler from the provided data.
-    # 2. If an index is provided, set the index. This is either through set_index or reindex.
-    # 3. The resultant query_compiler is columnarized and set as the query_compiler for the Series.
-    # 4. If a name is provided, set the name.
+    # STEP 1: Create a query_compiler from the provided data.
+    # STEP 2: If an index is provided, set the index. This is either through set_index or reindex.
+    # STEP 3: The resultant query_compiler is columnarized and set as the query_compiler for the Series.
+    # STEP 4: If a name is provided, set the name.
 
-    # 1. Setting the data
-    # -------------------
+    # STEP 1: Setting the data
+    # ------------------------
     if isinstance(data, Index):
         # CASE II: Index
         # If the data is an Index object, convert it to a Series, and get the query_compiler.
@@ -390,10 +390,15 @@ def __init__(
             data.to_series(index=None, name=name).reset_index(drop=True)._query_compiler
         )
 
-    elif isinstance(data, type(self)):
+    elif isinstance(data, Series):
         # CASE III: Series
-        # If the data is a Series object, copy the query_compiler.
-        query_compiler = data._query_compiler.copy()
+        # If the data is a Series object, use its query_compiler.
+        query_compiler = data._query_compiler
+        if index is None and name is None and copy is False:
+            # When copy is False and no index and name are provided, the Series is a shallow copy of the original Series.
+            self._query_compiler = query_compiler
+            data._add_sibling(self)
+            return
 
     else:
         # CASE IV: Non-Snowpark pandas data
@@ -433,8 +438,8 @@ def __init__(
             )
         )._query_compiler
 
-    # 2. Setting the index
-    # --------------------
+    # STEP 2: Setting the index
+    # -------------------------
     # The index is already set if the data is a non-Snowpark pandas object.
     # If either the data or the index is a Snowpark pandas object, set the index here.
     if index is not None and (
@@ -454,8 +459,8 @@ def __init__(
                 convert_index_to_list_of_qcs(index)
             )
 
-    # 3 and 4. Setting the query compiler and name
-    # --------------------------------------------
+    # STEP 3 and STEP 4: Setting the query compiler and name
+    # ------------------------------------------------------
     self._query_compiler = query_compiler.columnarize()
     if name is not None:
         self.name = name
diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
index e83fcbe00b..04321efcc0 100644
--- a/tests/integ/modin/groupby/test_groupby_apply.py
+++ b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -537,7 +537,7 @@ def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
                 if group_keys
                 else QUERY_COUNT_WITH_TRANSFORM_CHECK
             ),
-            join_count=2,
+            join_count=JOIN_COUNT + 1,
             udtf_count=UDTF_COUNT,
         ):
             snow_result = operation(mdf)
@@ -719,7 +719,7 @@ def groupby_apply_without_sort(df):
         with SqlCounter(
             query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK,
             udtf_count=UDTF_COUNT,
-            join_count=2,
+            join_count=JOIN_COUNT + 1,
         ):
             assert_snowpark_pandas_equal_to_pandas(
                 groupby_apply_without_sort(snow_df).sort_values(),
@@ -969,7 +969,7 @@ def test_args_and_kwargs(self, grouping_dfs_with_multiindexes):
         # One extra query to convert index to native pandas in dataframe constructor to create test dataframes
         query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
         udtf_count=UDTF_COUNT,
-        join_count=2,
+        join_count=JOIN_COUNT + 1,
     )
     @pytest.mark.parametrize("index", [[2.0, np.nan, 2.0, 1.0], [np.nan] * 4])
     def test_dropna(self, dropna, index):
@@ -1089,7 +1089,7 @@ def test_dataframe_groupby_getitem(self, by, func, dropna, group_keys, sort):
         with SqlCounter(
             query_count=qc,
             udtf_count=UDTF_COUNT,
-            join_count=2,
+            join_count=JOIN_COUNT + 1,
         ):
             eval_snowpark_pandas_result(
                 *create_test_dfs(
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index 354bb2f11e..e15acd03e8 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -748,8 +748,8 @@ def test_create_df_with_mixed_series_index_dict_data():
     native_data1 = native_pd.Series([1, 2, 3])
     native_data2 = native_pd.Index([4, 5, 6])
     data3 = [7, 8, 9]
-    snow_data1 = pd.Series([1, 2, 3])
-    snow_data2 = pd.Index([4, 5, 6])
+    snow_data1 = pd.Series(native_data1)
+    snow_data2 = pd.Index(native_data2)
     native_data = {"A": native_data1, "B": native_data2, "C": data3}
     snow_data = {"A": snow_data1, "B": snow_data2, "C": data3}
 
@@ -761,7 +761,7 @@ def test_create_df_with_mixed_series_index_dict_data():
 
     # Create DataFrame with dict data and Series index.
     native_ser_index = native_pd.Series([9, 2, 999])
-    snow_ser_index = pd.Series([9, 2, 999])
+    snow_ser_index = pd.Series(native_ser_index)
     native_df = native_pd.DataFrame(native_data, index=native_ser_index)
     snow_df = pd.DataFrame(snow_data, index=snow_ser_index)
     with SqlCounter(query_count=1):
@@ -769,7 +769,7 @@ def test_create_df_with_mixed_series_index_dict_data():
 
     # Create DataFrame with dict data and Index index.
     native_index = native_pd.Index([9, 2, 999])
-    snow_index = pd.Index([9, 2, 999])
+    snow_index = pd.Index(native_index)
     native_df = native_pd.DataFrame(native_data, index=native_index)
     snow_df = pd.DataFrame(snow_data, index=snow_index)
     with SqlCounter(query_count=1):
@@ -860,3 +860,91 @@ def test_create_series_with_df_data_negative():
         native_pd.Series(native_pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
     with pytest.raises(ValueError, match="Data cannot be a DataFrame"):
         pd.Series(pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+
+
+@sql_count_checker(query_count=1)
+def test_create_df_with_name_in_columns():
+    # Test DataFrame creation where the data is a named Series and its name is in the columns passed in.
+    # The column sharing the name with the Series takes on its values as the column values; the rest of the
+    # columns are filled with NaNs.
+    native_data = native_pd.Series([1, 2, 3], name="b")
+    snow_data = pd.Series(native_data)
+    columns = ["a", "b"]
+    native_df = native_pd.DataFrame(native_data, columns=columns)
+    snow_df = pd.DataFrame(snow_data, columns=columns)
+    assert_frame_equal(snow_df, native_df)
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_df_with_name_not_in_columns_and_index():
+    # Test DataFrame creation where the data is a named Series and its name is not in the columns passed in.
+    # The resultant DataFrame is filled with NaNs; the index and columns are set to the values provided.
+    native_data = native_pd.Series([1, 2, 3], name="b")
+    snow_data = pd.Series(native_data)
+    native_idx = native_pd.Index([1, 2, 3, 4, 5])
+    snow_idx = pd.Index(native_idx)
+    columns = ["a", "c"]
+    native_df = native_pd.DataFrame(native_data, index=native_idx, columns=columns)
+    snow_df = pd.DataFrame(snow_data, index=snow_idx, columns=columns)
+    assert_frame_equal(snow_df, native_df)
+
+
+@sql_count_checker(query_count=1)
+def test_create_df_with_df_and_subset_of_columns():
+    # Test DataFrame creation where data is a DataFrame and only a subset of its columns are passed in.
+    # Only the columns passed in are used; the rest are ignored. In this case with end up with a single
+    # column DataFrame.
+    native_data = native_pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
+    snow_data = pd.DataFrame(native_data)
+    columns = ["a"]
+    native_df = native_pd.DataFrame(native_data, columns=columns)
+    snow_df = pd.DataFrame(snow_data, columns=columns)
+    assert_frame_equal(snow_df, native_df)
+
+
+def test_create_df_with_copy():
+    # When copy is True, the data is copied into the DataFrame, and the new DataFrame and data do not share references.
+    data = pd.DataFrame([[1, 2], [3, 4], [5, 6]])
+    df_copy = pd.DataFrame(data, copy=True)
+    df_not_copy = pd.DataFrame(data, copy=False)
+
+    with SqlCounter(query_count=3):
+        # Changing data should also change df_not_copy. It does not change df_copy.
+        data.iloc[0, 0] = 100
+        assert data.iloc[0, 0] == df_not_copy.iloc[0, 0] == 100
+        assert df_copy.iloc[0, 0] == 1
+
+    with SqlCounter(query_count=3):
+        # Similarly, changing df_not_copy should also change data. It does not change df_copy.
+        df_not_copy.iloc[0, 0] = 99
+        assert data.iloc[0, 0] == df_not_copy.iloc[0, 0] == 99
+        assert df_copy.iloc[0, 0] == 1
+
+    with SqlCounter(query_count=2):
+        # Changing df_copy should not change data or df_not_copy.
+        df_copy.iloc[0, 0] = 1000
+        assert data.iloc[0, 0] == df_not_copy.iloc[0, 0] == 99
+
+
+def test_create_series_with_copy():
+    # When copy is True, the data is copied into the Series, and the new Series and data do not share references.
+    data = pd.Series([1, 2, 3, 4, 5])
+    series_copy = pd.Series(data, copy=True)
+    series_not_copy = pd.Series(data, copy=False)
+
+    with SqlCounter(query_count=3):
+        # Changing data should also change series_not_copy. It does not change series_copy.
+        data.iloc[0] = 100
+        assert data.iloc[0] == series_not_copy.iloc[0] == 100
+        assert series_copy.iloc[0] == 1
+
+    with SqlCounter(query_count=3):
+        # Similarly, changing series_not_copy should also change data. It does not change series_copy.
+        series_not_copy.iloc[0] = 99
+        assert data.iloc[0] == series_not_copy.iloc[0] == 99
+        assert series_copy.iloc[0] == 1
+
+    with SqlCounter(query_count=2):
+        # Changing series_copy should not change data or series_not_copy.
+        series_copy.iloc[0] = 1000
+        assert data.iloc[0] == series_not_copy.iloc[0] == 99

From 2eb14a7937a31bf75dd8f27f2afad35aed47a169 Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 18 Sep 2024 15:01:44 -0700
Subject: [PATCH 40/42] update query counts to use constants

---
 tests/integ/modin/groupby/test_groupby_apply.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
index 04321efcc0..1ef80e33be 100644
--- a/tests/integ/modin/groupby/test_groupby_apply.py
+++ b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -1074,9 +1074,9 @@ class TestSeriesGroupBy:
     def test_dataframe_groupby_getitem(self, by, func, dropna, group_keys, sort):
         """Test apply() on a SeriesGroupBy that we get by DataFrameGroupBy.__getitem__"""
         qc = (
-            6
+            QUERY_COUNT_WITH_TRANSFORM_CHECK
             if group_keys is False and not func == get_scalar_from_numeric_series
-            else 5
+            else QUERY_COUNT_WITHOUT_TRANSFORM_CHECK
         )
         if (
             func in (get_dataframe_from_numeric_series, get_series_from_numeric_series)

From d9bbd9b0cbf06cafec17d43e799acc5c3d71575a Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Wed, 18 Sep 2024 15:10:27 -0700
Subject: [PATCH 41/42] remove docstring update, add docstrings for helper
 functions

---
 src/snowflake/snowpark/modin/plugin/docstrings/series.py  | 2 +-
 .../modin/plugin/extensions/dataframe_overrides.py        | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
index eb3f4e1135..9e4ebd4d25 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -78,7 +78,7 @@ class Series(BasePandasDataset):
     c    3
     dtype: int64
 
-    The keys of the dictionary match with the Index values, hence the dictionary
+    The keys of the dictionary match with the Index values, hence the Index
     values have no effect.
 
     >>> d = {'a': 1, 'b': 2, 'c': 3}
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
index a9a5ea511e..d51a8d72db 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -649,6 +649,10 @@ def _df_init_dict_data_with_snowpark_pandas_values(
     columns: list | AnyArrayLike | Series | Index,
     dtype: str | np.dtype | native_pd.ExtensionDtype | None,
 ):
+    """
+    Helper function for initializing a DataFrame with a dictionary where all the values
+    are Snowpark pandas objects.
+    """
     # Special case: data is a dict where all the values are Snowpark pandas objects.
     # Concat can only be performed with BasePandasDataset objects.
     # If a value is an Index, convert it to a Series where the index is the index to be set since these values
@@ -674,6 +678,10 @@ def _df_init_list_data_with_snowpark_pandas_values(
     columns: list | AnyArrayLike | Series | Index,
     dtype: str | np.dtype | native_pd.ExtensionDtype | None,
 ):
+    """
+    Helper function for initializing a DataFrame with a list where all the values
+    are Snowpark pandas objects.
+    """
     # Special case: data is a list/dict where all the values are Snowpark pandas objects.
     # Concat can only be performed with BasePandasDataset objects.
     # If a value is an Index, convert it to a Series.

From f40c5b4a4a5f4d1230f13b4c2e15acc967909dbd Mon Sep 17 00:00:00 2001
From: Varnika Budati <varnika.budati@snowflake.com>
Date: Fri, 20 Sep 2024 11:28:42 -0700
Subject: [PATCH 42/42] try to break down df init into three steps: data,
 columns, and index

---
 .../snowpark/modin/plugin/_internal/utils.py  |  34 +-
 .../plugin/extensions/dataframe_overrides.py  | 118 +--
 .../plugin/extensions/series_overrides.py     |   7 +-
 .../test_df_series_creation_with_index.py     | 892 ++++++++++++------
 4 files changed, 678 insertions(+), 373 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/utils.py b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
index 6d4e15fe09..a9a10a7ec2 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/utils.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
@@ -2062,6 +2062,9 @@ def convert_index_to_qc(index: Any) -> Any:
     if isinstance(index, Index):
         idx_qc = index.to_series()._query_compiler
     elif isinstance(index, Series):
+        # The name of the index comes from the Series' name, not the index name. `reindex` does not handle this,
+        # so we need to set the name of the index to the name of the Series.
+        index.index.name = index.name
         idx_qc = index._query_compiler
     else:
         idx_qc = Series(index)._query_compiler
@@ -2109,7 +2112,6 @@ def convert_index_to_list_of_qcs(index: Any) -> list:
 def add_extra_columns_and_select_required_columns(
     query_compiler: Any,
     columns: Union[AnyArrayLike, list],
-    data_columns: Union[AnyArrayLike, list],
 ) -> Any:
     """
     Method to add extra columns to and select the required columns from the provided query compiler.
@@ -2123,26 +2125,32 @@ def add_extra_columns_and_select_required_columns(
         The query compiler to select columns from, i.e., data's query compiler.
     columns: AnyArrayLike or list
         The columns to select from the query compiler.
-    data_columns: AnyArrayLike or list
-        The columns in the data. This is data.columns if data is a DataFrame or data.name if data is a Series.
-
     """
     from modin.pandas import DataFrame
 
+    data_columns = query_compiler.get_columns().to_list()
     # The `columns` parameter is used to select the columns from `data` that will be in the resultant DataFrame.
     # If a value in `columns` is not present in data's columns, it will be added as a new column filled with NaN values.
     # These columns are tracked by the `extra_columns` variable.
     if data_columns is not None and columns is not None:
         extra_columns = [col for col in columns if col not in data_columns]
-        # To add these new columns to the DataFrame, perform `__getitem__` only with the extra columns
-        # and set them to None.
-        extra_columns_df = DataFrame(query_compiler=query_compiler)
-        extra_columns_df[extra_columns] = None
-        query_compiler = extra_columns_df._query_compiler
+        if extra_columns is not []:
+            # To add these new columns to the DataFrame, perform `__getitem__` only with the extra columns
+            # and set them to None.
+            extra_columns_df = DataFrame(query_compiler=query_compiler)
+            # In the case that the columns are MultiIndex but not all extra columns are tuples, we need to flatten the
+            # columns to ensure that the columns are a single-level index. If not, `__getitem__` will raise an error
+            # when trying to add new columns that are not in the expected tuple format.
+            if not all(isinstance(col, tuple) for col in extra_columns) and isinstance(
+                query_compiler.get_columns(), native_pd.MultiIndex
+            ):
+                flattened_columns = extra_columns_df.columns.to_flat_index()
+                extra_columns_df.columns = flattened_columns
+            extra_columns_df[extra_columns] = None
+            query_compiler = extra_columns_df._query_compiler
 
-    # To select the columns for the resultant DataFrame, perform `.loc[]` on the created query compiler.
+    # To select the columns for the resultant DataFrame, perform `__getitem__` on the created query compiler.
     # This step is performed to ensure that the right columns are picked from the InternalFrame since we
-    # never explicitly drop the unwanted columns. `.loc[]` also ensures that the columns in the resultant
+    # never explicitly drop the unwanted columns. `__getitem__` also ensures that the columns in the resultant
     # DataFrame are in the same order as the columns in the `columns` parameter.
-    columns = slice(None) if columns is None else columns
-    return DataFrame(query_compiler=query_compiler).loc[:, columns]._query_compiler
+    return DataFrame(query_compiler=query_compiler)[columns]._query_compiler
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
index d51a8d72db..65152181d3 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -60,6 +60,7 @@
     is_numeric_dtype,
 )
 from pandas.core.dtypes.inference import is_hashable, is_integer
+from pandas.core.indexes.base import ensure_index
 from pandas.core.indexes.frozen import FrozenList
 from pandas.io.formats.printing import pprint_thing
 from pandas.util._validators import validate_bool_kwarg
@@ -472,7 +473,6 @@ def __init__(
     # Setting the query compiler
     # --------------------------
     if query_compiler is not None:
-        # CASE I: query_compiler
         # If a query_compiler is passed in only use the query_compiler field to create a new DataFrame.
         # Verify that the data, index, and columns parameters are None.
         assert_fields_are_none(
@@ -485,50 +485,38 @@ def __init__(
     # Check that index is not a DataFrame and dtype is not "category".
     error_checking_for_init(index, dtype)
 
-    # Convert columns to a local object if it is a lazy Index.
-    columns = try_convert_index_to_native(columns)
+    # Convert columns to a local object if it is lazy.
+    if columns is not None:
+        columns = (
+            columns.to_pandas()
+            if isinstance(columns, (Index, BasePandasDataset))
+            else columns
+        )
+        columns = ensure_index(columns)
 
     # The logic followed here is:
-    # STEP 1: Create a query_compiler from the provided data. If columns are provided, add/select the columns.
-    # STEP 2: If an index is provided, set the index through set_index or reindex.
-    # STEP 3: The resultant query_compiler is then set as the query_compiler for the DataFrame.
-
-    # STEP 1: Setting the data (and columns)
-    # --------------------------------------
+    # STEP 1: Obtain the query_compiler from the provided data if the data is lazy. If data is local, the query
+    #         compiler is None.
+    # STEP 2: If columns are provided, set the columns if data is lazy.
+    # STEP 3: If both the data and index are local (or index is None), create a query compiler from pandas.
+    # STEP 4: Otherwise, set the index through set_index or reindex.
+    # STEP 5: The resultant query_compiler is then set as the query_compiler for the DataFrame.
+
+    # STEP 1: Setting the data
+    # ------------------------
     if isinstance(data, Index):
-        # CASE II: data is a Snowpark pandas Index
         # If the data is an Index object, convert it to a DataFrame to make sure that the values are in the
         # correct format: the values should be a data column, not an index column.
-        if data.name is None:
-            # If no name is provided, the default name is 0. Otherwise, only use the first value in `columns` to
-            # set the column name; this is because the resultant DataFrame will have only one column.
-            new_name = 0 if columns is None else columns[0]
-        else:
-            new_name = data.name
-        query_compiler = data.to_frame(index=False, name=new_name)._query_compiler
-
+        # Converting the Index object to its DataFrame version sets the resultant DataFrame's column name correctly -
+        # it should be 0 if the name is None.
+        query_compiler = data.to_frame(index=False)._query_compiler
     elif isinstance(data, Series):
-        # CASE III: data is a Snowpark pandas Series
-        # If the Series `data` has no name, the default name is 0.
-        name = [data.name] if data.name is not None else [0]
-        if columns is None:
-            # If no columns are provided, the resultant DataFrame has only one column.
-            # The column name is the Series' name.
-            query_compiler = data._query_compiler.set_columns(name)
-        elif data.name in columns:
-            # Treat any columns that are not data.name as extra columns. They will be appended as NaN columns.
-            # After this, select the required columns in the order provided by `columns`.
-            query_compiler = add_extra_columns_and_select_required_columns(
-                data._query_compiler, columns, name
-            )
-        else:
-            # If the columns provided are not in the named Series, pandas clears the DataFrame and sets columns.
-            query_compiler = from_pandas(
-                native_pd.DataFrame(columns=columns)
-            )._query_compiler
-
+        # Rename the Series object to 0 if its name is None and grab its query compiler.
+        query_compiler = data.rename(
+            0 if data.name is None else data.name, inplace=False
+        )._query_compiler
     elif isinstance(data, DataFrame):
-        # CASE IV: data is a Snowpark pandas DataFrame
+        query_compiler = data._query_compiler
         if columns is None and index is None:
             # Special case: if the new DataFrame has the same columns and index as the original DataFrame,
             # the query compiler is shared and kept track of as a sibling.
@@ -537,19 +525,38 @@ def __init__(
                 # When copy is False, the DataFrame is a shallow copy of the original DataFrame.
                 data._add_sibling(self)
             return
-        # Treat any columns that are not in data.columns as extra columns. They will be appended as NaN columns.
-        # After this, select the required columns in the order provided by `columns`.
-        query_compiler = add_extra_columns_and_select_required_columns(
-            data._query_compiler, columns, data.columns
-        )
 
-    else:
-        # CASE V: Non-Snowpark pandas data
+    # STEP 2: Setting the columns if data is lazy
+    # -------------------------------------------
+    # When data is lazy, the query compiler is not None.
+    if query_compiler is not None:
+        if columns is not None:
+            if (
+                isinstance(data, (Index, Series))
+                and query_compiler.get_columns()[0] not in columns
+            ):
+                # If the name of the Series/Index is not in the columns, clear the DataFrame and set the columns.
+                query_compiler = from_pandas(
+                    native_pd.DataFrame(columns=columns)
+                )._query_compiler
+            else:
+                # Treat any columns not in data.columns (or data.name if data is a Series/Index) as extra columns.
+                # They will be appended as NaN columns. Then, select the required columns in the order provided by `columns`.
+                query_compiler = add_extra_columns_and_select_required_columns(
+                    query_compiler, columns
+                )
+
+    # STEP 3: Creating a query compiler from pandas
+    # ---------------------------------------------
+    else:  # When the data is local, the query compiler is None.
+        # If the data, columns, and index are local objects, the query compiler representation is created from pandas.
+        # However, when the data is a dict but the index is lazy, the index is converted to pandas and the query
+        # compiler is created from pandas.
         if not isinstance(
             data, (native_pd.Series, native_pd.DataFrame, native_pd.Index)
         ) and is_list_like(data):
+            # If data is a pandas object, directly handle it with the pandas constructor.
             if is_dict_like(data):
-                # Setting up keys and values for processing if all the values are Snowpark pandas objects.
                 if columns is not None:
                     # Reduce the dictionary to only the relevant columns as the keys.
                     data = {key: value for key, value in data.items() if key in columns}
@@ -565,18 +572,19 @@ def __init__(
                     )
                     return
 
-                # If only some data is a Snowpark pandas object, convert it to pandas objects.
+                # If only some data is a Snowpark pandas object, convert the lazy data to pandas objects.
                 res = {}
-                index = try_convert_index_to_native(index)
                 for k, v in data.items():
                     if isinstance(v, Index):
                         res[k] = v.to_pandas()
                     elif isinstance(v, BasePandasDataset):
                         # Need to perform reindex on the Series or DataFrame objects since only the data
                         # whose index matches the given index is kept.
-                        res[k] = v.to_pandas().reindex(index=index)
+                        res[k] = v.reindex(index=index).to_pandas()
                     else:
                         res[k] = v
+                # If the index is lazy, convert it to a pandas object so that the pandas constructor can handle it.
+                index = try_convert_index_to_native(index)
                 data = res
 
             else:  # list-like but not dict-like data.
@@ -594,8 +602,8 @@ def __init__(
                 # Sometimes the ndarray representation of a list is different from a regular list.
                 # For instance, [(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]
                 # is different from np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]).
-                # The list has the shape (3, 3) while the ndarray has the shape (3,). Therefore, do not modify
-                # the ndarray data.
+                # The list has the shape (3, 3) while the ndarray has the shape (3,).
+                # Therefore, do not modify the ndarray data.
                 if not isinstance(data, np.ndarray):
                     # If only some data is a Snowpark pandas object, convert it to pandas objects.
                     res = [
@@ -609,7 +617,7 @@ def __init__(
         query_compiler = from_pandas(
             native_pd.DataFrame(
                 data=data,
-                # Handle setting the index, if it is a lazy index, outside this block.
+                # Handle setting the index, if it is a lazy index, outside this block in STEP 4.
                 index=None if isinstance(index, (Index, Series)) else index,
                 columns=columns,
                 dtype=dtype,
@@ -617,9 +625,9 @@ def __init__(
             )
         )._query_compiler
 
-    # STEP 2: Setting the index
+    # STEP 4: Setting the index
     # -------------------------
-    # The index is already set if the data is a non-Snowpark pandas object.
+    # The index is already set if the data and index are non-Snowpark pandas objects.
     # If either the data or the index is a Snowpark pandas object, set the index here.
     if index is not None and (
         isinstance(index, (Index, Series))
@@ -638,7 +646,7 @@ def __init__(
                 convert_index_to_list_of_qcs(index)
             )
 
-    # STEP 3: Setting the query compiler
+    # STEP 5: Setting the query compiler
     # ----------------------------------
     self._query_compiler = query_compiler
 
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index 34f9b9c690..e2ed2580cd 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -352,7 +352,6 @@ def __init__(
     # Setting the query compiler
     # --------------------------
     if query_compiler is not None:
-        # CASE I: query_compiler
         # If a query_compiler is passed in, only use the query_compiler and name fields to create a new Series.
         # Verify that the data and index parameters are None.
         assert_fields_are_none(class_name="Series", data=data, index=index)
@@ -380,14 +379,14 @@ def __init__(
     # STEP 1: Setting the data
     # ------------------------
     if isinstance(data, Index):
-        # CASE II: Index
+        # CASE I: Index
         # If the data is an Index object, convert it to a Series, and get the query_compiler.
         query_compiler = (
             data.to_series(index=None, name=name).reset_index(drop=True)._query_compiler
         )
 
     elif isinstance(data, Series):
-        # CASE III: Series
+        # CASE II: Series
         # If the data is a Series object, use its query_compiler.
         query_compiler = data._query_compiler
         if index is None and name is None and copy is False:
@@ -397,7 +396,7 @@ def __init__(
             return
 
     else:
-        # CASE IV: Non-Snowpark pandas data
+        # CASE III: Non-Snowpark pandas data
         # If the data is not a Snowpark pandas object, convert it to a query compiler.
         # The query compiler uses the '__reduced__' name internally as a column name to represent pandas
         # Series objects that are not explicitly assigned a name.
diff --git a/tests/integ/modin/index/test_df_series_creation_with_index.py b/tests/integ/modin/index/test_df_series_creation_with_index.py
index e15acd03e8..06ed621fc6 100644
--- a/tests/integ/modin/index/test_df_series_creation_with_index.py
+++ b/tests/integ/modin/index/test_df_series_creation_with_index.py
@@ -38,28 +38,187 @@ def obj_type_helper(obj_type: str) -> tuple:
     return assert_equal_func, snow_obj, native_obj, kwargs
 
 
+# DATA TESTS
+# ----------
 @pytest.mark.parametrize(
-    "native_idx",
+    "native_data",
     [
         native_pd.Index([1, 2, 3, 4], name="some name"),
-        native_pd.Index(list(range(250))),
+        native_pd.Index(list(range(200))),
         native_pd.Index(["A", None, 2.3, 1], name="AAAAA"),
         native_pd.Index([]),
+        native_pd.Series([1, 2, 3, 4], name="some name"),
+        native_pd.Series(list(range(100))),
+        native_pd.Series(["A", None, 2.3, 1], name="AAAAA"),
+        native_pd.Series([]),
+        [],
+        ["A", "B", "C"],
+        None,
     ],
 )
 @pytest.mark.parametrize("obj_type", ["series", "df"])
-@sql_count_checker(query_count=1, join_count=0)
-def test_create_with_index_as_data(native_idx, obj_type):
+@sql_count_checker(query_count=1)
+def test_create_with_data(native_data, obj_type):
     """
-    Creating a Series where the data is an Index.
+    Creating a DataFrame/Series where the data is an Index, Series, or list.
     """
-    snow_idx = pd.Index(native_idx)
+    if isinstance(native_data, native_pd.Series):
+        snow_data = pd.Series(native_data)
+    elif isinstance(native_data, native_pd.Index):
+        snow_data = pd.Index(native_data)
+    else:
+        snow_data = native_data
+    assert_equal_func, snow_obj, native_obj, kwargs = obj_type_helper(obj_type)
+    assert_equal_func(
+        snow_obj(snow_data),
+        native_obj(native_data),
+        check_dtype=False,
+        check_index_type=False,
+        **kwargs,
+    )
+
+
+# INDEX TESTS
+# -----------
+@pytest.mark.parametrize(
+    "index", [[1, 2, 3, 4], list(range(100)), ["A", None, 2.3, 1], []]
+)
+@pytest.mark.parametrize("index_type", ["series", "index", "list"])
+@pytest.mark.parametrize("obj_type", ["series", "df"])
+def test_create_with_index(index, index_type, obj_type):
+    with SqlCounter(query_count=1, join_count=0 if index_type == "list" else 1):
+        # When creating an empty Series with a non-empty index, the index should be used as the index of the Series.
+        if index_type == "series":
+            native_index, snow_index = native_pd.Series(index), pd.Series(index)
+        elif index_type == "index":
+            native_index, snow_index = native_pd.Index(index), pd.Index(index)
+        else:
+            native_index, snow_index = index, index
+        assert_equal_func, snow_obj, native_obj, kwargs = obj_type_helper(obj_type)
+        assert_equal_func(
+            snow_obj(index=snow_index, dtype=object),
+            native_obj(index=native_index, dtype=object),
+            check_index_type=False,
+            **kwargs,
+        )
+
+
+@pytest.mark.parametrize(
+    "index", [[1, 2, 3, 4], list(range(100)), ["A", None, 2.3, 1], []]
+)
+@pytest.mark.parametrize("index_type", ["series", "index"])
+@pytest.mark.parametrize("index_name", [None, "index name!", ("tuple", "name")])
+@pytest.mark.parametrize("obj_type", ["series", "df"])
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_with_named_index(index, index_type, index_name, obj_type):
+    # When creating an empty Series with a non-empty index, the index should be used as the index of the Series.
+    if index_type == "series":
+        native_index, snow_index = native_pd.Series(index, name=index_name), pd.Series(
+            index, name=index_name
+        )
+    else:
+        native_index, snow_index = native_pd.Index(index, name=index_name), pd.Index(
+            index, name=index_name
+        )
     assert_equal_func, snow_obj, native_obj, kwargs = obj_type_helper(obj_type)
     assert_equal_func(
-        snow_obj(snow_idx), native_obj(native_idx), check_dtype=False, **kwargs
+        snow_obj(index=snow_index, dtype=object),
+        native_obj(index=native_index, dtype=object),
+        check_index_type=False,
+        **kwargs,
     )
 
 
+# COLUMN TESTS
+# ------------
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["A"],
+        ("A", "B"),
+        [("A", "B")],
+        ["A", "B", "C"],
+        ["A", ("B", "C")],
+        [("A", "B"), ("C", "D")],
+        native_pd.Index(["A", "B", "C"]),
+        np.array([("A", "B"), ("B", "C")]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_create_df_with_columns(columns):
+    # Test DataFrame creation with only columns passed in.
+    native_df = native_pd.DataFrame(columns=columns)
+    snow_df = pd.DataFrame(columns=columns)
+    assert_frame_equal(snow_df, native_df)
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["A"],
+        ("A", "B"),
+        ["A", "B", "C"],
+        ["A", ("B", "C")],
+    ],
+)
+@pytest.mark.parametrize("column_type", ["index", "series"])
+@pytest.mark.parametrize("column_name", [None, "index name!", ("tuple", "name")])
+def test_create_df_with_lazy_columns(columns, column_type, column_name):
+    # Test DataFrame creation with only lazy columns passed in.
+    with SqlCounter(query_count=2 if column_type == "index" else 2):
+        if column_type == "index":
+            native_cols = native_pd.Index(data=columns, name=column_name)
+            snow_cols = pd.Index(data=columns, name=column_name)
+        else:
+            native_cols = native_pd.Series(data=columns, name=column_name)
+            snow_cols = pd.Series(data=columns, name=column_name)
+        native_df = native_pd.DataFrame(columns=native_cols)
+        snow_df = pd.DataFrame(columns=snow_cols)
+        # If the column name has a tuple in it, convert it to a list and check whether this matches the Snowpark pandas
+        # result. This is because any tuple values stored in Snowflake are converted to lists.
+        # Here, the column name is derived from the values of an Index/Series object stored in Snowflake.
+        if any(isinstance(col, tuple) for col in columns):
+            native_columns = [
+                list(col) if isinstance(col, tuple) else col
+                for col in native_df.columns
+            ]
+            assert native_columns == snow_df.columns.tolist()
+            # Set the Snowpark pandas DataFrame's columns to the native pandas DataFrame's columns to allow for
+            # easier comparison between the two objects
+            snow_df.columns = native_df.columns
+        assert_frame_equal(snow_df, native_df)
+
+
+@pytest.mark.parametrize("columns", [[("A", "B")], [("A", "B"), ("C", "D")]])
+@pytest.mark.parametrize("column_type", ["index", "series"])
+@pytest.mark.parametrize("column_name", [None, ("tuple", "name")])
+@sql_count_checker(query_count=2)
+def test_create_df_with_lazy_multiindex_columns(columns, column_type, column_name):
+    # Test DataFrame creation with only lazy columns passed in.
+    if column_type == "index":
+        native_cols = native_pd.Index(data=columns, name=column_name)
+        snow_cols = pd.Index(data=columns, name=column_name)
+    else:
+        native_cols = native_pd.Series(data=columns, name=column_name)
+        snow_cols = pd.Series(data=columns, name=column_name)
+    native_df = native_pd.DataFrame(columns=native_cols)
+    snow_df = pd.DataFrame(columns=snow_cols)
+    # If the column name has a tuple in it, convert it to a list and check whether this matches the Snowpark pandas
+    # result. This is because any tuple values stored in Snowflake are converted to lists.
+    # Here, the column name is derived from the values of an Index/Series object stored in Snowflake.
+    if column_type == "series" and any(isinstance(col, tuple) for col in columns):
+        native_columns = [
+            list(col) if isinstance(col, tuple) else col for col in native_df.columns
+        ]
+        assert native_columns == snow_df.columns.tolist()
+        # Set the Snowpark pandas DataFrame's columns to the native pandas DataFrame's columns to allow for
+        # easier comparison between the two objects
+        snow_df.columns = native_df.columns
+    assert_frame_equal(snow_df, native_df)
+
+
+# DATA AND INDEX TESTS
+# --------------------
 @pytest.mark.parametrize(
     "data, native_idx",
     [
@@ -345,127 +504,6 @@ def test_create_df_with_empty_df_as_data_and_index_as_index(native_df, native_in
     )
 
 
-@pytest.mark.parametrize(
-    "native_df, native_index, columns",
-    [
-        # Single column DataFrames.
-        (
-            native_pd.DataFrame(list(range(20))),
-            native_pd.Index(list(range(20))),
-            [1],
-        ),  # all index values match
-        (
-            native_pd.DataFrame(["A", "V", "D", "R"]),
-            native_pd.Index([10, 20, 30, 40], name="none"),
-            ["A"],
-        ),  # no index values match, column missing
-        # Multi-column DataFrames.
-        (
-            native_pd.DataFrame(
-                {"col1": ["A", "B", "C", "D"], "col2": ["B", "H", "T", "W"]},
-                index=[1.1, 2.2, 3, 4],
-            ),
-            native_pd.Index([1, 2, 3, 4], name="some name"),
-            ["col1"],
-        ),  # some index values are missing, subset of columns
-        (
-            native_pd.DataFrame(
-                [[10, 20, 30, 40], [2, 4, 6, 7], [-1, -2, -3, -4], [90, 50, 30, 10]],
-                index=native_pd.Index([None, "B", 0, 3.14], name="mixed"),
-                columns=["C", "L", "M", "W"],
-            ),
-            native_pd.Index(["B", 0, None, 3.14]),
-            [3, 1],
-        ),  # rearranged index and column values
-        (
-            native_pd.DataFrame(
-                [["A", "B", "C", "D", "E"], ["R", "S", "T", "U", "V"]],
-                columns=[1, 2, 3, 4, 5],
-            ),
-            native_pd.Index([3, 4], name="index"),
-            ["A", "V", "C"],
-        ),  # subset of index values
-        (
-            native_pd.DataFrame([list(range(20)), list(range(20))]),
-            native_pd.Index(list(range(20))),
-            [1],
-        ),  # all index values match
-        (
-            native_pd.DataFrame(
-                {
-                    "A": ["A", "V", "D", "R"],
-                    "V": ["V", "D", "R", "A"],
-                    "D": ["D", "R", "A", "V"],
-                    "R": ["R", "A", "V", "D"],
-                }
-            ),
-            native_pd.Index([10, 20, 30, 40], name="none"),
-            ["A", "X", "D", "R"],
-        ),  # no index values match
-        (
-            native_pd.DataFrame([]),
-            native_pd.Index([], name="empty index", dtype="int64"),
-            [],
-        ),  # empty data, index, and columns
-        (
-            native_pd.DataFrame([]),
-            native_pd.Index(["A", "V"], name="non-empty index"),
-            ["A", "V"],
-        ),  # empty data, non-empty index and columns
-        (
-            {
-                "A": [1, 2, 3],
-                "B": [4, 5, 6],
-            },  # dict data should behave similar to DataFrame data
-            native_pd.Index([10, 0, 1], name="non-empty index"),
-            ["A", "C"],
-        ),
-    ],
-)
-@pytest.mark.parametrize("column_type", ["list", "index"])
-def test_create_df_with_df_as_data_and_index_as_index_and_different_columns(
-    native_df, native_index, columns, column_type
-):
-    """
-    Creating a DataFrame where the data is a DataFrame, the index is an Index, and non-existent columns.
-    """
-    # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
-    # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
-    # One extra query is required to create the columns if it is an Index (column_type is "index").
-    native_columns = columns if column_type == "list" else native_pd.Index(columns)
-    snow_columns = columns if column_type == "list" else pd.Index(columns)
-    snow_df = (
-        pd.DataFrame(native_df)
-        if isinstance(native_df, native_pd.DataFrame)
-        else native_df
-    )
-    snow_index = pd.Index(native_index)
-    qc = 1 if column_type == "list" else 2
-    qc += 1 if (isinstance(native_df, dict)) else 0
-    qc += 1 if (isinstance(native_df, dict) and column_type == "index") else 0
-    jc = 1 if isinstance(native_df, native_pd.DataFrame) else 0
-    with SqlCounter(query_count=qc, join_count=jc):
-        assert_frame_equal(
-            pd.DataFrame(snow_df, index=snow_index, columns=native_columns),
-            native_pd.DataFrame(native_df, index=native_index, columns=snow_columns),
-            check_dtype=False,
-        )
-
-
-@sql_count_checker(query_count=1)
-def test_create_df_with_new_columns():
-    """
-    Creating a DataFrame with columns that don't exist in `data`.
-    """
-    native_df = native_pd.DataFrame(list(range(100)))
-    snow_df = pd.DataFrame(native_df)
-    assert_frame_equal(
-        pd.DataFrame(snow_df, columns=["new column"]),
-        native_pd.DataFrame(native_df, columns=["new column"]),
-        check_dtype=False,
-    )
-
-
 @sql_count_checker(query_count=2)
 def test_create_df_with_dict_as_data_and_index_as_index():
     """
@@ -485,54 +523,6 @@ def test_create_df_with_dict_as_data_and_index_as_index():
     assert_frame_equal(snow_df, native_df)
 
 
-@sql_count_checker(query_count=1)
-def test_create_series_with_list_of_lists_index():
-    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
-    arrays = [
-        np.array(["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"]),
-        np.array(["two", "one", "two", "one", "two", "one", "two", "one"]),
-    ]
-    data = [1, 2, 3, 4, 5, 6, 7, 8]
-    native_series = native_pd.Series(data, index=arrays)
-    snow_series = pd.Series(data, index=arrays)
-    assert_series_equal(snow_series, native_series)
-
-
-@sql_count_checker(query_count=1, join_count=2)
-def test_create_series_with_index_data_and_list_of_lists_index():
-    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
-    arrays = [
-        ["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"],
-        ["two", "one", "two", "one", "two", "one", "two", "one"],
-    ]
-    data = native_pd.Index([1, 2, 3, 4, 5, 6, 7, 8])
-    native_series = native_pd.Series(data, index=arrays)
-    snow_series = pd.Series(pd.Index(data), index=arrays)
-    assert_series_equal(snow_series, native_series)
-
-
-@sql_count_checker(query_count=1, join_count=2)
-def test_create_df_with_index_data_and_list_of_lists_index():
-    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
-    arrays = [
-        ["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"],
-        ["two", "one", "two", "one", "two", "one", "two", "one"],
-    ]
-    data = native_pd.Index([1, 2, 3, 4, 5, 6, 7, 8])
-    native_df = native_pd.DataFrame(data, index=arrays)
-    snow_df = pd.DataFrame(pd.Index(data), index=arrays)
-    assert_frame_equal(snow_df, native_df)
-
-
-@sql_count_checker(query_count=1)
-def test_create_series_with_none_data_and_non_empty_index():
-    # When creating an empty Series with a non-empty index, the index should be used as the index of the Series.
-    index = ["A", "B", "C", "D"]
-    native_series = native_pd.Series(None, index=index, dtype=object)
-    snow_series = pd.Series(None, index=index, dtype=object)
-    assert_series_equal(snow_series, native_series)
-
-
 @pytest.mark.parametrize(
     "data1, data2", [("series", "series"), ("series", "index"), ("index", "index")]
 )
@@ -682,26 +672,26 @@ def test_create_series_with_series_index_list_data(data1, data2):
     snow_data = [snow_data1, snow_data2]
 
     # Create Series only with list data.
-    native_df = native_pd.Series(native_data)
-    snow_df = pd.Series(snow_data)
+    native_ser = native_pd.Series(native_data)
+    snow_ser = pd.Series(snow_data)
     with SqlCounter(query_count=1):
-        assert_series_equal(snow_df, native_df)
+        assert_series_equal(snow_ser, native_ser)
 
     # Create Series with list data and Series index.
     native_ser_index = native_pd.Series([2, 11])
     snow_ser_index = pd.Series([2, 11])
-    native_df = native_pd.Series(native_data, index=native_ser_index)
-    snow_df = pd.Series(snow_data, index=snow_ser_index)
+    native_ser = native_pd.Series(native_data, index=native_ser_index)
+    snow_ser = pd.Series(snow_data, index=snow_ser_index)
     with SqlCounter(query_count=1):
-        assert_series_equal(snow_df, native_df, check_dtype=False)
+        assert_series_equal(snow_ser, native_ser, check_dtype=False)
 
     # Create Series with list data and Index index.
     native_index = native_pd.Index([22, 11])
     snow_index = pd.Index([22, 11])
-    native_df = native_pd.Series(native_data, index=native_index)
-    snow_df = pd.Series(snow_data, index=snow_index)
+    native_ser = native_pd.Series(native_data, index=native_index)
+    snow_ser = pd.Series(snow_data, index=snow_index)
     with SqlCounter(query_count=1):
-        assert_series_equal(snow_df, native_df, check_dtype=False)
+        assert_series_equal(snow_ser, native_ser, check_dtype=False)
 
 
 @pytest.mark.parametrize(
@@ -721,100 +711,69 @@ def test_create_series_with_series_index_dict_data(data1, data2):
     snow_data = {11: snow_data1, 22: snow_data2}
 
     # Create DataFrame only with dict data.
-    native_df = native_pd.Series(native_data)
-    snow_df = pd.Series(snow_data)
+    native_ser = native_pd.Series(native_data)
+    snow_ser = pd.Series(snow_data)
     with SqlCounter(query_count=1):
-        assert_series_equal(snow_df, native_df)
+        assert_series_equal(snow_ser, native_ser)
 
     # Create DataFrame with dict data and Series index.
     native_ser_index = native_pd.Series([9, 2, 999])
     snow_ser_index = pd.Series([9, 2, 999])
-    native_df = native_pd.Series(native_data, index=native_ser_index)
-    snow_df = pd.Series(snow_data, index=snow_ser_index)
+    native_ser = native_pd.Series(native_data, index=native_ser_index)
+    snow_ser = pd.Series(snow_data, index=snow_ser_index)
     with SqlCounter(query_count=1):
-        assert_series_equal(snow_df, native_df)
+        assert_series_equal(snow_ser, native_ser)
 
     # Create DataFrame with dict data and Index index.
     native_index = native_pd.Index([9, 2, 999])
     snow_index = pd.Index([9, 2, 999])
-    native_df = native_pd.Series(native_data, index=native_index)
-    snow_df = pd.Series(snow_data, index=snow_index)
+    native_ser = native_pd.Series(native_data, index=native_index)
+    snow_ser = pd.Series(snow_data, index=snow_index)
     with SqlCounter(query_count=1):
-        assert_series_equal(snow_df, native_df)
+        assert_series_equal(snow_ser, native_ser)
 
 
-def test_create_df_with_mixed_series_index_dict_data():
-    # Create the dict data.
-    native_data1 = native_pd.Series([1, 2, 3])
-    native_data2 = native_pd.Index([4, 5, 6])
-    data3 = [7, 8, 9]
-    snow_data1 = pd.Series(native_data1)
-    snow_data2 = pd.Index(native_data2)
-    native_data = {"A": native_data1, "B": native_data2, "C": data3}
-    snow_data = {"A": snow_data1, "B": snow_data2, "C": data3}
+@sql_count_checker(query_count=1)
+def test_create_series_with_list_data_and_list_of_lists_index():
+    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
+    arrays = [
+        np.array(["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"]),
+        np.array(["two", "one", "two", "one", "two", "one", "two", "one"]),
+    ]
+    data = [1, 2, 3, 4, 5, 6, 7, 8]
+    native_series = native_pd.Series(data, index=arrays)
+    snow_series = pd.Series(data, index=arrays)
+    assert_series_equal(snow_series, native_series)
 
-    # Create DataFrame only with dict data.
-    native_df = native_pd.DataFrame(native_data)
-    snow_df = pd.DataFrame(snow_data)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df)
 
-    # Create DataFrame with dict data and Series index.
-    native_ser_index = native_pd.Series([9, 2, 999])
-    snow_ser_index = pd.Series(native_ser_index)
-    native_df = native_pd.DataFrame(native_data, index=native_ser_index)
-    snow_df = pd.DataFrame(snow_data, index=snow_ser_index)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df)
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_series_with_index_data_and_list_of_lists_index():
+    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
+    arrays = [
+        ["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"],
+        ["two", "one", "two", "one", "two", "one", "two", "one"],
+    ]
+    data = native_pd.Index([1, 2, 3, 4, 5, 6, 7, 8])
+    native_series = native_pd.Series(data, index=arrays)
+    snow_series = pd.Series(pd.Index(data), index=arrays)
+    assert_series_equal(snow_series, native_series)
 
-    # Create DataFrame with dict data and Index index.
-    native_index = native_pd.Index([9, 2, 999])
-    snow_index = pd.Index(native_index)
-    native_df = native_pd.DataFrame(native_data, index=native_index)
-    snow_df = pd.DataFrame(snow_data, index=snow_index)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df)
-
-    # Create DataFrame with dict data, Series index, and columns.
-    columns = ["A", "B", "C"]
-    native_df = native_pd.DataFrame(
-        native_data, index=native_ser_index, columns=columns
-    )
-    snow_df = pd.DataFrame(snow_data, index=snow_ser_index, columns=columns)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df)
 
-    # Create DataFrame with dict data, Index index, and Index columns.
-    native_columns = native_pd.Index(columns)
-    snow_columns = pd.Index(columns)
-    native_df = native_pd.DataFrame(
-        native_data, index=native_index, columns=native_columns
-    )
-    snow_df = pd.DataFrame(snow_data, index=snow_index, columns=snow_columns)
-    with SqlCounter(query_count=1):
-        assert_frame_equal(snow_df, native_df)
-
-
-@sql_count_checker(query_count=2)
-def test_create_df_with_mixed_series_index_list_data_negative():
-    """
-    Since Snowpark pandas relies on native pandas for initialization a DataFrame with mixed data types,
-    they both raise the same error.
-    """
-    # Create the list data.
-    data1 = native_pd.Series([1, 2, 3])
-    data2 = native_pd.Index([4, 5, 6])
-    data3 = [7, 8, 9]
-    # Need to convert data3 to an Index since native pandas tries to perform `get_indexer` on it.
-    err_msg = "'builtin_function_or_method' object has no attribute 'get_indexer'"
-    with pytest.raises(AttributeError, match=err_msg):
-        native_pd.DataFrame([data1, data2, data3])
-    with pytest.raises(AttributeError, match=err_msg):
-        pd.DataFrame([pd.Series(data1), pd.Index(data2), data3])
+@sql_count_checker(query_count=1, join_count=2)
+def test_create_df_with_index_data_and_list_of_lists_index():
+    # When given a list of lists as the index, this index needs to be converted to a MultiIndex before processing.
+    arrays = [
+        ["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"],
+        ["two", "one", "two", "one", "two", "one", "two", "one"],
+    ]
+    data = native_pd.Index([1, 2, 3, 4, 5, 6, 7, 8])
+    native_df = native_pd.DataFrame(data, index=arrays)
+    snow_df = pd.DataFrame(pd.Index(data), index=arrays)
+    assert_frame_equal(snow_df, native_df)
 
 
 @pytest.mark.xfail(
-    reason="SNOW-1638397 DataFrane creation fails: reindex does not work with string index"
+    reason="SNOW-1638397 DataFrame creation fails: reindex does not work with string index"
 )
 def test_create_df_with_series_data_and_series_index():
     # Create the data and index.
@@ -830,40 +789,37 @@ def test_create_df_with_series_data_and_series_index():
         assert_frame_equal(snow_df, native_df)
 
 
-@sql_count_checker(query_count=0)
-def test_create_df_with_df_index_negative():
-    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
-        native_pd.DataFrame(
-            [1, 2, 3], index=native_pd.DataFrame([[1, 2], [3, 4], [5, 6]])
-        )
-    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
-        pd.DataFrame([1, 2, 3], index=pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
-
-
-@sql_count_checker(query_count=0)
-def test_create_series_with_df_index_negative():
-    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
-        native_pd.Series([1, 2, 3], index=native_pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
-    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
-        pd.Series([1, 2, 3], index=pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+# DATA AND COLUMN TESTS
+# ----------------------
+@sql_count_checker(query_count=1)
+def test_create_df_with_df_data_and_subset_of_columns():
+    # Test DataFrame creation where data is a DataFrame and only a subset of its columns are passed in.
+    # Only the columns passed in are used; the rest are ignored. In this case with end up with a single
+    # column DataFrame.
+    native_data = native_pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
+    snow_data = pd.DataFrame(native_data)
+    columns = ["a"]
+    native_df = native_pd.DataFrame(native_data, columns=columns)
+    snow_df = pd.DataFrame(snow_data, columns=columns)
+    assert_frame_equal(snow_df, native_df)
 
 
-@sql_count_checker(query_count=0)
-def test_create_series_with_df_data_negative():
-    with pytest.raises(
-        ValueError,
-        match=re.escape(
-            "The truth value of a DataFrame is ambiguous. Use a.empty, a.bool()"
-            ", a.item(), a.any() or a.all()."
-        ),
-    ):
-        native_pd.Series(native_pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
-    with pytest.raises(ValueError, match="Data cannot be a DataFrame"):
-        pd.Series(pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+@sql_count_checker(query_count=1)
+def test_create_df_with_df_data_and_new_columns():
+    """
+    Creating a DataFrame with columns that don't exist in `data`.
+    """
+    native_df = native_pd.DataFrame(list(range(100)))
+    snow_df = pd.DataFrame(native_df)
+    assert_frame_equal(
+        pd.DataFrame(snow_df, columns=["new column"]),
+        native_pd.DataFrame(native_df, columns=["new column"]),
+        check_dtype=False,
+    )
 
 
 @sql_count_checker(query_count=1)
-def test_create_df_with_name_in_columns():
+def test_create_df_with_df_data_and_name_in_columns():
     # Test DataFrame creation where the data is a named Series and its name is in the columns passed in.
     # The column sharing the name with the Series takes on its values as the column values; the rest of the
     # columns are filled with NaNs.
@@ -875,6 +831,299 @@ def test_create_df_with_name_in_columns():
     assert_frame_equal(snow_df, native_df)
 
 
+@sql_count_checker(query_count=1)
+def test_create_df_with_df_data_and_name_not_in_columns():
+    # Test DataFrame creation where the data is a named Series and its name is not in the columns passed in.
+    # The result is an empty DataFrame with the columns set.
+    native_data = native_pd.Series([1, 2, 3], name="b")
+    snow_data = pd.Series(native_data)
+    columns = ["a", "c"]
+    native_df = native_pd.DataFrame(native_data, columns=columns)
+    snow_df = pd.DataFrame(snow_data, columns=columns)
+    assert_frame_equal(snow_df, native_df)
+
+
+@sql_count_checker(query_count=1)
+def test_create_df_with_df_data_and_tuple_name_in_columns():
+    # Test DataFrame creation where the data is a named Series and its name is in the columns passed in.
+    # The column sharing the name with the Series takes on its values as the column values; the rest of the
+    # columns are filled with NaNs.
+    native_data = native_pd.Series([1, 2, 3], name=("b", "a"))
+    snow_data = pd.Series(native_data)
+    columns = [("b", "a"), "b"]
+    native_df = native_pd.DataFrame(native_data, columns=columns)
+    snow_df = pd.DataFrame(snow_data, columns=columns)
+    assert_frame_equal(snow_df, native_df)
+
+
+@sql_count_checker(query_count=1)
+def test_create_df_with_df_data_and_tuple_name_not_in_columns():
+    # Test DataFrame creation where the data is a named Series and its name is not in the columns passed in.
+    # The result is an empty DataFrame with the columns set.
+    native_data = native_pd.Series([1, 2, 3], name=("b", "a"))
+    snow_data = pd.Series(native_data)
+    columns = [("b", "c"), "b"]
+    native_df = native_pd.DataFrame(native_data, columns=columns)
+    snow_df = pd.DataFrame(snow_data, columns=columns)
+    assert_frame_equal(snow_df, native_df)
+
+
+# INDEX AND COLUMN TESTS
+# ----------------------
+@pytest.mark.parametrize(
+    "index",
+    [
+        ["A", "B", "C"],
+        ("A", "B", "C"),
+        ["A", ("B", "C")],
+    ],
+)
+@pytest.mark.parametrize("index_type", ["index", "series"])
+@pytest.mark.parametrize("index_name", [None, "A", "index name!", ("A", "B")])
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["A"],
+        ("A", "B"),
+        [("A", "B")],
+        ["A", "B", "C"],
+        ["A", ("B", "C")],
+        [("A", "B"), ("C", "D")],
+        native_pd.Index(["A", "B", "C"]),
+        np.array([("A", "B"), ("B", "C")]),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_create_df_with_index_and_columns(index, index_type, index_name, columns):
+    # Test DataFrame creation with both index and columns passed in.
+    if index_type == "index":
+        native_index = native_pd.Index(data=index, name=index_name)
+        snow_index = pd.Index(data=index, name=index_name)
+    else:
+        native_index = native_pd.Series(data=index, name=index_name)
+        snow_index = pd.Series(data=index, name=index_name)
+    native_df = native_pd.DataFrame(index=native_index, columns=columns)
+    snow_df = pd.DataFrame(index=snow_index, columns=columns)
+    assert_frame_equal(snow_df, native_df)
+
+
+@pytest.mark.parametrize("index", [[("A", "B"), ("C", "D")]])
+@pytest.mark.parametrize(
+    "index_type",
+    [
+        "index",
+        pytest.param(
+            "series",
+            marks=pytest.mark.xfail(
+                reason="SNOW-1675191 reindex does not work with tuple series"
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("index_name", [None, ("A", "B")])
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["A"],
+        ("A", "B"),
+        [("A", "B")],
+        ["A", "B", "C"],
+        ["A", ("B", "C")],
+        [("A", "B"), ("C", "D")],
+        native_pd.Index(["A", "B", "C"]),
+        np.array([("A", "B"), ("B", "C")]),
+    ],
+)
+def test_create_df_with_multiindex_and_columns(index, index_type, index_name, columns):
+    # Test DataFrame creation with both index and columns passed in.
+    with SqlCounter(query_count=1, join_count=1 if index_type == "series" else 0):
+        if index_type == "index":
+            native_index = native_pd.Index(data=index, name=index_name)
+            snow_index = pd.MultiIndex.from_tuples(index, names=index_name)
+        else:
+            native_index = native_pd.Series(data=index, name=index_name)
+            snow_index = pd.Series(data=index, name=index_name)
+        native_df = native_pd.DataFrame(index=native_index, columns=columns)
+        snow_df = pd.DataFrame(index=snow_index, columns=columns)
+        assert_frame_equal(snow_df, native_df)
+
+
+@sql_count_checker(query_count=2)
+def test_create_df_with_index_and_columns_match():
+    # Test DataFrame creation with both index and columns passed in where index name is not in columns.
+    native_df = native_pd.DataFrame(native_pd.Index([1, 2, 3], name="b"), columns=["a"])
+    snow_df = pd.DataFrame(pd.Index([1, 2, 3], name="b"), columns=["a"])
+    assert_frame_equal(snow_df, native_df)
+
+    # Test DataFrame creation with both index and columns passed in where name is in columns.
+    native_df = native_pd.DataFrame(
+        native_pd.Index([1, 2, 3], name="b"), columns=["a", "b"]
+    )
+    snow_df = pd.DataFrame(pd.Index([1, 2, 3], name="b"), columns=["a", "b"])
+    assert_frame_equal(snow_df, native_df)
+
+
+# DATA, INDEX, AND COLUMN TESTS
+# -----------------------------
+def test_create_df_with_mixed_series_index_dict_data():
+    # Create the dict data.
+    native_data1 = native_pd.Series([1, 2, 3])
+    native_data2 = native_pd.Index([4, 5, 6])
+    data3 = [7, 8, 9]
+    snow_data1 = pd.Series(native_data1)
+    snow_data2 = pd.Index(native_data2)
+    native_data = {"A": native_data1, "B": native_data2, "C": data3}
+    snow_data = {"A": snow_data1, "B": snow_data2, "C": data3}
+
+    # Create DataFrame only with dict data.
+    native_df = native_pd.DataFrame(native_data)
+    snow_df = pd.DataFrame(snow_data)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data and Series index.
+    native_ser_index = native_pd.Series([9, 2, 999])
+    snow_ser_index = pd.Series(native_ser_index)
+    native_df = native_pd.DataFrame(native_data, index=native_ser_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data and Index index.
+    native_index = native_pd.Index([9, 2, 999])
+    snow_index = pd.Index(native_index)
+    native_df = native_pd.DataFrame(native_data, index=native_index)
+    snow_df = pd.DataFrame(snow_data, index=snow_index)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data, Series index, and columns.
+    columns = ["A", "B", "C"]
+    native_df = native_pd.DataFrame(
+        native_data, index=native_ser_index, columns=columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_ser_index, columns=columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+    # Create DataFrame with dict data, Index index, and Index columns.
+    native_columns = native_pd.Index(columns)
+    snow_columns = pd.Index(columns)
+    native_df = native_pd.DataFrame(
+        native_data, index=native_index, columns=native_columns
+    )
+    snow_df = pd.DataFrame(snow_data, index=snow_index, columns=snow_columns)
+    with SqlCounter(query_count=1):
+        assert_frame_equal(snow_df, native_df)
+
+
+@pytest.mark.parametrize(
+    "native_df, native_index, columns",
+    [
+        # Single column DataFrames.
+        (
+            native_pd.DataFrame(list(range(20))),
+            native_pd.Index(list(range(20))),
+            [1],
+        ),  # all index values match
+        (
+            native_pd.DataFrame(["A", "V", "D", "R"]),
+            native_pd.Index([10, 20, 30, 40], name="none"),
+            ["A"],
+        ),  # no index values match, column missing
+        # Multi-column DataFrames.
+        (
+            native_pd.DataFrame(
+                {"col1": ["A", "B", "C", "D"], "col2": ["B", "H", "T", "W"]},
+                index=[1.1, 2.2, 3, 4],
+            ),
+            native_pd.Index([1, 2, 3, 4], name="some name"),
+            ["col1"],
+        ),  # some index values are missing, subset of columns
+        (
+            native_pd.DataFrame(
+                [[10, 20, 30, 40], [2, 4, 6, 7], [-1, -2, -3, -4], [90, 50, 30, 10]],
+                index=native_pd.Index([None, "B", 0, 3.14], name="mixed"),
+                columns=["C", "L", "M", "W"],
+            ),
+            native_pd.Index(["B", 0, None, 3.14]),
+            [3, 1],
+        ),  # rearranged index and column values
+        (
+            native_pd.DataFrame(
+                [["A", "B", "C", "D", "E"], ["R", "S", "T", "U", "V"]],
+                columns=[1, 2, 3, 4, 5],
+            ),
+            native_pd.Index([3, 4], name="index"),
+            ["A", "V", "C"],
+        ),  # subset of index values
+        (
+            native_pd.DataFrame([list(range(20)), list(range(20))]),
+            native_pd.Index(list(range(20))),
+            [1],
+        ),  # all index values match
+        (
+            native_pd.DataFrame(
+                {
+                    "A": ["A", "V", "D", "R"],
+                    "V": ["V", "D", "R", "A"],
+                    "D": ["D", "R", "A", "V"],
+                    "R": ["R", "A", "V", "D"],
+                }
+            ),
+            native_pd.Index([10, 20, 30, 40], name="none"),
+            ["A", "X", "D", "R"],
+        ),  # no index values match
+        (
+            native_pd.DataFrame([]),
+            native_pd.Index([], name="empty index", dtype="int64"),
+            [],
+        ),  # empty data, index, and columns
+        (
+            native_pd.DataFrame([]),
+            native_pd.Index(["A", "V"], name="non-empty index"),
+            ["A", "V"],
+        ),  # empty data, non-empty index and columns
+        (
+            {
+                "A": [1, 2, 3],
+                "B": [4, 5, 6],
+            },  # dict data should behave similar to DataFrame data
+            native_pd.Index([10, 0, 1], name="non-empty index"),
+            ["A", "C"],
+        ),
+    ],
+)
+@pytest.mark.parametrize("column_type", ["list", "index"])
+def test_create_df_with_df_as_data_and_index_as_index_and_different_columns(
+    native_df, native_index, columns, column_type
+):
+    """
+    Creating a DataFrame where the data is a DataFrame, the index is an Index, and non-existent columns.
+    """
+    # Two joins are performed: one from joining the data and index parameters to have a query compiler whose
+    # index columns match the provided index, and one from performing .loc[] to filter the generated qc.
+    # One extra query is required to create the columns if it is an Index (column_type is "index").
+    native_columns = columns if column_type == "list" else native_pd.Index(columns)
+    snow_columns = columns if column_type == "list" else pd.Index(columns)
+    snow_df = (
+        pd.DataFrame(native_df)
+        if isinstance(native_df, native_pd.DataFrame)
+        else native_df
+    )
+    snow_index = pd.Index(native_index)
+    qc = 1 if column_type == "list" else 2
+    qc += 1 if (isinstance(native_df, dict)) else 0
+    qc += 1 if (isinstance(native_df, dict) and column_type == "index") else 0
+    jc = 1 if isinstance(native_df, native_pd.DataFrame) else 0
+    with SqlCounter(query_count=qc, join_count=jc):
+        assert_frame_equal(
+            pd.DataFrame(snow_df, index=snow_index, columns=native_columns),
+            native_pd.DataFrame(native_df, index=native_index, columns=snow_columns),
+            check_dtype=False,
+        )
+
+
 @sql_count_checker(query_count=1, join_count=1)
 def test_create_df_with_name_not_in_columns_and_index():
     # Test DataFrame creation where the data is a named Series and its name is not in the columns passed in.
@@ -889,19 +1138,8 @@ def test_create_df_with_name_not_in_columns_and_index():
     assert_frame_equal(snow_df, native_df)
 
 
-@sql_count_checker(query_count=1)
-def test_create_df_with_df_and_subset_of_columns():
-    # Test DataFrame creation where data is a DataFrame and only a subset of its columns are passed in.
-    # Only the columns passed in are used; the rest are ignored. In this case with end up with a single
-    # column DataFrame.
-    native_data = native_pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
-    snow_data = pd.DataFrame(native_data)
-    columns = ["a"]
-    native_df = native_pd.DataFrame(native_data, columns=columns)
-    snow_df = pd.DataFrame(snow_data, columns=columns)
-    assert_frame_equal(snow_df, native_df)
-
-
+# COPY TESTS
+# ----------
 def test_create_df_with_copy():
     # When copy is True, the data is copied into the DataFrame, and the new DataFrame and data do not share references.
     data = pd.DataFrame([[1, 2], [3, 4], [5, 6]])
@@ -948,3 +1186,55 @@ def test_create_series_with_copy():
         # Changing series_copy should not change data or series_not_copy.
         series_copy.iloc[0] = 1000
         assert data.iloc[0] == series_not_copy.iloc[0] == 99
+
+
+# NEGATIVE TESTS
+# --------------
+@sql_count_checker(query_count=0)
+def test_create_df_with_df_index_negative():
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        native_pd.DataFrame(
+            [1, 2, 3], index=native_pd.DataFrame([[1, 2], [3, 4], [5, 6]])
+        )
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        pd.DataFrame([1, 2, 3], index=pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+
+
+@sql_count_checker(query_count=0)
+def test_create_series_with_df_index_negative():
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        native_pd.Series([1, 2, 3], index=native_pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        pd.Series([1, 2, 3], index=pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+
+
+@sql_count_checker(query_count=0)
+def test_create_series_with_df_data_negative():
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "The truth value of a DataFrame is ambiguous. Use a.empty, a.bool()"
+            ", a.item(), a.any() or a.all()."
+        ),
+    ):
+        native_pd.Series(native_pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+    with pytest.raises(ValueError, match="Data cannot be a DataFrame"):
+        pd.Series(pd.DataFrame([[1, 2], [3, 4], [5, 6]]))
+
+
+@sql_count_checker(query_count=2)
+def test_create_df_with_mixed_series_index_list_data_negative():
+    """
+    Since Snowpark pandas relies on native pandas for initialization a DataFrame with mixed data types,
+    they both raise the same error.
+    """
+    # Create the list data.
+    data1 = native_pd.Series([1, 2, 3])
+    data2 = native_pd.Index([4, 5, 6])
+    data3 = [7, 8, 9]
+    # Need to convert data3 to an Index since native pandas tries to perform `get_indexer` on it.
+    err_msg = "'builtin_function_or_method' object has no attribute 'get_indexer'"
+    with pytest.raises(AttributeError, match=err_msg):
+        native_pd.DataFrame([data1, data2, data3])
+    with pytest.raises(AttributeError, match=err_msg):
+        pd.DataFrame([pd.Series(data1), pd.Index(data2), data3])