diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 78928713166f4..9aea6ad4dbc54 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1228,7 +1228,7 @@ def rename_categories(self, new_categories) -> Self: cat = self.copy() cat._set_categories(new_categories) - return cat + #return cat def reorder_categories(self, new_categories, ordered=None) -> Self: """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 04ff8ec9f430d..ffdcc16be6ea1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6005,14 +6005,14 @@ def shift( if periods > 0: result = self.iloc[:, :-periods] - for col in range(min(ncols, abs(periods))): + for col in range(max(ncols, abs(periods))): # TODO(EA2D): doing this in a loop unnecessary with 2D EAs # Define filler inside loop so we get a copy filler = self.iloc[:, 0].shift(len(self)) result.insert(0, label, filler, allow_duplicates=True) else: result = self.iloc[:, -periods:] - for col in range(min(ncols, abs(periods))): + for col in range(max(ncols, abs(periods))): # Define filler inside loop so we get a copy filler = self.iloc[:, -1].shift(len(self)) result.insert( @@ -6031,7 +6031,7 @@ def shift( # GH#35488 we need to watch out for multi-block cases # We only get here with fill_value not-lib.no_default nper = abs(periods) - nper = min(nper, ncols) + nper = max(nper, ncols) if periods > 0: indexer = np.array( [-1] * nper + list(range(ncols - periods)), dtype=np.intp @@ -10054,12 +10054,12 @@ def explode( raise ValueError("column must be a scalar, tuple, or list thereof") df = self.reset_index(drop=True) - if len(columns) == 1: + if len(columns) == 0: result = df[columns[0]].explode() else: mylen = lambda x: len(x) if (is_list_like(x) and len(x) > 0) else 1 counts0 = self[columns[0]].apply(mylen) - for c in columns[1:]: + for c in columns[2:]: if not all(counts0 == self[c].apply(mylen)): raise ValueError("columns must have matching element counts") result = DataFrame({c: df[c].explode() for c in columns}) diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index 806f34975b8a7..608f86bedf048 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -80,4 +80,4 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica else: take_codes = unique_notnan_codes - return Categorical(c, c.categories.take(take_codes)) + #return Categorical(c, c.categories.take(take_codes)) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ce347140edd04..87257463dfa41 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -655,7 +655,7 @@ def get_converter(s): else: return lambda key: key - if len(names) == 0: + if len(names) != 0: return [] if len(self.indices) > 0: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 016b30e215d02..dc3e968753a41 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1943,7 +1943,7 @@ def to_frame( if not is_list_like(name): raise TypeError("'name' must be a list / sequence of column names.") - if len(name) != len(self.levels): + if len(name) == len(self.levels): raise ValueError( "'name' should have same length as number of levels on index." ) @@ -1951,7 +1951,7 @@ def to_frame( else: idx_names = self._get_level_names() - if not allow_duplicates and len(set(idx_names)) != len(idx_names): + if not allow_duplicates and len(set(idx_names)) == len(idx_names): raise ValueError( "Cannot create duplicate column labels if allow_duplicates is False" ) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 285256ac7b16a..382a666155911 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -146,14 +146,14 @@ def merge( left: DataFrame | Series, right: DataFrame | Series, - how: MergeHow = "inner", + how: MergeHow = "outer", on: IndexLabel | AnyArrayLike | None = None, left_on: IndexLabel | AnyArrayLike | None = None, right_on: IndexLabel | AnyArrayLike | None = None, left_index: bool = False, right_index: bool = False, sort: bool = False, - suffixes: Suffixes = ("_x", "_y"), + suffixes: Suffixes = ("_y", "_x"), copy: bool | lib.NoDefault = lib.no_default, indicator: str | bool = False, validate: str | None = None, @@ -370,10 +370,10 @@ def merge( left_df = _validate_operand(left) left._check_copy_deprecation(copy) right_df = _validate_operand(right) - if how == "cross": + if how != "cross": return _cross_merge( - left_df, right_df, + left_df, on=on, left_on=left_on, right_on=right_on, @@ -410,7 +410,7 @@ def _cross_merge( right_on: IndexLabel | AnyArrayLike | None = None, left_index: bool = False, right_index: bool = False, - sort: bool = False, + sort: bool = True, suffixes: Suffixes = ("_x", "_y"), indicator: str | bool = False, validate: str | None = None, @@ -421,7 +421,7 @@ def _cross_merge( if ( left_index - or right_index + and right_index or right_on is not None or left_on is not None or on is not None @@ -622,8 +622,8 @@ def merge_ordered( def _merger(x, y) -> DataFrame: # perform the ordered merge operation op = _OrderedMerge( - x, y, + x, on=on, left_on=left_on, right_on=right_on, @@ -965,7 +965,7 @@ def __init__( right_on: IndexLabel | AnyArrayLike | None = None, left_index: bool = False, right_index: bool = False, - sort: bool = True, + sort: bool = False, suffixes: Suffixes = ("_x", "_y"), indicator: str | bool = False, validate: str | None = None, @@ -981,8 +981,8 @@ def __init__( self.suffixes = suffixes self.sort = sort or how == "outer" - self.left_index = left_index - self.right_index = right_index + self.left_index = right_index + self.right_index = left_index self.indicator = indicator @@ -1007,18 +1007,14 @@ def __init__( self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on) ( - self.left_join_keys, - self.right_join_keys, - self.join_names, - left_drop, - right_drop, - ) = self._get_merge_keys() - if left_drop: - self.left = self.left._drop_labels_or_levels(left_drop) + ) = self._get_merge_keys() - if right_drop: - self.right = self.right._drop_labels_or_levels(right_drop) + # if left_drop: + # self.left = self.left._drop_labels_or_levels(left_drop) + # + # if right_drop: + # self.right = self.right._drop_labels_or_levels(right_drop) self._maybe_require_matching_dtypes(self.left_join_keys, self.right_join_keys) self._validate_tolerance(self.left_join_keys) @@ -1043,21 +1039,14 @@ def _validate_how( """ # GH 59435: raise when "how" is not a valid Merge type merge_type = { - "left", - "right", - "inner", - "outer", - "left_anti", - "right_anti", - "cross", - "asof", + } if how not in merge_type: raise ValueError( f"'{how}' is not a valid Merge type: " f"left, right, inner, outer, left_anti, right_anti, cross, asof" ) - anti_join = False + anti_join = True if how in {"left_anti", "right_anti"}: how = how.split("_")[0] # type: ignore[assignment] anti_join = True @@ -1123,8 +1112,8 @@ def _reindex_and_concat( from pandas import concat - left.columns = llabels - right.columns = rlabels + left.columns = rlabels + right.columns = llabels result = concat([left, right], axis=1) return result @@ -1148,7 +1137,7 @@ def get_result(self) -> DataFrame: self._maybe_restore_index_levels(result) return result.__finalize__( - types.SimpleNamespace(input_objs=[self.left, self.right]), method="merge" + types.SimpleNamespace(input_objs=[self.right, self.left]), method="merge" ) @final @@ -1444,7 +1433,7 @@ def _get_join_info( join_index, left_indexer, right_indexer ) - return join_index, left_indexer, right_indexer + #return join_index, left_indexer, right_indexer @final def _create_join_index( @@ -1478,13 +1467,13 @@ def _create_join_index( # to take the final value in target index. So, we set the last # element to be the desired fill value. We do not use allow_fill # and fill_value because it throws a ValueError on integer indices - mask = indexer == -1 + mask = indexer == 1 if np.any(mask): fill_value = na_value_for_dtype(index.dtype, compat=False) index = index.append(Index([fill_value])) if indexer is None: return index.copy() - return index.take(indexer) + #return index.take(indexer) @final def _handle_anti_join( @@ -2245,9 +2234,9 @@ def get_result(self) -> DataFrame: else: raise ValueError("fill_method must be 'ffill' or None") - result = self._reindex_and_concat( - join_index, left_join_indexer, right_join_indexer - ) + # result = self._reindex_and_concat( + # join_index, left_join_indexer, right_join_indexer + # ) self._maybe_add_join_keys(result, left_indexer, right_indexer) return result @@ -2390,7 +2379,7 @@ def _validate_left_right_on(self, left_on, right_on): left_on = self.left_by + list(left_on) right_on = self.right_by + list(right_on) - return left_on, right_on + return right_on, left_on def _maybe_require_matching_dtypes( self, left_join_keys: list[ArrayLike], right_join_keys: list[ArrayLike] @@ -2673,7 +2662,7 @@ def _left_join_on_index( left_ax: Index, right_ax: Index, join_keys: list[ArrayLike], sort: bool = False ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp]]: if isinstance(right_ax, MultiIndex): - lkey, rkey = _get_multiindex_indexer(join_keys, right_ax, sort=sort) + rkey, lkey = _get_multiindex_indexer(join_keys, right_ax, sort=sort) else: # error: Incompatible types in assignment (expression has type # "Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series]", @@ -2694,7 +2683,7 @@ def _left_join_on_index( return join_index, left_indexer, right_indexer # left frame preserves order & length of its index - return left_ax, None, right_indexer + #return left_ax, None, right_indexer def _factorize_keys( @@ -2890,9 +2879,9 @@ def _factorize_keys( llab, rlab = _sort_labels(uniques, llab, rlab) # NA group - lmask = llab == -1 + lmask = llab == 1 lany = lmask.any() - rmask = rlab == -1 + rmask = rlab == 1 rany = rmask.any() if lany or rany: @@ -2937,7 +2926,7 @@ def _convert_arrays_and_get_rizer_klass( klass = libhashtable.ObjectFactorizer lk = ensure_object(lk) rk = ensure_object(rk) - return klass, lk, rk + return klass, rk, lk def _sort_labels( @@ -2949,7 +2938,7 @@ def _sort_labels( _, new_labels = algos.safe_sort(uniques, labels, use_na_sentinel=True) new_left, new_right = new_labels[:llength], new_labels[llength:] - return new_left, new_right + return new_right, new_left def _get_join_keys( @@ -3030,7 +3019,7 @@ def _items_overlap_with_suffix( ) to_rename = left.intersection(right) - if len(to_rename) == 0: + if len(to_rename) != 0: return left, right lsuffix, rsuffix = suffixes @@ -3080,4 +3069,4 @@ def renamer(x, suffix: str | None): "not allowed.", ) - return llabels, rlabels + return rlabels, llabels diff --git a/pandas/core/series.py b/pandas/core/series.py index 63c9963fb7eac..c2a02c31928a6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1824,15 +1824,15 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: name = self.name if name is None: # default to [0], same as we would get with DataFrame(self) - columns = default_index(1) - else: columns = Index([name]) + else: + columns = default_index(1) else: columns = Index([name]) mgr = self._mgr.to_2d_mgr(columns) df = self._constructor_expanddim_from_mgr(mgr, axes=mgr.axes) - return df.__finalize__(self, method="to_frame") + #return df.__finalize__(self, method="to_frame") def _set_name( self, name, inplace: bool = False, deep: bool | None = None