2424from ..core import DATAFRAME_TYPE , SERIES_TYPE , DataFrame
2525from ..initializer import DataFrame as asframe , Series as asseries
2626from ..operands import DataFrameOperand , DataFrameOperandMixin
27- from ..utils import parse_index
27+ from ..utils import parse_index , is_index_value_identical
2828
2929# in pandas 1.0.x, __setitem__ with a list with missing items are not allowed
3030_allow_set_missing_list = pd_release_version [:2 ] >= (1 , 1 )
@@ -161,16 +161,7 @@ def tile(cls, op: "DataFrameSetitem"):
161161 rechunk_arg = {}
162162
163163 # check if all chunk's index_value are identical
164- target_chunk_index_values = [
165- c .index_value for c in target .chunks if c .index [1 ] == 0
166- ]
167- value_chunk_index_values = [v .index_value for v in value .chunks ]
168- is_identical = len (target_chunk_index_values ) == len (
169- value_chunk_index_values
170- ) and all (
171- c .key == v .key
172- for c , v in zip (target_chunk_index_values , value_chunk_index_values )
173- )
164+ is_identical = is_index_value_identical (target , value )
174165 if not is_identical :
175166 # do rechunk
176167 if any (np .isnan (s ) for s in target .nsplits [0 ]) or any (
@@ -202,8 +193,8 @@ def tile(cls, op: "DataFrameSetitem"):
202193
203194 out_chunks = []
204195 nsplits = [list (ns ) for ns in target .nsplits ]
205-
206196 nsplits [1 ][- 1 ] += len (append_cols )
197+ nsplits = tuple (tuple (ns ) for ns in nsplits )
207198
208199 column_chunk_shape = target .chunk_shape [1 ]
209200 for c in target .chunks :
@@ -239,26 +230,27 @@ def tile(cls, op: "DataFrameSetitem"):
239230
240231 chunk_inputs = [c , value_chunk ]
241232
242- dtypes , shape , columns_value = c .dtypes , c .shape , c .columns_value
243-
233+ shape = c .shape
244234 if append_cols and c .index [- 1 ] == column_chunk_shape - 1 :
245235 # some columns appended at the last column of chunks
246236 shape = (shape [0 ], shape [1 ] + len (append_cols ))
247- dtypes = pd .concat ([dtypes , out .dtypes .iloc [- len (append_cols ) :]])
248- columns_value = parse_index (dtypes .index , store_data = True )
249237
250238 result_chunk = chunk_op .new_chunk (
251239 chunk_inputs ,
252240 shape = shape ,
253- dtypes = dtypes ,
254- index_value = c .index_value ,
255- columns_value = columns_value ,
256241 index = c .index ,
257242 )
243+ result_chunk ._set_tileable_meta (
244+ tileable_key = out .key ,
245+ nsplits = nsplits ,
246+ index_value = out .index_value ,
247+ columns_value = out .columns_value ,
248+ dtypes = out .dtypes ,
249+ )
258250 out_chunks .append (result_chunk )
259251
260252 params = out .params
261- params ["nsplits" ] = tuple ( tuple ( ns ) for ns in nsplits )
253+ params ["nsplits" ] = nsplits
262254 params ["chunks" ] = out_chunks
263255 new_op = op .copy ()
264256 return new_op .new_tileables (op .inputs , kws = [params ])
@@ -270,10 +262,17 @@ def estimate_size(cls, ctx: dict, op: "DataFrameSetitem"):
270262
271263 @classmethod
272264 def execute (cls , ctx , op : "DataFrameSetitem" ):
273- target = ctx [op .target .key ].copy ()
265+ target = ctx [op .target .key ]
266+ # only deep copy when updating
267+ indexes = (
268+ (op .indexes ,)
269+ if not isinstance (op .indexes , (tuple , list , set ))
270+ else op .indexes
271+ )
272+ deep = bool (set (indexes ) & set (target .columns ))
273+ target = ctx [op .target .key ].copy (deep = deep )
274274 value = ctx [op .value .key ] if not np .isscalar (op .value ) else op .value
275275 try :
276-
277276 target [op .indexes ] = value
278277 except KeyError :
279278 if _allow_set_missing_list : # pragma: no cover
0 commit comments