@@ -256,6 +256,99 @@ SEXP selfrefokwrapper(SEXP x, SEXP verbose) {
256256 return ScalarInteger (_selfrefok (x ,FALSE,LOGICAL (verbose )[0 ]));
257257}
258258
259+ struct attrib_name_ctx {
260+ hashtab * indexNames ;
261+ SEXP index , assignedNames ;
262+ R_xlen_t len ;
263+ bool verbose ;
264+ };
265+ static SEXP getOneAttribName (SEXP key , SEXP val , void * ctx_ ) {
266+ (void )val ;
267+ struct attrib_name_ctx * ctx = ctx_ ;
268+ if (ctx -> indexNames )
269+ hash_set (ctx -> indexNames , PRINTNAME (key ), 1 );
270+ else
271+ ctx -> len ++ ;
272+ return NULL ;
273+ }
274+
275+ static SEXP fixIndexAttrib (SEXP tag , SEXP value , void * ctx_ ) {
276+ const struct attrib_name_ctx * ctx = ctx_ ;
277+
278+ hashtab * indexNames = ctx -> indexNames ;
279+ SEXP index = ctx -> index , assignedNames = ctx -> assignedNames ;
280+ R_xlen_t indexLength = xlength (value );
281+ bool verbose = ctx -> verbose ;
282+
283+ const char * tc1 , * c1 ;
284+ tc1 = c1 = CHAR (PRINTNAME (tag )); // the index name; e.g. "__col1__col2"
285+
286+ if (* tc1 != '_' || * (tc1 + 1 )!= '_' ) {
287+ // fix for #1396
288+ if (verbose ) {
289+ Rprintf (_ ("Dropping index '%s' as it doesn't have '__' at the beginning of its name. It was very likely created by v1.9.4 of data.table.\n" ), tc1 );
290+ }
291+ setAttrib (index , tag , R_NilValue );
292+ return NULL ;
293+ }
294+
295+ tc1 += 2 ; // tc1 always marks the start of a key column
296+ if (!* tc1 ) internal_error (__func__ , "index name ends with trailing __" ); // # nocov
297+
298+ void * vmax = vmaxget ();
299+ // check the position of the first appearance of an assigned column in the index.
300+ // the new index will be truncated to this position.
301+ size_t newKeyLength = strlen (c1 );
302+ char * s4 = R_alloc (newKeyLength + 3 , 1 );
303+ memcpy (s4 , c1 , newKeyLength );
304+ memcpy (s4 + newKeyLength , "__" , 3 );
305+
306+ for (int i = 0 ; i < xlength (assignedNames ); i ++ ){
307+ const char * tc2 = CHAR (STRING_ELT (assignedNames , i ));
308+ void * vmax2 = vmaxget ();
309+ size_t tc2_len = strlen (tc2 );
310+ char * s5 = R_alloc (tc2_len + 5 , 1 ); //4 * '_' + \0
311+ memcpy (s5 , "__" , 2 );
312+ memcpy (s5 + 2 , tc2 , tc2_len );
313+ memcpy (s5 + 2 + tc2_len , "__" , 3 );
314+ tc2 = strstr (s4 , s5 );
315+ if (tc2 && (tc2 - s4 < newKeyLength )){ // new column is part of key; match is before last match
316+ newKeyLength = tc2 - s4 ;
317+ }
318+ vmaxset (vmax2 );
319+ }
320+
321+ s4 [newKeyLength ] = '\0' ; // truncate the new key to the new length
322+ if (newKeyLength == 0 ){ // no valid key column remains. Drop the key
323+ setAttrib (index , tag , R_NilValue );
324+ hash_set (indexNames , PRINTNAME (tag ), 0 );
325+ if (verbose ) {
326+ Rprintf (_ ("Dropping index '%s' due to an update on a key column\n" ), c1 + 2 );
327+ }
328+ } else if (newKeyLength < strlen (c1 )) {
329+ SEXP s4Str = PROTECT (mkChar (s4 ));
330+ if (indexLength == 0 && // shortened index can be kept since it is just information on the order (see #2372)
331+ !hash_lookup (indexNames , s4Str , 0 )) { // index with shortened name not present yet
332+ setAttrib (index , installChar (s4Str ), value );
333+ hash_set (indexNames , PRINTNAME (tag ), 0 );
334+ setAttrib (index , tag , R_NilValue );
335+ hash_set (indexNames , s4Str , 1 );
336+ if (verbose )
337+ Rprintf (_ ("Shortening index '%s' to '%s' due to an update on a key column\n" ), c1 + 2 , s4 + 2 );
338+ } else { // indexLength > 0 || shortened name present already
339+ // indexLength > 0 indicates reordering. Drop it to avoid spurious reordering in non-indexed columns (#2372)
340+ // shortened name already present indicates that index needs to be dropped to avoid duplicate indices.
341+ setAttrib (index , tag , R_NilValue );
342+ hash_set (indexNames , tag , 0 );
343+ if (verbose )
344+ Rprintf (_ ("Dropping index '%s' due to an update on a key column\n" ), c1 + 2 );
345+ }
346+ UNPROTECT (1 ); // s4Str
347+ } //else: index is not affected by assign: nothing to be done
348+ vmaxset (vmax );
349+ return NULL ;
350+ }
351+
259352int * _Last_updated = NULL ;
260353
261354SEXP assign (SEXP dt , SEXP rows , SEXP cols , SEXP newcolnames , SEXP values )
@@ -265,11 +358,11 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
265358 // cols : column names or numbers corresponding to the values to set
266359 // rows : row numbers to assign
267360 R_len_t numToDo , targetlen , vlen , oldncol , oldtncol , coln , protecti = 0 , newcolnum , indexLength ;
268- SEXP targetcol , nullint , s , colnam , tmp , key , index , a , assignedNames , indexNames ;
361+ SEXP targetcol , nullint , s , colnam , tmp , key , index , a , assignedNames ;
269362 bool verbose = GetVerbose ();
270363 int ndelete = 0 ; // how many columns are being deleted
271364 const char * c1 , * tc1 , * tc2 ;
272- int * buf , indexNo ;
365+ int * buf ;
273366 if (isNull (dt )) error (_ ("assign has been passed a NULL dt" ));
274367 if (TYPEOF (dt ) != VECSXP ) error (_ ("dt passed to assign isn't type VECSXP" ));
275368 if (islocked (dt ))
@@ -549,93 +642,17 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
549642 }
550643 index = getAttrib (dt , install ("index" ));
551644 if (index != R_NilValue ) {
552- s = ATTRIB (index );
553- indexNo = 0 ;
554- // get a vector with all index names
555- PROTECT (indexNames = allocVector (STRSXP , xlength (s ))); protecti ++ ;
556- while (s != R_NilValue ){
557- SET_STRING_ELT (indexNames , indexNo , PRINTNAME (TAG (s )));
558- indexNo ++ ;
559- s = CDR (s );
560- }
561- s = ATTRIB (index ); // reset to first element
562- indexNo = 0 ;
563- while (s != R_NilValue ) {
564- a = TAG (s );
565- indexLength = xlength (CAR (s ));
566- tc1 = c1 = CHAR (PRINTNAME (a )); // the index name; e.g. "__col1__col2"
567- if (* tc1 != '_' || * (tc1 + 1 )!= '_' ) {
568- // fix for #1396
569- if (verbose ) {
570- Rprintf (_ ("Dropping index '%s' as it doesn't have '__' at the beginning of its name. It was very likely created by v1.9.4 of data.table.\n" ), tc1 );
571- }
572- setAttrib (index , a , R_NilValue );
573- indexNo ++ ;
574- s = CDR (s );
575- continue ; // with next index
576- }
577- tc1 += 2 ; // tc1 always marks the start of a key column
578- if (!* tc1 ) internal_error (__func__ , "index name ends with trailing __" ); // # nocov
579- // check the position of the first appearance of an assigned column in the index.
580- // the new index will be truncated to this position.
581- char * s4 = malloc (strlen (c1 ) + 3 );
582- if (!s4 ) {
583- internal_error (__func__ , "Couldn't allocate memory for s4" ); // # nocov
584- }
585- memcpy (s4 , c1 , strlen (c1 ));
586- memset (s4 + strlen (c1 ), '\0' , 1 );
587- strcat (s4 , "__" ); // add trailing '__' to newKey so we can search for pattern '__colName__' also at the end of the index.
588- int newKeyLength = strlen (c1 );
589- for (int i = 0 ; i < xlength (assignedNames ); i ++ ){
590- tc2 = CHAR (STRING_ELT (assignedNames , i ));
591- char * s5 = malloc (strlen (tc2 ) + 5 ); //4 * '_' + \0
592- if (!s5 ) {
593- free (s4 ); // # nocov
594- internal_error (__func__ , "Couldn't allocate memory for s5" ); // # nocov
595- }
596- memset (s5 , '_' , 2 );
597- memset (s5 + 2 , '\0' , 1 );
598- strcat (s5 , tc2 );
599- strcat (s5 , "__" );
600- tc2 = strstr (s4 , s5 );
601- if (tc2 == NULL ){ // column is not part of key
602- free (s5 );
603- continue ;
604- }
605- if (tc2 - s4 < newKeyLength ){ // new column match is before last match
606- newKeyLength = tc2 - s4 ;
607- }
608- free (s5 );
609- }
610- memset (s4 + newKeyLength , '\0' , 1 ); // truncate the new key to the new length
611- if (newKeyLength == 0 ){ // no valid key column remains. Drop the key
612- setAttrib (index , a , R_NilValue );
613- SET_STRING_ELT (indexNames , indexNo , NA_STRING );
614- if (verbose ) {
615- Rprintf (_ ("Dropping index '%s' due to an update on a key column\n" ), c1 + 2 );
616- }
617- } else if (newKeyLength < strlen (c1 )) {
618- SEXP s4Str = PROTECT (mkString (s4 ));
619- if (indexLength == 0 && // shortened index can be kept since it is just information on the order (see #2372)
620- LOGICAL (chin (s4Str , indexNames ))[0 ] == 0 ) {// index with shortened name not present yet
621- SET_TAG (s , install (s4 ));
622- SET_STRING_ELT (indexNames , indexNo , mkChar (s4 ));
623- if (verbose )
624- Rprintf (_ ("Shortening index '%s' to '%s' due to an update on a key column\n" ), c1 + 2 , s4 + 2 );
625- } else { // indexLength > 0 || shortened name present already
626- // indexLength > 0 indicates reordering. Drop it to avoid spurious reordering in non-indexed columns (#2372)
627- // shortened name already present indicates that index needs to be dropped to avoid duplicate indices.
628- setAttrib (index , a , R_NilValue );
629- SET_STRING_ELT (indexNames , indexNo , NA_STRING );
630- if (verbose )
631- Rprintf (_ ("Dropping index '%s' due to an update on a key column\n" ), c1 + 2 );
632- }
633- UNPROTECT (1 ); // s4Str
634- } //else: index is not affected by assign: nothing to be done
635- free (s4 );
636- indexNo ++ ;
637- s = CDR (s );
638- }
645+ struct attrib_name_ctx ctx = { 0 , };
646+ R_mapAttrib (index , getOneAttribName , & ctx ); // how many attributes?
647+ hashtab * h = hash_create (ctx .len );
648+ PROTECT (h -> prot );
649+ ctx .indexNames = h ;
650+ R_mapAttrib (index , getOneAttribName , & ctx ); // now remember the names
651+ ctx .index = index ;
652+ ctx .assignedNames = assignedNames ;
653+ ctx .verbose = verbose ;
654+ R_mapAttrib (index , fixIndexAttrib , & ctx ); // adjust indices as needed
655+ UNPROTECT (1 ); // h
639656 }
640657 if (ndelete ) {
641658 // delete any columns assigned NULL (there was a 'continue' earlier in loop above)
0 commit comments