@@ -348,16 +348,16 @@ func TestStringConvert(t *testing.T) {
348348 {MustCreateStringWithDefaults (sqltypes .Char , 20 ), JSONDocument {Val : map [string ]interface {}{"a" : 1 }}, `{"a": 1}` , false },
349349 {MustCreateStringWithDefaults (sqltypes .Char , 20 ), NewLazyJSONDocument ([]byte (`{"a":1}` )), `{"a": 1}` , false },
350350
351- {MustCreateStringWithDefaults (sqltypes .Char , 10 ), []byte {0x98 , 0x76 , 0x54 }, nil , true },
352- {MustCreateStringWithDefaults (sqltypes .VarChar , 10 ), []byte {0x98 , 0x76 , 0x54 }, nil , true },
353- {MustCreateStringWithDefaults (sqltypes .Text , 10 ), []byte {0x98 , 0x76 , 0x54 }, nil , true },
351+ {MustCreateStringWithDefaults (sqltypes .Char , 10 ), []byte {0x98 , 0x76 , 0x54 }, nil , false },
352+ {MustCreateStringWithDefaults (sqltypes .VarChar , 10 ), []byte {0x98 , 0x76 , 0x54 }, nil , false },
353+ {MustCreateStringWithDefaults (sqltypes .Text , 10 ), []byte {0x98 , 0x76 , 0x54 }, nil , false },
354354 {MustCreateBinary (sqltypes .Binary , 10 ), []byte {0x98 , 0x76 , 0x54 }, []byte {0x98 , 0x76 , 0x54 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }, false },
355355 {MustCreateBinary (sqltypes .VarBinary , 10 ), []byte {0x98 , 0x76 , 0x54 }, []byte {0x98 , 0x76 , 0x54 }, false },
356356 {MustCreateBinary (sqltypes .Blob , 10 ), []byte {0x98 , 0x76 , 0x54 }, []byte {0x98 , 0x76 , 0x54 }, false },
357357
358- {MustCreateStringWithDefaults (sqltypes .Char , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), nil , true },
359- {MustCreateStringWithDefaults (sqltypes .VarChar , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), nil , true },
360- {MustCreateStringWithDefaults (sqltypes .Text , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), nil , true },
358+ {MustCreateStringWithDefaults (sqltypes .Char , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), nil , false },
359+ {MustCreateStringWithDefaults (sqltypes .VarChar , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), nil , false },
360+ {MustCreateStringWithDefaults (sqltypes .Text , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), nil , false },
361361 {MustCreateBinary (sqltypes .Binary , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), []byte {0x98 , 0x76 , 0x54 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 }, false },
362362 {MustCreateBinary (sqltypes .VarBinary , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), []byte {0x98 , 0x76 , 0x54 }, false },
363363 {MustCreateBinary (sqltypes .Blob , 10 ), string ([]byte {0x98 , 0x76 , 0x54 }), []byte {0x98 , 0x76 , 0x54 }, false },
@@ -505,6 +505,183 @@ func TestStringSQL_StrictConvertValidation(t *testing.T) {
505505 })
506506}
507507
508+ // TestStringSQL_CustomerWorkflow_Issue8893 tests the specific customer scenarios
509+ // described in issue dolthub/dolt#8893 to ensure MySQL-compatible behavior.
510+ func TestStringSQL_CustomerWorkflow_Issue8893 (t * testing.T ) {
511+ ctx := sql .NewEmptyContext ()
512+
513+ // Customer's exact problematic data: "DoltLab®" with latin1 ® (0xAE)
514+ customerData := []byte {0x44 , 0x6F , 0x6C , 0x74 , 0x4C , 0x61 , 0x62 , 0xAE }
515+ textType := Text
516+
517+ t .Run ("Customer Scenario 1: Basic SELECT that was failing" , func (t * testing.T ) {
518+ // Customer reported: SELECT name FROM Products; threw "invalid string for charset utf8mb4"
519+ // After fix: Should return NULL instead of error
520+ result , err := textType .SQL (ctx , nil , customerData )
521+ require .NoError (t , err , "Customer's basic SELECT should not throw errors" )
522+ assert .True (t , result .IsNull (), "Should return NULL for invalid UTF-8 (matches MySQL)" )
523+ })
524+
525+ t .Run ("Customer Scenario 2: INSERT with problematic data" , func (t * testing.T ) {
526+ // Customer had existing data that was problematic
527+ // Our fix should allow INSERT operations to complete with NULL values
528+ convertResult , _ , err := textType .Convert (ctx , string (customerData ))
529+ require .NoError (t , err , "INSERT operations should not fail" )
530+ assert .Nil (t , convertResult , "Should insert NULL for invalid UTF-8 (matches MySQL)" )
531+ })
532+
533+ t .Run ("Customer Scenario 3: Data identification queries" , func (t * testing.T ) {
534+ // Customer needs to identify problematic records with WHERE clauses
535+ // Test that NULL values work properly in comparisons
536+ result , err := textType .SQL (ctx , nil , customerData )
537+ require .NoError (t , err )
538+
539+ // Simulate: SELECT * FROM Products WHERE name IS NULL;
540+ assert .True (t , result .IsNull (), "NULL values should be identifiable with IS NULL" )
541+
542+ // Simulate: SELECT * FROM Products WHERE name IS NOT NULL;
543+ validData := []byte ("ValidProduct" )
544+ validResult , err := textType .SQL (ctx , nil , validData )
545+ require .NoError (t , err )
546+ assert .False (t , validResult .IsNull (), "Valid data should not be NULL" )
547+ })
548+
549+ t .Run ("Customer Scenario 4: Mixed valid and invalid data" , func (t * testing.T ) {
550+ // Customer's table had mix of valid and invalid data
551+ testCases := []struct {
552+ name string
553+ data []byte
554+ isValid bool
555+ }{
556+ {"Valid product name" , []byte ("ValidProduct" ), true },
557+ {"Customer's problematic data" , customerData , false },
558+ {"Another valid name" , []byte ("AnotherProduct®" ), true }, // Proper UTF-8 ®
559+ {"Different invalid UTF-8" , []byte {0x48 , 0x65 , 0x6C , 0x6C , 0x6F , 0x98 }, false },
560+ }
561+
562+ for _ , tc := range testCases {
563+ t .Run (tc .name , func (t * testing.T ) {
564+ result , err := textType .SQL (ctx , nil , tc .data )
565+ require .NoError (t , err , "No queries should fail regardless of data validity" )
566+
567+ if tc .isValid {
568+ assert .False (t , result .IsNull (), "Valid UTF-8 should not return NULL" )
569+ assert .Equal (t , string (tc .data ), result .ToString ())
570+ } else {
571+ assert .True (t , result .IsNull (), "Invalid UTF-8 should return NULL" )
572+ }
573+ })
574+ }
575+ })
576+
577+ t .Run ("Customer Scenario 5: Export/cleanup operations" , func (t * testing.T ) {
578+ // Customer wanted to export data and re-import with proper encoding
579+ // All SELECT operations should work without throwing errors
580+
581+ // Simulate customer's export query that was failing
582+ problemData := [][]byte {
583+ customerData , // Original issue
584+ {0x48 , 0x65 , 0x6C , 0x6C , 0x6F , 0x98 , 0x76 , 0x54 }, // Other invalid UTF-8
585+ {0x54 , 0x65 , 0x73 , 0x74 , 0xAE , 0x98 }, // Multiple invalid bytes
586+ }
587+
588+ for i , data := range problemData {
589+ t .Run (fmt .Sprintf ("Export query %d" , i + 1 ), func (t * testing.T ) {
590+ result , err := textType .SQL (ctx , nil , data )
591+ require .NoError (t , err , "Export queries must not fail" )
592+
593+ // Customer can now identify records that need fixing
594+ if result .IsNull () {
595+ // This record needs attention in the re-import
596+ t .Logf ("Record %d identified as needing cleanup (NULL)" , i + 1 )
597+ }
598+ })
599+ }
600+ })
601+ }
602+
603+ // TestStringSQL_MySQLCompatibility_Issue8893 validates that our behavior exactly matches MySQL
604+ // for the specific scenarios in issue dolthub/dolt#8893.
605+ func TestStringSQL_MySQLCompatibility_Issue8893 (t * testing.T ) {
606+ ctx := sql .NewEmptyContext ()
607+
608+ t .Run ("MySQL VARBINARY behavior comparison" , func (t * testing.T ) {
609+ // Test data: 0x446F6C744C6162AE (DoltLab + latin1 ®)
610+ varbinaryData := []byte {0x44 , 0x6F , 0x6C , 0x74 , 0x4C , 0x61 , 0x62 , 0xAE }
611+
612+ // MySQL behavior for VARBINARY with invalid UTF-8:
613+ // - Basic SELECT: Shows "DoltLab�" (replacement character in display)
614+ // - This is handled at the display level, our SQL function should return the data
615+ binaryType := LongBlob // Binary type should pass through unchanged
616+ result , err := binaryType .SQL (ctx , nil , varbinaryData )
617+ require .NoError (t , err )
618+ assert .False (t , result .IsNull (), "Binary data should pass through unchanged" )
619+
620+ // The display shows replacement character, but the data itself is preserved
621+ resultBytes := []byte (result .ToString ())
622+ assert .Equal (t , varbinaryData , resultBytes , "Binary data should be preserved exactly" )
623+ })
624+
625+ t .Run ("MySQL TEXT behavior comparison" , func (t * testing.T ) {
626+ // Test data: 0x446F6C744C6162AE (DoltLab + latin1 ®)
627+ invalidUTF8Data := []byte {0x44 , 0x6F , 0x6C , 0x74 , 0x4C , 0x61 , 0x62 , 0xAE }
628+ textType := Text
629+
630+ // MySQL behavior for TEXT with invalid UTF-8:
631+ // - SELECT: Returns NULL
632+ // - INSERT: Accepts and stores NULL
633+ // - CAST to utf8mb4: Returns NULL
634+
635+ // Test SELECT behavior
636+ result , err := textType .SQL (ctx , nil , invalidUTF8Data )
637+ require .NoError (t , err , "SELECT should not error (MySQL compatibility)" )
638+ assert .True (t , result .IsNull (), "Should return NULL for invalid UTF-8 (matches MySQL)" )
639+
640+ // Test INSERT behavior (Convert function)
641+ convertResult , _ , err := textType .Convert (ctx , string (invalidUTF8Data ))
642+ require .NoError (t , err , "INSERT should not error (MySQL compatibility)" )
643+ assert .Nil (t , convertResult , "Should insert NULL for invalid UTF-8 (matches MySQL)" )
644+ })
645+
646+ t .Run ("MySQL CAST behavior comparison" , func (t * testing.T ) {
647+ // Test MySQL: SELECT CAST(0x446F6C744C6162AE AS CHAR CHARACTER SET utf8mb4);
648+ // Result: NULL
649+ invalidUTF8Data := []byte {0x44 , 0x6F , 0x6C , 0x74 , 0x4C , 0x61 , 0x62 , 0xAE }
650+
651+ // Test both SQL and Convert functions
652+ textType := Text
653+
654+ // SQL function (used in SELECT CAST(...))
655+ sqlResult , err := textType .SQL (ctx , nil , invalidUTF8Data )
656+ require .NoError (t , err )
657+ assert .True (t , sqlResult .IsNull (), "CAST in SELECT should return NULL (matches MySQL)" )
658+
659+ // Convert function (used in INSERT with CAST(...))
660+ convertResult , _ , err := textType .Convert (ctx , string (invalidUTF8Data ))
661+ require .NoError (t , err )
662+ assert .Nil (t , convertResult , "CAST in INSERT should return NULL (matches MySQL)" )
663+ })
664+
665+ t .Run ("Customer's exact error message scenario" , func (t * testing.T ) {
666+ // Customer reported: "invalid string for charset utf8mb4"
667+ // This should no longer occur with our fix
668+ customerData := []byte {0x44 , 0x6F , 0x6C , 0x74 , 0x4C , 0x61 , 0x62 , 0xAE }
669+ textType := Text
670+
671+ // Before fix: This would throw "invalid string for charset utf8mb4"
672+ // After fix: Should return NULL without any error
673+ result , err := textType .SQL (ctx , nil , customerData )
674+ require .NoError (t , err , "Should not throw 'invalid string for charset utf8mb4' error" )
675+ assert .True (t , result .IsNull (), "Should handle invalid UTF-8 gracefully with NULL" )
676+
677+ // Verify the error message pattern is not present
678+ if err != nil {
679+ assert .NotContains (t , err .Error (), "invalid string for charset utf8mb4" ,
680+ "Should not throw the specific error customer encountered" )
681+ }
682+ })
683+ }
684+
508685func TestStringString (t * testing.T ) {
509686 tests := []struct {
510687 typ sql.Type
0 commit comments