@@ -585,6 +585,59 @@ HRESULT Library_corlib_native_System_String::ToCharArray(CLR_RT_StackFrame &stac
585585 NANOCLR_NOCLEANUP ();
586586}
587587
588+ // Helper function for comparing UTF-8 substrings
589+ bool MatchString (CLR_RT_UnicodeHelper &inputIter, const char *searchStr, int searchCharLen)
590+ {
591+ // Create copies to preserve original iterator state
592+ CLR_RT_UnicodeHelper inputCopy = inputIter;
593+ CLR_RT_UnicodeHelper searchIter;
594+ searchIter.SetInputUTF8 (searchStr);
595+
596+ for (int i = 0 ; i < searchCharLen; i++)
597+ {
598+ CLR_UINT16 bufInput[3 ] = {0 };
599+ CLR_UINT16 bufSearch[3 ] = {0 };
600+
601+ // Set up buffers for character conversion
602+ inputCopy.m_outputUTF16 = bufInput;
603+ inputCopy.m_outputUTF16_size = MAXSTRLEN (bufInput);
604+ searchIter.m_outputUTF16 = bufSearch;
605+ searchIter.m_outputUTF16_size = MAXSTRLEN (bufSearch);
606+
607+ // Convert next character from input
608+ if (!inputCopy.ConvertFromUTF8 (1 , false ))
609+ {
610+ // Input ended prematurely
611+ return false ;
612+ }
613+
614+ // Convert next character from search string
615+ if (!searchIter.ConvertFromUTF8 (1 , false ))
616+ {
617+ // Shouldn't happen for valid search string
618+ return false ;
619+ }
620+
621+ // Compare first UTF-16 code unit
622+ if (bufInput[0 ] != bufSearch[0 ])
623+ {
624+ return false ;
625+ }
626+
627+ // Handle surrogate pairs (4-byte UTF-8 sequences)
628+ if (bufInput[0 ] >= 0xD800 && bufInput[0 ] <= 0xDBFF )
629+ {
630+ // High surrogate
631+ if (bufInput[1 ] != bufSearch[1 ])
632+ {
633+ // Low surrogate mismatch
634+ return false ;
635+ }
636+ }
637+ }
638+ return true ;
639+ }
640+
588641HRESULT Library_corlib_native_System_String::IndexOf (CLR_RT_StackFrame &stack, int mode)
589642{
590643 NATIVE_PROFILE_CLR_CORE ();
@@ -594,8 +647,8 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
594647 int startIndex;
595648 int count;
596649 int pos;
597- const char *pString;
598- const CLR_UINT16 *pChars;
650+ const char *pString = NULL ;
651+ const CLR_UINT16 *pChars = NULL ;
599652 int iChars = 0 ;
600653 CLR_RT_UnicodeHelper inputIterator;
601654 int inputLen;
@@ -605,8 +658,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
605658 if (!szText)
606659 szText = " " ;
607660 pos = -1 ;
608- pString = NULL ;
609- pChars = NULL ;
610661
611662 if (mode & c_IndexOf__SingleChar)
612663 {
@@ -617,23 +668,20 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
617668 {
618669 CLR_RT_HeapBlock_Array *array = stack.Arg1 ().DereferenceArray ();
619670 FAULT_ON_NULL (array);
620-
621671 pChars = (const CLR_UINT16 *)array->GetFirstElement ();
622672 iChars = array->m_numOfElements ;
623673 }
624674 else if (mode & c_IndexOf__String)
625675 {
626676 pString = stack.Arg1 ().RecoverString ();
627677 FAULT_ON_NULL (pString);
628- // how long is the search string?
629678 inputIterator.SetInputUTF8 (pString);
630679 searchLen = inputIterator.CountNumberOfCharacters ();
631680 }
632681
633- // calculate input string length
682+ // Calculate input length
634683 inputIterator.SetInputUTF8 (szText);
635684 inputLen = inputIterator.CountNumberOfCharacters ();
636-
637685 if (0 == inputLen)
638686 {
639687 pos = -1 ;
@@ -647,7 +695,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
647695 }
648696 else
649697 {
650- // for mode LastIndex... we are searching backwards toward the start of the string
651698 if (mode & c_IndexOf__Last)
652699 {
653700 startIndex = inputLen - 1 ;
@@ -663,49 +710,53 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
663710 NANOCLR_SET_AND_LEAVE (CLR_E_OUT_OF_RANGE);
664711
665712 // for mode LastIndex... with string we move the start index back by the search string length -1
713+ // if we search forward
666714 if ((mode & c_IndexOf__String_Last) == c_IndexOf__String_Last)
667715 {
668716 startIndex -= searchLen - 1 ;
669- // check the start index; if not in range skip the search
717+ // check the start index; if not in range, skip the search
670718 if (startIndex < 0 || startIndex > inputLen)
719+ {
671720 goto Exit;
721+ }
672722 }
673723
674724 // calculate the iteration count
675725 if (mode & c_IndexOf__Count)
676726 {
677- // count form parameter
727+ // count (from parameter)
678728 count = stack.Arg3 ().NumericByRefConst ().s4 ;
679729 }
680730 else
681731 {
682732 // for mode LastIndex... we are searching from start index backwards toward the start of the string
683733 if (mode & c_IndexOf__Last)
684734 {
685- // backward until the start of string
686- // one more time than the startIndex because we should iterate until zero
735+ // backwards until the start of the string
736+ // one position ahead of the startIndex because we should iterate until position zero
687737 count = startIndex + 1 ;
688738 }
689739 else
690740 {
691- // forward until the end of string
741+ // move forward until reaching the end of the string
692742 count = inputLen - startIndex;
693743 }
694744 }
695745
696- // for mode with string we reduce the count by the search string length -1
697- // if we search foreward
746+ // forward search with index of string mode: adjust the count by the search string length -1
698747 if ((mode & c_IndexOf__String_Last) == c_IndexOf__String)
699748 {
700749 count -= searchLen - 1 ;
701750 }
702751
703- // check the count
752+ // validate count
704753 if (mode & c_IndexOf__Last)
705754 {
706755 // check for backward mode; no exception; just exit
707756 if (count > startIndex + 1 )
757+ {
708758 goto Exit;
759+ }
709760 }
710761 else
711762 {
@@ -717,132 +768,87 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
717768 // First move to the character, then read it.
718769 if (inputIterator.ConvertFromUTF8 (startIndex, true ))
719770 {
720- // string mode?
771+ // String search mode
721772 if (pString)
722773 {
723- // iterate thru all positions
724774 while (count-- > 0 )
725775 {
726- CLR_RT_UnicodeHelper inputString;
727- inputString.SetInputUTF8 ((const char *)inputIterator.m_inputUTF8 );
728- CLR_RT_UnicodeHelper searchString;
729- searchString.SetInputUTF8 (pString);
730- bool finished = false ;
731-
732- while (true )
733- {
734- CLR_UINT16 bufInput[3 ];
735- CLR_UINT16 bufSearch[3 ];
736-
737- inputString.m_outputUTF16 = bufInput;
738- inputString.m_outputUTF16_size = MAXSTRLEN (bufInput);
739-
740- searchString.m_outputUTF16 = bufSearch;
741- searchString.m_outputUTF16_size = MAXSTRLEN (bufSearch);
742-
743- // read next char from search string; if no more chars to read (false)
744- // then we are done and found the search string in the input string
745- if (searchString.ConvertFromUTF8 (1 , false ) == false )
746- {
747- pos = startIndex;
748- finished = true ;
749- break ;
750- }
751-
752- // read the next char from the input string; if no more chars to read (false)
753- // we didn't found the search string in the input string; we abort the search now
754- if (inputString.ConvertFromUTF8 (1 , false ) == false )
755- {
756- finished = true ;
757- break ;
758- }
759-
760- // does the char from input not match the char from the search string
761- if (bufInput[0 ] != bufSearch[0 ])
762- {
763- // next iteration round but not finished
764- break ;
765- }
766- }
767-
768- // finished (with or without a found) then break
769- if (finished)
776+ // Use helper for proper UTF-8 comparison
777+ if (MatchString (inputIterator, pString, searchLen))
770778 {
779+ pos = startIndex;
771780 break ;
772781 }
773782
774- // reading forward or backward
783+ // Move to next candidate position (both forward or backward reading)
775784 if (mode & c_IndexOf__Last)
776785 {
777786 startIndex--;
778- // move one chars backward
779- if (inputIterator.MoveBackwardInUTF8 (szText, 1 ) == false )
787+ // move backwards one char
788+ if (! inputIterator.MoveBackwardInUTF8 (szText, 1 ))
780789 {
781790 break ;
782791 }
783792 }
784793 else
785794 {
786795 startIndex++;
787- // move to the next char
788- if (inputIterator.ConvertFromUTF8 (1 , true ) == false )
796+ // move forward to the next char
797+ if (! inputIterator.ConvertFromUTF8 (1 , true ))
789798 {
790799 break ;
791800 }
792801 }
793802 }
794803 }
795804
796- // char mode?
797- if (pChars)
805+ // Character search mode
806+ else if (pChars)
798807 {
799- // iterate thru all positions
808+ // iterate through all positions
800809 while (count-- > 0 )
801810 {
802- CLR_UINT16 buf[3 ];
811+ CLR_UINT16 buf[3 ] = { 0 } ;
803812
804813 inputIterator.m_outputUTF16 = buf;
805814 inputIterator.m_outputUTF16_size = MAXSTRLEN (buf);
806815
807816 // read the next char from the input string; if no more chars to read (false)
808- // we didn't found the search chars in the input string
809- if (inputIterator.ConvertFromUTF8 (1 , false ) == false )
817+ // the search chars weren't found in the input string
818+ if (! inputIterator.ConvertFromUTF8 (1 , false ))
810819 {
811820 break ;
812821 }
813822
814- // test each search char if it's a match
823+ // test each search char for a match
815824 for (int i = 0 ; i < iChars; i++)
816825 {
817- // match?
818826 if (buf[0 ] == pChars[i])
819827 {
820- // position found!
828+ // found position for next char
821829 pos = startIndex;
822830 break ;
823831 }
824832 }
825833
826- // found? => break
834+ // didn't find any, break
827835 if (pos != -1 )
828836 {
829837 break ;
830838 }
831839
832- // for mode LastIndex... we are searching from start index backwards toward the start of the string
840+ // for search mode LastIndex: we are searching from start index backwards toward the start of the string
833841 if (mode & c_IndexOf__Last)
834842 {
835- // in backward mode
843+ // backwards mode
836844 startIndex--;
837- // move two chars backward, because the current char is already read
838- if (inputIterator.MoveBackwardInUTF8 (szText, 2 ) == false )
839- {
845+ // have to move two chars backwards, because the current char is already read
846+ if (!inputIterator.MoveBackwardInUTF8 (szText, 2 ))
840847 break ;
841- }
842848 }
843849 else
844850 {
845- // forward mode; simple advance the start index
851+ // forward mode: just advance the start index
846852 startIndex++;
847853 }
848854 }
@@ -851,7 +857,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
851857
852858Exit:
853859 stack.SetResult_I4 (pos);
854-
855860 NANOCLR_NOCLEANUP ();
856861}
857862
@@ -889,10 +894,11 @@ HRESULT Library_corlib_native_System_String::ChangeCase(CLR_RT_StackFrame &stack
889894 *ptr++ = c;
890895 }
891896
892- NANOCLR_CHECK_HRESULT (CLR_RT_HeapBlock_String::CreateInstance (
893- stack.PushValue (),
894- (CLR_UINT16 *)arrayTmp->GetFirstElement (),
895- arrayTmp->m_numOfElements ));
897+ NANOCLR_CHECK_HRESULT (
898+ CLR_RT_HeapBlock_String::CreateInstance (
899+ stack.PushValue (),
900+ (CLR_UINT16 *)arrayTmp->GetFirstElement (),
901+ arrayTmp->m_numOfElements ));
896902
897903 NANOCLR_NOCLEANUP ();
898904}
@@ -923,10 +929,11 @@ HRESULT Library_corlib_native_System_String::Substring(CLR_RT_StackFrame &stack,
923929 NANOCLR_SET_AND_LEAVE (CLR_E_OUT_OF_RANGE);
924930 }
925931
926- NANOCLR_CHECK_HRESULT (CLR_RT_HeapBlock_String::CreateInstance (
927- stack.PushValue (),
928- (CLR_UINT16 *)arrayTmp->GetElement (startIndex),
929- length));
932+ NANOCLR_CHECK_HRESULT (
933+ CLR_RT_HeapBlock_String::CreateInstance (
934+ stack.PushValue (),
935+ (CLR_UINT16 *)arrayTmp->GetElement (startIndex),
936+ length));
930937
931938 NANOCLR_NOCLEANUP ();
932939}
@@ -1102,10 +1109,11 @@ HRESULT Library_corlib_native_System_String::Split(CLR_RT_StackFrame &stack, CLR
11021109 {
11031110 CLR_RT_HeapBlock *str = (CLR_RT_HeapBlock *)arrayDst->GetElement (count);
11041111
1105- NANOCLR_CHECK_HRESULT (CLR_RT_HeapBlock_String::CreateInstance (
1106- *str,
1107- pSrcStart,
1108- (CLR_UINT32)(pSrc - pSrcStart)));
1112+ NANOCLR_CHECK_HRESULT (
1113+ CLR_RT_HeapBlock_String::CreateInstance (
1114+ *str,
1115+ pSrcStart,
1116+ (CLR_UINT32)(pSrc - pSrcStart)));
11091117
11101118 pSrcStart = pSrc + 1 ;
11111119 }
0 commit comments