11/*
2- * Copyright (c) 2000, 2020 , Oracle and/or its affiliates.
2+ * Copyright (c) 2000, 2024 , Oracle and/or its affiliates.
33 *
44 * Licensed under the Universal Permissive License v 1.0 as shown at
55 * http://oss.oracle.com/licenses/upl.
66 */
7+
78using System ;
89using System . IO ;
910using System . Text ;
@@ -48,31 +49,6 @@ public DataWriter(Stream output) : base(output)
4849
4950 #endregion
5051
51- #region Properties
52-
53- /// <summary>
54- /// Obtain a temp buffer used to avoid allocations from
55- /// repeated calls to String APIs.
56- /// </summary>
57- /// <return>
58- /// a char buffer of CHAR_BUF_SIZE characters long
59- /// </return>
60- protected char [ ] CharBuf
61- {
62- get
63- {
64- // "partial" (i.e. windowed) char buffer just for formatUTF
65- char [ ] ach = m_achBuf ;
66- if ( ach == null )
67- {
68- m_achBuf = ach = new char [ CHAR_BUF_SIZE ] ;
69- }
70- return ach ;
71- }
72- }
73-
74- #endregion
75-
7652 #region Packed format writing
7753
7854 /// <summary>
@@ -403,247 +379,12 @@ public override void Write(string text)
403379 }
404380 else
405381 {
406- byte [ ] bytes = FormatUTF ( text ) ;
382+ byte [ ] bytes = Encoding . UTF8 . GetBytes ( text ) ;
407383 WritePackedInt32 ( bytes . Length ) ;
408384 Write ( bytes ) ;
409385 }
410386 }
411387
412388 #endregion
413-
414- #region UTF encoding functions
415-
416- /// <summary>
417- /// Figure out how many bytes it will take to hold the passed String.
418- /// </summary>
419- /// <remarks>
420- /// This method is tightly bound to formatUTF.
421- /// </remarks>
422- /// <param name="s">
423- /// the String
424- /// </param>
425- /// <return>
426- /// the binary UTF length
427- /// </return>
428- protected int CalcUTF ( String s )
429- {
430- int cch = s . Length ;
431- int cb = cch ;
432- char [ ] ach = CharBuf ;
433- bool fSmall = ( cch <= CHAR_BUF_SIZE ) ;
434- if ( fSmall )
435- {
436- var src = new StringBuilder ( s ) ;
437- src . CopyTo ( 0 , ach , 0 , cch ) ;
438- }
439-
440- for ( int ofch = 0 ; ofch < cch ; ++ ofch )
441- {
442- int ch ;
443- if ( fSmall )
444- {
445- ch = ach [ ofch ] ;
446- }
447- else
448- {
449- int ofBuf = ofch & CHAR_BUF_MASK ;
450- if ( ofBuf == 0 )
451- {
452- var src = new StringBuilder ( s ) ;
453- int len = Math . Min ( ofch + CHAR_BUF_SIZE , cch ) - ofch ;
454- src . CopyTo ( ofch , ach , 0 , len ) ;
455- }
456- ch = ach [ ofBuf ] ;
457- }
458-
459- if ( ch <= 0x007F )
460- {
461- // all bytes in this range use the 1-byte format
462- // except for 0
463- if ( ch == 0 )
464- {
465- ++ cb ;
466- }
467- }
468- else
469- {
470- // either a 2-byte format or a 3-byte format (if over
471- // 0x07FF)
472- cb += ( ch <= 0x07FF ? 1 : 2 ) ;
473- }
474- }
475-
476- return cb ;
477- }
478-
479- /// <summary>
480- /// Format the passed String as UTF into the passed byte array.
481- /// </summary>
482- /// <remarks>
483- /// This method is tightly bound to calcUTF.
484- /// </remarks>
485- /// <param name="s">
486- /// the string.
487- /// </param>
488- /// <returns>
489- /// The formated UTF byte array.
490- /// </returns>
491- public byte [ ] FormatUTF ( String s )
492- {
493- int cch = s . Length ;
494- int cb = CalcUTF ( s ) ;
495- int ofb = 0 ;
496- byte [ ] ab = new byte [ cb ] ;
497-
498- if ( cb == cch )
499- {
500- // ask the string to convert itself to ascii bytes
501- // straight into the WriteBuffer
502- Encoding . ASCII . GetBytes ( s , 0 , cch , ab , ofb ) ;
503- }
504- else
505- {
506- char [ ] ach = CharBuf ;
507- if ( cch <= CHAR_BUF_SIZE )
508- {
509- // The following is unnecessary, because it would already
510- // have been performed by calcUTF:
511- //
512- // if (fSmall)
513- // {
514- // s.getChars(0, cch, ach, 0);
515- // }
516- FormatUTF ( ab , ofb , ach , cch ) ;
517- }
518- else
519- {
520- for ( int ofch = 0 ; ofch < cch ; ofch += CHAR_BUF_SIZE )
521- {
522- int cchChunk = Math . Min ( CHAR_BUF_SIZE , cch - ofch ) ;
523- StringBuilder src = new StringBuilder ( s ) ;
524- src . CopyTo ( ofch , ach , 0 , cchChunk ) ;
525- ofb += FormatUTF ( ab , ofb , ach , cchChunk ) ;
526- }
527- }
528- }
529-
530- return ab ;
531- }
532-
533- /// <summary>
534- /// Format the passed characters as UTF into the passed byte array.
535- /// </summary>
536- /// <param name="ab">
537- /// The byte array to format into.
538- /// </param>
539- /// <param name="ofb">
540- /// The offset into the byte array to write the first byte.
541- /// </param>
542- /// <param name="ach">
543- /// The array of characters to format.
544- /// </param>
545- /// <param name="cch">
546- /// The number of characters to format.
547- /// </param>
548- /// <return>
549- /// The number of bytes written to the array.
550- /// </return>
551- protected int FormatUTF ( byte [ ] ab , int ofb , char [ ] ach , int cch )
552- {
553- int ofbOrig = ofb ;
554- for ( int ofch = 0 ; ofch < cch ; ++ ofch )
555- {
556- char ch = ach [ ofch ] ;
557- if ( ch >= 0x0001 && ch <= 0x007F )
558- {
559- // 1-byte format: 0xxx xxxx
560- ab [ ofb ++ ] = ( byte ) ch ;
561- }
562- else if ( ch <= 0x07FF )
563- {
564- // 2-byte format: 110x xxxx, 10xx xxxx
565- ab [ ofb ++ ] = ( byte ) ( 0xC0 | ( ( ch >> 6 ) & 0x1F ) ) ;
566- ab [ ofb ++ ] = ( byte ) ( 0x80 | ( ( ch ) & 0x3F ) ) ;
567- }
568- else
569- {
570- // 3-byte format: 1110 xxxx, 10xx xxxx, 10xx xxxx
571- ab [ ofb ++ ] = ( byte ) ( 0xE0 | ( ( ch >> 12 ) & 0x0F ) ) ;
572- ab [ ofb ++ ] = ( byte ) ( 0x80 | ( ( ch >> 6 ) & 0x3F ) ) ;
573- ab [ ofb ++ ] = ( byte ) ( 0x80 | ( ( ch ) & 0x3F ) ) ;
574- }
575- }
576- return ofb - ofbOrig ;
577- }
578-
579- ///<summary>
580- /// Get a buffer for formating data to bytes. Note that the resulting buffer
581- /// may be shorter than the requested size.
582- /// </summary>
583- /// <param name="cb">
584- /// the requested size for the buffer
585- /// </param>
586- /// <return>
587- /// A byte array that is at least <tt>cb</tt> bytes long, but not
588- /// shorter than <see cref="MIN_BUF"/> and (regardless of the value of
589- /// <tt>cb</tt>) not longer than <see cref="MAX_BUF"/>.
590- /// </return>
591- protected byte [ ] Tmpbuf ( int cb )
592- {
593- byte [ ] ab = m_abBuf ;
594- if ( ab == null || ab . Length < cb )
595- {
596- int cbOld = ab == null ? 0 : ab . Length ;
597- int cbNew = Math . Max ( MIN_BUF , Math . Min ( MAX_BUF , cb ) ) ;
598- if ( cbNew > cbOld )
599- {
600- m_abBuf = ab = new byte [ cbNew > ( ( uint ) MAX_BUF >> 1 ) ? MAX_BUF : cbNew ] ;
601- }
602- }
603- return ab ;
604- }
605-
606- #endregion
607-
608- #region Data Members
609-
610- /// <summary>
611- /// The minimum size of the temp buffer.
612- /// </summary>
613- private const int MIN_BUF = 0x40 ;
614-
615- /// <summary>
616- /// The maximum size of the temp buffer. The maximum size must be at least
617- /// <tt>(3 * CHAR_BUF_SIZE)</tt> to accomodate the worst-case UTF
618- /// formatting length.
619- /// </summary>
620- private const int MAX_BUF = 0x400 ;
621-
622- /// <summary>
623- /// Size of the temporary character buffer. Must be a power of 2.
624- /// Size is: 256 characters (.25 KB).
625- /// </summary>
626- protected const int CHAR_BUF_SIZE = 0x100 ;
627-
628- /// <summary>
629- /// Bitmask used against a raw offset to determine the offset within
630- /// the temporary character buffer.
631- /// </summary>
632- protected const int CHAR_BUF_MASK = ( CHAR_BUF_SIZE - 1 ) ;
633-
634- /// <summary>
635- /// A temp buffer to use for building the data to write.
636- /// </summary>
637- [ NonSerialized ]
638- private byte [ ] m_abBuf ;
639-
640- /// <summary>
641- /// A lazily instantiated temp buffer used to avoid allocations from
642- /// and repeated calls to String functions.
643- /// </summary>
644- [ NonSerialized ]
645- protected char [ ] m_achBuf ;
646-
647- #endregion
648389 }
649390}
0 commit comments