55import com .intellij .lang .ASTNode ;
66import com .intellij .psi .codeStyle .CodeStyleSettings ;
77import com .intellij .psi .formatter .common .AbstractBlock ;
8- import net .seesharpsoft .intellij .plugins .csv .settings .CsvCodeStyleSettings ;
98import net .seesharpsoft .intellij .plugins .csv .CsvColumnInfo ;
109import net .seesharpsoft .intellij .plugins .csv .CsvLanguage ;
1110import net .seesharpsoft .intellij .plugins .csv .psi .CsvElementType ;
1211import net .seesharpsoft .intellij .plugins .csv .psi .CsvTypes ;
12+ import net .seesharpsoft .intellij .plugins .csv .settings .CsvCodeStyleSettings ;
1313import org .jetbrains .annotations .Nullable ;
1414
15+ import java .io .BufferedReader ;
16+ import java .io .IOException ;
17+ import java .io .InputStream ;
18+ import java .io .InputStreamReader ;
19+ import java .util .ArrayList ;
1520import java .util .HashMap ;
1621import java .util .List ;
1722import java .util .Map ;
@@ -23,16 +28,144 @@ public final class CsvFormatHelper {
2328 private static final Pattern BEGIN_WHITE_SPACE_PATTERN = Pattern .compile ("^" + WHITE_SPACE_PATTERN_STRING );
2429 private static final Pattern END_WHITE_SPACE_PATTERN = Pattern .compile (WHITE_SPACE_PATTERN_STRING + "$" );
2530
31+ private static final int HEX_RADIX = 16 ;
32+ private static final int [][] WIDTH_DOUBLE_CHARCODE_RANGES ;
33+ private static final int [][] AMBIGUOUS_DOUBLE_CHARCODE_RANGES ;
34+ private static final int [][] WIDTH_ZERO_CHARCODE_RANGES = {
35+ {0x0300 , 0x036F }, {0x0483 , 0x0486 }, {0x0488 , 0x0489 },
36+ {0x0591 , 0x05BD }, {0x05BF , 0x05BF }, {0x05C1 , 0x05C2 },
37+ {0x05C4 , 0x05C5 }, {0x05C7 , 0x05C7 }, {0x0600 , 0x0603 },
38+ {0x0610 , 0x0615 }, {0x064B , 0x065E }, {0x0670 , 0x0670 },
39+ {0x06D6 , 0x06E4 }, {0x06E7 , 0x06E8 }, {0x06EA , 0x06ED },
40+ {0x070F , 0x070F }, {0x0711 , 0x0711 }, {0x0730 , 0x074A },
41+ {0x07A6 , 0x07B0 }, {0x07EB , 0x07F3 }, {0x0901 , 0x0902 },
42+ {0x093C , 0x093C }, {0x0941 , 0x0948 }, {0x094D , 0x094D },
43+ {0x0951 , 0x0954 }, {0x0962 , 0x0963 }, {0x0981 , 0x0981 },
44+ {0x09BC , 0x09BC }, {0x09C1 , 0x09C4 }, {0x09CD , 0x09CD },
45+ {0x09E2 , 0x09E3 }, {0x0A01 , 0x0A02 }, {0x0A3C , 0x0A3C },
46+ {0x0A41 , 0x0A42 }, {0x0A47 , 0x0A48 }, {0x0A4B , 0x0A4D },
47+ {0x0A70 , 0x0A71 }, {0x0A81 , 0x0A82 }, {0x0ABC , 0x0ABC },
48+ {0x0AC1 , 0x0AC5 }, {0x0AC7 , 0x0AC8 }, {0x0ACD , 0x0ACD },
49+ {0x0AE2 , 0x0AE3 }, {0x0B01 , 0x0B01 }, {0x0B3C , 0x0B3C },
50+ {0x0B3F , 0x0B3F }, {0x0B41 , 0x0B43 }, {0x0B4D , 0x0B4D },
51+ {0x0B56 , 0x0B56 }, {0x0B82 , 0x0B82 }, {0x0BC0 , 0x0BC0 },
52+ {0x0BCD , 0x0BCD }, {0x0C3E , 0x0C40 }, {0x0C46 , 0x0C48 },
53+ {0x0C4A , 0x0C4D }, {0x0C55 , 0x0C56 }, {0x0CBC , 0x0CBC },
54+ {0x0CBF , 0x0CBF }, {0x0CC6 , 0x0CC6 }, {0x0CCC , 0x0CCD },
55+ {0x0CE2 , 0x0CE3 }, {0x0D41 , 0x0D43 }, {0x0D4D , 0x0D4D },
56+ {0x0DCA , 0x0DCA }, {0x0DD2 , 0x0DD4 }, {0x0DD6 , 0x0DD6 },
57+ {0x0E31 , 0x0E31 }, {0x0E34 , 0x0E3A }, {0x0E47 , 0x0E4E },
58+ {0x0EB1 , 0x0EB1 }, {0x0EB4 , 0x0EB9 }, {0x0EBB , 0x0EBC },
59+ {0x0EC8 , 0x0ECD }, {0x0F18 , 0x0F19 }, {0x0F35 , 0x0F35 },
60+ {0x0F37 , 0x0F37 }, {0x0F39 , 0x0F39 }, {0x0F71 , 0x0F7E },
61+ {0x0F80 , 0x0F84 }, {0x0F86 , 0x0F87 }, {0x0F90 , 0x0F97 },
62+ {0x0F99 , 0x0FBC }, {0x0FC6 , 0x0FC6 }, {0x102D , 0x1030 },
63+ {0x1032 , 0x1032 }, {0x1036 , 0x1037 }, {0x1039 , 0x1039 },
64+ {0x1058 , 0x1059 }, {0x1160 , 0x11FF }, {0x135F , 0x135F },
65+ {0x1712 , 0x1714 }, {0x1732 , 0x1734 }, {0x1752 , 0x1753 },
66+ {0x1772 , 0x1773 }, {0x17B4 , 0x17B5 }, {0x17B7 , 0x17BD },
67+ {0x17C6 , 0x17C6 }, {0x17C9 , 0x17D3 }, {0x17DD , 0x17DD },
68+ {0x180B , 0x180D }, {0x18A9 , 0x18A9 }, {0x1920 , 0x1922 },
69+ {0x1927 , 0x1928 }, {0x1932 , 0x1932 }, {0x1939 , 0x193B },
70+ {0x1A17 , 0x1A18 }, {0x1B00 , 0x1B03 }, {0x1B34 , 0x1B34 },
71+ {0x1B36 , 0x1B3A }, {0x1B3C , 0x1B3C }, {0x1B42 , 0x1B42 },
72+ {0x1B6B , 0x1B73 }, {0x1DC0 , 0x1DCA }, {0x1DFE , 0x1DFF },
73+ {0x200B , 0x200F }, {0x202A , 0x202E }, {0x2060 , 0x2063 },
74+ {0x206A , 0x206F }, {0x20D0 , 0x20EF }, {0x302A , 0x302F },
75+ {0x3099 , 0x309A }, {0xA806 , 0xA806 }, {0xA80B , 0xA80B },
76+ {0xA825 , 0xA826 }, {0xFB1E , 0xFB1E }, {0xFE00 , 0xFE0F },
77+ {0xFE20 , 0xFE23 }, {0xFEFF , 0xFEFF }, {0xFFF9 , 0xFFFB },
78+ {0x10A01 , 0x10A03 }, {0x10A05 , 0x10A06 }, {0x10A0C , 0x10A0F },
79+ {0x10A38 , 0x10A3A }, {0x10A3F , 0x10A3F }, {0x1D167 , 0x1D169 },
80+ {0x1D173 , 0x1D182 }, {0x1D185 , 0x1D18B }, {0x1D1AA , 0x1D1AD },
81+ {0x1D242 , 0x1D244 }, {0xE0001 , 0xE0001 }, {0xE0020 , 0xE007F },
82+ {0xE0100 , 0xE01EF }
83+ };
84+
85+ static {
86+ final List <String > wideLines = new ArrayList <>();
87+ final List <String > ambiguousLines = new ArrayList <>();
88+ try (InputStream is = CsvFormatHelper .class .getClassLoader ().getResourceAsStream ("misc/EastAsianDoubleWidth.csv" )) {
89+ BufferedReader reader = new BufferedReader (new InputStreamReader (is ));
90+ reader .lines ().forEach (line -> {
91+ if (line .endsWith ("W" ) || line .endsWith ("F" )) {
92+ wideLines .add (line .substring (0 , line .length () - 2 ));
93+ }
94+ if (line .endsWith ("A" )) {
95+ ambiguousLines .add (line .substring (0 , line .length () - 2 ));
96+ }
97+ }
98+ );
99+ } catch (IOException e ) {
100+ e .printStackTrace ();
101+ }
102+
103+ WIDTH_DOUBLE_CHARCODE_RANGES = convertRangeTextToRangeArray (wideLines );
104+ AMBIGUOUS_DOUBLE_CHARCODE_RANGES = convertRangeTextToRangeArray (ambiguousLines );
105+ }
106+
107+ private static int [][] convertRangeTextToRangeArray (List <String > wideLines ) {
108+ int [][] targetArray = new int [wideLines .size ()][2 ];
109+ for (int i = 0 ; i < targetArray .length ; ++i ) {
110+ String [] split = wideLines .get (i ).split ("\\ .\\ ." );
111+ targetArray [i ][0 ] = Integer .parseInt (split [0 ], HEX_RADIX );
112+ targetArray [i ][1 ] = split .length == 1 ? targetArray [i ][0 ] : Integer .parseInt (split [1 ], HEX_RADIX );
113+ }
114+ return targetArray ;
115+ }
116+
117+ private static boolean binarySearch (int [][] ranges , int charCode ) {
118+ int min = 0 ;
119+ int mid ;
120+ int max = ranges .length - 1 ;
121+
122+ if (charCode < ranges [0 ][0 ] || charCode > ranges [max ][1 ]) {
123+ return false ;
124+ }
125+ while (max >= min ) {
126+ mid = (min + max ) / 2 ;
127+ if (charCode > ranges [mid ][1 ]) {
128+ min = mid + 1 ;
129+ } else if (charCode < ranges [mid ][0 ]) {
130+ max = mid - 1 ;
131+ } else {
132+ return true ;
133+ }
134+ }
135+
136+ return false ;
137+ }
138+
139+ public static int charWidth (int charCode , boolean ambiguousWide ) {
140+ if (charCode <= 0 || binarySearch (WIDTH_ZERO_CHARCODE_RANGES , charCode )) {
141+ return 0 ;
142+ }
143+ if (binarySearch (WIDTH_DOUBLE_CHARCODE_RANGES , charCode ) || (ambiguousWide && binarySearch (AMBIGUOUS_DOUBLE_CHARCODE_RANGES , charCode ))) {
144+ return 2 ;
145+ }
146+ return 1 ;
147+ }
148+
149+ public static int charWidth (CharSequence s , boolean ambiguousWide ) {
150+ int result = 0 ;
151+ for (int i = 0 ; i < s .length (); i ++) {
152+ result += charWidth (s .charAt (i ), ambiguousWide );
153+ }
154+ return result ;
155+ }
156+
26157 public static int getTextLength (ASTNode node , CodeStyleSettings codeStyleSettings ) {
27158 CsvCodeStyleSettings csvCodeStyleSettings = codeStyleSettings .getCustomSettings (CsvCodeStyleSettings .class );
28159 String text = node .getText ();
29- int length = node . getTextLength () ;
160+ int length = 0 ;
30161 if (csvCodeStyleSettings .TABULARIZE && !csvCodeStyleSettings .WHITE_SPACES_OUTSIDE_QUOTES && text .startsWith ("\" " )) {
31162 text = text .substring (1 , text .length () - 1 );
32163 text = BEGIN_WHITE_SPACE_PATTERN .matcher (text ).replaceFirst ("" );
33164 text = END_WHITE_SPACE_PATTERN .matcher (text ).replaceFirst ("" );
34- length = text . length () + 2 ;
165+ length += 2 ;
35166 }
167+ length += csvCodeStyleSettings .ENABLE_WIDE_CHARACTER_DETECTION ? charWidth (text , csvCodeStyleSettings .TREAT_AMBIGUOUS_CHARACTERS_AS_WIDE ) : text .length ();
168+
36169 return length ;
37170 }
38171
0 commit comments