|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "flag" |
| 5 | + "fmt" |
| 6 | + "io/ioutil" |
| 7 | + "os" |
| 8 | + |
| 9 | + "github.com/axgle/mahonia" |
| 10 | +) |
| 11 | + |
| 12 | +const usage = `converter convert files from one specify charset to another |
| 13 | +
|
| 14 | +Usage: |
| 15 | +
|
| 16 | + converter [commands|flags] |
| 17 | +
|
| 18 | +The commands & flags are: |
| 19 | +
|
| 20 | + -sc <charset> source file charset |
| 21 | + -dc <charset> destination file charset, if you not specify this, default will be UTF-8 |
| 22 | + -s <file> source file path |
| 23 | + -d <file> destination file path |
| 24 | + -l list all supported charset |
| 25 | + -t test log with all charset |
| 26 | +
|
| 27 | +Examples: |
| 28 | +
|
| 29 | + # start logkit |
| 30 | + converter -sc UTF-16 -s /home/test/test.log -d UTF-8 -d /home/test/test.utf-8.log |
| 31 | +
|
| 32 | +` |
| 33 | + |
| 34 | +var ( |
| 35 | + sc = flag.String("sc", "", "source file charset") |
| 36 | + dc = flag.String("dc", "UTF-8", "destination file charset") |
| 37 | + sp = flag.String("s", "", "source file path") |
| 38 | + dp = flag.String("d", "decoded.log", "destination file path") |
| 39 | + list = flag.Bool("l", false, "list all supported charset") |
| 40 | + test = flag.Bool("t", false, "test log with all charset") |
| 41 | +) |
| 42 | + |
| 43 | +func usageExit(rc int) { |
| 44 | + fmt.Println(usage) |
| 45 | + os.Exit(rc) |
| 46 | +} |
| 47 | + |
| 48 | +func main() { |
| 49 | + flag.Usage = func() { usageExit(0) } |
| 50 | + flag.Parse() |
| 51 | + lists := []string{"UTF-8", "UTF-16", "US-ASCII", "ISO-8859-1", |
| 52 | + "GBK", "latin1", "GB18030", "EUC-JP", "UTF-16BE", "UTF-16LE", "Big5", "Shift_JIS", |
| 53 | + "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", |
| 54 | + "ISO-8859-8", "ISO-8859-9", "ISO-8859-10", "ISO-8859-11", "ISO-8859-13", |
| 55 | + "ISO-8859-14", "ISO-8859-15", "ISO-8859-16", "macos-0_2-10.2", "macos-6_2-10.4", |
| 56 | + "macos-7_3-10.2", "macos-29-10.2", "macos-35-10.2", "windows-1250", "windows-1251", |
| 57 | + "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", |
| 58 | + "windows-1257", "windows-1258", "windows-874", "IBM037", "ibm-273_P100-1995", |
| 59 | + "ibm-277_P100-1995", "ibm-278_P100-1995", "ibm-280_P100-1995", "ibm-284_P100-1995", |
| 60 | + "ibm-285_P100-1995", "ibm-290_P100-1995", "ibm-297_P100-1995", "ibm-420_X120-1999", |
| 61 | + //此处省略大量IBM的字符集,太多,等用户需要再加 |
| 62 | + "KOI8-R", "KOI8-U", "ebcdic-xml-us"} |
| 63 | + if *list { |
| 64 | + fmt.Println("this tool is used to convert files from one specify charset to another, use -h to see how to use it\n all supported charsets are list as belows:") |
| 65 | + for _, v := range lists { |
| 66 | + fmt.Println(v) |
| 67 | + } |
| 68 | + return |
| 69 | + } |
| 70 | + |
| 71 | + if *sp == "" { |
| 72 | + fmt.Println("you must specify your log path to be converted, use -h to helps") |
| 73 | + return |
| 74 | + } |
| 75 | + datas, err := ioutil.ReadFile(*sp) |
| 76 | + if err != nil { |
| 77 | + fmt.Println("read file ", *sp, " err: ", err) |
| 78 | + return |
| 79 | + } |
| 80 | + if *test { |
| 81 | + if len(datas) > 1024 { |
| 82 | + datas = datas[:1024] |
| 83 | + } |
| 84 | + for _, v := range lists { |
| 85 | + fmt.Println(v) |
| 86 | + decoder := mahonia.NewDecoder(v) |
| 87 | + fmt.Println(decoder.ConvertString(string(datas))) |
| 88 | + } |
| 89 | + return |
| 90 | + } |
| 91 | + if *sc == "" { |
| 92 | + fmt.Println("you must specify your source file charset, use -l to see all supported charsets") |
| 93 | + return |
| 94 | + } |
| 95 | + decoder := mahonia.NewDecoder(*sc) |
| 96 | + ret := decoder.ConvertString(string(datas)) |
| 97 | + err = ioutil.WriteFile(*dp, []byte(ret), 0644) |
| 98 | + if err != nil { |
| 99 | + fmt.Println("write file ", *dp, " err: ", err) |
| 100 | + } |
| 101 | + return |
| 102 | +} |
0 commit comments