|
| 1 | +package ioc |
| 2 | + |
| 3 | +import "strings" |
| 4 | + |
| 5 | +// AlphabetType represents different types of alphabets that can be used for IOC calculation |
| 6 | +type AlphabetType int |
| 7 | + |
| 8 | +const ( |
| 9 | + // Latin represents the standard 26-letter Latin alphabet (a-z) |
| 10 | + Latin AlphabetType = iota |
| 11 | + // Runeglish represents the standard Latin alphabet plus common runes/symbols |
| 12 | + Runeglish |
| 13 | + // Rune represents all possible Unicode runes in the text |
| 14 | + Rune |
| 15 | +) |
| 16 | + |
| 17 | +// GetAlphabet returns the character set corresponding to the specified alphabet type |
| 18 | +func GetAlphabet(alphabetType AlphabetType) []string { |
| 19 | + var retval string |
| 20 | + |
| 21 | + switch alphabetType { |
| 22 | + case Latin: |
| 23 | + retval = "abcdefghijklmnopqrstuvwxyz" |
| 24 | + case Runeglish: |
| 25 | + retval = "abcdefghijlmnoprstuwxy" |
| 26 | + case Rune: |
| 27 | + retval = "ᛝᛟᛇᛡᛠᚫᚦᚠᚢᚩᚱᚳᚷᚹᚻᚾᛁᛄᛈᛉᛋᛏᛒᛖᛗᛚᛞᚪᚣ" |
| 28 | + default: |
| 29 | + retval = "abcdefghijklmnopqrstuvwxyz" |
| 30 | + } |
| 31 | + |
| 32 | + return strings.Split(retval, "") |
| 33 | +} |
| 34 | + |
| 35 | +// CalcIOC calculates the incidence of coincidence for the given text using the provided alphabet. |
| 36 | +// The incidence of coincidence is a measure used in cryptanalysis that |
| 37 | +// reflects the likelihood of randomly selecting the same letter twice from a text. |
| 38 | +// It returns a float64 value between 0 and 1. |
| 39 | +// If an empty alphabet is provided, the function defaults to the standard English alphabet. |
| 40 | +func CalcIOC(text string, alphabetType AlphabetType) float64 { |
| 41 | + // Get the alphabet |
| 42 | + alphabet := GetAlphabet(alphabetType) |
| 43 | + |
| 44 | + // Create a map for faster character lookup |
| 45 | + validChars := make(map[string]bool) |
| 46 | + for _, char := range alphabet { |
| 47 | + validChars[char] = true |
| 48 | + } |
| 49 | + |
| 50 | + // Create a map to count occurrences of each letter |
| 51 | + counts := make(map[string]int) |
| 52 | + |
| 53 | + // Count only characters in our alphabet |
| 54 | + totalLetters := 0 |
| 55 | + for _, char := range strings.ToLower(text) { |
| 56 | + if validChars[string(char)] { |
| 57 | + counts[string(char)]++ |
| 58 | + totalLetters++ |
| 59 | + } |
| 60 | + } |
| 61 | + |
| 62 | + // If there are fewer than 2 letters, return 0 |
| 63 | + if totalLetters <= 1 { |
| 64 | + return 0.0 |
| 65 | + } |
| 66 | + |
| 67 | + // Calculate the sum of frequencies squared |
| 68 | + var sum float64 = 0.0 |
| 69 | + for _, count := range counts { |
| 70 | + sum += float64(count) * float64(count) |
| 71 | + } |
| 72 | + |
| 73 | + // Calculate and return the IOC |
| 74 | + return sum / (float64(totalLetters) * float64(totalLetters-1)) |
| 75 | +} |
0 commit comments