You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
val cipherBitString =Format.asBitString(cipherBits)
25
+
26
+
// Initialize array to store cover text tokens
27
+
var coverTextTokens = intArrayOf()
28
+
29
+
// Initialize variables and flags for loop
30
+
var i =0
31
+
var isLastSentenceFinished =false
32
+
33
+
var isFirstRun =true// llama.cpp batch needs to store context tokens in first run, but only last sampled token in subsequent runs
34
+
var sampledToken =-1// Will always be overwritten with last cover text token
35
+
36
+
// Sample tokens until all of bits of secret message are encoded and last sentence is finished
37
+
while (i < cipherBitString.length ||!isLastSentenceFinished) {
38
+
// Huffman sampling to encode bits of secret message into tokens
39
+
if (i < cipherBitString.length) {
40
+
// Call llama.cpp to calculate the logit matrix similar to https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp:
41
+
// Needs only next tokens to be processed to store in a batch, i.e. contextTokens in first run and last sampled token in subsequent runs, rest is managed internally in ctx
42
+
// Only last row of logit matrix is needed as it contains logits corresponding to last token of the prompt
43
+
val logits =LlamaCpp.getLogits(if (isFirstRun) contextTokens else intArrayOf(sampledToken)).last()
44
+
45
+
// Get top 2^bitsPerToken logits for last token of prompt (= height of Huffman tree)
46
+
val topLogits = getTopLogits(logits)
47
+
48
+
// Construct Huffman tree from top logits
49
+
val huffmanCoding =HuffmanCoding()
50
+
huffmanCoding.buildHuffmanTree(topLogits)
51
+
huffmanCoding.mergeHuffmanNodes()
52
+
val root = huffmanCoding.generateHuffmanCodes()
53
+
54
+
// Traverse Huffman tree based on bits of secret message to sample next token, therefore encoding information in it
55
+
var currentNode = root
56
+
57
+
// First nodes won't have a token as they were created during the merge step
58
+
while (currentNode.token ==null) {
59
+
// First condition is needed in case (length of cipher bits) % (bits per token) != 0
60
+
// In last loop of outer while, inner while can cause i to exceed cipherBitString.length
61
+
// Second condition is only checked if first condition is false, so IndexOutOfBoundsException can't happen
62
+
if (i >= cipherBitString.length || cipherBitString[i] =='0') {
63
+
// Asserting left and right child nodes to be not null is safe as Huffman tree isn't traversed further down than bitsPerToken levels
64
+
currentNode = currentNode.left!!
65
+
}
66
+
else {
67
+
currentNode = currentNode.right!!
68
+
}
69
+
70
+
// Every time a turn is made when traversing the Huffman tree, another bit is encoded
71
+
i++
72
+
}
73
+
74
+
// Token containing the right bitsPerToken bits of information in its path is now found
75
+
sampledToken = currentNode.token!!
76
+
77
+
// Update flag
78
+
isFirstRun =false
79
+
}
80
+
// Greedy sampling to pick most likely token until last sentence is finished
81
+
else {
82
+
// llama.cpp greedy sampler is used for efficiency instead of manually sorting logits descending and picking the first one
83
+
// Input is only last sampled token similar to else case of getLogits input above, as greedy sampling only over gets called after Huffman sampling
0 commit comments