diff --git a/lib/caotral/assembler.rb b/lib/caotral/assembler.rb index 89e0733..f889178 100644 --- a/lib/caotral/assembler.rb +++ b/lib/caotral/assembler.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true -require_relative "assembler/elf" -require_relative "assembler/elf/utils" -require_relative "assembler/elf/header" -require_relative "assembler/elf/sections" -require_relative "assembler/elf/section_header" +require "caotral/binary/elf" + +require_relative "assembler/builder" +require_relative "assembler/reader" +require_relative "assembler/writer" class Caotral::Assembler GCC_ASSEMBLERS = ["gcc", "as"].freeze @@ -15,7 +15,7 @@ def self.assemble!(input:, output: File.basename(input, ".*") + ".o", assembler: def initialize(input:, output: File.basename(input, ".*") + ".o", assembler: "as", type: :relocatable, debug: false) @input, @output = input, output - @elf = ELF.new(type:, input:, output:, debug:) + @asm_reader = Caotral::Assembler::Reader.new(input:, debug:) @assembler = assembler @debug = debug end @@ -30,7 +30,10 @@ def assemble(assembler: @assembler, assembler_options: [], input: @input, output output end def obj_file = @output - def to_elf(input: @input, output: @output, debug: false) = @elf.build(input:, output:, debug:) + def to_elf(input: @input, output: @output, debug: false) + elf_obj = Caotral::Assembler::Builder.new(instructions:).build + Caotral::Assembler::Writer.new(elf_obj:, output:, debug:).write + end def command(asm) case asm @@ -44,6 +47,7 @@ def command(asm) end private + def instructions = @instructions ||= @asm_reader.read def gcc_assembler(assembler) case assembler when "as", "gcc" diff --git a/lib/caotral/assembler/builder.rb b/lib/caotral/assembler/builder.rb new file mode 100644 index 0000000..bffa179 --- /dev/null +++ b/lib/caotral/assembler/builder.rb @@ -0,0 +1,50 @@ +require "caotral/binary/elf" + +require_relative "builder/text" + +module Caotral + class Assembler + class Builder + def initialize(instructions:) = @instructions = instructions + + def build + elf = Caotral::Binary::ELF.new + elf.header = Caotral::Binary::ELF::Header.new + + sections = [] + sections << [nil, nil] + sections << [".text", assemble_text(@instructions)] + sections << [".strtab", Caotral::Binary::ELF::Section::Strtab.new] + sections << [".symtab", Caotral::Binary::ELF::Section::Symtab.new] + sections << [".shstrtab", Caotral::Binary::ELF::Section::Strtab.new] + sections.each do |(section_name, body)| + header = Caotral::Binary::ELF::SectionHeader.new + section = Caotral::Binary::ELF::Section.new(header:, body:, section_name:) + elf.sections << section + end + strtab = elf.find_by_name(".strtab") + symtab = elf.find_by_name(".symtab") + symtab.body = build_symtab(strtab.body) + elf + end + + private + def assemble_text(instructions) + text = Caotral::Assembler::Builder::Text.new(instructions:) + instructions.each do |label, lines| + text.entries << { label:, size: 0 } + lines.each { |line| text.assemble!(line) } + end + text.build + end + + def build_symtab(strtab) + entries = [] + entries << Caotral::Binary::ELF::Section::Symtab.new.set!(name: 0, info: 0, shndx: 0, value: 0, size: 0) + name = strtab.offset_of("main") + entries << Caotral::Binary::ELF::Section::Symtab.new.set!(name:, info: 0x12, other: 0, shndx: 1, value: 0, size: 0) + entries + end + end + end +end diff --git a/lib/caotral/assembler/builder/text.rb b/lib/caotral/assembler/builder/text.rb new file mode 100644 index 0000000..b4d0249 --- /dev/null +++ b/lib/caotral/assembler/builder/text.rb @@ -0,0 +1,275 @@ +require "caotral/binary/elf" +module Caotral + class Assembler + class Builder + class Text + PREFIX = { + REX_W: 0x48, + }.freeze + + REGISTER_CODE = { + RAX: 0b000, + RCX: 0b001, + RDX: 0b010, + RBX: 0b011, + RSP: 0b100, + RBP: 0b101, + RSI: 0b110, + RDI: 0b111, + }.freeze + + OPECODE = { + ADD: [0x01], + CMP: [0x39], + CQO: [0x99], + IDIV: [0xf7], + IMUL: [0x0f], + MOV: [0x89], + MOVR: [0x8B], + MOVXZ: [0x0f, 0xb7], + SUB: [0x83], + XOR: [0x31], + }.freeze + HEX_PATTERN = /\A0x[0-9a-fA-F]+\z/.freeze + attr_reader :entries + + def initialize(instructions:) + @instructions = instructions + @entries = [] + @label_positions = {} + end + + def assemble!(line) + line = line.strip + return if line.empty? + @entries << parse_line(line) + end + + def build + @label_positions.clear + offset = 0 + @entries.each do |entry| + if entry[:label] + @label_positions[entry[:label]] = offset + next + end + offset += entry[:size] + end + + @bytes = [] + offset = 0 + @entries.each do |entry| + next if entry[:label] + bytes = encode(entry, offset) + @bytes << bytes + offset += bytes.size + end + + @bytes.flatten.pack("C*") + end + + def size = build.bytesize + def align(val, bytes) = (val << [0] until build.bytesize % bytes == 0) + + private + + def encode(entry, offset) + opecode(entry[:op], offset, *entry[:operands]) + end + + def parse_line(line) + op, *operands = line.split(/\s+/).reject(&:empty?).map { it.gsub(/,/, "") } + size = instruction_size(op, *operands) + { op:, operands:, size: } + end + + def instruction_size(op, *operands) + case op + when "je", "jne" + 6 + when "jmp" + 5 + else + opecode(op, 0, *operands).size + end + end + + def opecode(op, offset, *operands) + case op + when "push" + push(*operands) + when "mov", "movzb" + [PREFIX[:REX_W], *mov(op, *operands)] + when "sub", "add", "imul", "cqo", "idiv" + [PREFIX[:REX_W], *calc(op, *operands)] + when "xor" + [PREFIX[:REX_W], *calc_bit(op, *operands)] + when "lea" + [PREFIX[:REX_W], *calc_addr(op, *operands)] + when "pop" + pop(*operands) + when "cmp" + [PREFIX[:REX_W], *cmp(op, *operands)] + when "sete", "setl" + sete(op, *operands) + when "je", "jmp", "jne" + jump(op, offset, *operands) + when "syscall" + [0x0f, 0x05] + when "ret" + [0xc3] + else + raise Caotral::Binary::ELF::Error, "yet implemented operations: #{op}" + end + end + + def jump(op, offset, *operands) + label = operands.first + target = @label_positions.fetch(label) do + raise Caotral::Binary::ELF::Error, "unknown label: #{label}" + end + size = instruction_size(op, label) + rel = Integer(target) - Integer(offset) - Integer(size) + displacement = [rel].pack("l<").bytes + case op + when "je" + [0x0f, 0x84, *displacement] + when "jmp" + [0xe9, *displacement] + when "jne" + [0x0f, 0x85, *displacement] + else + raise Caotral::Binary::ELF::Error, "unknown jump: #{op}" + end + end + + def mov(op, *operands) + reg = case operands + in ["rax", "rbp"] + [0xe8] + in ["rbp", "rsp"] + [0xe5] + in ["rsp", "rbp"] + [0xec] + in ["[rax]", "rdi"] + [0x38] + in ["rax", "al"] + op = "MOVXZ" + [0xc0] + in ["rax", "[rax]"] + op = "MOVR" + [0x00] + in ["rdi", "rax"] + [0xC7] + in ["rax", HEX_PATTERN] + return [0xC7, 0xC0, *immediate(operands[1])] + else + operands&.map { reg(_1) } + end # steep:ignore + [OPECODE[op.upcase.to_sym], reg].flatten + end + + def calc(op, *operands) + ope_code = OPECODE[op.upcase.to_sym] + case [op, *operands] + in ["sub", "rax", "rdi"] + [0x29, 0xf8] + in ["add", "rax", "rdi"] + [ope_code, 0xf8] + in ["imul", "rax", "rdi"] + [ope_code, 0xaf, 0xc7] + in ["idiv", "rdi"] + [ope_code, 0xff] + in ["sub", "rsp", *num] + [ope_code, 0xec, *num.map { |n| n.to_i(16) }] + in ["sub", "rax", *num] + [ope_code, 0xe8, *num.map { |n| n.to_i(16) }] + in ["cqo"] + [0x99] + end # steep:ignore + end + + def calc_bit(op, *operands) + case [op, *operands] + in ["xor", "rax", "rax"] + [0x31, 0xc0] + in ["xor", "rdi", "rdi"] + [0x31, 0xff] + end # steep:ignore + end + + def calc_addr(op, *operands) + case [op, *operands] + in ["lea", "rax", *addrs] + rm, disp = parse_addressing_mode(addrs.first) + [0x8D, *mod_rm(0b01, 0b000, rm), disp] + end # steep:ignore + end + + def cmp(op, *operands) + case operands + in ["rax", "rdi"] + [0x39, 0xf8] + in ["rax", "0"] + [0x83, 0xf8, 0x00] + end # steep:ignore + end + + def sete(op, *operands) + case [op, operands] + in ["sete", ["al"]] + [0x0f, 0x94, 0xc0] + in ["setl", ["al"]] + [0x0f, 0x9c, 0xc0] + end # steep:ignore + end + + def push(*operands) + case operands + in ["rbp"] | ["rdi"] + [0x55] + in ["rax"] + [0x50] + in [HEX_PATTERN] + [0x68, *immediate(operands.first)] + else + [0x6a, *operands.map { |o| reg(o) }] + end # steep:ignore + end + + def pop(*operands) + case operands + in ["rax"] | ["rdi"] + [0x58 + REGISTER_CODE[operands.first.upcase.to_sym]] + in ["rbp"] + [0x5d] + end # steep:ignore + end + + def reg(r) + case r + when "rsp" + 0xec + when "rbp" + 0x5e + when "rax" + 0x29 + when "rdi" + 0xf8 + when /\d+/ + r.to_i(16) + else + raise Caotral::Binary::ELF::Error, "yet implemented operand address: #{r}" + end + end + def immediate(operand) = [operand.to_i(16)].pack("L").unpack("C*") + def mod_rm(mod, reg, rm) = (mod << 6) | (reg << 3) | rm + def parse_addressing_mode(str) + m = str.match(/\[(?\w+)(?[\+\-]\d+)?\]/) + [REGISTER_CODE[m[:reg].upcase.to_sym], m[:disp].to_i & 0xff] + end + end + end + end +end + diff --git a/lib/caotral/assembler/elf.rb b/lib/caotral/assembler/elf.rb deleted file mode 100644 index d1f3411..0000000 --- a/lib/caotral/assembler/elf.rb +++ /dev/null @@ -1,77 +0,0 @@ -class Caotral::Assembler - class ELF - class Error < StandardError; end - class Section; end - class SectionHeader; end - module Utils; end - - def initialize(type:, input:, output:, debug:) - @input, @output = input, output - @header = Header.new(type:) - @sections = Sections.new - end - - def build(input: @input, output: @output, debug: false) - program_size = 0 - read!(input:) - init_assemble! - - offset = 0x40 - section_headers = [] - names = [] - bodies = { - null: nil, - text: nil, - data: nil, - bss: nil, - note: nil, - symtab: nil, - strtab: nil, - shstrtab: nil, - } - name_idx = 0 - padding = nil - @sections.each do |section| - name = section.name - names << name - section.body.set!(name: names.join) if name == "\0.shstrtab" - bin = section.body.build - size = bin.bytesize - bin << "\0" until (bin.bytesize % 8) == 0 if ["\0.text", "\0.shstrtab"].include?(name) - bin << "\0" until ((bin.bytesize + offset) % 8) == 0 if ["\0.shstrtab"].include?(name) - bodies[section.section_name.to_sym] = bin - header = section.header - if offset > 0x40 && size > 0 && padding&.>(0) - offset += padding - padding = nil - end - padding = bin.size - size if size > 0 - header.set!(name: name_idx, offset:, size:) unless name == "" - offset += size - section_headers << header.build - name_idx += name == "" ? 1 : name.size - end - @header.set!(shoffset: offset + padding) - w = File.open(output, "wb") - w.write([@header.build, *bodies.values, *section_headers].join) - w.close - [@header.build, *bodies.values, *section_headers] - end - - private - def init_assemble! = (note!; symtab!) - - def read!(input: @input, text: @sections.text.body) - read = { main: false } - File.open(input, "r") do |r| - r.each_line do |line| - read[:main] = line.match(/main:/) unless read[:main] - next unless read[:main] && !/main:/.match(line) - text.assemble!(line) - end - end - end - def note! = @sections.note.body.null! - def symtab! = @sections.symtab.body.set!(entsize: 0x18, name: 1, info: 0x10, other: 0, shndx: 1) - end -end diff --git a/lib/caotral/assembler/elf/header.rb b/lib/caotral/assembler/elf/header.rb deleted file mode 100644 index aefd134..0000000 --- a/lib/caotral/assembler/elf/header.rb +++ /dev/null @@ -1,62 +0,0 @@ -require_relative "../elf" - -class Caotral::Assembler::ELF::Header - include Caotral::Assembler::ELF::Utils - IDENT = [0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00].freeze - ELF_FILE_TYPE = { NONE: 0, REL: 1, EXEC: 2, DYN: 3, CORE: 4 }.freeze - - def initialize(endian: :littel, type: :rel, arc: :amd64) - @ident = IDENT - @type = num2bytes(ELF_FILE_TYPE[elf(type)], 2) - @arch = arch(arc) - @version = num2bytes(1, 4) - @entry = num2bytes(0x00, 8) - @phoffset = num2bytes(0x00, 8) - @shoffset = num2bytes(0x00, 8) - @flags = num2bytes(0x00, 4) - @ehsize = num2bytes(0x40, 2) - @phsize = num2bytes(0x00, 2) - @phnum = num2bytes(0x00, 2) - @shentsize = num2bytes(0x40, 2) - @shnum = num2bytes(0x08, 2) - @shstrndx = num2bytes(0x07, 2) - end - - def build = bytes.flatten.pack("C*") - - def set!(entry: nil, phoffset: nil, shoffset: nil, shnum: nil, shstrndx: nil) - @entry = num2bytes(entry, 8) if check(entry, 8) - @phoffset = num2bytes(phoffset, 8) if check(phoffset, 8) - @shoffset = num2bytes(shoffset, 8) if check(shoffset, 8) - @shnum = num2bytes(shnum, 4) if check(shnum, 4) - @shstrndx = num2bytes(shstrndx, 4) if check(shstrndx, 4) - end - - private - - def bytes = [ - @ident, @type, @arch, @version, @entry, @phoffset, - @shoffset, @flags, @ehsize, @phsize, @phnum, @shentsize, - @shnum, @shstrndx - ] - - def arch(machine) - case machine.to_s - in "amd64" | "x86_64" | "x64" - [0x3e, 0x00] - end - end - - def elf(type) - case type.to_s - in "relocatable" | "rel" - :REL - in "exe" | "ex" | "exec" - :EXEC - in "shared" | "share" | "dynamic" | "dyn" - :DYN - else - :NONE - end - end -end diff --git a/lib/caotral/assembler/elf/section.rb b/lib/caotral/assembler/elf/section.rb deleted file mode 100644 index a1080b5..0000000 --- a/lib/caotral/assembler/elf/section.rb +++ /dev/null @@ -1,25 +0,0 @@ -require_relative "section/text" -require_relative "section/bss" -require_relative "section/data" -require_relative "section/note" -require_relative "section/null" -require_relative "section/symtab" -require_relative "section/strtab" -require_relative "section/shstrtab" -require_relative "section_header" - -class Caotral::Assembler::ELF::Section - attr_reader :header, :body, :name, :section_name - def initialize(type:, options: {}) - type_string = type.to_s.capitalize - type_string = type_string.upcase if type_string == "Bss" - @section_name = type_string.downcase - @name = section_name == "null" ? "" : "\0.#{section_name}" - @header = Caotral::Assembler::ELF::SectionHeader.new.send("#{@section_name}!") - @body = Module.const_get("Caotral::Assembler::ELF::Section::#{type_string}").new(**options) - end - - def name=(name) - @name = name - end -end diff --git a/lib/caotral/assembler/elf/section/bss.rb b/lib/caotral/assembler/elf/section/bss.rb deleted file mode 100644 index 87da36a..0000000 --- a/lib/caotral/assembler/elf/section/bss.rb +++ /dev/null @@ -1,10 +0,0 @@ -class Caotral::Assembler::ELF::Section::BSS - include Caotral::Assembler::ELF::Utils - def initialize(**opts) = nil - def build = bytes.flatten.pack("C*") - def set! = self - - private - def bytes = [] - def check(val, bytes) = false -end diff --git a/lib/caotral/assembler/elf/section/data.rb b/lib/caotral/assembler/elf/section/data.rb deleted file mode 100644 index ecf8d9a..0000000 --- a/lib/caotral/assembler/elf/section/data.rb +++ /dev/null @@ -1,7 +0,0 @@ -class Caotral::Assembler::ELF::Section::Data - include Caotral::Assembler::ELF::Utils - def initialize(**opts) = nil - def build = bytes.flatten.pack("C*") - def set! = self - private def bytes = [] -end diff --git a/lib/caotral/assembler/elf/section/note.rb b/lib/caotral/assembler/elf/section/note.rb deleted file mode 100644 index ece321c..0000000 --- a/lib/caotral/assembler/elf/section/note.rb +++ /dev/null @@ -1,33 +0,0 @@ -class Caotral::Assembler::ELF::Section::Note - include Caotral::Assembler::ELF::Utils - - def self.gnu_property = new.gnu_property!.build - def self.null = new.null!.build - - def initialize(type: nil) - @nsize = nil - @dsize = nil - @type = nil - @name = nil - @desc = nil - gnu_property! if type == :gnu - end - - def set!(nsize: nil, dsize: nil, type: nil, name: nil, desc: nil) - @nsize = num2bytes(nsize, 4) if check(nsize, 4) - @dsize = num2bytes(dsize, 4) if check(dsize, 4) - @type = num2bytes(type, 4) if check(type, 4) - @name = name!(name) if name - @desc = desc!(desc) if desc - self - end - - def gnu_property! = set!(nsize: 0x04, dsize: 0x20, type: 0x05, name: "GNU", desc: %w(02 00 01 c0 04 00 00 00 00 00 00 00 00 00 00 00 01 00 01 c0 04 00 00 00 01 00 00 00 00 00 00 00).map { |val| val.to_i(16) }) - def null! = set!(nsize: 0, dsize: 0, type: 0, name: "NULL", desc: [0]) - - private - - def name!(name) = align(@name = name.bytes, 4) - def desc!(desc) = align(@desc = desc.is_a?(Array) ? desc : desc.bytes, 4) - def bytes = [@nsize, @dsize, @type, @name, @desc] -end diff --git a/lib/caotral/assembler/elf/section/null.rb b/lib/caotral/assembler/elf/section/null.rb deleted file mode 100644 index e5dcb37..0000000 --- a/lib/caotral/assembler/elf/section/null.rb +++ /dev/null @@ -1,7 +0,0 @@ -class Caotral::Assembler::ELF::Section::Null - include Caotral::Assembler::ELF::Utils - def initialize(**opts) = nil - def build = bytes.flatten.pack("C*") - def set! = self - private def bytes = [] -end diff --git a/lib/caotral/assembler/elf/section/shstrtab.rb b/lib/caotral/assembler/elf/section/shstrtab.rb deleted file mode 100644 index 493a6d0..0000000 --- a/lib/caotral/assembler/elf/section/shstrtab.rb +++ /dev/null @@ -1,22 +0,0 @@ -class Caotral::Assembler::ELF::Section::Shstrtab - include Caotral::Assembler::ELF::Utils - def initialize(**opts) = @name = [] - def build = bytes.flatten.pack("C*") - def set!(name:) = (@name << name!(name); self) - - private - def bytes = [@name, [0]] - def name!(name) - case name - when String - (name.match(/\A\0\..+\z/) ? name : "\0.#{name}").bytes - when Array - raise Caotral::Assembler::ELF::Error, "unaccepted type in Array" unless name.all? { |elem| elem.is_a?(Integer) } - n = name - n.unshift(0) && n.push(0) unless n.first == 0 && n.last == 0 - n - else - raise Caotral::Assembler::ELF::Error, "unsupported type" - end - end -end diff --git a/lib/caotral/assembler/elf/section/strtab.rb b/lib/caotral/assembler/elf/section/strtab.rb deleted file mode 100644 index 8984894..0000000 --- a/lib/caotral/assembler/elf/section/strtab.rb +++ /dev/null @@ -1,5 +0,0 @@ -class Caotral::Assembler::ELF::Section::Strtab - include Caotral::Assembler::ELF::Utils - def initialize(names = "\0main\0", **opts) = @names = names - def build = @names.bytes.pack("C*") -end diff --git a/lib/caotral/assembler/elf/section/symtab.rb b/lib/caotral/assembler/elf/section/symtab.rb deleted file mode 100644 index 215e8cc..0000000 --- a/lib/caotral/assembler/elf/section/symtab.rb +++ /dev/null @@ -1,37 +0,0 @@ -class Caotral::Assembler::ELF::Section::Symtab - include Caotral::Assembler::ELF::Utils - def initialize(**opts) - @entsize = [] - @name = num2bytes(0, 4) - @info = num2bytes(0, 1) - @other = num2bytes(0, 1) - @shndx = num2bytes(0, 2) - @value = num2bytes(0, 8) - @size = num2bytes(0, 8) - end - - def set!(entsize: nil, name: nil, info: nil, other: nil, shndx: nil, value: nil, size: nil) - @entsize = [0] * entsize unless entsize.nil? - @name = name2bytes(name, 4) if check(name, 4) - @info = num2bytes(info, 1) if check(info, 1) - @other = num2bytes(other, 1) if check(other, 1) - @shndx = num2bytes(shndx, 2) if check(shndx, 2) - @value = num2bytes(value, 8) if check(value, 8) - @size = num2bytes(size, 8) if check(size, 8) - end - - private - def bytes = [@entsize, @name, @info, @other, @shndx, @value, @size] - def name2bytes(name, bytes) - case name - when String - name.bytes.reverse - when Array - name[0..bytes] - when Integer - num2bytes(name, bytes) - else - [0] * bytes - end - end -end diff --git a/lib/caotral/assembler/elf/section/text.rb b/lib/caotral/assembler/elf/section/text.rb deleted file mode 100644 index c82b012..0000000 --- a/lib/caotral/assembler/elf/section/text.rb +++ /dev/null @@ -1,265 +0,0 @@ -class Caotral::Assembler::ELF::Section::Text - PREFIX = { - REX_W: 0x48, - }.freeze - - REGISTER_CODE = { - RAX: 0b000, - RCX: 0b001, - RDX: 0b010, - RBX: 0b011, - RSP: 0b100, - RBP: 0b101, - RSI: 0b110, - RDI: 0b111, - }.freeze - - OPECODE = { - ADD: [0x01], - CMP: [0x39], - CQO: [0x99], - IDIV: [0xf7], - IMUL: [0x0f], - MOV: [0x89], - MOVR: [0x8B], - MOVXZ: [0x0f, 0xb7], - SUB: [0x83], - XOR: [0x31], - }.freeze - HEX_PATTERN = /\A0x[0-9a-fA-F]+\z/.freeze - - def initialize(**opts) - @bytes = [] - @entries = [] - @label_positions = {} - end - - def assemble!(line) - line = line.strip - return if line.empty? - @entries << parse_line(line) - end - - def build - @label_positions.clear - offset = 0 - @entries.each do |entry| - if entry[:label] - @label_positions[entry[:label]] = offset - next - end - offset += entry[:size] - end - - @bytes = [] - offset = 0 - @entries.each do |entry| - next if entry[:label] - bytes = encode(entry, offset) - @bytes << bytes - offset += bytes.size - end - - @bytes.flatten.pack("C*") - end - - def size = build.bytesize - def align(val, bytes) = (val << [0] until build.bytesize % bytes == 0) - - private - - def encode(entry, offset) - opecode(entry[:op], offset, *entry[:operands]) - end - - def parse_line(line) - return { label: line.delete_suffix(":"), size: 0 } if line.end_with?(":") - op, *operands = line.split(/\s+/).reject(&:empty?).map { it.gsub(/,/, "") } - size = instruction_size(op, *operands) - { op:, operands:, size: } - end - - def instruction_size(op, *operands) - case op - when "je", "jne" - 6 - when "jmp" - 5 - else - opecode(op, 0, *operands).size - end - end - - def opecode(op, offset, *operands) - case op - when "push" - push(*operands) - when "mov", "movzb" - [PREFIX[:REX_W], *mov(op, *operands)] - when "sub", "add", "imul", "cqo", "idiv" - [PREFIX[:REX_W], *calc(op, *operands)] - when "xor" - [PREFIX[:REX_W], *calc_bit(op, *operands)] - when "lea" - [PREFIX[:REX_W], *calc_addr(op, *operands)] - when "pop" - pop(*operands) - when "cmp" - [PREFIX[:REX_W], *cmp(op, *operands)] - when "sete", "setl" - sete(op, *operands) - when "je", "jmp", "jne" - jump(op, offset, *operands) - when "syscall" - [0x0f, 0x05] - when "ret" - [0xc3] - else - raise Caotral::Assembler::ELF::Error, "yet implemented operations: #{op}" - end - end - - def jump(op, offset, *operands) - label = operands.first - target = @label_positions.fetch(label) do - raise Caotral::Compiler::Assembler::ELF::Error, "unknown label: #{label}" - end - size = instruction_size(op, label) - rel = target - (offset + size) - displacement = [rel].pack("l<").unpack("C*") - case op - when "je" - [0x0f, 0x84, *displacement] - when "jmp" - [0xe9, *displacement] - when "jne" - [0x0f, 0x85, *displacement] - end - end - - def mov(op, *operands) - reg = case operands - in ["rax", "rbp"] - [0xe8] - in ["rbp", "rsp"] - [0xe5] - in ["rsp", "rbp"] - [0xec] - in ["[rax]", "rdi"] - [0x38] - in ["rax", "al"] - op = "MOVXZ" - [0xc0] - in ["rax", "[rax]"] - op = "MOVR" - [0x00] - in ["rdi", "rax"] - [0xC7] - in ["rax", HEX_PATTERN] - return [0xC7, 0xC0, *immediate(operands[1])] - else - operands&.map { reg(_1) } - end # steep:ignore - [OPECODE[op.upcase.to_sym], reg].flatten - end - - def calc(op, *operands) - ope_code = OPECODE[op.upcase.to_sym] - case [op, *operands] - in ["sub", "rax", "rdi"] - [0x29, 0xf8] - in ["add", "rax", "rdi"] - [ope_code, 0xf8] - in ["imul", "rax", "rdi"] - [ope_code, 0xaf, 0xc7] - in ["idiv", "rdi"] - [ope_code, 0xff] - in ["sub", "rsp", *num] - [ope_code, 0xec, *num.map { |n| n.to_i(16) }] - in ["sub", "rax", *num] - [ope_code, 0xe8, *num.map { |n| n.to_i(16) }] - in ["cqo"] - [0x99] - end # steep:ignore - end - - def calc_bit(op, *operands) - case [op, *operands] - in ["xor", "rax", "rax"] - [0x31, 0xc0] - in ["xor", "rdi", "rdi"] - [0x31, 0xff] - end # steep:ignore - end - - def calc_addr(op, *operands) - case [op, *operands] - in ["lea", "rax", *addrs] - rm, disp = parse_addressing_mode(addrs.first) - [0x8D, *mod_rm(0b01, 0b000, rm), disp] - end # steep:ignore - end - - def cmp(op, *operands) - case operands - in ["rax", "rdi"] - [0x39, 0xf8] - in ["rax", "0"] - [0x83, 0xf8, 0x00] - end # steep:ignore - end - - def sete(op, *operands) - case [op, operands] - in ["sete", ["al"]] - [0x0f, 0x94, 0xc0] - in ["setl", ["al"]] - [0x0f, 0x9c, 0xc0] - end # steep:ignore - end - - def push(*operands) - case operands - in ["rbp"] | ["rdi"] - [0x55] - in ["rax"] - [0x50] - in [HEX_PATTERN] - [0x68, *immediate(operands.first)] - else - [0x6a, *operands.map { |o| reg(o) }] - end # steep:ignore - end - - def pop(*operands) - case operands - in ["rax"] | ["rdi"] - [0x58 + REGISTER_CODE[operands.first.upcase.to_sym]] - in ["rbp"] - [0x5d] - end # steep:ignore - end - - def reg(r) - case r - when "rsp" - 0xec - when "rbp" - 0x5e - when "rax" - 0x29 - when "rdi" - 0xf8 - when /\d+/ - r.to_i(16) - else - raise Caotral::Assembler::ELF::Error, "yet implemented operand address: #{r}" - end - end - def immediate(operand) = [operand.to_i(16)].pack("L").unpack("C*") - def mod_rm(mod, reg, rm) = (mod << 6) | (reg << 3) | rm - def parse_addressing_mode(str) - m = str.match(/\[(?\w+)(?[\+\-]\d+)?\]/) - [REGISTER_CODE[m[:reg].upcase.to_sym], m[:disp].to_i & 0xff] - end -end diff --git a/lib/caotral/assembler/elf/section_header.rb b/lib/caotral/assembler/elf/section_header.rb deleted file mode 100644 index a0a9e34..0000000 --- a/lib/caotral/assembler/elf/section_header.rb +++ /dev/null @@ -1,44 +0,0 @@ -class Caotral::Assembler::ELF::SectionHeader - include Caotral::Assembler::ELF::Utils - def initialize - @name = nil - @type = nil - @flags = nil - @addr = nil - @offset = nil - @size = nil - @link = nil - @info = nil - @addralign = nil - @entsize = nil - end - - def build = bytes.flatten.pack("C*") - - def set!(name: nil, type: nil, flags: nil, addr: nil, - offset: nil, size: nil, link: nil, info: nil, - addralign: nil, entsize: nil) - @name = num2bytes(name, 4) if check(name, 4) - @type = num2bytes(type, 4) if check(type, 4) - @flags = num2bytes(flags, 8) if check(flags, 8) - @addr = num2bytes(addr, 8) if check(addr, 8) - @offset = num2bytes(offset, 8) if check(offset, 8) - @size = num2bytes(size, 8) if check(size, 8) - @link = num2bytes(link, 4) if check(link, 4) - @info = num2bytes(info, 4) if check(info, 4) - @addralign = num2bytes(addralign, 8) if check(addralign, 8) - @entsize = num2bytes(entsize, 8) if check(entsize, 8) - self - end - - def null! = set!(name: 0, type: 0, flags: 0, addr: 0, offset: 0, size: 0, link: 0, info: 0, addralign: 0, entsize: 0) - def text! = set!(flags: 0x06, addralign: 0x01, addr: 0, type: 1, entsize: 0, link: 0, info: 0) - def data! = set!(type: 0x01, flags: 0x03, addralign: 1, addr: 0, info: 0, link: 0, entsize: 0) - def bss! = set!(type: 0x8, flags: 3, addralign: 1, addr: 0, info: 0, link: 0, entsize: 0) - def note! = set!(type: 0x07, flags: 0x02, size: 0x30, addralign: 0x08, addr: 0, link: 0, info: 0, entsize: 0) - def symtab! = set!(type: 2, info: 1, addr: 0, link: 6, entsize: 0x18, addralign: 8, flags: 0) - def strtab! = set!(type: 3, info: 0, addr: 0, link: 0, entsize: 0, addralign: 1, flags: 0) - def shstrtab! = set!(type: 3, info: 0, addr: 0, link: 0, entsize: 0, addralign: 1, flags: 0) - - private def bytes = [@name, @type, @flags, @addr, @offset, @size, @link, @info, @addralign, @entsize] -end diff --git a/lib/caotral/assembler/elf/sections.rb b/lib/caotral/assembler/elf/sections.rb deleted file mode 100644 index 5a99773..0000000 --- a/lib/caotral/assembler/elf/sections.rb +++ /dev/null @@ -1,19 +0,0 @@ -require_relative "section" - -class Caotral::Assembler::ELF::Sections - ATTRIBUTES = %i|null text data bss note symtab strtab shstrtab| - attr_reader *ATTRIBUTES - - def initialize - @null = Caotral::Assembler::ELF::Section.new(type: :null) - @text = Caotral::Assembler::ELF::Section.new(type: :text) - @data = Caotral::Assembler::ELF::Section.new(type: :data) - @bss = Caotral::Assembler::ELF::Section.new(type: :bss) - @note = Caotral::Assembler::ELF::Section.new(type: :note, options: {type: :gnu}) - @symtab = Caotral::Assembler::ELF::Section.new(type: :symtab) - @strtab = Caotral::Assembler::ELF::Section.new(type: :strtab) - @shstrtab = Caotral::Assembler::ELF::Section.new(type: :shstrtab) - end - - def each(&block) = ATTRIBUTES.each { |t| yield send(t) } -end diff --git a/lib/caotral/assembler/elf/utils.rb b/lib/caotral/assembler/elf/utils.rb deleted file mode 100644 index e10f46b..0000000 --- a/lib/caotral/assembler/elf/utils.rb +++ /dev/null @@ -1,23 +0,0 @@ -module Caotral::Assembler::ELF::Utils - def build = (build_errors; bytes.flatten.pack("C*")) - def size = build.bytesize - def set! = (raise Caotral::Assembler::ELF::Error, "should be implementing #{self.class}") - def empties = must_be_filled_section_fields - - private - def align(val, bytes) - val << 0 until val.size % bytes == 0 - val - end - def bytes = (raise Caotral::Assembler::ELF::Error, "should be implementing #{self.class}") - def must_be_filled_section_fields = instance_variables.reject { |i| instance_variable_get(i) } - def num2bytes(val, bytes) = hexas(val, bytes).reverse - def check(val, bytes) = ((val.is_a?(Array) && val.all? { |v| v.is_a?(Integer) } && val.size == bytes) || (val.is_a?(Integer) && (hexas(val, bytes).size == bytes))) - def hexas(val, hex) = ("%0#{hex*2}x" % val).scan(/.{1,2}/).map { |v| v.to_i(16) }.then { |list| list.unshift(0) until list.size >= hex; list } - def build_errors - return unless bytes.any?(&:nil?) - errors = [] - bytes.each_with_index { |v, idx| errors << instance_variables[idx] if v.nil? } - raise Caotral::Assembler::ELF::Error, "unaccepted types: #{errors.join(",")}" - end -end diff --git a/lib/caotral/assembler/reader.rb b/lib/caotral/assembler/reader.rb new file mode 100644 index 0000000..89a7a6c --- /dev/null +++ b/lib/caotral/assembler/reader.rb @@ -0,0 +1,29 @@ +require "caotral/binary/elf" + +module Caotral + class Assembler + class Reader + attr_reader :instructions + def initialize(input:, debug: false) + @input, @debug = input, debug + @instructions = Hash.new { |h, k| h[k] = [] } + end + + def read + File.open(@input, "r") do |reader| + current_label = nil + reader.each_line do |line| + if /^(?