diff --git a/.gitignore b/.gitignore index 4acafde1..fd5ceaf6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +# 2kodevs ignores +*src/compiled.asm +*src/code.cl +*src/code.s +*Stuff/ + # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig # Created by https://www.gitignore.io/api/visualstudiocode,linux,latex,python @@ -408,3 +414,5 @@ dmypy.json # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) + +src/test.cl diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..cc67606f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python.linting.pylintEnabled": true, + "python.linting.enabled": true +} \ No newline at end of file diff --git a/compiled.asm b/compiled.asm new file mode 100644 index 00000000..66ddac15 --- /dev/null +++ b/compiled.asm @@ -0,0 +1,2204 @@ + .data +data_1: .asciiz "Object" +data_2: .asciiz "IO" +data_3: .asciiz "String" +data_4: .asciiz "Int" +data_5: .asciiz "A" +data_6: .asciiz "Main" +data_7: .asciiz "Program aborted" +data_8: .asciiz "Dispatch on void" +data_9: .asciiz "Case on void" +data_10: .asciiz "Execution of a case statement without a matching branch" +data_11: .asciiz "Division by zero" +data_12: .asciiz "Substring out of range" +data_13: .asciiz "n" + +type_name_table: + .word data_1 + .word data_2 + .word data_3 + .word data_4 + .word data_5 + .word data_6 + +proto_table: + .word type_1_proto + .word type_2_proto + .word type_3_proto + .word type_4_proto + .word type_5_proto + .word type_6_proto + +type_1_dispatch: + .word L_1 + .word L_2 + .word L_3 + .word L_4 + +type_1_proto: + .word 0 + .word 4 + .word type_1_dispatch + .word -1 + +type_2_dispatch: + .word L_5 + .word L_6 + .word L_7 + .word L_8 + .word L_9 + +type_2_proto: + .word 1 + .word 4 + .word type_2_dispatch + .word -1 + +type_3_dispatch: + .word L_10 + .word L_11 + .word L_12 + .word L_13 + +type_3_proto: + .word 2 + .word 6 + .word type_3_dispatch + .word 0 + .word 0 + .word -1 + +type_4_dispatch: + .word L_14 + +type_4_proto: + .word 3 + .word 5 + .word type_4_dispatch + .word 0 + .word -1 + +type_5_dispatch: + .word L_2 + .word L_3 + .word L_4 + .word L_15 + .word L_16 + .word L_17 + .word L_18 + .word L_19 + +type_5_proto: + .word 4 + .word 7 + .word type_5_dispatch + .word 0 + .word 0 + .word 0 + .word -1 + +type_6_dispatch: + .word L_2 + .word L_3 + .word L_4 + .word L_6 + .word L_7 + .word L_8 + .word L_9 + .word L_21 + +type_6_proto: + .word 5 + .word 4 + .word type_6_dispatch + .word -1 + .text + .globl main +main: + jal mem_manager_init + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -8 + li $t0, 5 + sll $t0 $t0 2 + la $t1, proto_table + addu $t1 $t1 $t0 + lw $t1, 0($t1) + lw $a0, 4($t1) + sll $a0 $a0 2 + jal malloc + move $a2 $a0 + move $a0 $t1 + move $a1 $v0 + jal copy + sw $v0, -12($fp) + lw $t0, -12($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_21 + sw $v0, -8($fp) + addi $sp, $sp, 4 + li $v0, 0 + addi $sp, $sp, 8 + lw $fp, 0($sp) + addi $sp, $sp, 4 + li $v0, 10 + syscall +L_1: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $a2, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + li $t0, 0 + sll $t0 $t0 2 + la $t1, proto_table + addu $t1 $t1 $t0 + lw $t1, 0($t1) + lw $a0, 4($t1) + sll $a0 $a0 2 + jal malloc + move $a2 $a0 + move $a0 $t1 + move $a1 $v0 + jal copy + sw $v0, -8($fp) + lw $v0, -8($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a2, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_2: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + la $t0, data_7 + 0 + sw $t0, -8($fp) + li $v0, 4 + lw $a0, -8($fp) + syscall + li $v0, 10 + syscall + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_3: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -8 + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + lw $t0, 0($fp) + lw $t0, 0($t0) + sll $t0 $t0 2 + la $t1, type_name_table + addu $t0 $t0 $t1 + lw $t0, 0($t0) + sw $t0, -8($fp) + lw $t0, -8($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_10 + sw $v0, -12($fp) + addi $sp, $sp, 4 + lw $v0, -12($fp) + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 8 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_4: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $a2, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + lw $t0, 0($fp) + lw $a0, 4($t0) + jal malloc + move $a2 $a0 + move $a0 $t0 + move $a1 $v0 + jal copy + sw $v0, -8($fp) + lw $v0, -8($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a2, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_5: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $a2, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + li $t0, 0 + sll $t0 $t0 2 + la $t1, proto_table + addu $t1 $t1 $t0 + lw $t1, 0($t1) + lw $a0, 4($t1) + sll $a0 $a0 2 + jal malloc + move $a2 $a0 + move $a0 $t1 + move $a1 $v0 + jal copy + sw $v0, -8($fp) + lw $v0, -8($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a2, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_6: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + lw $t0, 0($fp) + lw $t0, 12($t0) + sw $t0, -8($fp) + li $v0, 4 + lw $a0, -8($fp) + syscall + lw $v0, 4($fp) + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_7: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + lw $t0, 0($fp) + lw $t0, 12($t0) + sw $t0, -8($fp) + li $v0, 1 + lw $a0, -8($fp) + syscall + lw $v0, 4($fp) + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_8: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -8 + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + jal read_str + sw $v0, -8($fp) + lw $t0, -8($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_10 + sw $v0, -12($fp) + addi $sp, $sp, 4 + lw $v0, -12($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 8 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_9: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -8 + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + li $v0, 5 + syscall + sw $v0, -8($fp) + lw $t0, -8($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_14 + sw $v0, -12($fp) + addi $sp, $sp, 4 + lw $v0, -12($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 8 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_10: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -12 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $a2, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + li $t0, 2 + sll $t0 $t0 2 + la $t1, proto_table + addu $t1 $t1 $t0 + lw $t1, 0($t1) + lw $a0, 4($t1) + sll $a0 $a0 2 + jal malloc + move $a2 $a0 + move $a0 $t1 + move $a1 $v0 + jal copy + sw $v0, -8($fp) + lw $t1, -8($fp) + lw $t0, 0($fp) + sw $t0, 12($t1) + lw $a0, 0($fp) + jal len + sw $v0, -12($fp) + lw $t0, -12($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_14 + sw $v0, -16($fp) + addi $sp, $sp, 4 + lw $t1, -8($fp) + lw $t0, -16($fp) + sw $t0, 16($t1) + lw $v0, -8($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a2, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 12 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_11: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, 0($fp) + lw $t0, 16($t0) + sw $t0, -8($fp) + lw $v0, -8($fp) + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_12: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -8 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $a0, 4($fp) + lw $a1, 0($fp) + jal concat + sw $v0, -8($fp) + lw $t0, -8($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_10 + sw $v0, -12($fp) + addi $sp, $sp, 4 + lw $v0, -12($fp) + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 8 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_13: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -28 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $a2, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + lw $t0, 4($fp) + lw $t0, 12($t0) + sw $t0, -12($fp) + lw $t0, 0($fp) + lw $t0, 12($t0) + sw $t0, -16($fp) + lw $t0, 8($fp) + lw $t0, 16($t0) + sw $t0, -20($fp) + lw $t0, -16($fp) + lw $t1, -12($fp) + add $t0 $t0 $t1 + sw $t0, -24($fp) + lw $a0, -20($fp) + lw $a1, -24($fp) + jal less + sw $v0, -28($fp) + lw $t0, -28($fp) + bne $t0 $zero L_23 + j L_24 + L_23: + li $v0, 4 + la $a0, data_12 + syscall + li $v0, 10 + syscall + L_24: + lw $a0, 8($fp) + lw $a1, -12($fp) + lw $a2, -16($fp) + jal substr + sw $v0, -8($fp) + lw $t0, -8($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_10 + sw $v0, -32($fp) + addi $sp, $sp, 4 + lw $v0, -32($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a2, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 28 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_14: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $a2, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + li $t0, 3 + sll $t0 $t0 2 + la $t1, proto_table + addu $t1 $t1 $t0 + lw $t1, 0($t1) + lw $a0, 4($t1) + sll $a0 $a0 2 + jal malloc + move $a2 $a0 + move $a0 $t1 + move $a1 $v0 + jal copy + sw $v0, -8($fp) + lw $t1, -8($fp) + lw $t0, 0($fp) + sw $t0, 12($t1) + lw $v0, -8($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a2, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_15: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, 0($fp) + lw $t0, 12($t0) + sw $t0, -8($fp) + lw $v0, -8($fp) + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_16: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, 0 + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + lw $t1, 4($fp) + lw $t0, 0($fp) + sw $t0, 16($t1) + lw $v0, 0($fp) + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 0 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_17: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, 0($fp) + lw $t0, 16($t0) + sw $t0, -8($fp) + lw $v0, -8($fp) + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_18: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, 0 + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + lw $t1, 4($fp) + lw $t0, 0($fp) + sw $t0, 20($t1) + lw $v0, 0($fp) + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 0 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_19: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, 0($fp) + lw $t0, 20($t0) + sw $t0, -8($fp) + lw $v0, -8($fp) + lw $t0, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_20: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -8 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $a2, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + li $t0, 4 + sll $t0 $t0 2 + la $t1, proto_table + addu $t1 $t1 $t0 + lw $t1, 0($t1) + lw $a0, 4($t1) + sll $a0 $a0 2 + jal malloc + move $a2 $a0 + move $a0 $t1 + move $a1 $v0 + jal copy + sw $v0, -8($fp) + li $t0, 18 + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_14 + sw $v0, -12($fp) + addi $sp, $sp, 4 + lw $t1, -8($fp) + lw $t0, -12($fp) + sw $t0, 12($t1) + lw $v0, -8($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a2, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 8 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_21: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -128 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + jal L_20 + sw $v0, -16($fp) + lw $t0, -16($fp) + sw $t0, -12($fp) + lw $t0, 0($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_8 + sw $v0, -24($fp) + addi $sp, $sp, 4 + lw $t0, -24($fp) + sw $t0, -20($fp) + lw $t0, -20($fp) + sw $t0, -28($fp) + lw $t0, -12($fp) + sw $t0, -36($fp) + lw $a0, -36($fp) + li $a1, 0 + jal equals + sw $v0, -40($fp) + lw $t0, -40($fp) + bne $t0 $zero L_25 + j L_26 + L_25: + li $v0, 4 + la $a0, data_8 + syscall + li $v0, 10 + syscall + L_26: + lw $t0, -36($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -28($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -36($fp) + lw $t0, 0($t0) + sw $t0, -44($fp) + la $t0, proto_table + lw $t1, -44($fp) + sll $t1 $t1 2 + addu $t0 $t0 $t1 + lw $t0, 0($t0) + lw $t0, 8($t0) + addiu $t0 $t0 16 + lw $t0, 0($t0) + jal $t0 + sw $v0, -32($fp) + addi $sp, $sp, 8 + lw $t0, -12($fp) + sw $t0, -56($fp) + lw $a0, -56($fp) + li $a1, 0 + jal equals + sw $v0, -60($fp) + lw $t0, -60($fp) + bne $t0 $zero L_27 + j L_28 + L_27: + li $v0, 4 + la $a0, data_8 + syscall + li $v0, 10 + syscall + L_28: + lw $t0, -56($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -56($fp) + lw $t0, 0($t0) + sw $t0, -64($fp) + la $t0, proto_table + lw $t1, -64($fp) + sll $t1 $t1 2 + addu $t0 $t0 $t1 + lw $t0, 0($t0) + lw $t0, 8($t0) + addiu $t0 $t0 20 + lw $t0, 0($t0) + jal $t0 + sw $v0, -52($fp) + addi $sp, $sp, 4 + lw $t0, -52($fp) + sw $t0, -48($fp) + lw $t0, 0($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -48($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_6 + sw $v0, -68($fp) + addi $sp, $sp, 8 + la $t0, data_13 + 0 + sw $t0, -76($fp) + lw $t0, -76($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_10 + sw $v0, -80($fp) + addi $sp, $sp, 4 + lw $t0, -80($fp) + sw $t0, -72($fp) + lw $t0, 0($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -72($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_6 + sw $v0, -84($fp) + addi $sp, $sp, 8 + lw $t0, 0($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_9 + sw $v0, -92($fp) + addi $sp, $sp, 4 + lw $t0, -92($fp) + sw $t0, -88($fp) + lw $t0, -12($fp) + sw $t0, -100($fp) + lw $a0, -100($fp) + li $a1, 0 + jal equals + sw $v0, -104($fp) + lw $t0, -104($fp) + bne $t0 $zero L_29 + j L_30 + L_29: + li $v0, 4 + la $a0, data_8 + syscall + li $v0, 10 + syscall + L_30: + lw $t0, -100($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -88($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -100($fp) + lw $t0, 0($t0) + sw $t0, -108($fp) + la $t0, proto_table + lw $t1, -108($fp) + sll $t1 $t1 2 + addu $t0 $t0 $t1 + lw $t0, 0($t0) + lw $t0, 8($t0) + addiu $t0 $t0 24 + lw $t0, 0($t0) + jal $t0 + sw $v0, -96($fp) + addi $sp, $sp, 8 + lw $t0, -12($fp) + sw $t0, -120($fp) + lw $a0, -120($fp) + li $a1, 0 + jal equals + sw $v0, -124($fp) + lw $t0, -124($fp) + bne $t0 $zero L_31 + j L_32 + L_31: + li $v0, 4 + la $a0, data_8 + syscall + li $v0, 10 + syscall + L_32: + lw $t0, -120($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -120($fp) + lw $t0, 0($t0) + sw $t0, -128($fp) + la $t0, proto_table + lw $t1, -128($fp) + sll $t1 $t1 2 + addu $t0 $t0 $t1 + lw $t0, 0($t0) + lw $t0, 8($t0) + addiu $t0 $t0 28 + lw $t0, 0($t0) + jal $t0 + sw $v0, -116($fp) + addi $sp, $sp, 4 + lw $t0, -116($fp) + sw $t0, -112($fp) + lw $t0, 0($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + lw $t0, -112($fp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + jal L_7 + sw $v0, -132($fp) + addi $sp, $sp, 8 + lw $t0, -132($fp) + sw $t0, -8($fp) + lw $v0, -8($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 128 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra +L_22: + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $fp, $sp, 4 + addi $sp, $sp, -4 + addi $sp, $sp, -4 + sw $a1, 0($sp) + addi $sp, $sp, -4 + sw $t1, 0($sp) + addi $sp, $sp, -4 + sw $t0, 0($sp) + addi $sp, $sp, -4 + sw $a0, 0($sp) + addi $sp, $sp, -4 + sw $a2, 0($sp) + addi $sp, $sp, -4 + sw $ra, 0($sp) + li $t0, 5 + sll $t0 $t0 2 + la $t1, proto_table + addu $t1 $t1 $t0 + lw $t1, 0($t1) + lw $a0, 4($t1) + sll $a0 $a0 2 + jal malloc + move $a2 $a0 + move $a0 $t1 + move $a1 $v0 + jal copy + sw $v0, -8($fp) + lw $v0, -8($fp) + lw $ra, 0($sp) + addi $sp, $sp, 4 + lw $a2, 0($sp) + addi $sp, $sp, 4 + lw $a0, 0($sp) + addi $sp, $sp, 4 + lw $t0, 0($sp) + addi $sp, $sp, 4 + lw $t1, 0($sp) + addi $sp, $sp, 4 + lw $a1, 0($sp) + addi $sp, $sp, 4 + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + jr $ra + +header_size = 12 #in bytes +header_size_slot = 0 +header_next_slot = 4 +header_reachable_slot = 8 +alloc_size = 2048 +total_alloc_size = 2060 #alloc_size + header_size +neg_header_size = -12 #-header_size +free_list = 0 +used_list = header_size +state_size = 4 +stack_base = -4 +init_alloc_size = 28 #(header_size*2) + state_size +object_mark = -1 +meta_data_object_size = 4 #in words +object_expanded = -2 +reachable = 1 +new_line = 10 +str_size_treshold = 1024 + + + +##################################################################################################### +# Initialize memory manager # +# Args: # +# # +# Return: # +# # +# Summary: # +# The initial blocks for Free-List and Used-List are created. # +# The $gp is set to use as reference when initial blocks or values related to memory manager # +# state are needed. # +# A block of size alloc_size is created an added to Free-List # +##################################################################################################### +mem_manager_init: + + addiu $sp $sp -16 + sw $v0 0($sp) + sw $a0 4($sp) + sw $a1 8($sp) + sw $ra 12($sp) + + + li $v0 9 + li $a0 init_alloc_size + syscall #Creating free-list start point + move $gp $v0 + addiu $gp $gp state_size + + sw $zero header_size_slot($gp) #The free-list start with a block without space, just header, that will always be there. + sw $zero header_next_slot($gp) + sw $zero header_reachable_slot($gp) + + move $a0 $gp + li $a1 alloc_size + jal extend_heap + + addiu $a0 $a0 header_size + sw $zero header_size_slot($a0) #The used-list start with a block without space, just header, that will always be there. + sw $zero header_next_slot($a0) + sw $zero header_reachable_slot($a0) + + + + lw $v0 0($sp) + lw $a0 4($sp) + lw $a1 8($sp) + lw $ra 12($sp) + addiu $sp $sp 16 + + sw $sp stack_base($gp) + + jr $ra + + +##################################################################################################### +# Free a block previously allocated # +# Args: # +# $a0 Block to free address # +# Return: # +# # +# Summary: # +# Remove the block from the used-list and add it to the free-list # +##################################################################################################### +free_block: + addiu $sp $sp -28 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $a0 12($sp) + sw $ra 16($sp) + sw $t3 20($sp) + sw $t4 24($sp) + + move $t0 $a0 + + addiu $t1 $gp free_list # Store in $t1 the initial block of the free-list + + addiu $t3 $gp used_list # Store in $t3 the initial block of the used-list + +free_block_loop_used_list: # Iterate througth the used-list until find the block + lw $t4 header_next_slot($t3) + beq $t4 $t0 free_block_loop_free_list + move $t3 $t4 + j free_block_loop_used_list + + +free_block_loop_free_list: # Iterate througth the free-list to find the antecesor of the block in the free-list + lw $t2 header_next_slot($t1) + beq $t2 $zero free_block_founded_prev + bge $t2 $t0 free_block_founded_prev + move $t1 $t2 + j free_block_loop_free_list + +free_block_founded_prev: + # Remove the block from the used-list + lw $t4 header_next_slot($t0) + sw $t4 header_next_slot($t3) + + # Add the block to the free-list + sw $t2 header_next_slot($t0) + sw $t0 header_next_slot($t1) + +free_block_end: + + # Try to merge the list where the new block was added + move $a0 $t0 + jal expand_block + move $a0 $t1 + jal expand_block + + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $a0 12($sp) + lw $ra 16($sp) + lw $t3 20($sp) + lw $t4 24($sp) + addiu $sp $sp 28 + + jr $ra + + +##################################################################################################### +# Merge two continuos blocks of the free-list # +# Args: # +# $a0 First of the two blocks to merge # +# Return: # +# # +# Summary: # +# Check if a block can be merged with its sucesor in the free list # +##################################################################################################### +expand_block: + addiu $sp $sp -16 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + + + addiu $t0 $gp free_list # $t0 = the initial block of the free-list + + beq $t0 $a0 expand_block_end # The initial block can't be expanded, the initial block always will have size 0 + + move $t0 $a0 + + # Check if the block and its sucesor in the free list are contiguous in memory + lw $t1 header_next_slot($t0) + lw $t2 header_size_slot($t0) + move $t3 $t2 + addiu $t2 $t2 header_size + addu $t2 $t2 $t0 + beq $t2 $t1 expand_block_expand + j expand_block_end + +expand_block_expand: #Increment the size of the first block and update next field + lw $t2 header_size_slot($t1) + addi $t2 $t2 header_size + add $t2 $t2 $t3 + sw $t2 header_size_slot($t0) + lw $t1 header_next_slot($t1) + sw $t1 header_next_slot($t0) + +expand_block_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + addiu $sp $sp 16 + + jr $ra + + +##################################################################################################### +# Allocate more memory for the process and add it to the free-list # +# Args: # +# $a0 Last block of the free-list # +# $a1 Memory amount to alloc # +# Return: # +# # +# Summary: # +# More memory is allocated and add it to the free-list as a block. # +##################################################################################################### +extend_heap: + addiu $sp $sp -12 + sw $a0 0($sp) + sw $a1 4($sp) + sw $t0 8($sp) + + # Increase the amount of memory by header_size to create a block with that size + li $v0 9 + addiu $a0 $a1 header_size + syscall + + # Set values of the block_header + move $t0 $a1 + sw $t0 header_size_slot($v0) + sw $zero header_next_slot($v0) + sw $zero header_reachable_slot($v0) + + # Add block to the end of the free-list + lw $t0, 0($sp) + sw $v0 header_next_slot($t0) + + move $a0 $t0 + lw $a1 4($sp) + lw $t0 8($sp) + addiu $sp $sp 12 + + jr $ra + + + +##################################################################################################### +# Split a block into two blocks, one of the requested size and the other with the rest. # +# Args: # +# $a0 Address of the block to split # +# $a1 Size requested for one block # +# Return: # +# # +# Summary: # +# The block is splitted into two blocks if the size allow it. # +##################################################################################################### +split_block: + addiu $sp $sp -16 + sw $t0 0($sp) + sw $t1 4($sp) + sw $a0 8($sp) + sw $a1 12($sp) + + # Check if the block can be splitted in two blocks, one of the requested size + lw $t0 header_size_slot($a0) + bgt $a1 $t0 split_block_error_small + + # Check if after a split the block there is enough space to create another block, if there is not do not split + sub $t0 $t0 $a1 + li $t1 header_size + ble $t0 $t1 split_block_same_size + + # Compute the address of the second block + addu $t0 $a0 $a1 + addiu $t0 $t0 header_size + + #Update headers of the two blocks + lw $t1 header_next_slot($a0) + sw $t1 header_next_slot($t0) + sw $t0 header_next_slot($a0) + + lw $t1 header_size_slot($a0) #update sizes + sub $t1 $t1 $a1 + + addi $t1 $t1 neg_header_size + sw $t1 header_size_slot($t0) + sw $a1 header_size_slot($a0) + move $v0 $a0 + j split_block_end + +split_block_same_size: + move $v0 $a0 + j split_block_end + +split_block_error_small: + j split_block_end + +split_block_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $a0 8($sp) + lw $a1 12($sp) + addiu $sp $sp 16 + + jr $ra + + +##################################################################################################### +# Best Fit strategy is used to select the block # +# Args: # +# $a0 size to alloc # +# Return: # +# $v0 address of allocated block # +# Summary: # +# Actual block is store in $t0, the size block is checked to know if it is a # +# valid block (a block is valid if its size is larger or equal than the required size), # +# if the block is valid we compare it with the actual best block and keep the shorter block. # +# If there is not a block with the required size, a new block of size # +# max(total_alloc_size, size requested) is requested with sbrk and splitted if necessary # +##################################################################################################### +malloc: + move $v0 $zero + addiu $sp $sp -28 + sw $t1 0($sp) + sw $t0 4($sp) + sw $a0 8($sp) + sw $a1 12($sp) + sw $ra 16($sp) + sw $t2 20($sp) + sw $t3 24($sp) + + addiu $t0 $gp free_list + j malloc_loop + +malloc_end: + + move $a0 $v0 + lw $a1 8($sp) # a1 = requested block size + jal split_block + + lw $t1 header_next_slot($v0) + sw $t1 header_next_slot($t3) + + addiu $t1 $gp used_list + lw $a0 header_next_slot($t1) + + sw $a0 header_next_slot($v0) + sw $v0 header_next_slot($t1) + + addiu $v0 $v0 header_size + + lw $t3 24($sp) + lw $t2 20($sp) + lw $ra 16($sp) + lw $a1 12($sp) + lw $a0 8($sp) + lw $t0 4($sp) + lw $t1 0($sp) + addiu $sp $sp 28 + + jr $ra +####################################################################### +# t0 = actual block address # +####################################################################### +malloc_loop: + move $t2 $t0 # save previous block in $t2 (this is usefull when we lw $t3 24($sp)need to alloc the new block) + lw $t0 header_next_slot($t0) # t0 = next block address + beq $t0 $zero malloc_search_end # if t0 == 0 we reach to the free-list end + j malloc_check_valid_block + +####################################################################### +# $v0 = actual selected block address # +####################################################################### +malloc_search_end: + beq $v0 $zero malloc_alloc_new_block # if v0 == 0 a valid block was not found + j malloc_end + +####################################################################### +# t2 = last block of free list # +# a0 = requested block size # +####################################################################### +malloc_alloc_new_block: + li $t1 alloc_size # t1 = standard alloc size + move $t3 $t2 + move $a1 $a0 # a1 = requested block size + move $a0 $t2 # a0 = last block of free list + bge $a1 $t1 malloc_big_block # if the requested size is bigger than the standar alloc size go to malloc_big_block + li $a1 alloc_size # a1 = standard alloc size + jal extend_heap + + j malloc_end + +###################################################################### +# a1 = requested block size # +###################################################################### +malloc_big_block: + #addiu $a1 $a1 header_size # Add header size to alloc size + jal extend_heap + j malloc_end + + + +######################################################################## +# t0 = actual block address # +######################################################################## +malloc_check_valid_block: + lw $t1 header_size_slot($t0) # t1 = size new block + bge $t1 $a0 malloc_valid_block # the actual block have the required size + j malloc_loop + +######################################################################## +# t0 = actual block address # +# t1 = size actual block # +# v0 = actual selected block address(0 if no one have been selected) # +# v1 = actual selected block size # +######################################################################## +malloc_valid_block: + beq $v0 $zero malloc_first_valid_block # this is the first valid block + bge $t1 $v1 malloc_loop # the selected block is smaller than actual block + move $v0 $t0 # selected block address = actual block address + move $v1 $t1 # selected block size = actual block size + move $t3 $t2 + j malloc_loop + + +######################################################################## +# t0 = actual block address # +# t1 = size actual block # +# v0 = actual selected block address(0 if no one have been selected) # +# v1 = actual selected block size # +######################################################################## +malloc_first_valid_block: + move $v0 $t0 # selected block address = actual block address + move $v1 $t1 # selected block size = actual block size + move $t3 $t2 + j malloc_loop + + +#TODO Look for objects in registers +##################################################################################################### +# Remove from used-list the blocks that are not reachables, the root objects are in the stack and # +# registers # +# Args: # +# # +# Return: # +# # +# Summary: # +# First the objects in stack and registers are marked as reachables, after that the objects # +# that are reachables from them are marked as reachable too using a dfs algorithm. When all # +# reachables objects are marked the used-list is scanned and all the objects that are not # +# marked as reachables are released. # +##################################################################################################### + +gc_collect: + addiu $sp $sp -24 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $a0 16($sp) + sw $ra 20($sp) + + li $t3 reachable # $t3 = reachable value + addiu $t0 $sp 20 # $t0 = the start of the stack without count this function + lw $t1 stack_base($gp) # $t1 = the end of the stack + + li $t2 1 +# Go through the stack searching for objects +gc_collect_loop: + addiu $t0 $t0 4 + beq $t0 $t1 gc_collect_dfs # If the end of the stack was reached finish this loop + + lw $a0 0($t0) + jal check_if_is_object + + bne $v0 $t2 gc_collect_loop + + addiu $a0 $a0 neg_header_size + sw $t3 header_reachable_slot($a0) + + j gc_collect_loop + +gc_collect_dfs: + addiu $t1 $gp used_list + +# Go through the used-list and try to expand any reachable block +gc_collect_outer_loop: + lw $t1 header_next_slot($t1) + beq $t1 $zero gc_collect_free + lw $t2 header_reachable_slot($t1) + beq $t2 reachable gc_collect_expand + j gc_collect_outer_loop + +gc_collect_expand: + addiu $a0 $t1 header_size # expand an object not a block + jal gc_collect_recursive_expand + j gc_collect_outer_loop + +gc_collect_free: + addiu $t0 $gp used_list + lw $t0 header_next_slot($t0) + +# Go through the used-list and free any unreachable object and set the reachable and expanded field to their default values +gc_collect_free_loop: + beq $t0 $zero gc_collect_end + lw $t1 header_reachable_slot($t0) + bne $t1 reachable gc_collect_free_loop_free + sw $zero header_reachable_slot($t0) + move $a0 $t0 + jal check_if_is_object + beq $v0 $zero gc_collect_free_loop + li $t1 object_mark + addiu $t2 $t0 header_size + lw $t3 4($t2) + sll $t3 $t3 2 + addu $t2 $t2 $t3 + sw $t1 -4($t2) + lw $t0 header_next_slot($t0) + j gc_collect_free_loop + +gc_collect_free_loop_free: + move $a0 $t0 + lw $t0 header_next_slot($t0) + jal free_block + j gc_collect_free_loop + + +gc_collect_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $a0 16($sp) + lw $ra 20($sp) + addiu $sp $sp 24 + + jr $ra + + + + +##################################################################################################### +# Mark the objects that are reachable from the attrs of one object in a recursive way. # +# Args: # +# $a0: Object to expand # +# Return: # +# # +# Summary: # +# The actual object is marked as reachable and expanded to avoid infinite cycles, and this # +# routine is called recursively to expand the objects in the attrs of the actual object. # +##################################################################################################### +gc_collect_recursive_expand: + addiu $sp $sp -16 + sw $a0 0($sp) + sw $t0 4($sp) + sw $t1 8($sp) + sw $ra 12($sp) + + jal check_if_is_object # If is not an object can not be expanded + beq $v0 $zero gc_collect_recursive_expand_end + + lw $t0 4($a0) + sll $t0 $t0 2 + addiu $t0 $t0 -4 + addu $t0 $a0 $t0 + lw $t1 0($t0) # Check if the object was ready expanded to avoid infinite cycles + beq $t1 object_expanded gc_collect_recursive_expand_end + + # Mark the block that contains the object as reachable + li $t1 reachable + addiu $a0 $a0 neg_header_size + sw $t1 header_reachable_slot($a0) + addiu $a0 $a0 header_size + + # Mark the object as expanded + li $t1 object_expanded + sw $t1 0($t0) + + lw $t0 0($a0) # $t0 = type of the object + + # int and string types are special cases + la $t1 int_type + lw $t1 0($t1) + beq $t0 $t1 gc_collect_recursive_expand_end + + la $t1 string_type + lw $t1 0($t1) + beq $t0 $t1 gc_collect_recursive_expand_string_object + + lw $t0 4($a0) + li $t1 meta_data_object_size + sub $t0 $t0 $t1 + + addiu $t1 $a0 12 + +# call this routine in every attr of the object +gc_collect_recursive_expand_attr_loop: + beq $t0 $zero gc_collect_recursive_expand_end + lw $a0 0($t1) + jal gc_collect_recursive_expand + addiu $t1 $t1 4 + sub $t0 $t0 1 + j gc_collect_recursive_expand_attr_loop + +# the value field of string object is not an object but it is a +# reference to the block where the string is saved, so that block +# needs to be marked as reachable +gc_collect_recursive_expand_string_object: + lw $t0 8($a0) + addiu $t0 $t0 neg_header_size + li $t1 reachable + sw $t1 header_reachable_slot($t0) + + +gc_collect_recursive_expand_end: + lw $a0 0($sp) + lw $t0 4($sp) + lw $t1 8($sp) + lw $ra 12($sp) + addiu $sp $sp 16 + + jr $ra + + + + + + + + +# $a0 address from +# $a1 address to +# $a2 size +copy: + addiu $sp $sp -16 + sw $a0 0($sp) + sw $a1 4($sp) + sw $a2 8($sp) + sw $t0 12($sp) + +copy_loop: + beq $a2 $zero copy_end + lw $t0 0($a0) + sw $t0 0($a1) + addiu $a0 $a0 4 + addiu $a1 $a1 4 + addi $a2 $a2 -4 + j copy_loop + +copy_end: + lw $a0 0($sp) + lw $a1 4($sp) + lw $a2 8($sp) + lw $t0 12($sp) + addiu $sp $sp 16 + + jr $ra + + +##################################################################################################### +# Check if a value is a reference to an object # +# Args: # +# $a0: Value to check # +# Return: # +# $v0: 1 if is a reference to an object else 0 # +# Summary: # +# Check if a value is a valid heap address and if it is check if in that address there are # +# values that match with the object schema # +##################################################################################################### +check_if_is_object: + addiu $sp $sp -20 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $a0 16($sp) + + move $t0 $a0 + + li $v0 9 + move $a0 $zero + syscall + + addiu $t1 $v0 -4 # Last word of heap + + # Check that the first word is a type object + blt $t0 $gp check_if_is_object_not_object + bgt $t0 $t1 check_if_is_object_not_object + lw $t2 0($t0) + blt $t2 $zero check_if_is_object_not_object + la $t3 type_number + lw $t3 0($t3) + bge $t2 $t3 check_if_is_object_not_object + + addiu $t0 $t0 4 + blt $t0 $gp check_if_is_object_not_object + bgt $t0 $t1 check_if_is_object_not_object + lw $t2 0($t0) #Store size in $t2 + + addiu $t0 $t0 8 + + + li $t3 meta_data_object_size + sub $t2 $t2 $t3 + sll $t2 $t2 2 + addu $t0 $t0 $t2 + + # Check if the last word of the object is an object mark + blt $t0 $gp check_if_is_object_not_object + bgt $t0 $t1 check_if_is_object_not_object + lw $t2 0($t0) + beq $t2 object_mark check_if_is_object_is_object + beq $t2 object_expanded check_if_is_object_is_object + +check_if_is_object_not_object: + li $v0 0 + j check_if_is_object_end + + +check_if_is_object_is_object: + li $v0 1 + + +check_if_is_object_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $a0 16($sp) + addiu $sp $sp 20 + + jr $ra + + +equals: + beq $a0 $a1 equals_equal + li $v0 0 + j equals_end + +equals_equal: + li $v0 1 + +equals_end: + jr $ra + + + +less_equal: + ble $a0 $a1 less_equal_true + li $v0 0 + j less_equal_end + +less_equal_true: + li $v0 1 + +less_equal_end: + jr $ra + + +less: + blt $a0 $a1 less_true + li $v0 0 + j less_end + +less_true: + li $v0 1 + +less_end: + jr $ra + + +len: + addiu $sp $sp -8 + sw $t0 0($sp) + sw $t1 4($sp) + + move $t0 $a0 + move $v0 $zero + +len_loop: + lb $t1 0($t0) + beq $t1 $zero len_end + addi $v0 $v0 1 + addiu $t0 $t0 1 + j len_loop + +len_end: + lw $t0 0($sp) + lw $t1 4($sp) + addiu $sp $sp 8 + + jr $ra + + +use_block: + addiu $sp $sp -12 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + + addiu $t0 $gp free_list + +use_block_loop: + move $t1 $t0 + lw $t0 header_next_slot($t0) + beq $t0 $zero use_block_end + beq $t0 $a0 use_block_founded + j use_block_loop + +use_block_founded: + lw $t2 header_next_slot($t0) + sw $t2 header_next_slot($t1) + + addiu $t1 $gp used_list + lw $t2 header_next_slot($t1) + sw $t0 header_next_slot($t1) + sw $t2 header_next_slot($t0) + +use_block_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + addiu $sp $sp 12 + + jr $ra + + + + +read_str: + addiu $sp $sp -36 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $t4 16($sp) + sw $t5 20($sp) + sw $a0 24($sp) + sw $a1 28($sp) + sw $ra 32($sp) + + addiu $t0 $gp free_list + move $t1 $zero + move $t2 $t0 + +read_str_larger_block_loop: + lw $t0 header_next_slot($t0) + beq $t0 $zero read_str_reading + lw $t3 header_size_slot($t0) + bge $t1 $t3 read_str_larger_block_loop + move $t1 $t3 + move $t2 $t0 + j read_str_larger_block_loop + +read_str_reading: + beq $t1 $zero read_str_new_block + move $a1 $t1 + li $v0 8 + addiu $a0 $t2 header_size + syscall + move $t0 $a0 + move $t1 $zero + +read_str_look_nl: + lb $t2 0($t0) + beq $t2 $zero read_str_no_nl + beq $t2 new_line read_str_nl_founded + addi $t1 $t1 1 + addi $t0 $t0 1 + j read_str_look_nl + +read_str_nl_founded: + sb $zero 0($t0) + addi $t1 $t1 1 + li $t2 4 + div $t1 $t2 + mfhi $t3 + beq $t3 $zero read_str_nl_founded_alligned + sub $t2 $t2 $t3 + add $t1 $t1 $t2 +read_str_nl_founded_alligned: + move $a1 $t1 + addiu $a0 $a0 neg_header_size + jal split_block + jal use_block + + addiu $v0 $a0 header_size + j read_str_end + + +read_str_no_nl: + addi $t1 $t1 1 + blt $t1 str_size_treshold read_str_dup + addi $t1 $t1 alloc_size + j read_str_extend_heap +read_str_dup: + sll $t1 $t1 1 +read_str_extend_heap: + move $a1 $t1 + move $t0 $a0 + addiu $a0 $gp free_list + +read_str_last_block_loop: + lw $t1 header_next_slot($a0) + beq $t1 $zero read_str_last_block_founded + lw $a0 header_next_slot($a0) + j read_str_last_block_loop + +read_str_last_block_founded: + jal extend_heap + jal expand_block + lw $t1 header_next_slot($a0) + bne $t1 $zero read_str_copy_prev + move $t1 $a0 + +read_str_copy_prev: + lw $t3 header_size_slot($t1) + move $t2 $zero + move $t5 $t1 + addiu $t1 $t1 header_size + +read_str_copy_loop: + lb $t4 0($t0) + beq $t4 $zero read_str_copy_end + sb $t4 0($t1) + addi $t2 $t2 1 + addi $t0 $t0 1 + addi $t1 $t1 1 + j read_str_copy_loop + +read_str_copy_end: + sub $t3 $t3 $t2 + move $a0 $t1 + move $a1 $t3 + li $v0 8 + syscall + move $t0 $a0 + move $t1 $t2 + addiu $a0 $t5 header_size + j read_str_look_nl + + +read_str_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $t4 16($sp) + lw $t5 20($sp) + lw $a0 24($sp) + lw $a1 28($sp) + lw $ra 32($sp) + addiu $sp $sp 36 + + jr $ra + + +read_str_new_block: + addiu $t0 $gp free_list + +read_str_new_block_search_last: + lw $t1 header_next_slot($t0) + beq $t1 $zero read_str_new_block_create + move $t0 $t1 + j read_str_new_block_search_last + +read_str_new_block_create: + move $a0 $t0 + li $a1 alloc_size + jal extend_heap + jal expand_block + lw $t2 header_next_slot($a0) + beq $t2 $zero read_str_new_block_expanded + lw $t1 header_size_slot($t2) + j read_str_reading + +read_str_new_block_expanded: + move $t2 $a0 + lw $t1 header_size_slot($a0) + j read_str_reading + + + +concat: + addiu $sp $sp -24 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $a0 12($sp) + sw $a1 16($sp) + sw $ra 20($sp) + + move $t0 $a0 + move $t1 $a1 + addiu $a0 $a0 neg_header_size + addiu $a1 $a1 neg_header_size + + lw $a0 header_size_slot($a0) + lw $a1 header_size_slot($a1) + + add $a0 $a0 $a1 + jal malloc + move $t2 $v0 + +concat_copy_first_loop: + lb $a0 0($t0) + beq $a0 $zero concat_copy_second_loop + sb $a0 0($t2) + addiu $t0 $t0 1 + addiu $t2 $t2 1 + j concat_copy_first_loop + +concat_copy_second_loop: + lb $a0 0($t1) + beq $a0 $zero concat_end + sb $a0 0($t2) + addiu $t1 $t1 1 + addiu $t2 $t2 1 + j concat_copy_second_loop + +concat_end: + sb $zero 0($t2) + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $a0 12($sp) + lw $a1 16($sp) + lw $ra 20($sp) + addiu $sp $sp 24 + + jr $ra + + +substr: + addiu $sp $sp -24 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $a0 16($sp) + sw $ra 20($sp) + + move $t0 $a0 + li $t1 4 + + div $a2 $t1 + + mfhi $t2 + bne $t2 $zero substr_allign_size + move $t1 $a2 + j substr_new_block + +substr_allign_size: + sub $t1 $t1 $t2 + add $t1 $t1 $a2 + +substr_new_block: + move $a0 $t1 + jal malloc + move $t3 $v0 + move $t1 $zero + addu $t0 $t0 $a1 + +substr_copy_loop: + beq $t1 $a2 substr_end + lb $t2 0($t0) + sb $t2 0($t3) + addiu $t0 $t0 1 + addiu $t3 $t3 1 + addiu $t1 $t1 1 + j substr_copy_loop + +substr_end: + sb $zero 0($t3) + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $a0 16($sp) + lw $ra 20($sp) + addiu $sp $sp 24 + + jr $ra + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/Informe/Informe.md b/doc/Informe/Informe.md new file mode 100644 index 00000000..13da033e --- /dev/null +++ b/doc/Informe/Informe.md @@ -0,0 +1,232 @@ +# Informe de Complementos de Compilación +## Datos Generales +### Autores +- Miguel Tenorio Potrony +- Mauricio Lázaro Perdomo Cortés +- Lázaro Raúl Iglesias Vera + +### Sobre el proyecto +Para la implementación de este proyecto se tomaron como base, los proyectos realizados durante 3er año, donde se desarrollaron las fases de chequeo e inferencia de tipos, además de parsing. El código de dichos proyectos conserva su estructura pero estuvo sujeto a cambios y mejoras. + +La mayoría de nuestras implementaciones siguen las ideas y utilizan las herramientas dadas en clase durante 3er año. + +Todas las fases del proceso de compilación y ejecución serán explicadas a continuación. + + +## Pipeline +Como se puede apreciar en [main.py](https://github.com/2kodevs/cool-compiler-2020/blob/master/src/main.py) el pipeline de nuestro proceso de compilación es: + +1. Lexer +2. Parsing +3. Recolección de tipos +4. Construcción de tipos +5. Chequeo/Inferencia de tipos +6. Verificación de tipos +7. Traducción de Cool a CIL +8. Traducción de CIL a MIPS + +Cada parte del proceso será discutida en detalle durante las siguientes secciones. + +Como se puede apreciar en la etapa #5 del proceso, el chequeo e inferencia de tipos se realizan al unísono, sin embargo cada parte se explicará en secciones separadas y se hará notar por qué se decidió realizarlas al mismo tiempo. + +## Lexer + +Para el proceso de lexer y tokenización se utilizó el paquete PLY. Se creó un un lexer que consta de tres estados: + + - INITIAL + - comments + - strings + +Para cada uno de estos estados se definieron las expresiones regulares que representan cada uno de los tokens posibles, y se +manejan otras variables que conforman el estado del lexer, como la línea actual. + +## Parsing +Para el proceso de parsing se utilizó el parser LR1 y la gramática de Cool que fueron implementados para el proyecto de 3er año sobre chequeo de tipos. + +Fue necesario modificar la salida del Parser para poder devolver la información referente al token de error en caso de que alguna falla fuera detectada. + +Dado que los proyectos llevados a cabo previamente fueron desarrollados para mini-Cool, se hizo necesario modificar la gramática, y se obtuvo como resultado: + +### Gramática de Cool +La grámatica implementada es S-Atributada. Una descripción de los símbolos y producciones de la grámatica, se puede ver en [grammar](https://github.com/2kodevs/cool-compiler-2020/blob/master/src/doc/grammar.pdf) + +## Recolección de tipos +Durante la recolección de tipos se visitan todas las declaraciones de clases, se crean los tipos asociados a ellas y se valida la correctitud de las mismas. + +**Errores detectados**: +- Herencia cíclica +- Redefinición de clases +- Nombres de clase no válidos + +## Construcción de tipos +A los tipos creados en la fase anterior se le añaden todos sus atributos y métodos. Además se verifica que se cumplan los requerimientos de un programa válido de Cool q son tener una clase `Main` con su método `main`. + +**Errores detectados**: +- Problemas de nombrado de atributos y métodos +- Redefinición de atributos +- Redefinición incorrecta de métodos +- Uso de tipos no definidos +- No definición de la clase `Main` o su método `main` +- Incorrecta definición del método `main` +- Mal uso de herencia + +## Chequeo de tipos +En esta fase se evalúa la correctitud de todas las expresiones del lenguaje y se decide el tipo estático de cada una de ellas según lo establecido en el manual de [Cool](https://github.com/2kodevs/cool-compiler-2020/blob/master/doc/cool-manual.pdf). + +**Errores detectados**: +- Incompatibilidad de tipos +- Uso de tipos no definidos +- Uso de variables, tipos y métodos no definidos +- mal usos de `self` y `SELF_TYPE` +- mal usos del `case` + +## Inferencia de tipos +Para la implementación de esta fase se expandió el comportamiento del visitor encargado del chequeo de tipos, razón por la cual ambos procesos se realizan en la misma fase. + +Para lograr la inferencia de tipos, se realizó un algoritmo de punto fijo en el cual mediante repeticiones sucesivas del proceso de inferencia se van definiendo los tipos de aquellas variables declaradas como `AUTO_TYPE`. + +### Idea +Una variable en Cool dada su utilización puede definir dos conjuntos + +1. Tipos a los que se conforma (**Ancestros**) +2. Tipos que se conforman a ella (**Descendientes**) + +Dados los dos conjuntos anteriores se puede decidir si una variable `AUTO_TYPE` puede ser inferida correctamente o no. + +Ambos conjuntos recibieron un nombre intuitivo mencionado anteriormente en **negrita** para hacer referencia a su contenido. + +El tipo que se decida otorgar(inferir) a la variable en cuestión, llamémosle _**T**_, deberá conformarse a todos los tipos del conjunto 1. Al mismo tiempo todos los tipos del conjunto 2 deberán conformarse a él. + +Dicho lo anterior y dado el hecho de que un tipo *A* se conforma a un tipo *B* solamente si *B* es ancestro de *A*, podemos notar que: + +1. El tipo a seleccionar debe ser un ancestro del **Menor Ancestro Común** (**LCA** por sus siglas en inglés) a todos los nodos del conjunto 2, llamémosle *N*. En otras palabras el primer tipo que es ancestro de todos los tipos en el conjunto 2. +2. Como todos los tipos del conjunto 1 necesitan ser ancestros de _**T**_, todos pertenecerán al camino que se forma desde _**T**_ hasta *Object* en el árbol de tipos, por tanto _**T**_ necesita ser descendiente del primero que aparezca en el camino mencionado y pertenezca al conjunto 1, llamémosle *M*. +3. Tomando el operador **<=** para referirnos a la relación *ser ancestro de*, se puede afirmar que _**T**_ es de la forma _**N <= T <= M**_, o lo que es lo mismo _**T**_ podría ser cualquier tipo en el camino de *N* a *M*. + +> El nodo que representa el **LCA** siempre existe dado que el árbol de tipos es único, por tanto en caso extremo *Object* siempre será válido como ancestro a todos los tipos. + +El algortimo implementado tras cada recorrido del **AST**(Árbol de sintaxis abstracta) infiere el tipo de todas aquellas variables de las cuales se tenga información, seleccionando como tipo inferido siempre el que representa a *N*. + +Al ser este algoritmo una extensión del chequeo de tipos, mientras se van infiriendo los tipos se valida que los mismos no ocasionen error. +> En todo lo anterior se asume que todo tipo es ancestro y descendiente de sí mismo. + +**Errores detectados**: +- Mal usos de `AUTO_TYPE` en casos donde no se cumpla que _**N <= M**_ o todos los tipos en el conjunto 1 no se encuentren en un camino del árbol de tipos +- Todos los errores de chequeo semántico que existan en el código o surgan tras la inferencia de una o varias variables. + +## Verificación de tipos +Esta fase surge dado que tras el proceso de inferencia puede haber ocurrido un error que durante el chequeo semántico no se valida. Dado que permitimos *AUTO_TYPE* en los parametros de las funciones, al terminar la inferencia pueden generarse conflictos de mala redefinición de métodos, los cuales son chequeados en la fase de Construcción de los tipos (etapa #4). Por tanto la única función de esta fase es verificar la correctitud de los tipos. + +**Errores detectados**: +- Mala redefinición de métodos ocacionada por la inferencia de tipos + +## Traducción a CIL +En esta etapa del proceso de compilación, requirió especial atención la generación de las expresiones *case*. Para ello se requiere ordenar las instrucciones de tal modo que se asegure el emparejamiento del tipo de la expresión principal con el tipo más específico declarado en las ramas del *case*. + +Primero por cada rama **b** se cuentan cuántos tipos declarados en las demás ramas se conforman a **b**, creando de este modo una tupla `(cantidad, tipo declarado en b)`. +Luego se ordenan todas estas tuplas por su primer elemento, obteniendo así una secuencia ordenada donde el primero elemento representa la rama cuyo tipo declarado se encuentra en el nivel más bajo en la jerarquía de tipos del programa. + +Luego por cada rama **b** de esta secuencia, se obtienen todos los tipos del programa que conforman a **b**, y por cada uno de estos que no haya sido tomado en cuenta en el procesamiento de ramas anteriores, se generan las instrucciones necesarias para comprobar si el tipo de la expresión principal del *case* coincide con él. En caso de coincidencia, se salta al bloque de las instrucciones generadas por el cuerpo de **b**; si no entonces se procede a comprobar con el tipo siguiente. Nótese que no se repiten comprobaciones. + +**Errores detectados**: +- Dispatch estático o dinámico desde un objeto void +- Expresión principal de un *case* tiene valor `void` +- Ejecución de un *case* sin que ocurra algún emparejamiento con alguna rama. +- División por cero +- Substring fuera de rango + +> Aunque estos errores realmente se detectan en ejecución, es en esta fase que se genera el código que permite detectarlos. + +## Traducción a MIPS +En la fase de generación de código `MIPS` se enfrentaron tres problemas fundamentales: + + - Estructura de los objetos en memoria. + - Definición de tipos en memoria. + - Elección de registros. + +### Estructura de los objetos en memoria. +Determinar el modelo que seguirían los objetos en la memoria fue un paso fundamental para la toma de múltiples decisiones tanto en la generación de código `CIL` como `MIPS`. Los objetos en memoria siguen el siguiente modelo: + +```| Tipo | Tamaño | Tabla de dispatch | -- Atributos -- | Marca de objeto |``` + - Tipo: Esta sección tiene tamaño 1 `palabra`, el valor aquí encontrado se interpreta como un entero e indica el tipo del objeto. + - Tamaño: Esta sección tiene tamaño 1 `palabra`, el valor aquí encontrado se interpreta como un entero e indica el tamaño en `palabras` del objeto. + - Tabla de dispatch: Esta sección tiene tamaño 1 `palabra`, el valor aquí encontrado se interpreta como una dirección de memoria e indica el inicio de la tabla de dispatch del objeto. La tabla de dispatch del objeto es un segmento de la memoria donde interpretamos cada `palabra` como la dirección a uno de los métodos del objeto. + - Atributos: Esta sección tiene tamaño **N** `palabras` donde **N** es la cantidad de atributos que conforman el objeto, cada una de las `palabras` que conforman esta sección representa el valor de un atributo del objeto. + - Marca de objeto: Esta sección tiene tamaño 1 `palabra`, es un valor usado para marcar que esta zona de la memoria corresponde a un objeto, se añadió con el objetivo de hacer menos propenso a fallos la tarea de identificar objetos en memoria en el `Garbage Collector`. + +### Definición de tipos en memoria. +Un tipo está representado por tres estructuras en la memoria: + - Una dirección a una cadena alfanumérica que representa el nombre del tipo. + - Un prototipo que es una especie de plantilla que se utiliza en la creación de los objetos. Cuando se crea un objeto este prototipo es copiado al segmento de memoria asignado al objeto. Un prototipo es un objeto válido por lo que tiene exactamente la misma estructura explicada anteriormente. El prototipo es también la solución escogida para el problema de los valores por defecto de los objetos. + - Una tabla de dispatch que como se explicó anteriormente contiene las direcciones de los métodos del objeto. +Existe una tabla de prototipos (nombres) donde se puede encontrar el prototipo (nombre) de un tipo específico, utilizando como índice el valor que representa al tipo. + +### Elección de registros. +La elección de registros fue un proceso que se decidió optimizar para disminuir la utilización de las operaciones `lw` y `sw` en `MIPS` que como se sabe, añaden una demora considerable a nuestros programas por el tiempo que tarda en realizarse un operación de escritura o lectura en la memoria. +El proceso de elección de registros se realiza para cada función y consta de los siguientes pasos: + - Separación del código en bloques básicos: + + Para obtener los bloques básicos primero se hace un recorrido por las instrucciones de la función marcando los líderes. Son considerados líderes las instrucciones de tipo `Label` y las instrucciones que tengan como predecesor un instrucción de tipo `Goto` o `Goto if`. Luego de tener marcados los líderes, se obtienen los bloques que serán los conjuntos de instrucciones consecutivas que comienzan con un líder y terminan con la primera instrucción que sea predecesor de un líder (notar que un bloque puede estar formado por una sola instrucción). + + - Creación del grafo de flujo: + + Este es un grafo dirigido que indica los caminos posibles entre los bloques básicos su elaboración es bastante sencilla: si la última instrucción de un bloque es un `Goto`, entonces se añadirá una arista desde este bloque hacia el bloque iniciado por la instrucción `Label` a la que hace referencia el `Goto`; si la última instrucción es de tipo `Goto if`, entonces se añadirán dos aristas una hacia el bloque que comienza con la instrucción `Label` a la que se hace referencia, y otra hacia el bloque que comienza con la instrucción siguiente en la función; en el caso de que la última instrucción sea de cualquier otro tipo, se colocará una sola arista desde el bloque actual hacia el bloque que comienza con la instrucción siguiente en la función. + + - Análisis de vida de las variables: + + En este procedimiento se computan cinco conjuntos para cada instrucción **I**: `succ`, `gen`, `kill`, `in` y `out`. `succ` contiene las instrucciones que se pueden ejecutar inmediatamente después de la instrucción **I**; `gen` contiene las variables de las que se necesita el valor en la instrucción **I**; `kill` contiene las variables a las que se les asigna un valor en la instrucción **I**; `in` contiene las variables que pueden estar vivas al llegar a la instrucción **I**, y `out` contiene las variables que pueden estar vivas luego de ejecutada la instrucción **I**. + + - Creación del grafo de interferencia: + + Los vértices de este grafo serán las variables que se utilizan en la función y existirá una arista entre los vértices **x** y **y**, si las variables que representan esos nodos interfieren. Dos variables interfieren si existe alguna instrucción **I** tal que **x** pertenezca al `kill` de **I** y **y** pertenezca al `out` de **I**. + + - Asignación de registros: + + Contando con el grafo de interferencia, se asignan registros a las variables de forma tal que dos variables que interfieran no se les asigne el mismo registro, esto puede verse como el problema de colorear un grafo con **N** colores siendo **N** la cantidad de registros que se tienen. Es conocido que este problema es *NP* por lo que para asignar los registros se usa una heurística muy sencilla que consiste en lo siguiente: + + Primero se va eliminando del grafo y colocando en una pila cada nodo que tenga menos de N vecinos, se nota que todos estos elementos pueden ser coloreados sin problemas. Si en algún momento no existe algún nodo con menos de N vecinos, se tomará un nodo al azar; este proceso terminará cuando no queden nodos en el grafo. Luego se va sacando cada nodo de la pila y se le asigna un registro que no esté usado por alguno de los nodos que eran vecinos de este en el momento en que se eliminó del grafo, en el caso de que existan más de un nodo posible, se le asigna el menor, en caso de que no exista nodo posible la variable no tendrá registro y su valor permanecerá en la memoria. + +**Errores detectados**: +- Heap overflow + +## Ejecución +Para ejecutar el proyecto se necesita tener instalado `Python` y el conjunto de dependencias listado en [requirements.txt](https://github.com/2kodevs/cool-compiler-2020/blob/master/requirements.txt). + +Para instalar las dependencias puede utilizar: +```bash +make install +``` +Una vez estén instaladas las dependencias, puede compilar y ejecutar cualquier archivo de código cool utilizando el comando: +```bash +make main CODE=.cl +``` +>Para usar `make` necesita estar en la dirección `/src` + +## Estructura +Los archivos del proyecto se encuentran modularizados de la siguiente manera: + +1. **core** + 1. **cmp** + 1. **cool** + 2. **parser** + 2. **lexer** + 3. **visitors** + 1. **type_check** + 2. **cil** + 3. **mips** + +**cmp** contiene todos los archivos heredados de las clases de 3er año y proyectos anteriores. + +**cool** contiene el *AST*, Gramática y Parser de Cool + +**parser** contiene la implementación parser LR1 utilizada + +**lexer** todo lo referente a lexer y tokenización + +**visitor** contiene la implementación del patrón visitor + +**type_checking** fases de la #3 a la #6 + +**cil** traducción a cil + +**mips** traducción a mips diff --git a/doc/Informe/Informe.pdf b/doc/Informe/Informe.pdf new file mode 100644 index 00000000..30f0f7cd Binary files /dev/null and b/doc/Informe/Informe.pdf differ diff --git a/doc/Informe/grammar.pdf b/doc/Informe/grammar.pdf new file mode 100644 index 00000000..bdf52cdc Binary files /dev/null and b/doc/Informe/grammar.pdf differ diff --git a/doc/Informe/grammar.tex b/doc/Informe/grammar.tex new file mode 100644 index 00000000..849ac7cc --- /dev/null +++ b/doc/Informe/grammar.tex @@ -0,0 +1,90 @@ +\documentclass{article} +\usepackage[utf8]{inputenc} +\usepackage{amsmath} + +\begin{document} + Terminals : class, type, inherits, id, let, in, isvoid, not, new, case, of, esac, if, then, else, fi, while, loop, pool + +\begin{eqnarray*} + program & \rightarrow & class\_list \\ + class\_list & \rightarrow & class\_def \\ + & \rightarrow & class\_def \ class\_list \\ + class\_def & \rightarrow & class \ type \ \{ \ feature\_list\ \} \ ;\\ + & \rightarrow & class \ type \ inherits \ type \ \{ \ feature\_list\ \} \ ;\\ + feature\_list & \rightarrow & feature \ feature\_list \\ + & \rightarrow & \epsilon \\ + feature & \rightarrow & param \ ; \\ + & \rightarrow & value\_param \ ;\\ + & \rightarrow & id \ ( \ ) \ : \ type \ \{ \ expression \ \} \ ; \\ + & \rightarrow & id \ ( \ param\_list \ ) \ : \ type\ \{ \ expression \ \} \ ; \\ + param\_list & \rightarrow & param \\ + & \rightarrow & param \ , \ param\_list \\ + param & \rightarrow & id \ : \ type \\ + value\_param & \rightarrow & param \ \leftarrow \ expression \\ + block & \rightarrow & expression \ ; \\ + & \rightarrow & expression \ ; \ block\\ + let\_list & \rightarrow & param\\ + & \rightarrow & param \ , \ let\_list\\ + & \rightarrow & value\_param\\ + & \rightarrow & value\_param \ , \ let\_list\\ + case\_list & \rightarrow & param \ \Rightarrow \ expression \ ; \\ + & \rightarrow & param \ \Rightarrow \ expression \ ; \ case\_list\\ + func\_call & \rightarrow & . \ id \ ( \ )\\ + & \rightarrow & @ \ type \ . \ id \ ( \ )\\ + & \rightarrow & . \ id \ ( \ arg\_list \ ) \\ + & \rightarrow & @ \ type \ . \ id \ ( \ arg\_list \ ) \\ + arg\_list & \rightarrow & expression \\ + & \rightarrow & expression \ , \ arg\_list \\ + member\_call & \rightarrow & id \ ( \ arg\_list \ ) \\ + & \rightarrow & id \ ( \ ) \\ + expression & \rightarrow & special\\ + & \rightarrow & comparison\_expr\\ +\end{eqnarray*} +\begin{eqnarray*} + special & \rightarrow & arith \ \le \ special\_arith \\ + & \rightarrow & arith \ < \ special\_arith \\ + & \rightarrow & arith \ = \ special\_arith \\ + & \rightarrow & special\_arith \\ + special\_arith & \rightarrow & arith \ + \ special\_term \\ + & \rightarrow & arith \ - \ special\_term \\ + & \rightarrow & special\_term \\ + special\_term & \rightarrow & term \ * \ special\_unary \\ + & \rightarrow & term \ / \ special\_unary \\ + & \rightarrow & special\_unary \\ + special\_unary & \rightarrow & isvoid \ special\_unary \\ + & \rightarrow & \sim \ special\_unary \\ + & \rightarrow & final\_expr \\ + final\_expr & \rightarrow & let \ let\_list \ in \ expression \\ + & \rightarrow & id \ \leftarrow \ expression \\ + & \rightarrow & not \ expression \\ + comparison\_expr & \rightarrow & arith \ \le \ arith \\ + & \rightarrow & arith \ < \ arith \\ + & \rightarrow & arith \ = \ arith \\ + & \rightarrow & arith \\ + arith & \rightarrow & arith \ + \ term \\ + & \rightarrow & arith \ - \ term \\ + & \rightarrow & term \\ + term & \rightarrow & term \ * \ unary \\ + & \rightarrow & term \ / \ unary \\ + & \rightarrow & unary \\ + unary & \rightarrow & isvoid \ unary \\ + & \rightarrow & \sim \ unary \\ + & \rightarrow & func\_expr \\ + func\_expr & \rightarrow & func\_expr \ func\_call \\ + & \rightarrow & atom \\ +\end{eqnarray*} +\begin{eqnarray*} + atom & \rightarrow & id \\ + & \rightarrow & bool \\ + & \rightarrow & string \\ + & \rightarrow & interger \\ + & \rightarrow & new \ type \\ + & \rightarrow & member\_call \\ + & \rightarrow & ( \ expression \ )\\ + & \rightarrow & \{ \ block \ \}\\ + & \rightarrow & if \ expression \ then \ expression \ else \ expression \ fi \\ + & \rightarrow & while \ expression \ loop \ expression \ pool\\ + & \rightarrow & case \ expression \ of \ case\_list \ esac \\ +\end{eqnarray*} + +\end{document} \ No newline at end of file diff --git a/doc/Readme.md b/doc/Readme.md index 402477c8..a0450f53 100644 --- a/doc/Readme.md +++ b/doc/Readme.md @@ -4,9 +4,9 @@ **Nombre** | **Grupo** | **Github** --|--|-- -Nombre1 Apellido1 Apellido2 | C4xx | [@github_user](https://github.com/) -Nombre2 Apellido1 Apellido2 | C4xx | [@github_user](https://github.com/) -Nombre3 Apellido1 Apellido2 | C4xx | [@github_user](https://github.com/) +Lázaro Raúl Iglesias Vera | C412 | [@stdevRulo](https://github.com/stdevRulo) +Miguel Tenorio Potrony | C412 | [@stdevAntiD2ta](https://github.com/stdevAntiD2ta) +Mauricio Lázaro Perdomo Cortés | C412 | [@stdevMauricio1802](https://github.com/stdevMauricio1802) ## Readme diff --git a/requirements.txt b/requirements.txt index 9eb0cad1..cba16ee2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ pytest pytest-ordering +ply diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 00000000..2f68d228 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,43 @@ +.DEFAULT_GOAL := help +.PHONY: clean, info + +CODE := code.cl +FILE_NAME := $(shell echo $(CODE) | cut -d '.' -f 1) +COOLC_ASM := $(FILE_NAME).s +ASM := $(FILE_NAME).mips +ORG_NAME := 2kodevs +PROJECT_NAME := CoolCompiler +APP_VERSION := v0.1 +APP_DESCRIPTION := $(ORG_NAME) - $(PROJECT_NAME)$(APP_VERSION) +DEVELOPERS := Lázaro Raúl Iglesias Vera, Miguel Tenorio Potrony, Mauricio Lázaro Perdomo Cortés +COPYRIGHT := Copyright © 2020: $(DEVELOPERS) +TEST_DIR := core/cmp/Stuff/tests/ +TEST := + +main: ## Compiling the compiler :) + @./coolc.sh $(CODE) + @spim -file $(ASM) + +clean: ## Remove temporary files + @rm -rf build/* + +test: ## Run testsuit with name TAG + pytest ../tests -v --tb=short -m=${TAG} + +info: ## Display project description + @echo "$(APP_DESCRIPTION)" + @echo "$(COPYRIGHT)" + +coolc: ## Run the code.cl file using coolc + @coolc $(CODE) + @coolc_spim $(COOLC_ASM) + +install: ## Install the project dependencies + pip install ../requeriments.txt + +save: ## Save the code.cl as a test + @cat code.cl > $(TEST_DIR)$(TEST).cl + +help: ## Show this help + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + diff --git a/src/coolc.sh b/src/coolc.sh index 3088de4f..9ef2fe5e 100755 --- a/src/coolc.sh +++ b/src/coolc.sh @@ -1,11 +1,14 @@ -# Incluya aquí las instrucciones necesarias para ejecutar su compilador +#!/bin/bash +# Execution details INPUT_FILE=$1 OUTPUT_FILE=${INPUT_FILE:0: -2}mips -# Si su compilador no lo hace ya, aquí puede imprimir la información de contacto -echo "LINEA_CON_NOMBRE_Y_VERSION_DEL_COMPILADOR" # TODO: Recuerde cambiar estas -echo "Copyright (c) 2019: Nombre1, Nombre2, Nombre3" # TODO: líneas a los valores correctos +# Display project descripton here +#make info # TODO: Ensure that this rule is executed a single time +echo "2kodevs - CoolCompilerv0.1" +echo "Copyright © 2020: Lázaro Raúl Iglesias Vera, Miguel Tenorio Potrony, Mauricio Lázaro Perdomo Cortéz" -# Llamar al compilador -echo "Compiling $INPUT_FILE into $OUTPUT_FILE" +# Compile and Run +#echo "Compiling $INPUT_FILE into $OUTPUT_FILE" +python3 main.py -f $INPUT_FILE diff --git a/src/core/__init__.py b/src/core/__init__.py new file mode 100644 index 00000000..7ebcadac --- /dev/null +++ b/src/core/__init__.py @@ -0,0 +1,2 @@ +from .visitors import * +from .lexer import CoolLexer diff --git a/src/core/cmp/__init__.py b/src/core/cmp/__init__.py new file mode 100644 index 00000000..8ce243d5 --- /dev/null +++ b/src/core/cmp/__init__.py @@ -0,0 +1,8 @@ +from .evaluation import evaluate_reverse_parse +from .cool import ast as cool_ast +from .automata import State +from .pycompiler import * +from .semantic import * +from .parser import * +from .utils import * +from .cool import * diff --git a/src/core/cmp/automata.py b/src/core/cmp/automata.py new file mode 100644 index 00000000..02b23f80 --- /dev/null +++ b/src/core/cmp/automata.py @@ -0,0 +1,198 @@ +try: + import pydot +except: + pass + +class State: + def __init__(self, state, final=False, formatter=lambda x: str(x), shape='circle'): + self.state = state + self.final = final + self.transitions = {} + self.epsilon_transitions = set() + self.tag = None + self.formatter = formatter + self.shape = shape + + # The method name is set this way from compatibility issues. + def set_formatter(self, value, attr='formatter', visited=None): + if visited is None: + visited = set() + elif self in visited: + return + + visited.add(self) + self.__setattr__(attr, value) + for destinations in self.transitions.values(): + for node in destinations: + node.set_formatter(value, attr, visited) + for node in self.epsilon_transitions: + node.set_formatter(value, attr, visited) + return self + + def has_transition(self, symbol): + return symbol in self.transitions + + def add_transition(self, symbol, state): + try: + self.transitions[symbol].append(state) + except: + self.transitions[symbol] = [state] + return self + + def add_epsilon_transition(self, state): + self.epsilon_transitions.add(state) + return self + + def recognize(self, string): + states = self.epsilon_closure + for symbol in string: + states = self.move_by_state(symbol, *states) + states = self.epsilon_closure_by_state(*states) + return any(s.final for s in states) + + def to_deterministic(self, formatter=lambda x: str(x)): + closure = self.epsilon_closure + start = State(tuple(closure), any(s.final for s in closure), formatter) + + closures = [ closure ] + states = [ start ] + pending = [ start ] + + while pending: + state = pending.pop() + symbols = { symbol for s in state.state for symbol in s.transitions } + + for symbol in symbols: + move = self.move_by_state(symbol, *state.state) + closure = self.epsilon_closure_by_state(*move) + + if closure not in closures: + new_state = State(tuple(closure), any(s.final for s in closure), formatter) + closures.append(closure) + states.append(new_state) + pending.append(new_state) + else: + index = closures.index(closure) + new_state = states[index] + + state.add_transition(symbol, new_state) + + return start + + @staticmethod + def from_nfa(nfa, get_states=False): + states = [] + for n in range(nfa.states): + state = State(n, n in nfa.finals) + states.append(state) + + for (origin, symbol), destinations in nfa.map.items(): + origin = states[origin] + origin[symbol] = [ states[d] for d in destinations ] + + if get_states: + return states[nfa.start], states + return states[nfa.start] + + @staticmethod + def move_by_state(symbol, *states): + return { s for state in states if state.has_transition(symbol) for s in state[symbol]} + + @staticmethod + def epsilon_closure_by_state(*states): + closure = { state for state in states } + + l = 0 + while l != len(closure): + l = len(closure) + tmp = [s for s in closure] + for s in tmp: + for epsilon_state in s.epsilon_transitions: + closure.add(epsilon_state) + return closure + + @property + def epsilon_closure(self): + return self.epsilon_closure_by_state(self) + + @property + def name(self): + return self.formatter(self.state) + + def get(self, symbol): + target = self.transitions[symbol] + assert len(target) == 1 + return target[0] + + def __getitem__(self, symbol): + if symbol == '': + return self.epsilon_transitions + try: + return self.transitions[symbol] + except KeyError: + return None + + def __setitem__(self, symbol, value): + if symbol == '': + self.epsilon_transitions = value + else: + self.transitions[symbol] = value + + def __repr__(self): + return str(self) + + def __str__(self): + return str(self.state) + + def __hash__(self): + return hash(self.state) + + def __iter__(self): + yield from self._visit() + + def _visit(self, visited=None): + if visited is None: + visited = set() + elif self in visited: + return + + visited.add(self) + yield self + + for destinations in self.transitions.values(): + for node in destinations: + yield from node._visit(visited) + for node in self.epsilon_transitions: + yield from node._visit(visited) + + def graph(self): + G = pydot.Dot(rankdir='LR', margin=0.1) + G.add_node(pydot.Node('start', shape='plaintext', label='', width=0, height=0)) + + visited = set() + def visit(start): + ids = id(start) + if ids not in visited: + visited.add(ids) + G.add_node(pydot.Node(ids, label=start.name, shape=self.shape, style='bold' if start.final else '')) + for tran, destinations in start.transitions.items(): + for end in destinations: + visit(end) + G.add_edge(pydot.Edge(ids, id(end), label=tran, labeldistance=2)) + for end in start.epsilon_transitions: + visit(end) + G.add_edge(pydot.Edge(ids, id(end), label='ε', labeldistance=2)) + + visit(self) + G.add_edge(pydot.Edge('start', id(self), label='', style='dashed')) + + return G + + def _repr_svg_(self): + try: + return self.graph().create_svg().decode('utf8') + except: + pass + + def write_to(self, fname): + return self.graph().write_svg(fname) diff --git a/src/core/cmp/cool/__init__.py b/src/core/cmp/cool/__init__.py new file mode 100644 index 00000000..4d235e06 --- /dev/null +++ b/src/core/cmp/cool/__init__.py @@ -0,0 +1,2 @@ +from .parser import CoolParser +from .grammar import CoolGrammar diff --git a/src/core/cmp/cool/ast.py b/src/core/cmp/cool/ast.py new file mode 100644 index 00000000..8dcfc41b --- /dev/null +++ b/src/core/cmp/cool/ast.py @@ -0,0 +1,178 @@ +from ..utils import Token, empty_token + +# AST Classes +class Node: + pass + +class ProgramNode(Node): + def __init__(self, declarations): + self.declarations = declarations + +class DeclarationNode(Node): + pass + +class ClassDeclarationNode(DeclarationNode): + def __init__(self, idx, features, parent=None): + self.id = idx.lex + self.tid = idx + self.features = features + if not parent: + parent = Token("Object", "type") + parent.row = idx.row + parent.column = idx.column + self.parent = parent.lex + self.tparent = parent + +class AttrDeclarationNode(DeclarationNode): + def __init__(self, idx, typex, expr=None, arrow=empty_token): + self.id = idx.lex + self.tid = idx + self.type = typex.lex + self.ttype = typex + self.arrow = arrow + self.expr = expr + +class FuncDeclarationNode(DeclarationNode): + def __init__(self, idx, params, return_type, body): + self.id = idx.lex + self.tid = idx + self.params = params + self.type = return_type.lex + self.ttype = return_type + self.body = body + +class ExpressionNode(Node): + pass + +class IfThenElseNode(ExpressionNode): + def __init__(self, condition, if_body, if_token, else_body): + self.token = if_token + self.condition = condition + self.if_body = if_body + self.else_body = else_body + +class WhileLoopNode(ExpressionNode): + def __init__(self, condition, body, token): + self.token = token + self.condition = condition + self.body = body + +class BlockNode(ExpressionNode): + def __init__(self, exprs): + self.exprs = exprs + +class LetInNode(ExpressionNode): + def __init__(self, let_body, in_body): + self.let_body = let_body + self.in_body = in_body + +class CaseOfNode(ExpressionNode): + def __init__(self, expr, branches): + self.expr = expr + self.branches = branches + +class CaseExpressionNode(AttrDeclarationNode): + pass + +class LetAttributeNode(AttrDeclarationNode): + pass + +class AssignNode(ExpressionNode): + def __init__(self, idx, expr): + self.id = idx.lex + self.tid = idx + self.expr= expr + +class UnaryNode(ExpressionNode): + def __init__(self, expr, symbol): + self.symbol = symbol + self.expr = expr + +class NotNode(UnaryNode): + pass + +class BinaryNode(ExpressionNode): + def __init__(self, left, right, symbol): + self.symbol = symbol + self.left = left + self.right = right + +class ComparisonNode(BinaryNode): + pass + +class LessEqualNode(ComparisonNode): + pass + +class LessNode(ComparisonNode): + pass + +class EqualNode(ComparisonNode): + pass + +class ArithmeticNode(BinaryNode): + pass + +class PlusNode(ArithmeticNode): + pass + +class MinusNode(ArithmeticNode): + pass + +class StarNode(ArithmeticNode): + pass + +class DivNode(ArithmeticNode): + pass + +class IsVoidNode(UnaryNode): + pass + +class ComplementNode(UnaryNode): + pass + +class FunctionCallNode(ExpressionNode): + def __init__(self, obj, idx, args, typex=empty_token): + self.obj = obj + self.id = idx.lex + self.tid = idx + self.args = args + self.type = typex.lex + self.ttype = typex + +class MemberCallNode(ExpressionNode): + def __init__(self, idx, args): + self.id = idx.lex + self.tid = idx + self.args = args + +class NewNode(ExpressionNode): + def __init__(self, typex): + self.type = typex.lex + self.ttype = typex + +class AtomicNode(ExpressionNode): + def __init__(self, token): + self.lex = token.lex + self.token = token + +class IntegerNode(AtomicNode): + pass + +class IdNode(AtomicNode): + pass + +class StringNode(AtomicNode): + pass + +class BoolNode(AtomicNode): + pass + +class Param(Node): + def __init__(self, tid, ttype): + self.tid = tid + self.ttype = ttype + self.type = ttype.lex + + def __iter__(self): + yield self.tid.lex + yield self.type diff --git a/src/core/cmp/cool/grammar.py b/src/core/cmp/cool/grammar.py new file mode 100644 index 00000000..838c60e9 --- /dev/null +++ b/src/core/cmp/cool/grammar.py @@ -0,0 +1,158 @@ +from ..pycompiler import Grammar +from .ast import * + +# Grammar + +CoolGrammar = Grammar() + +# non-terminals +program = CoolGrammar.NonTerminal('', startSymbol=True) +class_list, def_class = CoolGrammar.NonTerminals(' ') +feature_list, feature = CoolGrammar.NonTerminals(' ') +param_list, param = CoolGrammar.NonTerminals(' ') +expr, member_call, expr_list, block, let_list, case_list = CoolGrammar.NonTerminals(' ') +arith, term, func_expr, void, compl_expr, cmp_expr, statement = CoolGrammar.NonTerminals(' ') +atom, func_call, arg_list = CoolGrammar.NonTerminals(' ') +final_expr, unary_expr = CoolGrammar.NonTerminals(' ') +special, special_arith, special_term = CoolGrammar.NonTerminals(' ') + +# terminals +classx, inherits = CoolGrammar.Terminals('class inherits') +ifx, then, elsex, fi = CoolGrammar.Terminals('if then else fi') +whilex, loop, pool = CoolGrammar.Terminals('while loop pool') +let, inx = CoolGrammar.Terminals('let in') +case, of, esac = CoolGrammar.Terminals('case of esac') +semi, colon, comma, dot, at, opar, cpar, ocur, ccur, larrow, rarrow = CoolGrammar.Terminals('; : , . @ ( ) { } <- =>') +plus, minus, star, div, isvoid, compl = CoolGrammar.Terminals('+ - * / isvoid ~') +notx, less, leq, equal = CoolGrammar.Terminals('not < <= =') +new, idx, typex, integer, string, boolx = CoolGrammar.Terminals('new id type integer string bool') +eof = CoolGrammar.EOF + +# productions +program %= class_list, lambda h, s: ProgramNode(s[1]) + +# +class_list %= def_class + class_list, lambda h, s: [s[1]] + s[2] +class_list %= def_class, lambda h, s: [s[1]] + +# +def_class %= classx + typex + ocur + feature_list + ccur + semi, lambda h, s: ClassDeclarationNode(s[2], s[4]) +def_class %= classx + typex + inherits + typex + ocur + feature_list + ccur + semi, lambda h, s: ClassDeclarationNode(s[2], s[6], s[4]) + +# +feature_list %= feature + feature_list, lambda h, s: [s[1]] + s[2] +feature_list %= CoolGrammar.Epsilon, lambda h, s: [] + +# +feature %= idx + colon + typex + semi, lambda h, s: AttrDeclarationNode(s[1], s[3]) +feature %= idx + colon + typex + larrow + expr + semi, lambda h, s: AttrDeclarationNode(s[1], s[3], s[5], s[4]) + +# +feature %= idx + opar + param_list + cpar + colon + typex + ocur + expr + ccur + semi, lambda h, s: FuncDeclarationNode(s[1], s[3], s[6], s[8]) +feature %= idx + opar + cpar + colon + typex + ocur + expr + ccur + semi, lambda h, s: FuncDeclarationNode(s[1], [], s[5], s[7]) + +# +param_list %= param, lambda h, s: [s[1]] +param_list %= param + comma + param_list, lambda h, s: [s[1]] + s[3] + +# +param %= idx + colon + typex, lambda h, s: Param(s[1], s[3]) + +# +block %= expr + semi, lambda h, s: [s[1]] +block %= expr + semi + block, lambda h, s: [s[1]] + s[3] + +# +let_list %= idx + colon + typex, lambda h, s: [LetAttributeNode(s[1], s[3])] +let_list %= idx + colon + typex + larrow + expr, lambda h, s: [LetAttributeNode(s[1], s[3], s[5], s[4])] +let_list %= idx + colon + typex + comma + let_list, lambda h, s: [LetAttributeNode(s[1], s[3])] + s[5] +let_list %= idx + colon + typex + larrow + expr + comma + let_list, lambda h, s: [LetAttributeNode(s[1], s[3], s[5], s[4])] + s[7] + +# +case_list %= idx + colon + typex + rarrow + expr + semi, lambda h, s: [CaseExpressionNode(s[1], s[3], s[5])] +case_list %= idx + colon + typex + rarrow + expr + semi + case_list, lambda h, s: [CaseExpressionNode(s[1], s[3], s[5])] + s[7] + +# +func_call %= at + typex + dot + idx + opar + arg_list + cpar, lambda h, s: (s[4], s[6], s[2]) +func_call %= at + typex + dot + idx + opar + cpar, lambda h, s: (s[4], [], s[2]) +func_call %= dot + idx + opar + arg_list + cpar, lambda h, s: (s[2], s[4]) +func_call %= dot + idx + opar + cpar, lambda h, s: (s[2], []) + +# +arg_list %= expr, lambda h, s: [s[1]] +arg_list %= expr + comma + arg_list, lambda h, s: [s[1]] + s[3] + +# +member_call %= idx + opar + arg_list + cpar, lambda h, s: MemberCallNode(s[1], s[3]) +member_call %= idx + opar + cpar, lambda h, s: MemberCallNode(s[1], []) + +# +expr %= special, lambda h, s: s[1] +expr %= cmp_expr, lambda h, s: s[1] + +# +special %= arith + leq + special_arith, lambda h, s: LessEqualNode(s[1], s[3], s[2]) +special %= arith + less + special_arith, lambda h, s: LessNode(s[1], s[3], s[2]) +special %= arith + equal + special_arith, lambda h, s: EqualNode(s[1], s[3], s[2]) +special %= special_arith, lambda h, s: s[1] + +# +special_arith %= arith + plus + special_term, lambda h, s: PlusNode(s[1], s[3], s[2]) +special_arith %= arith + minus + special_term, lambda h, s: MinusNode(s[1], s[3], s[2]) +special_arith %= special_term, lambda h, s: s[1] + +# +special_term %= term + star + unary_expr, lambda h, s: StarNode(s[1], s[3], s[2]) +special_term %= term + div + unary_expr, lambda h, s: DivNode(s[1], s[3], s[2]) +special_term %= unary_expr, lambda h, s: s[1] + +# +unary_expr %= isvoid + unary_expr, lambda h, s: IsVoidNode(s[2], s[1]) +unary_expr %= compl + unary_expr, lambda h, s: ComplementNode(s[2], s[1]) +unary_expr %= final_expr, lambda h, s: s[1] + +# +final_expr %= let + let_list + inx + expr, lambda h, s: LetInNode(s[2], s[4]) +final_expr %= idx + larrow + expr, lambda h, s: AssignNode(s[1], s[3]) +final_expr %= notx + expr, lambda h, s: NotNode(s[2], s[1]) + +# +cmp_expr %= arith + leq + arith, lambda h, s: LessEqualNode(s[1], s[3], s[2]) +cmp_expr %= arith + less + arith, lambda h, s: LessNode(s[1], s[3], s[2]) +cmp_expr %= arith + equal + arith, lambda h, s: EqualNode(s[1], s[3], s[2]) +cmp_expr %= arith, lambda h, s: s[1] + +# +arith %= arith + plus + term, lambda h, s: PlusNode(s[1], s[3], s[2]) +arith %= arith + minus + term, lambda h, s: MinusNode(s[1], s[3], s[2]) +arith %= term, lambda h, s: s[1] + +# +term %= term + star + void, lambda h, s: StarNode(s[1], s[3], s[2]) +term %= term + div + void, lambda h, s: DivNode(s[1], s[3], s[2]) +term %= void, lambda h, s: s[1] + +# +void %= isvoid + void, lambda h, s: IsVoidNode(s[2], s[1]) +void %= compl_expr, lambda h, s: s[1] + +# +compl_expr %= compl + void, lambda h, s: ComplementNode(s[2], s[1]) +compl_expr %= func_expr, lambda h, s: s[1] + +# +func_expr %= func_expr + func_call, lambda h, s: FunctionCallNode(s[1], *s[2]) +func_expr %= atom, lambda h, s: s[1] + +# +atom %= member_call, lambda h, s: s[1] +atom %= new + typex, lambda h, s: NewNode(s[2]) +atom %= opar + expr + cpar, lambda h, s: s[2] +atom %= idx, lambda h, s: IdNode(s[1]) +atom %= integer, lambda h, s: IntegerNode(s[1]) +atom %= string, lambda h, s: StringNode(s[1]) +atom %= boolx, lambda h, s: BoolNode(s[1]) +atom %= ocur + block + ccur, lambda h, s: BlockNode(s[2]) +atom %= ifx + expr + then + expr + elsex + expr + fi, lambda h, s: IfThenElseNode(s[2], s[4], s[1], s[6]) +atom %= whilex + expr + loop + expr + pool, lambda h, s: WhileLoopNode(s[2], s[4], s[1]) +atom %= case + expr + of + case_list + esac, lambda h, s: CaseOfNode(s[2], s[4]) diff --git a/src/core/cmp/cool/parser.py b/src/core/cmp/cool/parser.py new file mode 100644 index 00000000..d76ea4b4 --- /dev/null +++ b/src/core/cmp/cool/parser.py @@ -0,0 +1,4 @@ +from ..parser import LR1Parser +from .grammar import CoolGrammar + +CoolParser = LR1Parser(CoolGrammar) diff --git a/src/core/cmp/evaluation.py b/src/core/cmp/evaluation.py new file mode 100644 index 00000000..8866b381 --- /dev/null +++ b/src/core/cmp/evaluation.py @@ -0,0 +1,33 @@ +from .pycompiler import EOF +from .parser import ShiftReduceParser + +def evaluate_reverse_parse(right_parse, operations, tokens): + if not right_parse or not operations or not tokens: + return + + right_parse = iter(right_parse) + tokens = iter(tokens) + stack = [] + for operation in operations: + if operation == ShiftReduceParser.SHIFT: + token = next(tokens) + stack.append(token) + elif operation == ShiftReduceParser.REDUCE: + production = next(right_parse) + _, body = production + attributes = production.attributes + assert all(rule is None for rule in attributes[1:]), 'There must be only synteticed attributes.' + rule = attributes[0] + + if len(body): + synteticed = [None] + stack[-len(body):] + value = rule(None, synteticed) + stack[-len(body):] = [value] + else: + stack.append(rule(None, None)) + else: + raise Exception('Invalid action!!!') + + assert len(stack) == 1 + assert isinstance(next(tokens).token_type, EOF) + return stack[0] \ No newline at end of file diff --git a/src/core/cmp/parser/LR1.py b/src/core/cmp/parser/LR1.py new file mode 100644 index 00000000..d5b4078d --- /dev/null +++ b/src/core/cmp/parser/LR1.py @@ -0,0 +1,30 @@ +from .shift_reduce import ShiftReduceParser +from .utils import build_LR1_automaton, upd_table + +class LR1Parser(ShiftReduceParser): + def _build_parsing_table(self): + self.ok = True + G = self.Augmented = self.G.AugmentedGrammar(True) + + automaton = self.automaton = build_LR1_automaton(G) + for i, node in enumerate(automaton): + if self.verbose: print(i, '\t', '\n\t '.join(str(x) for x in node.state), '\n') + node.idx = i + node.tag = f'I{i}' + + for node in automaton: + idx = node.idx + for item in node.state: + if item.IsReduceItem: + prod = item.production + if prod.Left == G.startSymbol: + self.ok &= upd_table(self.action, idx, G.EOF, (ShiftReduceParser.OK, '')) + else: + for lookahead in item.lookaheads: + self.ok &= upd_table(self.action, idx, lookahead, (ShiftReduceParser.REDUCE, prod)) + else: + next_symbol = item.NextSymbol + if next_symbol.IsTerminal: + self.ok &= upd_table(self.action, idx, next_symbol, (ShiftReduceParser.SHIFT, node[next_symbol.Name][0].idx)) + else: + self.ok &= upd_table(self.goto, idx, next_symbol, node[next_symbol.Name][0].idx) diff --git a/src/core/cmp/parser/__init__.py b/src/core/cmp/parser/__init__.py new file mode 100644 index 00000000..0dd59a64 --- /dev/null +++ b/src/core/cmp/parser/__init__.py @@ -0,0 +1,2 @@ +from .LR1 import LR1Parser +from .shift_reduce import ShiftReduceParser diff --git a/src/core/cmp/parser/shift_reduce.py b/src/core/cmp/parser/shift_reduce.py new file mode 100644 index 00000000..03acaaaa --- /dev/null +++ b/src/core/cmp/parser/shift_reduce.py @@ -0,0 +1,49 @@ +class ShiftReduceParser: + SHIFT = 'SHIFT' + REDUCE = 'REDUCE' + OK = 'OK' + + def __init__(self, G, verbose=False): + self.G = G + self.verbose = verbose + self.action = {} + self.goto = {} + self._build_parsing_table() + + def _build_parsing_table(self): + raise NotImplementedError() + + def __call__(self, w, get_shift_reduce=False): + stack = [0] + cursor = 0 + output = [] + operations = [] + + while True: + state = stack[-1] + lookahead = w[cursor].token_type + if self.verbose: print(stack, w[cursor:]) + + # Your code here!!! (Detect error) + if state not in self.action or lookahead not in self.action[state]: + return None, (True, w[cursor]) #//TODO: Build the correct error using `w[cursor]` + + action, tag = list(self.action[state][lookahead])[0] + # Your code here!!! (Shift case) + if action is ShiftReduceParser.SHIFT: + operations.append(ShiftReduceParser.SHIFT) + stack.append(tag) + cursor += 1 + # Your code here!!! (Reduce case) + elif action is ShiftReduceParser.REDUCE: + operations.append(ShiftReduceParser.REDUCE) + if len(tag.Right): + stack = stack[:-len(tag.Right)] + stack.append(list(self.goto[stack[-1]][tag.Left])[0]) + output.append(tag) + # Your code here!!! (OK case) + elif action is ShiftReduceParser.OK: + return (output if not get_shift_reduce else(output,operations)), (False, None) + # Your code here!!! (Invalid case) + else: + raise ValueError diff --git a/src/core/cmp/parser/utils.py b/src/core/cmp/parser/utils.py new file mode 100644 index 00000000..ba414acc --- /dev/null +++ b/src/core/cmp/parser/utils.py @@ -0,0 +1,229 @@ +from ..pycompiler import Item +from ..automata import State +from ..utils import ContainerSet + +def compute_local_first(firsts, alpha): + first_alpha = ContainerSet() + + try: + alpha_is_epsilon = alpha.IsEpsilon + except: + alpha_is_epsilon = False + + if alpha_is_epsilon: + first_alpha.set_epsilon() + else: + for symbol in alpha: + first_alpha.update(firsts[symbol]) + if not firsts[symbol].contains_epsilon: + break + else: + first_alpha.set_epsilon() + return first_alpha + + +def compute_firsts(G): + firsts = {} + change = True + + for terminal in G.terminals: + firsts[terminal] = ContainerSet(terminal) + + for nonterminal in G.nonTerminals: + firsts[nonterminal] = ContainerSet() + + while change: + change = False + + for production in G.Productions: + X = production.Left + alpha = production.Right + + first_X = firsts[X] + + try: + first_alpha = firsts[alpha] + except: + first_alpha = firsts[alpha] = ContainerSet() + + local_first = compute_local_first(firsts, alpha) + + change |= first_alpha.hard_update(local_first) + change |= first_X.hard_update(local_first) + return firsts + + +def compute_follows(G, firsts): + follows = {} + change = True + + local_firsts = {} + + for nonterminal in G.nonTerminals: + follows[nonterminal] = ContainerSet() + follows[G.startSymbol] = ContainerSet(G.EOF) + + while change: + change = False + + for production in G.Productions: + X = production.Left + alpha = production.Right + + follow_X = follows[X] + + for i, Y in enumerate(alpha): + if Y.IsTerminal: + continue + beta = alpha[i + 1:] + try: + beta_f = local_firsts[beta] + except KeyError: + beta_f = local_firsts[beta] = compute_local_first(firsts, beta) + change |= follows[Y].update(beta_f) + if beta_f.contains_epsilon: + change |= follows[Y].update(follow_X) + + return follows + + +def upd_table(table, symbol, trans, val): + if symbol not in table: + table[symbol] = {} + if trans not in table[symbol]: + table[symbol][trans] = set() + table[symbol][trans].update([val]) + ans = (len(table[symbol][trans]) == 1) + return ans + + +def build_LR0_automaton(G): + assert len(G.startSymbol.productions) == 1, 'Grammar must be augmented' + + start_production = G.startSymbol.productions[0] + start_item = Item(start_production, 0) + + automaton = State(start_item, True) + + pending = [start_item] + visited = {start_item: automaton} + + while pending: + current_item = pending.pop() + if current_item.IsReduceItem: + continue + + # Your code here!!! (Decide which transitions to add) + transitions = [] + + next_item = current_item.NextItem() + if next_item not in visited: + visited[next_item] = State(next_item, True) + pending.append(next_item) + transitions.append(visited[next_item]) + + symbol = current_item.NextSymbol + if symbol.IsNonTerminal: + for prod in symbol.productions: + item = Item(prod, 0) + if item not in visited: + visited[item] = State(item, True) + pending.append(item) + transitions.append(visited[item]) + + current_state = visited[current_item] + # Your code here!!! (Add the decided transitions) + current_state.add_transition(current_item.NextSymbol.Name, transitions[0]) + for item in transitions[1:]: + current_state.add_epsilon_transition(item) + return automaton + + +def expand(item, firsts): + next_symbol = item.NextSymbol + if next_symbol is None or not next_symbol.IsNonTerminal: + return [] + + lookaheads = ContainerSet() + # Your code here!!! (Compute lookahead for child items) + for preview in item.Preview(): + lookaheads.hard_update(compute_local_first(firsts, preview)) + + assert not lookaheads.contains_epsilon + # Your code here!!! (Build and return child items) + return [Item(prod, 0, lookaheads) for prod in next_symbol.productions] + + +def compress(items): + centers = {} + + for item in items: + center = item.Center() + try: + lookaheads = centers[center] + except KeyError: + centers[center] = lookaheads = set() + lookaheads.update(item.lookaheads) + + return {Item(x.production, x.pos, set(lookahead)) for x, lookahead in centers.items()} + + +def closure_lr1(items, firsts): + closure = ContainerSet(*items) + + changed = True + while changed: + changed = False + + new_items = ContainerSet() + for item in closure: + new_items.extend(expand(item, firsts)) + + changed = closure.update(new_items) + + return compress(closure) + + +def goto_lr1(items, symbol, firsts=None, just_kernel=False): + assert just_kernel or firsts is not None, '`firsts` must be provided if `just_kernel=False`' + items = frozenset(item.NextItem() for item in items if item.NextSymbol == symbol) + return items if just_kernel else closure_lr1(items, firsts) + + +def build_LR1_automaton(G): + assert len(G.startSymbol.productions) == 1, 'Grammar must be augmented' + + firsts = compute_firsts(G) + firsts[G.EOF] = ContainerSet(G.EOF) + + start_production = G.startSymbol.productions[0] + start_item = Item(start_production, 0, lookaheads=(G.EOF,)) + start = frozenset([start_item]) + + closure = closure_lr1(start, firsts) + automaton = State(frozenset(closure), True) + + pending = [start] + visited = {start: automaton} + + while pending: + current = pending.pop() + current_state = visited[current] + + for symbol in G.terminals + G.nonTerminals: + # Your code here!!! (Get/Build `next_state`) + items = current_state.state + kernel = goto_lr1(items, symbol, just_kernel=True) + if not kernel: + continue + try: + next_state = visited[kernel] + except KeyError: + closure = goto_lr1(items, symbol, firsts) + next_state = visited[kernel] = State(frozenset(closure), True) + pending.append(kernel) + + current_state.add_transition(symbol.Name, next_state) + + automaton.set_formatter(lambda x: "") + return automaton diff --git a/src/core/cmp/pycompiler.py b/src/core/cmp/pycompiler.py new file mode 100644 index 00000000..8d885f12 --- /dev/null +++ b/src/core/cmp/pycompiler.py @@ -0,0 +1,511 @@ +import json + +class Symbol(object): + + def __init__(self, name, grammar): + self.Name = name + self.Grammar = grammar + + def __str__(self): + return self.Name + + def __repr__(self): + return repr(self.Name) + + def __add__(self, other): + if isinstance(other, Symbol): + return Sentence(self, other) + + raise TypeError(other) + + def __or__(self, other): + + if isinstance(other, (Sentence)): + return SentenceList(Sentence(self), other) + + raise TypeError(other) + + @property + def IsEpsilon(self): + return False + + def __len__(self): + return 1 + +class NonTerminal(Symbol): + + + def __init__(self, name, grammar): + super().__init__(name, grammar) + self.productions = [] + + + def __imod__(self, other): + + if isinstance(other, (Sentence)): + p = Production(self, other) + self.Grammar.Add_Production(p) + return self + + if isinstance(other, tuple): + assert len(other) > 1 + + if len(other) == 2: + other += (None,) * len(other[0]) + + assert len(other) == len(other[0]) + 2, "Debe definirse una, y solo una, regla por cada símbolo de la producción" + # assert len(other) == 2, "Tiene que ser una Tupla de 2 elementos (sentence, attribute)" + + if isinstance(other[0], Symbol) or isinstance(other[0], Sentence): + p = AttributeProduction(self, other[0], other[1:]) + else: + raise Exception("") + + self.Grammar.Add_Production(p) + return self + + if isinstance(other, Symbol): + p = Production(self, Sentence(other)) + self.Grammar.Add_Production(p) + return self + + if isinstance(other, SentenceList): + + for s in other: + p = Production(self, s) + self.Grammar.Add_Production(p) + + return self + + raise TypeError(other) + + @property + def IsTerminal(self): + return False + + @property + def IsNonTerminal(self): + return True + + @property + def IsEpsilon(self): + return False + +class Terminal(Symbol): + + def __init__(self, name, grammar): + super().__init__(name, grammar) + + @property + def IsTerminal(self): + return True + + @property + def IsNonTerminal(self): + return False + + @property + def IsEpsilon(self): + return False + +class EOF(Terminal): + + def __init__(self, Grammar): + super().__init__('$', Grammar) + +class Sentence(object): + + def __init__(self, *args): + self._symbols = tuple(x for x in args if not x.IsEpsilon) + self.hash = hash(self._symbols) + + def __len__(self): + return len(self._symbols) + + def __add__(self, other): + if isinstance(other, Symbol): + return Sentence(*(self._symbols + (other,))) + + if isinstance(other, Sentence): + return Sentence(*(self._symbols + other._symbols)) + + raise TypeError(other) + + def __or__(self, other): + if isinstance(other, Sentence): + return SentenceList(self, other) + + if isinstance(other, Symbol): + return SentenceList(self, Sentence(other)) + + raise TypeError(other) + + def __repr__(self): + return str(self) + + def __str__(self): + return ("%s " * len(self._symbols) % tuple(self._symbols)).strip() + + def __iter__(self): + return iter(self._symbols) + + def __getitem__(self, index): + return self._symbols[index] + + def __eq__(self, other): + return self._symbols == other._symbols + + def __hash__(self): + return self.hash + + @property + def IsEpsilon(self): + return False + +class SentenceList(object): + + def __init__(self, *args): + self._sentences = list(args) + + def Add(self, symbol): + if not symbol and (symbol is None or not symbol.IsEpsilon): + raise ValueError(symbol) + + self._sentences.append(symbol) + + def __iter__(self): + return iter(self._sentences) + + def __or__(self, other): + if isinstance(other, Sentence): + self.Add(other) + return self + + if isinstance(other, Symbol): + return self | Sentence(other) + +class Epsilon(Terminal, Sentence): + + def __init__(self, grammar): + super().__init__('epsilon', grammar) + + + def __str__(self): + return "e" + + def __repr__(self): + return 'epsilon' + + def __iter__(self): + yield from () + + def __len__(self): + return 0 + + def __add__(self, other): + return other + + def __eq__(self, other): + return isinstance(other, (Epsilon,)) + + def __hash__(self): + return hash("") + + @property + def IsEpsilon(self): + return True + +class Production(object): + + def __init__(self, nonTerminal, sentence): + + self.Left = nonTerminal + self.Right = sentence + + def __str__(self): + + return '%s := %s' % (self.Left, self.Right) + + def __repr__(self): + return '%s -> %s' % (self.Left, self.Right) + + def __iter__(self): + yield self.Left + yield self.Right + + def __eq__(self, other): + return isinstance(other, Production) and self.Left == other.Left and self.Right == other.Right + + def __hash__(self): + return hash((self.Left, self.Right)) + + @property + def IsEpsilon(self): + return self.Right.IsEpsilon + +class AttributeProduction(Production): + + def __init__(self, nonTerminal, sentence, attributes): + if not isinstance(sentence, Sentence) and isinstance(sentence, Symbol): + sentence = Sentence(sentence) + super(AttributeProduction, self).__init__(nonTerminal, sentence) + + self.attributes = attributes + + def __str__(self): + return '%s := %s' % (self.Left, self.Right) + + def __repr__(self): + return '%s -> %s' % (self.Left, self.Right) + + def __iter__(self): + yield self.Left + yield self.Right + + + @property + def IsEpsilon(self): + return self.Right.IsEpsilon + + # sintetizar en ingles??????, pending aggrement + def syntetice(self): + pass + +class Grammar(): + + def __init__(self): + + self.Productions = [] + self.nonTerminals = [] + self.terminals = [] + self.startSymbol = None + # production type + self.pType = None + self.Epsilon = Epsilon(self) + self.EOF = EOF(self) + + self.symbDict = { '$': self.EOF } + + def NonTerminal(self, name, startSymbol = False): + + name = name.strip() + if not name: + raise Exception("Empty name") + + term = NonTerminal(name,self) + + if startSymbol: + + if self.startSymbol is None: + self.startSymbol = term + else: + raise Exception("Cannot define more than one start symbol.") + + self.nonTerminals.append(term) + self.symbDict[name] = term + return term + + def NonTerminals(self, names): + + ans = tuple((self.NonTerminal(x) for x in names.strip().split())) + + return ans + + + def Add_Production(self, production): + + if len(self.Productions) == 0: + self.pType = type(production) + + assert type(production) == self.pType, "The Productions most be of only 1 type." + + production.Left.productions.append(production) + self.Productions.append(production) + + + def Terminal(self, name): + + name = name.strip() + if not name: + raise Exception("Empty name") + + term = Terminal(name, self) + self.terminals.append(term) + self.symbDict[name] = term + return term + + def Terminals(self, names): + + ans = tuple((self.Terminal(x) for x in names.strip().split())) + + return ans + + + def __str__(self): + + mul = '%s, ' + + ans = 'Non-Terminals:\n\t' + + nonterminals = mul * (len(self.nonTerminals)-1) + '%s\n' + + ans += nonterminals % tuple(self.nonTerminals) + + ans += 'Terminals:\n\t' + + terminals = mul * (len(self.terminals)-1) + '%s\n' + + ans += terminals % tuple(self.terminals) + + ans += 'Productions:\n\t' + + ans += str(self.Productions) + + return ans + + def __getitem__(self, name): + try: + return self.symbDict[name] + except KeyError: + return None + + @property + def to_json(self): + + productions = [] + + for p in self.Productions: + head = p.Left.Name + + body = [] + + for s in p.Right: + body.append(s.Name) + + productions.append({'Head':head, 'Body':body}) + + d={'NonTerminals':[symb.Name for symb in self.nonTerminals], 'Terminals': [symb.Name for symb in self.terminals],\ + 'Productions':productions} + + # [{'Head':p.Left.Name, "Body": [s.Name for s in p.Right]} for p in self.Productions] + return json.dumps(d) + + @staticmethod + def from_json(data): + data = json.loads(data) + + G = Grammar() + dic = {'epsilon':G.Epsilon} + + for term in data['Terminals']: + dic[term] = G.Terminal(term) + + for noTerm in data['NonTerminals']: + dic[noTerm] = G.NonTerminal(noTerm) + + for p in data['Productions']: + head = p['Head'] + dic[head] %= Sentence(*[dic[term] for term in p['Body']]) + + return G + + def copy(self): + G = Grammar() + G.Productions = self.Productions.copy() + G.nonTerminals = self.nonTerminals.copy() + G.terminals = self.terminals.copy() + G.pType = self.pType + G.startSymbol = self.startSymbol + G.Epsilon = self.Epsilon + G.EOF = self.EOF + G.symbDict = self.symbDict.copy() + + return G + + @property + def IsAugmentedGrammar(self): + augmented = 0 + for left, right in self.Productions: + if self.startSymbol == left: + augmented += 1 + if augmented <= 1: + return True + else: + return False + + def AugmentedGrammar(self, force=False): + if not self.IsAugmentedGrammar or force: + + G = self.copy() + # S, self.startSymbol, SS = self.startSymbol, None, self.NonTerminal('S\'', True) + S = G.startSymbol + G.startSymbol = None + SS = G.NonTerminal('S\'', True) + if G.pType is AttributeProduction: + SS %= S + G.Epsilon, lambda x : x + else: + SS %= S + G.Epsilon + + return G + else: + return self.copy() + #endchange + +class Item: + + def __init__(self, production, pos, lookaheads=[]): + self.production = production + self.pos = pos + self.lookaheads = frozenset(look for look in lookaheads) + + def __str__(self): + s = str(self.production.Left) + " -> " + if len(self.production.Right) > 0: + for i,c in enumerate(self.production.Right): + if i == self.pos: + s += "." + s += str(self.production.Right[i]) + if self.pos == len(self.production.Right): + s += "." + else: + s += "." + s += ", " + str(self.lookaheads)[10:-1] + return s + + def __repr__(self): + return str(self) + + + def __eq__(self, other): + return ( + (self.pos == other.pos) and + (self.production == other.production) and + (set(self.lookaheads) == set(other.lookaheads)) + ) + + def __hash__(self): + return hash((self.production,self.pos,self.lookaheads)) + + @property + def IsReduceItem(self): + return len(self.production.Right) == self.pos + + @property + def NextSymbol(self): + if self.pos < len(self.production.Right): + return self.production.Right[self.pos] + else: + return None + + def NextItem(self): + if self.pos < len(self.production.Right): + return Item(self.production,self.pos+1,self.lookaheads) + else: + return None + + def Preview(self, skip=1): + unseen = self.production.Right[self.pos+skip:] + return [ unseen + (lookahead,) for lookahead in self.lookaheads ] + + def Center(self): + return Item(self.production, self.pos) \ No newline at end of file diff --git a/src/core/cmp/semantic.py b/src/core/cmp/semantic.py new file mode 100644 index 00000000..b396d2a7 --- /dev/null +++ b/src/core/cmp/semantic.py @@ -0,0 +1,281 @@ +import itertools as itt +from collections import OrderedDict + +class SemanticError(Exception): + @property + def text(self): + return self.args[0] + +class Attribute: + def __init__(self, name, typex): + self.name = name + self.type = typex + + def __str__(self): + return f'[attrib] {self.name} : {self.type.name};' + + def __repr__(self): + return str(self) + +class Method: + def __init__(self, name, param_names, params_types, return_type): + self.name = name + self.param_names = param_names + self.param_types = params_types + self.return_type = return_type + + def __str__(self): + params = ', '.join(f'{n}:{t.name}' for n,t in zip(self.param_names, self.param_types)) + return f'[method] {self.name}({params}): {self.return_type.name};' + + def __eq__(self, other): + return other.name == self.name and \ + other.return_type == self.return_type and \ + other.param_types == self.param_types + +class Type: + def __init__(self, name:str='Object'): + self.name = name + self.attributes = [] + self.methods = {} + self.parent = None + + def set_parent(self, parent): + if self.parent is not None: + raise SemanticError(f'Parent type is already set for {self.name}.') + self.parent = parent + + def get_attribute(self, name:str): + try: + return next(attr for attr in self.attributes if attr.name == name) + except StopIteration: + if self.parent is None: + raise AttributeError(f'Attribute "{name}" is not defined in {self.name}.') + try: + return self.parent.get_attribute(name) + except AttributeError: + raise AttributeError(f'Attribute "{name}" is not defined in {self.name}.') + + def define_attribute(self, name:str, typex): + try: + self.get_attribute(name) + except AttributeError: + attribute = Attribute(name, typex) + self.attributes.append(attribute) + return attribute + else: + raise SemanticError(f'Attribute "{name}" is already defined in {self.name}.') + + def get_method(self, name:str): + try: + return self.methods[name] + except KeyError: + if self.parent is None: + raise AttributeError(f'Method "{name}" is not defined in {self.name}.') + try: + return self.parent.get_method(name) + except AttributeError: + raise AttributeError(f'Method "{name}" is not defined in {self.name}.') + + def define_method(self, name:str, param_names:list, param_types:list, return_type): + # //TODO: Remove the below if clause + if name in self.methods.keys(): + raise SemanticError(f'Method "{name}" already defined in {self.name}') + try: + method = self.get_method(name) + except AttributeError: + pass + else: + if method.return_type != return_type or method.param_types != param_types: + raise SemanticError(f'Method "{name}" already defined in {self.name} with a different signature.') + + method = self.methods[name] = Method(name, param_names, param_types, return_type) + return method + + def all_attributes(self, clean=True): + plain = OrderedDict() if self.parent is None else self.parent.all_attributes(False) + for attr in self.attributes: + plain[attr.name] = (attr, self) + return plain.values() if clean else plain + + def all_methods(self, clean=True): + plain = OrderedDict() if self.parent is None else self.parent.all_methods(False) + for method in self.methods.values(): + plain[method.name] = (method, self) + return plain.values() if clean else plain + + def conforms_to(self, other): + return other.bypass() or self == other or self.parent is not None and self.parent.conforms_to(other) + + def bypass(self): + if self.name == 'Object': + return True + return False + + def __str__(self): + output = f'type {self.name}' + parent = '' if self.parent is None else f' : {self.parent.name}' + output += parent + output += ' {' + output += '\n\t' if self.attributes or self.methods else '' + output += '\n\t'.join(str(x) for x in self.attributes) + output += '\n\t' if self.attributes else '' + output += '\n\t'.join(str(x) for x in self.methods.values()) + output += '\n' if self.methods else '' + output += '}\n' + return output + + def __repr__(self): + return str(self) + +class MutableType(Type): + def conforms_to(self, other): + return True + + def bypass(self): + return True + +class ErrorType(MutableType): + def __init__(self): + Type.__init__(self, '') + + def __eq__(self, other): + return isinstance(other, Type) + + def __bool__(self): + return False + +class AutoType(MutableType): + def __init__(self): + Type.__init__(self, 'AUTO_TYPE') + + def __eq__(self, other): + return isinstance(other, AutoType) + +class VoidType(Type): + def __init__(self): + Type.__init__(self, 'void') + + def conforms_to(self, other): + if other.name in [ 'Int', 'String', 'Bool', 'IO']: + return False + return True + + def bypass(self): + return False + + def __eq__(self, other): + return other.name == self.name or isinstance(other, VoidType) + +class IntType(Type): + def __init__(self): + Type.__init__(self, 'Int') + + def __eq__(self, other): + return other.name == self.name or isinstance(other, IntType) + +class StringType(Type): + def __init__(self): + Type.__init__(self, 'String') + + def __eq__(self, other): + return other.name == self.name or isinstance(other, StringType) + +class BoolType(Type): + def __init__(self): + Type.__init__(self, 'Bool') + + def __eq__(self, other): + return other.name == self.name or isinstance(other, BoolType) + +class IOType(Type): + def __init__(self): + Type.__init__(self, 'IO') + + def __eq__(self, other): + return other.name == self.name or isinstance(other, IOType) + +class SelfType(Type): + def __init__(self, fixed=None): + Type.__init__(self, 'SELF_TYPE') + self.fixed = fixed + + def get_method(self, name): + return self.fixed.get_method(name) + + def get_attribute(self, name): + return self.fixed.get_attribute(name) + + def conforms_to(self, other): + return Type.conforms_to(self, other) or self.fixed is not None and self.fixed.conforms_to(other) + + def __eq__(self, other): + return other.name == self.name or isinstance(other, SelfType) + +class Context: + def __init__(self): + self.types = {} + + def append_type(self, new_type): + name = new_type.name + if name in self.types: + raise SemanticError(f'Type with the same name ({name}) already in context.') + typex = self.types[name] = new_type + return typex + + def create_type(self, name:str): + return self.append_type(Type(name)) + + def get_type(self, name:str): + try: + return self.types[name] + except KeyError: + raise TypeError(f'Type "{name}" is not defined.') + + def __str__(self): + return '{\n\t' + '\n\t'.join(y for x in self.types.values() for y in str(x).split('\n')) + '\n}' + + def __repr__(self): + return str(self) + +class VariableInfo: + def __init__(self, name, vtype): + self.name = name + self.type = vtype + +class Scope: + def __init__(self, parent=None): + self.locals = [] + self.parent = parent + self.children = [] + self.index = 0 if parent is None else len(parent) + + def __len__(self): + return len(self.locals) + + def create_child(self): + child = Scope(self) + self.children.append(child) + return child + + def define_variable(self, vname, vtype): + info = VariableInfo(vname, vtype) + self.locals.append(info) + return info + + def find_variable(self, vname, index=None): + locals = self.locals if index is None else itt.islice(self.locals, index) + try: + return next(x for x in locals if x.name == vname) + except StopIteration: + return self.parent.find_variable(vname, self.index) if self.parent is not None else None + + def is_defined(self, vname): + return self.find_variable(vname) is not None + + def is_local(self, vname): + return any(True for x in self.locals if x.name == vname) + + def count_auto(self): + num = sum([x.type.name == 'AUTO_TYPE' for x in self.locals]) + return num + sum([scp.count_auto() for scp in self.children]) \ No newline at end of file diff --git a/src/core/cmp/utils.py b/src/core/cmp/utils.py new file mode 100644 index 00000000..efd6b884 --- /dev/null +++ b/src/core/cmp/utils.py @@ -0,0 +1,98 @@ +from .pycompiler import Production, Sentence, Symbol, EOF, Epsilon + +class ContainerSet: + def __init__(self, *values, contains_epsilon=False): + self.set = set(values) + self.contains_epsilon = contains_epsilon + + def add(self, value): + n = len(self.set) + self.set.add(value) + return n != len(self.set) + + def extend(self, values): + change = False + for value in values: + change |= self.add(value) + return change + + def set_epsilon(self, value=True): + last = self.contains_epsilon + self.contains_epsilon = value + return last != self.contains_epsilon + + def update(self, other): + n = len(self.set) + self.set.update(other.set) + return n != len(self.set) + + def epsilon_update(self, other): + return self.set_epsilon(self.contains_epsilon | other.contains_epsilon) + + def hard_update(self, other): + return self.update(other) | self.epsilon_update(other) + + def find_match(self, match): + for item in self.set: + if item == match: + return item + return None + + def __len__(self): + return len(self.set) + int(self.contains_epsilon) + + def __str__(self): + return '%s-%s' % (str(self.set), self.contains_epsilon) + + def __repr__(self): + return str(self) + + def __iter__(self): + return iter(self.set) + + def __nonzero__(self): + return len(self) > 0 + + def __eq__(self, other): + if isinstance(other, set): + return self.set == other + return isinstance(other, ContainerSet) and self.set == other.set and self.contains_epsilon == other.contains_epsilon + +class Token: + """ + Basic token class. + + Parameters + ---------- + lex : str + Token's lexeme. + token_type : Enum + Token's type. + """ + + def __init__(self, lex, token_type): + self.lex = lex + self.token_type = token_type + + def __str__(self): + return f'{self.token_type}: {self.lex}' + + def __repr__(self): + return str(self) + + @property + def is_valid(self): + return True + +class InferenceSets: + def __init__(self): + self.D = [] + self.S = [] + + def add(self, new_type, conforms=True): + cur = [self.S, self.D][conforms] + cur.append(new_type) + return self + +empty_token = Token("", "") +empty_token.row, empty_token.column = (0, 0) diff --git a/src/core/lexer/__init__.py b/src/core/lexer/__init__.py new file mode 100644 index 00000000..5872b253 --- /dev/null +++ b/src/core/lexer/__init__.py @@ -0,0 +1 @@ +from .lex import CoolLexer diff --git a/src/core/lexer/lex.py b/src/core/lexer/lex.py new file mode 100644 index 00000000..84fcb77b --- /dev/null +++ b/src/core/lexer/lex.py @@ -0,0 +1,378 @@ +import ply.lex as lex +import re + +from ..cmp import Token +from ..cmp.cool import grammar as G + +class CoolLexer: + + states = ( + ('comments', 'exclusive'), + ('strings', 'exclusive'), + ) + + reserved = { + "class": "CLASS", + "else": "ELSE", + "fi": "FI", + "if": "IF", + "in": "IN", + "inherits": "INHERITS", + "isvoid": "ISVOID", + "let": "LET", + "loop": "LOOP", + "pool": "POOL", + "then": "THEN", + "while": "WHILE", + "case": "CASE", + "esac": "ESAC", + "new": "NEW", + "of": "OF", + "not": "NOT", + "true": "TRUE", + "false": "FALSE", + } + + tokenType = { + "CLASS": G.classx, + "ELSE": G.elsex, + "FI": G.fi, + "IF": G.ifx, + "IN": G.inx, + "INHERITS": G.inherits, + "ISVOID": G.isvoid, + "LET": G.let, + "LOOP": G.loop, + "POOL": G.pool, + "THEN": G.then, + "WHILE": G.whilex, + "CASE": G.case, + "ESAC": G.esac, + "NEW": G.new, + "OF": G.of, + "NOT": G.notx, + "OBJECTIDENTIFIER": G.idx, + "TYPEIDENTIFIER": G.typex, + "LCBRA": G.ocur, + "RCBRA": G.ccur, + "LPAREN": G.opar, + "RPAREN": G.cpar, + "COLON": G.colon, + "SEMICOLON": G.semi, + "NUMBER": G.integer, + "eof": G.eof, + "PLUS": G.plus, + "MINUS": G.minus, + "DIVIDE": G.div, + "TIMES": G.star, + "LESS": G.less, + "LESSEQ": G.leq, + "EQUALS": G.equal, + "TRUE": G.boolx, + "FALSE": G.boolx, + "COMPLEMENT": G.compl, + "RARROW": G.rarrow, + "LARROW": G.larrow, + "COMMA": G.comma, + "DOT": G.dot, + "AT": G.at, + "STRING": G.string, + } + + tokens = [ + 'NUMBER', + 'TYPEIDENTIFIER', + 'OBJECTIDENTIFIER', + 'EQUALS', + 'PLUS', + 'MINUS', + 'TIMES', + 'DIVIDE', + 'LPAREN', + 'RPAREN', + 'STRING', + 'LESS', + 'LESSEQ', + 'LCBRA', + 'RCBRA', + 'COLON', + 'SEMICOLON', + 'COMPLEMENT', + 'RARROW', + 'LARROW', + 'COMMA', + 'DOT', + 'AT', + 'ERROR' + ] + list(reserved.values()) + + t_ignore = ' \t\f\r\t\v' + t_comments_ignore = '' + + def __init__(self): + self.build() + + def build(self, **kwargs): + self.lexer = lex.lex(module=self, errorlog=lex.NullLogger(), **kwargs) + self.lexer.eof= (1,1) + self.comment_level = 0 + self.string = "" + + def t_comments_COMMENTOUT(self, t): + r'\*\)' + if self.comment_level == 0: + self.lexer.begin('INITIAL') + else: + self.comment_level -= 1 + + def t_STRINGIN(self, t): + r'"' + self.string = "" + t.lexer.begin('strings') + + def t_strings_NULL(self, t): + r'\0' + line = t.lexer.lineno + column = self.compute_column(t) + t.type = "ERROR" + t.value = f"({line},{column}) - LexicographicError: Null caracter in string" + self.add_line_column(t) + return t + + def t_strings_newline1(self, t): + r'\\n' + self.string += '\n' + + def t_strings_newline2(self, t): + r'\\\n' + t.lexer.lineno+=1 + self.string += '\n' + + def t_strings_invalid_new_line(self, t): + r'\n' + line = t.lexer.lineno + t.lexer.lineno+=1 + column = self.compute_column(t) + t.lexer.begin("INITIAL") + t.type = "ERROR" + t.value = f"({line},{column}) - LexicographicError: Unterminated string constant" + self.add_line_column(t) + return t + + def t_strings_escaped_special_character(self, t): + r'\\(b|t|f)' + self.string+= t.value + + def t_strings_escaped_character(self, t): + r'\\.' + self.string+= t.value[1] + + def t_strings_STRINGOUT(self, t): + r'"' + t.lexer.begin('INITIAL') + t.type = 'STRING' + t.value = self.string + self.add_line_column(t) + return t + + def t_strings_character(self, t): + r'.' + self.string += t.value + + def t_strings_eof(self, t): + line = t.lexer.lineno + column = self.compute_column(t) + t.type = "ERROR" + t.value = f"({line},{column}) - LexicographicError: EOF in string constant" + t.lexer.begin("INITIAL") + self.add_line_column(t) + return t + + def t_TYPEIDENTIFIER(self, t): + r'[A-Z][a-zA-Z0-9|_]*' + l_value = t.value.lower() + if l_value == "false" or l_value == "true": + if t.value[0] != "f" and t.value[0] != 't': + self.add_line_column(t) + return t + t.type = CoolLexer.reserved.get(l_value, "TYPEIDENTIFIER") + self.add_line_column(t) + return t + + def t_OBJECTIDENTIFIER(self,t): + r'[a-z][a-zA-Z0-9|_]*' + l_value = t.value.lower() + if l_value == "false" or l_value == "true": + if t.value[0] != "f" and t.value[0] != 't': + self.add_line_column(t) + return t + t.type = CoolLexer.reserved.get(l_value, "OBJECTIDENTIFIER") + self.add_line_column(t) + return t + + def t_NUMBER(self, t): + r'\d+' + t.value = int(t.value) + self.add_line_column(t) + return t + + def t_ANY_newline(self, t): + r'\n+' + t.lexer.lineno += len(t.value) + + def t_LINECOMMENT(self, t): + r'--.*' + + def t_COMMENTIN(self, t): + r'\(\*' + self.lexer.begin('comments') + + def t_comments_COMMENTIN(self, t): + r'\(\*' + self.comment_level += 1 + + def t_eof(self, t): + t.lexer.eof =(t.lexer.lineno, self.compute_column(t)) + return None + + def t_comments_eof(self, t): + line = t.lexer.lineno + column = self.compute_column(t) + t.type = "ERROR" + t.value = f"({line},{column}) - LexicographicError: EOF in comment" + t.lexer.begin("INITIAL") + self.add_line_column(t) + return t + + def compute_column(self, token): + line_start = self.text.rfind('\n', 0, token.lexpos) + 1 + return (token.lexpos - line_start) + 1 + + def t_LARROW(self, t): + r'<-' + self.add_line_column(t) + return t + + def t_LESSEQ(self, t): + r'<=' + self.add_line_column(t) + return t + + def t_RARROW(self, t): + r'=>' + self.add_line_column(t) + return t + + def t_EQUALS(self, t): + r'=' + self.add_line_column(t) + return t + + def t_PLUS(self, t): + r'\+' + self.add_line_column(t) + return t + + def t_MINUS(self, t): + r'-' + self.add_line_column(t) + return t + + def t_TIMES(self, t): + r'\*' + self.add_line_column(t) + return t + + def t_DIVIDE(self, t): + r'/' + self.add_line_column(t) + return t + + def t_LPAREN(self, t): + r'\(' + self.add_line_column(t) + return t + + def t_RPAREN(self, t): + r'\)' + self.add_line_column(t) + return t + + def t_LESS(self, t): + r'<' + self.add_line_column(t) + return t + + def t_LCBRA(self, t): + r'{' + self.add_line_column(t) + return t + + def t_RCBRA(self, t): + r'}' + self.add_line_column(t) + return t + + def t_COLON(self, t): + r':' + self.add_line_column(t) + return t + + def t_SEMICOLON(self, t): + r';' + self.add_line_column(t) + return t + + def t_COMPLEMENT(self, t): + r'~' + self.add_line_column(t) + return t + + def t_COMMA(self, t): + r',' + self.add_line_column(t) + return t + + def t_DOT(self, t): + r'\.' + self.add_line_column(t) + return t + + def t_AT(self, t): + r'@' + self.add_line_column(t) + return t + + def t_error(self, t): + line = t.lexer.lineno + column = self.compute_column(t) + error_text = t.value[0] + t.lexer.skip(1) + t.type = "ERROR" + t.value = f"({line},{column}) - LexicographicError: \"{error_text}\"" + self.add_line_column(t) + return t + + def t_comments_error(self, t): + t.lexer.skip(1) + + def tokenize(self, text): + self.text = text + self.lexer.input(text) + tokens = [] + original_tokens = [token for token in self.lexer] + for token in original_tokens: + if token.type == "ERROR": + tokens.append(Token(token.value, "ERROR")) + else: + tokens.append(Token(token.value, self.tokenType[token.type])) + tokens[-1].row = token.row + tokens[-1].column = token.column + EOF = Token('$', G.eof) + EOF.row, EOF.column = self.lexer.eof + return tokens + [EOF] + + def add_line_column(self, t): + t.row = t.lexer.lineno + t.column = self.compute_column(t) + \ No newline at end of file diff --git a/src/core/visitors/__init__.py b/src/core/visitors/__init__.py new file mode 100644 index 00000000..d1398e75 --- /dev/null +++ b/src/core/visitors/__init__.py @@ -0,0 +1,3 @@ +from .cil import * +from .mips import * +from .type_check import * diff --git a/src/core/visitors/cil/__init__.py b/src/core/visitors/cil/__init__.py new file mode 100644 index 00000000..3f7ada13 --- /dev/null +++ b/src/core/visitors/cil/__init__.py @@ -0,0 +1,2 @@ +from .ast_printer import get_formatter +from .cool_to_cil import COOLToCILVisitor diff --git a/src/core/visitors/cil/ast_printer.py b/src/core/visitors/cil/ast_printer.py new file mode 100644 index 00000000..5658a8dc --- /dev/null +++ b/src/core/visitors/cil/ast_printer.py @@ -0,0 +1,184 @@ +from .cil import * +from ...visitors import visitor + +def get_formatter(): + + class PrintVisitor(object): + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + dottypes = '\n'.join(self.visit(t) for t in node.dottypes) + dotdata = '\n'.join(self.visit(t) for t in node.dotdata) + dotcode = '\n'.join(self.visit(t) for t in node.dotcode) + + return f'.TYPES\n{dottypes}\n\n.DATA\n{dotdata}\n\n.CODE\n{dotcode}' + + @visitor.when(TypeNode) + def visit(self, node): + attributes = '\n\t'.join(f'attribute {x}' for x in node.attributes) + methods = '\n\t'.join(f'method {x}: {y}' for x,y in node.methods) + + return f'type {node.name} {{\n\t{attributes}\n\n\t{methods}\n}}' + + @visitor.when(DataNode) + def visit(self, node): + return f'{node.name} = {node.value}' + + @visitor.when(FunctionNode) + def visit(self, node): + params = '\n\t'.join(self.visit(x) for x in node.params) + localvars = '\n\t'.join(self.visit(x) for x in node.localvars) + instructions = '\n\t'.join(self.visit(x) for x in node.instructions if self.visit(x) != []) + + return f'function {node.name} {{\n\t{params}\n\n\t{localvars}\n\n\t{instructions}\n}}' + + @visitor.when(ParamNode) + def visit(self, node): + return f'PARAM {node.name}' + + @visitor.when(LocalNode) + def visit(self, node): + return f'LOCAL {node.name}' + + @visitor.when(AssignNode) + def visit(self, node): + return f'{node.dest} = {node.source}' + + @visitor.when(PlusNode) + def visit(self, node): + return f'{node.dest} = {node.left} + {node.right}' + + @visitor.when(MinusNode) + def visit(self, node): + return f'{node.dest} = {node.left} - {node.right}' + + @visitor.when(StarNode) + def visit(self, node): + return f'{node.dest} = {node.left} * {node.right}' + + @visitor.when(DivNode) + def visit(self, node): + return f'{node.dest} = {node.left} / {node.right}' + + @visitor.when(LessEqualNode) + def visit(self, node): + return f'{node.dest} = {node.left} <= {node.right}' + + @visitor.when(LessNode) + def visit(self, node): + return f'{node.dest} = {node.left} < {node.right}' + + @visitor.when(EqualNode) + def visit(self, node): + return f'{node.dest} = {node.left} == {node.right}' + + @visitor.when(GetAttribNode) + def visit(self, node): + return f'{node.dest} = GETATTR {node.obj} {node.attr}' + + @visitor.when(SetAttribNode) + def visit(self, node): + return f'SETATTR {node.obj} {node.attr} {node.value}' + + @visitor.when(AllocateNode) + def visit(self, node): + return f'{node.dest} = ALLOCATE {node.type}' + + @visitor.when(TypeOfNode) + def visit(self, node): + return f'{node.dest} = TYPEOF {node.obj}' + + @visitor.when(LabelNode) + def visit(self, node): + return f'LABEL {node.label}' + + @visitor.when(GotoNode) + def visit(self, node): + return f'GOTO {node.label}' + + @visitor.when(GotoIfNode) + def visit(self, node): + return f'IF {node.condition} GOTO {node.label}' + + @visitor.when(StaticCallNode) + def visit(self, node): + return f'{node.dest} = CALL {node.function}' + + @visitor.when(DynamicCallNode) + def visit(self, node): + return f'{node.dest} = VCALL {node.type} {node.method}' + + @visitor.when(ArgNode) + def visit(self, node): + return f'ARG {node.name}' + + @visitor.when(ReturnNode) + def visit(self, node): + return f'RETURN {node.value if node.value is not None else ""}' + + @visitor.when(LoadNode) + def visit(self, node): + return f'{node.dest} = Load {node.msg}' + + @visitor.when(ExitNode) + def visit(self, node): + return f'EXIT' + + @visitor.when(TypeNameNode) + def visit(self, node): + return f'{node.dest} = TYPENAME {node.source}' + + @visitor.when(NameNode) + def visit(self, node): + return f'{node.dest} = NAME {node.name}' + + @visitor.when(CopyNode) + def visit(self, node): + return f'{node.dest} = COPY {node.source}' + + @visitor.when(LengthNode) + def visit(self, node): + return f'{node.dest} = LENGTH {node.source}' + + @visitor.when(ConcatNode) + def visit(self, node): + return f'{node.dest} = CONCAT {node.prefix} {node.suffix}' + + @visitor.when(SubstringNode) + def visit(self, node): + return f'{node.dest} = SUBSTRING {node.index} {node.length}' + + @visitor.when(ReadStrNode) + def visit(self, node): + return f'{node.dest} = READSTR' + + @visitor.when(ReadIntNode) + def visit(self, node): + return f'{node.dest} = READINT' + + @visitor.when(PrintStrNode) + def visit(self, node): + return f'PRINT {node.value}' + + @visitor.when(PrintIntNode) + def visit(self, node): + return f'PRINT {node.value}' + + @visitor.when(ComplementNode) + def visit(self, node): + return f'{node.dest} = COMPL {node.obj}' + + @visitor.when(VoidNode) + def visit(self, node): + return 'VOID' + + @visitor.when(ErrorNode) + def visit(self, node): + return f'ERROR {node.data_node}' + + printer = PrintVisitor() + return (lambda ast: printer.visit(ast)) + \ No newline at end of file diff --git a/src/core/visitors/cil/cil.py b/src/core/visitors/cil/cil.py new file mode 100644 index 00000000..8c9cc2fa --- /dev/null +++ b/src/core/visitors/cil/cil.py @@ -0,0 +1,258 @@ +#AST +class Node: + pass + +class ProgramNode(Node): + def __init__(self, dottypes, dotdata, dotcode): + self.dottypes = dottypes + self.dotdata = dotdata + self.dotcode = dotcode + +class TypeNode(Node): + def __init__(self, name): + self.name = name + self.attributes = [] + self.methods = [] + +class DataNode(Node): + def __init__(self, vname, value): + self.name = vname + self.value = value + +class FunctionNode(Node): + def __init__(self, fname, params, localvars, instructions): + self.name = fname + self.params = params + self.localvars = localvars + self.instructions = instructions + self.ids = dict() + self.labels_count = 0 + +class ParamNode(Node): + def __init__(self, name): + self.name = name + +class LocalNode(Node): + def __init__(self, name): + self.name = name + +class InstructionNode(Node): + def __init__(self): + self.leader = False + +class AssignNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + + def __repr__(self): + return f"{self.dest} = {self.source}" + +class ArithmeticNode(InstructionNode): + def __init__(self, dest, left, right): + self.dest = dest + self.left = left + self.right = right + +class PlusNode(ArithmeticNode): + pass + +class MinusNode(ArithmeticNode): + pass + +class StarNode(ArithmeticNode): + pass + +class DivNode(ArithmeticNode): + pass + +class LessEqualNode(ArithmeticNode): + pass + +class LessNode(ArithmeticNode): + pass + +class EqualNode(ArithmeticNode): + def __repr__(self): + return f"{self.dest} = {self.left} == {self.right}" + +class EqualStrNode(ArithmeticNode): + pass + +class GetAttribNode(InstructionNode): + def __init__(self, dest, obj, attr, computed_type): + self.dest = dest + self.obj = obj + self.attr = attr + self.computed_type = computed_type + + def __repr__(self): + return f"{self.dest} = GETATTR {self.obj} {self.attr}" + +class SetAttribNode(InstructionNode): + def __init__(self, obj, attr, value, computed_type): + self.obj = obj + self.attr = attr + self.value = value + self.computed_type = computed_type + +class GetIndexNode(InstructionNode): + pass + +class SetIndexNode(InstructionNode): + pass + +class AllocateNode(InstructionNode): + def __init__(self, itype, dest): + self.type = itype + self.dest = dest + +class ArrayNode(InstructionNode): + pass + +class TypeOfNode(InstructionNode): + def __init__(self, obj, dest): + self.obj = obj + self.dest = dest + + def __repr__(self): + return f"{self.dest} = TYPEOF {self.obj}" + +class LabelNode(InstructionNode): + def __init__(self, label): + self.label = label + + def __repr__(self): + return f"LABEL {self.label}:" + +class GotoNode(InstructionNode): + def __init__(self, label): + self.label = label + + def __repr__(self): + return f"GOTO {self.label}" + +class GotoIfNode(InstructionNode): + def __init__(self, condition, label): + self.condition = condition + self.label = label + + def __repr__(self): + return f"GOTO {self.label} if {self.condition}" + +class StaticCallNode(InstructionNode): + def __init__(self, function, dest): + self.function = function + self.dest = dest + + def __repr__(self): + return f"{self.dest} = CALL {self.function}" + +class DynamicCallNode(InstructionNode): + def __init__(self, xtype, method, dest, computed_type): + self.type = xtype + self.method = method + self.dest = dest + self.computed_type = computed_type + + def __repr__(self): + return f"{self.dest} = VCALL {self.type} {self.method}" + +class ArgNode(InstructionNode): + def __init__(self, name): + self.name = name + + def __repr__(self): + return f"ARG {self.name}" + +class ReturnNode(InstructionNode): + def __init__(self, value=None): + self.value = value + + def __repr__(self): + return f"RETURN {self.value}" + +class LoadNode(InstructionNode): + def __init__(self, dest, msg): + self.dest = dest + self.msg = msg + + def __repr__(self): + return f"{self.dest} LOAD {self.msg}" + +class ExitNode(InstructionNode): + pass + +class TypeNameNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + + def __repr__(self): + return f"{self.dest} = TYPENAME {self.source}" + +class NameNode(InstructionNode): + def __init__(self, dest, name): + self.dest = dest + self.name = name + + def __repr__(self): + return f"{self.dest} = NAME {self.name}" + +class CopyNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + +class LengthNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + +class ConcatNode(InstructionNode): + def __init__(self, dest, prefix, suffix, length): + self.dest = dest + self.prefix = prefix + self.suffix = suffix + self.length = length + +class SubstringNode(InstructionNode): + def __init__(self, dest, str_value, index, length): + self.dest = dest + self.str_value = str_value + self.index = index + self.length = length + +class ReadStrNode(InstructionNode): + def __init__(self, dest): + self.dest = dest + +class ReadIntNode(InstructionNode): + def __init__(self, dest): + self.dest = dest + +class PrintStrNode(InstructionNode): + def __init__(self, value): + self.value = value + + def __repr__(self): + return f"PRINTSTR {self.value}" + +class PrintIntNode(InstructionNode): + def __init__(self, value): + self.value = value + +class ComplementNode(InstructionNode): + def __init__(self, dest, obj): + self.dest = dest + self.obj = obj + +class VoidNode(InstructionNode): + pass + +class ErrorNode(InstructionNode): + def __init__(self, data_node): + self.data_node = data_node + + def __repr__(self): + return f"ERROR {self.data_node}" diff --git a/src/core/visitors/cil/cool_to_cil.py b/src/core/visitors/cil/cool_to_cil.py new file mode 100644 index 00000000..7605f25a --- /dev/null +++ b/src/core/visitors/cil/cool_to_cil.py @@ -0,0 +1,1016 @@ +from ..cil import cil +from .utils import get_token +from ...visitors import visitor +from ...cmp import cool_ast as cool, VariableInfo + +class BaseCOOLToCILVisitor: + def __init__(self, context): + self.dottypes = [] + self.dotdata = [] + self.dotcode = [] + self.current_type = None + self.current_method = None + self.current_function = None + self.context = context + self.vself = VariableInfo('self', None) + self.value_types = ['String', 'Int', 'Bool'] + + @property + def params(self): + return self.current_function.params + + @property + def localvars(self): + return self.current_function.localvars + + @property + def ids(self): + return self.current_function.ids + + @property + def instructions(self): + return self.current_function.instructions + + def register_param(self, vinfo): + #'param_{self.current_function.name[9:]}_{vinfo.name}_{len(self.params)}' + vinfo.name = vinfo.name + param_node = cil.ParamNode(vinfo.name) + self.params.append(param_node) + return vinfo.name + + def register_local(self, vinfo, id=False): + new_vinfo = VariableInfo('', None) + if len(self.current_function.name) >= 8 and self.current_function.name[:8] == 'function': + new_vinfo.name = f'local_{self.current_function.name[9:]}_{vinfo.name}_{len(self.localvars)}' + else: + new_vinfo.name = f'local_{self.current_function.name[5:]}_{vinfo.name}_{len(self.localvars)}' + + local_node = cil.LocalNode(new_vinfo.name) + if id: + self.ids[vinfo.name] = new_vinfo.name + self.localvars.append(local_node) + return new_vinfo.name + + def define_internal_local(self): + vinfo = VariableInfo('internal', None) + return self.register_local(vinfo) + + def register_instruction(self, instruction): + self.instructions.append(instruction) + return instruction + ############################### + + def to_function_name(self, method_name, type_name): + return f'function_{method_name}_at_{type_name}' + + def init_name(self, type_name, attr=False): + if attr: + return f'init_attr_at_{type_name}' + return f'init_at_{type_name}' + + def register_function(self, function_name): + function_node = cil.FunctionNode(function_name, [], [], []) + self.dotcode.append(function_node) + return function_node + + def register_type(self, name): + type_node = cil.TypeNode(name) + self.dottypes.append(type_node) + return type_node + + def register_data(self, value): + vname = f'data_{len(self.dotdata)}' + data_node = cil.DataNode(vname, value) + self.dotdata.append(data_node) + return data_node + + def register_label(self, label): + lname = f'{label}_{self.current_function.labels_count}' + self.current_function.labels_count += 1 + return cil.LabelNode(lname) + + def register_built_in(self): + #Object + type_node = self.register_type('Object') + + self.current_function = self.register_function(self.init_name('Object')) + instance = self.define_internal_local() + self.register_instruction(cil.AllocateNode('Object', instance)) + self.register_instruction(cil.ReturnNode(instance)) + + self.current_function = self.register_function(self.to_function_name('abort', 'Object')) + self.register_param(self.vself) + vname = self.define_internal_local() + data_node = [dn for dn in self.dotdata if dn.value == 'Abort called from class '][0] + self.register_instruction(cil.LoadNode(vname, data_node)) + self.register_instruction(cil.PrintStrNode(vname)) + self.register_instruction(cil.TypeNameNode(vname, self.vself.name)) + self.register_instruction(cil.PrintStrNode(vname)) + data_node = self.register_data('\n') + self.register_instruction(cil.LoadNode(vname, data_node)) + self.register_instruction(cil.PrintStrNode(vname)) + self.register_instruction(cil.ExitNode()) + # No need for RETURN here right?? + + self.current_function = self.register_function(self.to_function_name('type_name', 'Object')) + self.register_param(self.vself) + result = self.define_internal_local() + self.register_instruction(cil.TypeNameNode(result, self.vself.name)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(result)) + self.register_instruction(cil.StaticCallNode(self.init_name('String'), instance)) + self.register_instruction(cil.ReturnNode(instance)) + + self.current_function = self.register_function(self.to_function_name('copy', 'Object')) + self.register_param(self.vself) + result = self.define_internal_local() + self.register_instruction(cil.CopyNode(result, self.vself.name)) + self.register_instruction(cil.ReturnNode(result)) + + type_node.methods = [(name, self.to_function_name(name, 'Object')) for name in ['abort', 'type_name', 'copy']] + type_node.methods += [('init', self.init_name('Object'))] + obj_methods = ['abort', 'type_name', 'copy'] + + #IO + type_node = self.register_type('IO') + + self.current_function = self.register_function(self.init_name('IO')) + instance = self.define_internal_local() + self.register_instruction(cil.AllocateNode('IO', instance)) + self.register_instruction(cil.ReturnNode(instance)) + + self.current_function = self.register_function(self.to_function_name('out_string', 'IO')) + self.register_param(self.vself) + self.register_param(VariableInfo('x', None)) + vname = self.define_internal_local() + self.register_instruction(cil.GetAttribNode(vname, 'x', 'value', 'String')) + self.register_instruction(cil.PrintStrNode(vname)) + self.register_instruction(cil.ReturnNode(self.vself.name)) + + self.current_function = self.register_function(self.to_function_name('out_int', 'IO')) + self.register_param(self.vself) + self.register_param(VariableInfo('x', None)) + vname = self.define_internal_local() + self.register_instruction(cil.GetAttribNode(vname, 'x', 'value', 'Int')) + self.register_instruction(cil.PrintIntNode(vname)) + self.register_instruction(cil.ReturnNode(self.vself.name)) + + self.current_function = self.register_function(self.to_function_name('in_string', 'IO')) + self.register_param(self.vself) + result = self.define_internal_local() + self.register_instruction(cil.ReadStrNode(result)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(result)) + self.register_instruction(cil.StaticCallNode(self.init_name('String'), instance)) + self.register_instruction(cil.ReturnNode(instance)) + + self.current_function = self.register_function(self.to_function_name('in_int', 'IO')) + self.register_param(self.vself) + result = self.define_internal_local() + self.register_instruction(cil.ReadIntNode(result)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(result)) + self.register_instruction(cil.StaticCallNode(self.init_name('Int'), instance)) + self.register_instruction(cil.ReturnNode(instance)) + + type_node.methods = [(method, self.to_function_name(method, 'Object')) for method in obj_methods] + type_node.methods += [(name, self.to_function_name(name, 'IO')) for name in ['out_string', 'out_int', 'in_string', 'in_int']] + type_node.methods += [('init', self.init_name('IO'))] + + #String + type_node = self.register_type('String') + type_node.attributes = ['value', 'length'] + + self.current_function = self.register_function(self.init_name('String')) + self.register_param(VariableInfo('val', None)) + instance = self.define_internal_local() + self.register_instruction(cil.AllocateNode('String', instance)) + self.register_instruction(cil.SetAttribNode(instance, 'value', 'val', 'String')) + result = self.define_internal_local() + self.register_instruction(cil.LengthNode(result, 'val')) + attr = self.define_internal_local() + self.register_instruction(cil.ArgNode(result)) + self.register_instruction(cil.StaticCallNode(self.init_name('Int'), attr)) + self.register_instruction(cil.SetAttribNode(instance, 'length', attr, 'String')) + self.register_instruction(cil.ReturnNode(instance)) + + self.current_function = self.register_function(self.to_function_name('length', 'String')) + self.register_param(self.vself) + result = self.define_internal_local() + self.register_instruction(cil.GetAttribNode(result, self.vself.name, 'length', 'String')) + self.register_instruction(cil.ReturnNode(result)) + + self.current_function = self.register_function(self.to_function_name('concat', 'String')) + self.register_param(self.vself) + self.register_param(VariableInfo('s', None)) + str_1 = self.define_internal_local() + str_2 = self.define_internal_local() + length_1 = self.define_internal_local() + length_2 = self.define_internal_local() + self.register_instruction(cil.GetAttribNode(str_1, self.vself.name, 'value', 'String')) + self.register_instruction(cil.GetAttribNode(str_2, 's', 'value', 'String')) + self.register_instruction(cil.GetAttribNode(length_1, self.vself.name, 'length', 'String')) + self.register_instruction(cil.GetAttribNode(length_2, 's', 'length', 'String')) + self.register_instruction(cil.GetAttribNode(length_1, length_1, 'value', 'Int')) + self.register_instruction(cil.GetAttribNode(length_2, length_2, 'value', 'Int')) + self.register_instruction(cil.PlusNode(length_1, length_1, length_2)) + + result = self.define_internal_local() + self.register_instruction(cil.ConcatNode(result, str_1, str_2, length_1)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(result)) + self.register_instruction(cil.StaticCallNode(self.init_name('String'), instance)) + self.register_instruction(cil.ReturnNode(instance)) + + self.current_function = self.register_function(self.to_function_name('substr', 'String')) + self.register_param(self.vself) + self.register_param(VariableInfo('i', None)) + self.register_param(VariableInfo('l', None)) + result = self.define_internal_local() + index_value = self.define_internal_local() + length_value = self.define_internal_local() + length_attr = self.define_internal_local() + length_substr = self.define_internal_local() + less_value = self.define_internal_local() + str_value = self.define_internal_local() + self.register_instruction(cil.GetAttribNode(str_value, self.vself.name, 'value', 'String')) + self.register_instruction(cil.GetAttribNode(index_value, 'i', 'value', 'Int')) + self.register_instruction(cil.GetAttribNode(length_value, 'l', 'value', 'Int')) + #Check Out of range error + self.register_instruction(cil.GetAttribNode(length_attr, self.vself.name, 'length', 'String')) + self.register_instruction(cil.PlusNode(length_substr, length_value, index_value)) + self.register_instruction(cil.LessNode(less_value, length_attr, length_substr)) + self.register_runtime_error(less_value, 'Substring out of range') + self.register_instruction(cil.SubstringNode(result, str_value, index_value, length_value)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(result)) + self.register_instruction(cil.StaticCallNode(self.init_name('String'), instance)) + self.register_instruction(cil.ReturnNode(instance)) + + type_node.methods = [(method, self.to_function_name(method, 'Object')) for method in obj_methods] + type_node.methods += [(name, self.to_function_name(name, 'String')) for name in ['length', 'concat', 'substr']] + type_node.methods += [('init', self.init_name('String'))] + + #Int + type_node = self.register_type('Int') + type_node.attributes = ['value'] + + self.current_function = self.register_function(self.init_name('Int')) + self.register_param(VariableInfo('val', None)) + instance = self.define_internal_local() + self.register_instruction(cil.AllocateNode('Int', instance)) + self.register_instruction(cil.SetAttribNode(instance, 'value', 'val', 'Int')) + self.register_instruction(cil.ReturnNode(instance)) + + type_node.methods = [(method, self.to_function_name(method, 'Object')) for method in obj_methods] + type_node.methods += [('init', self.init_name('Int'))] + + #Bool + type_node = self.register_type('Bool') + type_node.attributes = ['value'] + + self.current_function = self.register_function(self.init_name('Bool')) + self.register_param(VariableInfo('val', None)) + instance = self.define_internal_local() + self.register_instruction(cil.AllocateNode('Bool', instance)) + self.register_instruction(cil.SetAttribNode(instance, 'value', 'val', 'Bool')) + self.register_instruction(cil.ReturnNode(instance)) + + type_node.methods = [(method, self.to_function_name(method, 'Object')) for method in obj_methods] + type_node.methods += [('init', self.init_name('Bool'))] + + def register_runtime_error(self, condition, msg): + error_node = self.register_label('error_label') + continue_node = self.register_label('continue_label') + self.register_instruction(cil.GotoIfNode(condition, error_node.label)) + self.register_instruction(cil.GotoNode(continue_node.label)) + self.register_instruction(error_node) + data_node = self.register_data(msg) + self.register_instruction(cil.ErrorNode(data_node)) + + self.register_instruction(continue_node) + + +class COOLToCILVisitor(BaseCOOLToCILVisitor): + def __init__(self, context): + super().__init__(context) + + def buildHierarchy(self, t:str): + if t == 'Object': return None + return {x.name for x in self.context.types.values() if x.name != 'AUTO_TYPE' and x.conforms_to(self.context.get_type(t))} + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(cool.ProgramNode) + def visit(self, node, scope): + ###################################################### + # node.declarations -> [ ClassDeclarationNode ... ] + ###################################################### + + self.current_function = self.register_function('entry') + result = self.define_internal_local() + instance = self.register_local(VariableInfo('instance', None)) + self.register_instruction(cil.StaticCallNode(self.init_name('Main'), instance)) + self.register_instruction(cil.ArgNode(instance)) + self.register_instruction(cil.StaticCallNode(self.to_function_name('main', 'Main'), result)) + self.register_instruction(cil.ReturnNode(0)) + # Error message raised by Object:abort() + self.register_data('Abort called from class ') + self.register_built_in() + self.current_function = None + + for declaration, child_scope in zip(node.declarations, scope.children): + self.visit(declaration, child_scope) + + return cil.ProgramNode(self.dottypes, self.dotdata, self.dotcode) + + @visitor.when(cool.ClassDeclarationNode) + def visit(self, node, scope): + #################################################################### + # node.id -> str + # node.parent -> str + # node.features -> [ FuncDeclarationNode/AttrDeclarationNode ... ] + #################################################################### + + self.current_type = self.context.get_type(node.id) + + # (Handle all the .TYPE section) + type_node = self.register_type(node.id) + type_node.attributes = [attr.name for attr, _ in self.current_type.all_attributes()] + type_node.methods = [(method.name, self.to_function_name(method.name, xtype.name)) for method, xtype in self.current_type.all_methods()] + + func_declarations = (f for f in node.features if isinstance(f, cool.FuncDeclarationNode)) + for feature, child_scope in zip(func_declarations, scope.children): + self.visit(feature, child_scope) + + #init + self.current_function = self.register_function(self.init_name(node.id)) + #allocate + instance = self.register_local(VariableInfo('instance', None)) + self.register_instruction(cil.AllocateNode(node.id, instance)) + + func = self.current_function + vtemp = self.define_internal_local() + + #init_attr + self.current_function = self.register_function(self.init_name(node.id, attr=True)) + self.register_param(self.vself) + if node.parent != 'Object' and node.parent != 'IO': + self.register_instruction(cil.ArgNode(self.vself.name)) + self.register_instruction(cil.StaticCallNode(self.init_name(node.parent, attr=True), vtemp)) + attr_declarations = (f for f in node.features if isinstance(f, cool.AttrDeclarationNode)) + for feature in attr_declarations: + self.visit(feature, scope) + + self.current_function = func + self.register_instruction(cil.ArgNode(instance)) + self.register_instruction(cil.StaticCallNode(self.init_name(node.id, attr=True), vtemp)) + + self.register_instruction(cil.ReturnNode(instance)) + self.current_function = None + + self.current_type = None + + @visitor.when(cool.AttrDeclarationNode) + def visit(self, node, scope): + ############################### + # node.id -> str + # node.type -> str + # node.expr -> ExpressionNode + ############################### + if node.expr: + self.visit(node.expr, scope) + self.register_instruction(cil.SetAttribNode(self.vself.name, node.id, scope.ret_expr, self.current_type)) + elif node.type in self.value_types: + vtemp = self.define_internal_local() + self.register_instruction(cil.AllocateNode(node.type, vtemp)) + self.register_instruction(cil.SetAttribNode(self.vself.name, node.id, vtemp, self.current_type)) + + @visitor.when(cool.FuncDeclarationNode) + def visit(self, node, scope): + ##################################### + # node.id -> str + # node.params -> [ (str, str) ... ] + # node.type -> str + # node.body -> ExpressionNode + ##################################### + + self.current_method = self.current_type.get_method(node.id) + type_name = self.current_type.name + + self.current_function = self.register_function(self.to_function_name(self.current_method.name, type_name)) + + # (Handle PARAMS) + self.register_param(self.vself) + for param_name, _ in node.params: + self.register_param(VariableInfo(param_name, None)) + + scope.ret_expr = None + #//TODO: scope children used here ??? + self.visit(node.body, scope) + # (Handle RETURN) + if scope.ret_expr is None: + self.register_instruction(cil.ReturnNode('')) + elif self.current_function.name == 'entry': + self.register_instruction(cil.ReturnNode(0)) + else: + self.register_instruction(cil.ReturnNode(scope.ret_expr)) + + self.current_method = None + + @visitor.when(cool.IfThenElseNode) + def visit(self, node, scope): + ################################### + # node.condition -> ExpressionNode + # node.if_body -> ExpressionNode + # node.else_body -> ExpressionNode + ################################## + vret = self.register_local(VariableInfo('if_then_else_value', None)) + vcondition = self.define_internal_local() + + then_label_node = self.register_label('then_label') + else_label_node = self.register_label('else_label') + continue_label_node = self.register_label('continue_label') + + #If condition GOTO then_label + self.visit(node.condition, scope) + self.register_instruction(cil.GetAttribNode(vcondition, scope.ret_expr, 'value', 'Bool')) + self.register_instruction(cil.GotoIfNode(vcondition, then_label_node.label)) + #GOTO else_label + self.register_instruction(cil.GotoNode(else_label_node.label)) + #Label then_label + self.register_instruction(then_label_node) + self.visit(node.if_body, scope) + self.register_instruction(cil.AssignNode(vret, scope.ret_expr)) + self.register_instruction(cil.GotoNode(continue_label_node.label)) + #Label else_label + self.register_instruction(else_label_node) + self.visit(node.else_body, scope) + self.register_instruction(cil.AssignNode(vret, scope.ret_expr)) + + self.register_instruction(continue_label_node) + scope.ret_expr = vret + + @visitor.when(cool.WhileLoopNode) + def visit(self, node, scope): + ################################### + # node.condition -> ExpressionNode + # node.body -> ExpressionNode + ################################### + + vcondition = self.define_internal_local() + while_label_node = self.register_label('while_label') + loop_label_node = self.register_label('loop_label') + pool_label_node = self.register_label('pool_label') + #Label while + self.register_instruction(while_label_node) + #If condition GOTO loop + self.visit(node.condition, scope) + self.register_instruction(cil.GetAttribNode(vcondition, scope.ret_expr, 'value', 'Bool')) + self.register_instruction(cil.GotoIfNode(vcondition, loop_label_node.label)) + #GOTO pool + self.register_instruction(cil.GotoNode(pool_label_node.label)) + #Label loop + self.register_instruction(loop_label_node) + self.visit(node.body, scope) + #GOTO while + self.register_instruction(cil.GotoNode(while_label_node.label)) + #Label pool + self.register_instruction(pool_label_node) + + #The result of a while loop is void + scope.ret_expr = cil.VoidNode() + + @visitor.when(cool.BlockNode) + def visit(self, node, scope): + ####################################### + # node.exprs -> [ ExpressionNode ... ] + ####################################### + for expr in node.exprs: + self.visit(expr, scope) + + @visitor.when(cool.LetInNode) + def visit(self, node, scope): + ############################################ + # node.let_body -> [ LetAttributeNode ... ] + # node.in_body -> ExpressionNode + ############################################ + vret = self.register_local(VariableInfo('let_in_value', None)) + + for let_attr_node in node.let_body: + self.visit(let_attr_node, scope) + self.visit(node.in_body, scope) + self.register_instruction(cil.AssignNode(vret, scope.ret_expr)) + scope.ret_expr = vret + + @visitor.when(cool.CaseOfNode) + def visit(self, node, scope): + ############################################## + # node.expr -> ExpressionNode + # node.branches -> [ CaseExpressionNode ... } + ############################################## + vexpr = self.register_local(VariableInfo('case_expr_value', None)) + vtype = self.register_local(VariableInfo('typeName_value', None)) + vcond = self.register_local(VariableInfo('equal_value', None)) + vret = self.register_local(VariableInfo('case_value', None)) + self.visit(node.expr, scope) + self.register_instruction(cil.AssignNode(vexpr, scope.ret_expr)) + self.register_instruction(cil.TypeNameNode(vtype, scope.ret_expr)) + + #Check if node.expr is void and raise proper error if vexpr value is void + void = cil.VoidNode() + equal_result = self.define_internal_local() + self.register_instruction(cil.EqualNode(equal_result, vexpr, void)) + + token = get_token(node.expr) + self.register_runtime_error(equal_result, f'({token.row},{token.column}) - RuntimeError: Case on void\n') + + end_label = self.register_label('end_label') + labels = [] + old = {} + + # sorting the branches + order = [] + for b in node.branches: + count = 0 + t1 = self.context.get_type(b.type) + for other in node.branches: + t2 = self.context.get_type(other.type) + count += t2.conforms_to(t1) + order.append((count, b)) + order.sort(key=lambda x: x[0]) + + for idx, (_, b) in enumerate(order): + labels.append(self.register_label(f'{idx}_label')) + h = self.buildHierarchy(b.type) + if not h: + self.register_instruction(cil.GotoNode(labels[-1].label)) + break + h.add(b.type) + for s in old: + h -= s + for t in h: + vbranch_type_name = self.register_local(VariableInfo('branch_type_name', None)) + self.register_instruction(cil.NameNode(vbranch_type_name, t)) + self.register_instruction(cil.EqualNode(vcond, vtype, vbranch_type_name)) + self.register_instruction(cil.GotoIfNode(vcond, labels[-1].label)) + + #Raise runtime error if no Goto was executed + data_node = self.register_data(f'({token.row + 1 + len(node.branches)},{token.column - 5}) - RuntimeError: Execution of a case statement without a matching branch\n') + self.register_instruction(cil.ErrorNode(data_node)) + + for idx, l in enumerate(labels): + self.register_instruction(l) + vid = self.register_local(VariableInfo(order[idx][1].id, None), id=True) + self.register_instruction(cil.AssignNode(vid, vexpr)) + self.visit(order[idx][1], scope) + self.register_instruction(cil.AssignNode(vret, scope.ret_expr)) + self.register_instruction(cil.GotoNode(end_label.label)) + + scope.ret_expr = vret + self.register_instruction(end_label) + + @visitor.when(cool.CaseExpressionNode) + def visit(self, node, scope): + ############################### + # node.id -> str + # node.type -> str + # node.expr -> ExpressionNode + ############################### + self.visit(node.expr, scope) + + @visitor.when(cool.LetAttributeNode) + def visit(self, node, scope): + ############################### + # node.id -> str + # node.type -> str + # node.expr -> ExpressionNode + ############################### + if node.id in self.ids: + vname = self.ids[node.id] + else: + vname = self.register_local(VariableInfo(node.id, node.type), id=True) + if node.expr: + self.visit(node.expr, scope) + self.register_instruction(cil.AssignNode(vname, scope.ret_expr)) + elif node.type in self.value_types: + self.register_instruction(cil.AllocateNode(node.type, vname)) + + @visitor.when(cool.AssignNode) + def visit(self, node, scope): + ############################### + # node.id -> str + # node.expr -> ExpressionNode + ############################### + + self.visit(node.expr, scope) + + try: + self.current_type.get_attribute(node.id) + self.register_instruction(cil.SetAttribNode(self.vself.name, node.id, scope.ret_expr, self.current_type.name)) + except AttributeError: + vname = None + param_names = [pn.name for pn in self.current_function.params] + if node.id in param_names: + for n in param_names: + if node.id in n.split("_"): + vname = n + break + else: + for n in [lv.name for lv in self.current_function.localvars]: + if node.id in n.split("_"): + vname = n + break + self.register_instruction(cil.AssignNode(vname, scope.ret_expr)) + + @visitor.when(cool.NotNode) + def visit(self, node, scope): + ############################### + # node.expr -> ExpressionNode + ############################### + vname = self.define_internal_local() + value = self.define_internal_local() + instance = self.define_internal_local() + + self.visit(node.expr, scope) + self.register_instruction(cil.GetAttribNode(value, scope.ret_expr, 'value', 'Bool')) + self.register_instruction(cil.MinusNode(vname, 1, value)) + + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Bool'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.LessEqualNode) + def visit(self, node, scope): + ############################### + # node.left -> ExpressionNode + # node.right -> ExpressionNode + ############################### + vname = self.define_internal_local() + left_value = self.define_internal_local() + right_value = self.define_internal_local() + instance = self.define_internal_local() + + self.visit(node.left, scope) + left = scope.ret_expr + self.visit(node.right, scope) + right = scope.ret_expr + self.register_instruction(cil.GetAttribNode(left_value, left, 'value', 'Bool')) + self.register_instruction(cil.GetAttribNode(right_value, right, 'value', 'Bool')) + self.register_instruction(cil.LessEqualNode(vname, left_value, right_value)) + + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Bool'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.LessNode) + def visit(self, node, scope): + ############################### + # node.left -> ExpressionNode + # node.right -> ExpressionNode + ############################### + vname = self.define_internal_local() + left_value = self.define_internal_local() + right_value = self.define_internal_local() + instance = self.define_internal_local() + + self.visit(node.left, scope) + left = scope.ret_expr + self.visit(node.right, scope) + right = scope.ret_expr + self.register_instruction(cil.GetAttribNode(left_value, left, 'value', 'Bool')) + self.register_instruction(cil.GetAttribNode(right_value, right, 'value', 'Bool')) + self.register_instruction(cil.LessNode(vname, left_value, right_value)) + + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Bool'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.EqualNode) + def visit(self, node, scope): + ############################### + # node.left -> ExpressionNode + # node.right -> ExpressionNode + ############################### + vname = self.define_internal_local() + type_left = self.define_internal_local() + type_int = self.define_internal_local() + type_bool = self.define_internal_local() + type_string = self.define_internal_local() + equal_result = self.define_internal_local() + left_value = self.define_internal_local() + right_value = self.define_internal_local() + instance = self.define_internal_local() + + self.visit(node.left, scope) + left = scope.ret_expr + self.visit(node.right, scope) + right = scope.ret_expr + + self.register_instruction(cil.TypeNameNode(type_left, left)) + self.register_instruction(cil.NameNode(type_int, 'Int')) + self.register_instruction(cil.NameNode(type_bool, 'Bool')) + self.register_instruction(cil.NameNode(type_string, 'String')) + + int_node = self.register_label('int_label') + string_node = self.register_label('string_label') + reference_node = self.register_label('reference_label') + continue_node = self.register_label('continue_label') + self.register_instruction(cil.EqualNode(equal_result, type_left, type_int)) + self.register_instruction(cil.GotoIfNode(equal_result, int_node.label)) + self.register_instruction(cil.EqualNode(equal_result, type_left, type_bool)) + self.register_instruction(cil.GotoIfNode(equal_result, int_node.label)) + self.register_instruction(cil.EqualNode(equal_result, type_left, type_string)) + self.register_instruction(cil.GotoIfNode(equal_result, string_node.label)) + self.register_instruction(cil.GotoNode(reference_node.label)) + + self.register_instruction(int_node) + self.register_instruction(cil.GetAttribNode(left_value, left, 'value', 'Int')) + self.register_instruction(cil.GetAttribNode(right_value, right, 'value', 'Int')) + self.register_instruction(cil.EqualNode(vname, left_value, right_value)) + self.register_instruction(cil.GotoNode(continue_node.label)) + + self.register_instruction(string_node) + self.register_instruction(cil.GetAttribNode(left_value, left, 'value', 'String')) + self.register_instruction(cil.GetAttribNode(right_value, right, 'value', 'String')) + self.register_instruction(cil.EqualStrNode(vname, left_value, right_value)) + self.register_instruction(cil.GotoNode(continue_node.label)) + + self.register_instruction(reference_node) + self.register_instruction(cil.EqualNode(vname, left, right)) + + self.register_instruction(continue_node) + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Bool'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.PlusNode) + def visit(self, node, scope): + ############################### + # node.left -> ExpressionNode + # node.right -> ExpressionNode + ############################### + vname = self.define_internal_local() + vleft = self.define_internal_local() + vright = self.define_internal_local() + self.visit(node.left, scope) + self.register_instruction(cil.GetAttribNode(vleft, scope.ret_expr, 'value', 'Int')) + self.visit(node.right, scope) + self.register_instruction(cil.GetAttribNode(vright, scope.ret_expr, 'value', 'Int')) + self.register_instruction(cil.PlusNode(vname, vleft, vright)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Int'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.MinusNode) + def visit(self, node, scope): + ############################### + # node.left -> ExpressionNode + # node.right -> ExpressionNode + ############################### + vname = self.define_internal_local() + vleft = self.define_internal_local() + vright = self.define_internal_local() + self.visit(node.left, scope) + self.register_instruction(cil.GetAttribNode(vleft, scope.ret_expr, 'value', 'Int')) + self.visit(node.right, scope) + self.register_instruction(cil.GetAttribNode(vright, scope.ret_expr, 'value', 'Int')) + self.register_instruction(cil.MinusNode(vname, vleft, vright)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Int'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.StarNode) + def visit(self, node, scope): + ############################### + # node.left -> ExpressionNode + # node.right -> ExpressionNode + ############################### + vname = self.define_internal_local() + vleft = self.define_internal_local() + vright = self.define_internal_local() + self.visit(node.left, scope) + self.register_instruction(cil.GetAttribNode(vleft, scope.ret_expr, 'value', 'Int')) + self.visit(node.right, scope) + self.register_instruction(cil.GetAttribNode(vright, scope.ret_expr, 'value', 'Int')) + self.register_instruction(cil.StarNode(vname, vleft, vright)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Int'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.DivNode) + def visit(self, node, scope): + ############################### + # node.left -> ExpressionNode + # node.right -> ExpressionNode + ############################### + vname = self.define_internal_local() + vleft = self.define_internal_local() + vright = self.define_internal_local() + self.visit(node.left, scope) + self.register_instruction(cil.GetAttribNode(vleft, scope.ret_expr, 'value', 'Int')) + self.visit(node.right, scope) + self.register_instruction(cil.GetAttribNode(vright, scope.ret_expr, 'value', 'Int')) + + #Check division by 0 + equal_result = self.define_internal_local() + self.register_instruction(cil.EqualNode(equal_result, vright, 0)) + token = get_token(node.right) + self.register_runtime_error(equal_result, f'({token.row},{token.column}) - RuntimeError: Division by zero\n') + + self.register_instruction(cil.DivNode(vname, vleft, vright)) + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Int'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.IsVoidNode) + def visit(self, node, scope): + ############################### + # node.expr -> ExpressionNode + ############################### + void = cil.VoidNode() + value = self.define_internal_local() + self.visit(node.expr, scope) + self.register_instruction(cil.AssignNode(value, scope.ret_expr)) + result = self.define_internal_local() + self.register_instruction(cil.EqualNode(result, value, void)) + self.register_instruction(cil.ArgNode(result)) + self.register_instruction(cil.StaticCallNode(self.init_name("Bool"), result)) + scope.ret_expr = result + + @visitor.when(cool.ComplementNode) + def visit(self, node, scope): + ############################### + # node.expr -> ExpressionNode + ############################### + vname = self.define_internal_local() + value = self.define_internal_local() + instance = self.define_internal_local() + self.visit(node.expr, scope) + self.register_instruction(cil.GetAttribNode(value, scope.ret_expr, 'value', 'Int')) + self.register_instruction(cil.ComplementNode(vname, value)) + self.register_instruction(cil.ArgNode(vname)) + self.register_instruction(cil.StaticCallNode(self.init_name('Int'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.FunctionCallNode) + def visit(self, node, scope): + ###################################### + # node.obj -> ExpressionNode + # node.id -> str + # node.args -> [ ExpressionNode ... ] + # node.type -> str + ##################################### + + args = [] + for arg in node.args: + vname = self.register_local(VariableInfo(f'{node.id}_arg', None), id=True) + self.visit(arg, scope) + self.register_instruction(cil.AssignNode(vname, scope.ret_expr)) + args.append(cil.ArgNode(vname)) + result = self.register_local(VariableInfo(f'return_value_of_{node.id}', None), id=True) + + vobj = self.define_internal_local() + self.visit(node.obj, scope) + self.register_instruction(cil.AssignNode(vobj, scope.ret_expr)) + + #Check if node.obj is void + void = cil.VoidNode() + equal_result = self.define_internal_local() + self.register_instruction(cil.EqualNode(equal_result, vobj, void)) + + token = get_token(node.obj) + self.register_runtime_error(equal_result, f'({token.row},{token.column}) - RuntimeError: Dispatch on void\n') + + #self + self.register_instruction(cil.ArgNode(vobj)) + for arg in args: + self.register_instruction(arg) + + if node.type: + #Call of type @.id(,...,) + self.register_instruction(cil.StaticCallNode(self.to_function_name(node.id, node.type), result)) + else: + #Call of type .(,...,) + type_of_node = self.register_local(VariableInfo(f'{node.id}_type', None), id=True) + self.register_instruction(cil.TypeOfNode(vobj, type_of_node)) + computed_type = node.obj.computed_type + if computed_type.name == 'SELF_TYPE': + computed_type = computed_type.fixed + self.register_instruction(cil.DynamicCallNode(type_of_node, node.id, result, computed_type.name)) + + scope.ret_expr = result + + @visitor.when(cool.MemberCallNode) + def visit(self, node, scope): + ###################################### + # node.id -> str + # node.args -> [ ExpressionNode ... ] + ###################################### + #method = [self.to_function_name(method.name, xtype.name) for method, xtype in self.current_type.all_methods() if method.name == node.id][0] + + args = [] + for arg in node.args: + vname = self.register_local(VariableInfo(f'{node.id}_arg', None), id=True) + self.visit(arg, scope) + self.register_instruction(cil.AssignNode(vname, scope.ret_expr)) + args.append(cil.ArgNode(vname)) + result = self.register_local(VariableInfo(f'return_value_of_{node.id}', None), id=True) + + self.register_instruction(cil.ArgNode(self.vself.name)) + for arg in args: + self.register_instruction(arg) + + type_of_node = self.register_local(VariableInfo(f'{self.vself.name}_type', None)) + self.register_instruction(cil.TypeOfNode(self.vself.name, type_of_node)) + self.register_instruction(cil.DynamicCallNode(type_of_node, node.id, result, self.current_type.name)) + #self.register_instruction(cil.StaticCallNode(method, result)) + scope.ret_expr = result + + @visitor.when(cool.NewNode) + def visit(self, node, scope): + ############################### + # node.type -> str + ############################### + instance = self.define_internal_local() + + if node.type == 'SELF_TYPE': + vtype = self.define_internal_local() + self.register_instruction(cil.TypeOfNode(self.vself.name, vtype)) + self.register_instruction(cil.AllocateNode(vtype, instance)) + elif node.type == 'Int' or node.type == 'Bool': + self.register_instruction(cil.ArgNode(0)) + elif node.type == 'String': + data_node = [dn for dn in self.dotdata if dn.value == ''][0] + vmsg = self.register_local(VariableInfo('msg', None)) + self.register_instruction(cil.LoadNode(vmsg, data_node)) + self.register_instruction(cil.ArgNode(vmsg)) + + self.register_instruction(cil.StaticCallNode(self.init_name(node.type), instance)) + scope.ret_expr = instance + + @visitor.when(cool.IntegerNode) + def visit(self, node, scope): + ############################### + # node.lex -> str + ############################### + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(int(node.lex))) + self.register_instruction(cil.StaticCallNode(self.init_name('Int'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.IdNode) + def visit(self, node, scope): + ############################### + # node.lex -> str + ############################### + try: + self.current_type.get_attribute(node.lex) + attr = self.register_local(VariableInfo(node.lex, None), id=True) + self.register_instruction(cil.GetAttribNode(attr, self.vself.name, node.lex, self.current_type.name)) + scope.ret_expr = attr + except AttributeError: + param_names = [pn.name for pn in self.current_function.params] + if node.lex in param_names: + for n in param_names: + if node.lex == n: + scope.ret_expr = n + break + else: + scope.ret_expr = self.ids[node.lex] + + @visitor.when(cool.StringNode) + def visit(self, node, scope): + ############################### + # node.lex -> str + ############################### + try: + data_node = [dn for dn in self.dotdata if dn.value == node.lex][0] + except IndexError: + data_node = self.register_data(node.lex) + vmsg = self.register_local(VariableInfo('msg', None)) + instance = self.define_internal_local() + self.register_instruction(cil.LoadNode(vmsg, data_node)) + self.register_instruction(cil.ArgNode(vmsg)) + self.register_instruction(cil.StaticCallNode(self.init_name('String'), instance)) + scope.ret_expr = instance + + @visitor.when(cool.BoolNode) + def visit(self, node, scope): + ############################### + # node.lex -> str + ############################### + if node.lex == 'true': + scope.ret_expr = 1 + else: + scope.ret_expr = 0 + instance = self.define_internal_local() + self.register_instruction(cil.ArgNode(scope.ret_expr)) + self.register_instruction(cil.StaticCallNode(self.init_name('Bool'), instance)) + scope.ret_expr = instance diff --git a/src/core/visitors/cil/utils.py b/src/core/visitors/cil/utils.py new file mode 100644 index 00000000..09208171 --- /dev/null +++ b/src/core/visitors/cil/utils.py @@ -0,0 +1,5 @@ +def get_token(node): + for attr in ['tid', 'token', 'ttype', 'symbol']: + if hasattr(node, attr): + return getattr(node, attr) + raise Exception(f'{node} has no token') diff --git a/src/core/visitors/mips/__init__.py b/src/core/visitors/mips/__init__.py new file mode 100644 index 00000000..7ee6cc80 --- /dev/null +++ b/src/core/visitors/mips/__init__.py @@ -0,0 +1,2 @@ +from .ast_printer import PrintVisitor +from .cil_to_mips import CILToMIPSVisitor diff --git a/src/core/visitors/mips/ast_printer.py b/src/core/visitors/mips/ast_printer.py new file mode 100644 index 00000000..174ca704 --- /dev/null +++ b/src/core/visitors/mips/ast_printer.py @@ -0,0 +1,153 @@ +from .mips import * +from ...visitors import visitor + +class PrintVisitor: + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(Register) + def visit(self, node): + return f'${node.name}' + + @visitor.when(int) + def visit(self, node): + return str(node) + + @visitor.when(str) + def visit(self, node): + return node + + @visitor.when(ProgramNode) + def visit(self, node): + data_section_header = "\t.data" + static_strings = '\n'.join([self.visit(string_const) for string_const in node.data]) + + names_table = f"{TYPENAMES_TABLE_LABEL}:\n" + "\n".join([f"\t.word\t{tp.string_name_label}" for tp in node.types]) + proto_table = f"{PROTO_TABLE_LABEL}:\n" + "\n".join([f"\t.word\t{tp.label}_proto" for tp in node.types]) + + types = "\n\n".join([self.visit(tp) for tp in node.types]) + + code = "\n".join([self.visit(func) for func in node.functions]) + return f'{data_section_header}\n{static_strings}\n\n{names_table}\n\n{proto_table}\n\n{types}\n\t.text\n\t.globl main\n{code}' + + @visitor.when(StringConst) + def visit(self, node): + return f'{node.label}: .asciiz "{node.string}"' + + @visitor.when(MIPSType) + def visit(self, node): + methods = "\n".join([f"\t.word\t {node.methods[k]}" for k in node.methods]) + dispatch_table = f"{node.label}_dispatch:\n{methods}" + proto_begin = f"{node.label}_proto:\n\t.word\t{node.index}\n\t.word\t{node.size}\n\t.word\t{node.label}_dispatch" + proto_attr = "\n".join([f'\t.word\t{node._default_attributes.get(attr, "0")}' for attr in node.attributes]) + proto_end = f"\t.word\t{OBJECT_MARK}" + proto = f"{proto_begin}\n{proto_attr}\n{proto_end}" if proto_attr != "" else f"{proto_begin}\n{proto_end}" + + return f'{dispatch_table}\n\n{proto}' + + @visitor.when(SyscallNode) + def visit(self, node): + return 'syscall' + + @visitor.when(LabelRelativeLocation) + def visit(self, node): + return f'{node.label} + {node.offset}' + + @visitor.when(RegisterRelativeLocation) + def visit(self, node): + return f'{node.offset}({self.visit(node.register)})' + + @visitor.when(FunctionNode) + def visit(self, node): + instr = [self.visit(instruction) for instruction in node.instructions] + #TODO la linea de abajo sobra, es necesaria mientras la traduccion del AST de CIL este incompleta + instr2 = [inst for inst in instr if type(inst) == str] + instructions = "\n\t".join(instr2) + return f'{node.label}:\n\t{instructions}' + + @visitor.when(AddInmediateNode) + def visit(self, node): + return f'addi {self.visit(node.dest)}, {self.visit(node.src)}, {self.visit(node.value)}' + + @visitor.when(StoreWordNode) + def visit(self, node): + return f'sw {self.visit(node.reg)}, {self.visit(node.addr)}' + + @visitor.when(LoadInmediateNode) + def visit(self, node): + return f'li {self.visit(node.reg)}, {self.visit(node.value)}' + + @visitor.when(JumpAndLinkNode) + def visit(self, node): + return f'jal {node.label}' + + @visitor.when(JumpRegister) + def visit(self, node): + return f'jr {self.visit(node.reg)}' + + @visitor.when(JumpRegisterAndLinkNode) + def visit(self, node): + return f'jal {self.visit(node.reg)}' + + @visitor.when(LoadWordNode) + def visit(self, node): + return f'lw {self.visit(node.reg)}, {self.visit(node.addr)}' + + @visitor.when(LoadAddressNode) + def visit(self, node): + return f'la {self.visit(node.reg)}, {self.visit(node.label)}' + + @visitor.when(MoveNode) + def visit(self, node): + return f'move {self.visit(node.reg1)} {self.visit(node.reg2 )}' + + @visitor.when(ShiftLeftLogicalNode) + def visit(self, node): + return f"sll {self.visit(node.dest)} {self.visit(node.src)} {node.bits}" + + @visitor.when(AddInmediateUnsignedNode) + def visit(self, node): + return f"addiu {self.visit(node.dest)} {self.visit(node.src)} {self.visit(node.value)}" + + @visitor.when(AddUnsignedNode) + def visit(self, node): + return f"addu {self.visit(node.dest)} {self.visit(node.sum1)} {self.visit(node.sum2)}" + + @visitor.when(LabelNode) + def visit(self, node): + return f"{node.name}:" + + @visitor.when(BranchOnNotEqualNode) + def visit(self, node): + return f"bne {self.visit(node.reg1)} {self.visit(node.reg2)} {node.label}" + + @visitor.when(JumpNode) + def visit(self, node): + return f"j {node.label}" + + @visitor.when(AddNode) + def visit(self, node): + return f"add {self.visit(node.reg1)} {self.visit(node.reg2)} {self.visit(node.reg3)}" + + @visitor.when(SubNode) + def visit(self, node): + return f"sub {self.visit(node.reg1)} {self.visit(node.reg2)} {self.visit(node.reg3)}" + + @visitor.when(MultiplyNode) + def visit(self, node): + return f"mul {self.visit(node.reg1)} {self.visit(node.reg2)} {self.visit(node.reg3)}" + + @visitor.when(DivideNode) + def visit(self, node): + return f"div {self.visit(node.reg1)} {self.visit(node.reg2)}" + + @visitor.when(ComplementNode) + def visit(self, node): + return f"not {self.visit(node.reg1)} {self.visit(node.reg2)}" + + @visitor.when(MoveFromLowNode) + def visit(self, node): + return f"mflo {self.visit(node.reg)}" + \ No newline at end of file diff --git a/src/core/visitors/mips/cil_to_mips.py b/src/core/visitors/mips/cil_to_mips.py new file mode 100644 index 00000000..47cd52d8 --- /dev/null +++ b/src/core/visitors/mips/cil_to_mips.py @@ -0,0 +1,1397 @@ +import itertools as itt + +from ...visitors import visitor +from ..cil import cil +from ..mips import mips +from random import choice +from collections import defaultdict + + +class MemoryManager: + def __init__(self, registers, function_for_assign): + self.registers = registers + self.func = function_for_assign + + def get_reg_for_var(self, var): + index = self.func(var) + if index == -1: + return None + return self.registers[index] + + def get_reg_unusued(self, used = []): + possibles = list(set(self.registers).difference(set(used))) + return choice(possibles) + + +class LabelGenerator: + def __init__(self): + self.data_count = 0 + self.type_count = 0 + self.code_count = 0 + + def generate_type_label(self): + self.type_count += 1 + return f'type_{self.type_count}' + + def generate_data_label(self): + self.data_count += 1 + return f'data_{self.data_count}' + + def generate_code_label(self): + self.code_count += 1 + return f'L_{self.code_count}' + + +class CILToMIPSVisitor: + def __init__(self, label_generator = LabelGenerator()): + self._label_generator = label_generator + self.memory_manager = None + self._types = {} + self._data_section = {} + self._functions = {} + self._actual_function = None + self._name_func_map = {} + self._pushed_args = 0 + self._labels_map = {} + + def generate_type_label(self): + return self._label_generator.generate_type_label() + + def generate_data_label(self): + return self._label_generator.generate_data_label() + + def generate_code_label(self): + return self._label_generator.generate_code_label() + + def get_var_location(self, name): + return self._actual_function.get_var_location(name) + + def register_function(self, name, function): + self._functions[name] = function + + def init_function(self, function): + self._actual_function = function + self._labels_map = {} + + def finish_functions(self): + self._actual_function = None + + def push_arg(self): + self._pushed_args += 1 + + def clean_pushed_args(self): + self._pushed_args = 0 + + def get_free_reg(self): + return self._registers_manager.get_free_reg() + + def free_reg(self, reg): + self._registers_manager.free_reg(reg) + + def in_entry_function(self): + return self._actual_function.label == 'main' + + def register_label(self, cil_label, mips_label): + self._labels_map[cil_label] = mips_label + + def get_mips_label(self, label): + return self._labels_map[label] + + @visitor.on('node') + def collect_func_names(self, node): + pass + + @visitor.when(cil.ProgramNode) + def collect_func_names(self, node): + for func in node.dotcode: + self.collect_func_names(func) + + @visitor.when(cil.FunctionNode) + def collect_func_names(self, node): + if node.name == "entry": + self._name_func_map[node.name] = 'main' + else: + self._name_func_map[node.name] = self.generate_code_label() + + @visitor.on('node') + def collect_labels_in_func(self, node): + pass + + @visitor.when(cil.LabelNode) + def collect_labels_in_func(self, node): + mips_label = self.generate_code_label() + self.register_label(node.label, mips_label) + + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(cil.InstructionNode) + def visit(self, node): + print(type(node)) + + @visitor.when(cil.ProgramNode) + def visit(self, node): + #Get functions names + self.collect_func_names(node) + + self._data_section["default_str"] = mips.StringConst("default_str", "") + #Convert CIL ProgramNode to MIPS ProgramNode + for tp in node.dottypes: + self.visit(tp) + + for data in node.dotdata: + self.visit(data) + + for func in node.dotcode: + self.visit(func) + + return mips.ProgramNode( [data for data in self._data_section.values()], [tp for tp in self._types.values()], [func for func in self._functions.values()]) + + @visitor.when(cil.TypeNode) + def visit(self, node): + name_label = self.generate_data_label() + self._data_section[node.name] = mips.StringConst(name_label, node.name) + + type_label = self.generate_type_label() + methods = {key: self._name_func_map[value] for key, value in node.methods} + defaults = [] + if node.name == "String": + defaults = [('value', 'default_str'), ('length', 'type_4_proto')] + new_type = mips.MIPSType(type_label, name_label, node.attributes, methods, len(self._types), default=defaults) + + self._types[node.name] = new_type + + @visitor.when(cil.DataNode) + def visit(self, node): + label = self.generate_data_label() + self._data_section[node.name] = mips.StringConst(label, node.value) + + @visitor.when(cil.FunctionNode) + def visit(self, node): + used_regs_finder = UsedRegisterFinder() + + label = self._name_func_map[node.name] + params = [param.name for param in node.params] + localvars = [local.name for local in node.localvars] + size_for_locals = len(localvars) * mips.ATTR_SIZE + + new_func = mips.FunctionNode(label, params, localvars) + self.register_function(node.name, new_func) + self.init_function(new_func) + + ra = RegistersAllocator() + + if len(node.instructions): + reg_for_var = ra.get_registers_for_variables(node.instructions, node.params, len(mips.REGISTERS)) + self.memory_manager = MemoryManager(mips.REGISTERS, lambda x : reg_for_var[x]) + + for instruction in node.instructions: + self.collect_labels_in_func(instruction) + + initial_instructions = [] + if self.in_entry_function(): + initial_instructions.append(mips.JumpAndLinkNode("mem_manager_init")) + + initial_instructions.extend(mips.push_register(mips.FP_REG)) + initial_instructions.append(mips.AddInmediateNode(mips.FP_REG, mips.SP_REG, 4)) + initial_instructions.append(mips.AddInmediateNode(mips.SP_REG, mips.SP_REG, -size_for_locals)) + + code_instructions = [] + + code_instructions = list(itt.chain.from_iterable([self.visit(instruction) for instruction in node.instructions])) + + final_instructions = [] + + for param in params: + reg = self.memory_manager.get_reg_for_var(param) + if reg is not None: + code_instructions.insert(0,mips.LoadWordNode(reg, self.get_var_location(param))) + + if not self.in_entry_function(): + used_regs = used_regs_finder.get_used_registers(code_instructions) + for reg in used_regs: + initial_instructions.extend(mips.push_register(reg)) + + for reg in used_regs[::-1]: + final_instructions.extend(mips.pop_register(reg)) + + final_instructions.append(mips.AddInmediateNode(mips.SP_REG, mips.SP_REG, size_for_locals)) + final_instructions.extend(mips.pop_register(mips.FP_REG)) + + if not self.in_entry_function(): + final_instructions.append(mips.JumpRegister(mips.RA_REG)) + else: + final_instructions.extend(mips.exit_program()) + + func_instructions = list(itt.chain(initial_instructions, code_instructions, final_instructions)) + new_func.add_instructions(func_instructions) + + self.finish_functions() + + @visitor.when(cil.ArgNode) + def visit(self, node): + self.push_arg() + instructions = [] + if type(node.name) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[0], node.name)) + instructions.extend(mips.push_register(mips.ARG_REGISTERS[0])) + else: + reg = self.memory_manager.get_reg_for_var(node.name) + if reg is None: + reg = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg, self.get_var_location(node.name))) + instructions.extend(mips.push_register(reg)) + return instructions + + @visitor.when(cil.StaticCallNode) + def visit(self, node): + instructions = [] + label = self._name_func_map[node.function] + instructions.append(mips.JumpAndLinkNode(label)) + + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + if self._pushed_args > 0: + instructions.append(mips.AddInmediateNode(mips.SP_REG, mips.SP_REG, self._pushed_args * mips.ATTR_SIZE)) + self.clean_pushed_args() + return instructions + + @visitor.when(cil.AssignNode) + def visit(self, node): + instructions = [] + + reg1 = None + if type(node.source) == cil.VoidNode: + reg1 = mips.ZERO_REG + elif node.source.isnumeric(): + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadInmediateNode(reg1, int(node.source))) + else: + reg1 = self.memory_manager.get_reg_for_var(node.source) + if reg1 is None: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg1, self.get_var_location(node.source))) + + reg2 = self.memory_manager.get_reg_for_var(node.dest) + if reg2 is None: + instructions.append(mips.StoreWordNode(reg1, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg2, reg1)) + + return instructions + + @visitor.when(cil.AllocateNode) + def visit(self, node): + instructions = [] + + tp = 0 + if node.type.isnumeric(): + tp = node.type + else: + tp = self._types[node.type].index + + reg1 = self.memory_manager.get_reg_unusued() + reg2 = self.memory_manager.get_reg_unusued([reg1]) + instructions.extend(mips.push_register(reg1)) + instructions.extend(mips.push_register(reg2)) + + instructions.append(mips.LoadInmediateNode(reg1, tp)) + + instructions.extend(mips.create_object(reg1, reg2)) + + instructions.extend(mips.pop_register(reg2)) + instructions.extend(mips.pop_register(reg1)) + + reg3 = self.memory_manager.get_reg_for_var(node.dest) + if reg3 is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg3, mips.V0_REG)) + + return instructions + + @visitor.when(cil.ReturnNode) + def visit(self, node): + instructions = [] + + if node.value is None: + instructions.append(mips.LoadInmediateNode(mips.V0_REG, 0)) + elif isinstance(node.value, int): + instructions.append(mips.LoadInmediateNode(mips.V0_REG, node.value)) + elif isinstance(node.value, cil.VoidNode): + instructions.append(mips.LoadInmediateNode(mips.V0_REG, 0)) + else: + reg = self.memory_manager.get_reg_for_var(node.value) + if reg is None: + instructions.append(mips.LoadWordNode(mips.V0_REG, self.get_var_location(node.value))) + else: + instructions.append(mips.MoveNode(mips.V0_REG, reg)) + return instructions + + @visitor.when(cil.LoadNode) + def visit(self, node): + instructions = [] + + string_location = mips.LabelRelativeLocation(self._data_section[node.msg.name].label, 0) + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.LoadAddressNode(mips.ARG_REGISTERS[0], string_location)) + instructions.append(mips.StoreWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.dest))) + else: + instructions.append(mips.LoadAddressNode(reg, string_location)) + + return instructions + + @visitor.when(cil.PrintIntNode) + def visit(self, node): + instructions = [] + instructions.append(mips.LoadInmediateNode(mips.V0_REG, 1)) + + reg = self.memory_manager.get_reg_for_var(node.value) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.value))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + + instructions.append(mips.SyscallNode()) + + return instructions + + @visitor.when(cil.PrintStrNode) + def visit(self, node): + instructions = [] + instructions.append(mips.LoadInmediateNode(mips.V0_REG, 4)) + + reg = self.memory_manager.get_reg_for_var(node.value) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0]. self.get_var_location(node.value))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + instructions.append(mips.SyscallNode()) + + return instructions + + @visitor.when(cil.TypeNameNode) + def visit(self, node): + instructions = [] + + reg1 = self.memory_manager.get_reg_for_var(node.source) + pushed = False + if reg1 is None: + reg1 = self.memory_manager.get_reg_unusued() + instructions.extend(mips.push_register(reg1)) + instructions.append(mips.LoadWordNode(reg1, self.get_var_location(node.source))) + pushed = True + + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], mips.RegisterRelativeLocation(reg1, 0))) + + if pushed: + instructions.extend(mips.pop_register(reg1)) + + instructions.append(mips.ShiftLeftLogicalNode(mips.ARG_REGISTERS[0], mips.ARG_REGISTERS[0], 2)) + instructions.append(mips.LoadAddressNode(mips.ARG_REGISTERS[1], mips.TYPENAMES_TABLE_LABEL)) + instructions.append(mips.AddUnsignedNode(mips.ARG_REGISTERS[0], mips.ARG_REGISTERS[0], mips.ARG_REGISTERS[1])) + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], mips.RegisterRelativeLocation(mips.ARG_REGISTERS[0], 0))) + + reg2 = self.memory_manager.get_reg_for_var(node.dest) + if reg2 is None: + instructions.append(mips.StoreWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg2, mips.ARG_REGISTERS[0])) + + return instructions + + @visitor.when(cil.ExitNode) + def visit(self, node): + instructions = [] + instructions.append(mips.LoadInmediateNode(mips.V0_REG, 10)) + instructions.append(mips.SyscallNode()) + + return instructions + + @visitor.when(cil.GetAttribNode) + def visit(self, node): + instructions = [] + + dest = node.dest if type(node.dest) == str else node.dest.name + obj = node.obj if type(node.obj) == str else node.obj.name + comp_type = node.computed_type if type(node.computed_type) == str else node.computed_type.name + + reg = self.memory_manager.get_reg_for_var(obj) + if reg is None: + reg = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg, self.get_var_location(obj))) + + tp = self._types[comp_type] + offset = (tp.attributes.index(node.attr) + 3) * mips.ATTR_SIZE + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], mips.RegisterRelativeLocation(reg, offset))) + + reg = self.memory_manager.get_reg_for_var(dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.ARG_REGISTERS[1], self.get_var_location(dest))) + else: + instructions.append(mips.MoveNode(reg, mips.ARG_REGISTERS[1])) + + return instructions + + @visitor.when(cil.SetAttribNode) + def visit(self, node): + instructions = [] + + obj = node.obj if type(node.obj) == str else node.obj.name + comp_type = node.computed_type if type(node.computed_type) == str else node.computed_type.name + + tp = self._types[comp_type] + offset = (tp.attributes.index(node.attr) + 3) * mips.ATTR_SIZE + + reg1 = self.memory_manager.get_reg_for_var(obj) + if reg1 is None: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(obj))) + + reg2 = None + if type(node.value) == int: + reg2 = instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[1], node.value)) + else: + reg2 = self.memory_manager.get_reg_for_var(node.value) + if reg2 is None: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.LoadWordNode(reg2, self.get_var_location(node.value))) + + instructions.append(mips.StoreWordNode(reg2, mips.RegisterRelativeLocation(reg1, offset))) + + return instructions + + @visitor.when(cil.CopyNode) + def visit(self, node): + instructions = [] + + pushed = False + reg = self.memory_manager.get_reg_for_var(node.source) + if reg is None: + reg = self.memory_manager.get_reg_unusued() + instructions.extend(mips.push_register(reg)) + instructions.append(mips.LoadWordNode(reg, self.get_var_location(node.source))) + pushed = True + + instructions.extend(mips.copy_object(reg, mips.ARG_REGISTERS[3])) + + if pushed: + instructions.extend(mips.pop_register(reg)) + + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.EqualNode) + def visit(self, node): + instructions = [] + + if type(node.left) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[0], node.left)) + elif type(node.left) == cil.VoidNode: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[0], 0)) + else: + reg = self.memory_manager.get_reg_for_var(node.left) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.left))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + + if type(node.right) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[1], node.right)) + elif type(node.right) == cil.VoidNode: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[1], 0)) + else: + reg = self.memory_manager.get_reg_for_var(node.right) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], self.get_var_location(node.right))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[1], reg)) + + instructions.append(mips.JumpAndLinkNode("equals")) + + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.EqualStrNode) + def visit(self, node): + instructions = [] + + reg = self.memory_manager.get_reg_for_var(node.left) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.left))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + + reg = self.memory_manager.get_reg_for_var(node.right) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], self.get_var_location(node.right))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[1], reg)) + + instructions.append(mips.JumpAndLinkNode("equal_str")) + + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.LabelNode) + def visit(self, node): + return [mips.LabelNode(self.get_mips_label(node.label))] + + @visitor.when(cil.GotoIfNode) + def visit(self, node): + instructions = [] + + mips_label = self.get_mips_label(node.label) + + reg = self.memory_manager.get_reg_for_var(node.condition) + if reg is None: + reg = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.condition))) + + instructions.append(mips.BranchOnNotEqualNode(reg, mips.ZERO_REG, mips_label)) + + return instructions + + @visitor.when(cil.GotoNode) + def visit(self, node): + mips_label = self.get_mips_label(node.label) + return [mips.JumpNode(mips_label)] + + @visitor.when(cil.TypeOfNode) + def visit(self, node): + instructions = [] + + reg1 = self.memory_manager.get_reg_for_var(node.obj) + if reg1 is None: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg1, self.get_var_location(node.obj))) + + reg2 = self.memory_manager.get_reg_for_var(node.dest) + if reg2 is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], mips.RegisterRelativeLocation(reg1, 0))) + instructions.append(mips.StoreWordNode(mips.ARG_REGISTERS[1], self.get_var_location(node.dest))) + else: + instructions.append(mips.LoadWordNode(reg2, mips.RegisterRelativeLocation(reg1, 0))) + + return instructions + + @visitor.when(cil.DynamicCallNode) + def visit(self, node): + instructions = [] + + comp_tp = self._types[node.computed_type] + method_index = list(comp_tp.methods).index(node.method) + reg = self.memory_manager.get_reg_for_var(node.type) + if reg is None: + reg = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg, self.get_var_location(node.type))) + + instructions.append(mips.LoadAddressNode(mips.ARG_REGISTERS[1], mips.PROTO_TABLE_LABEL)) + instructions.append(mips.ShiftLeftLogicalNode(mips.ARG_REGISTERS[2], reg, 2)) + instructions.append(mips.AddUnsignedNode(mips.ARG_REGISTERS[1], mips.ARG_REGISTERS[1], mips.ARG_REGISTERS[2])) + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], mips.RegisterRelativeLocation(mips.ARG_REGISTERS[1], 0))) + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], mips.RegisterRelativeLocation(mips.ARG_REGISTERS[1], 8))) + instructions.append(mips.AddInmediateUnsignedNode(mips.ARG_REGISTERS[1], mips.ARG_REGISTERS[1], method_index * 4)) + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], mips.RegisterRelativeLocation(mips.ARG_REGISTERS[1], 0))) + instructions.append(mips.JumpRegisterAndLinkNode(mips.ARG_REGISTERS[1])) + + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + if self._pushed_args > 0: + instructions.append(mips.AddInmediateNode(mips.SP_REG, mips.SP_REG, self._pushed_args * mips.ATTR_SIZE)) + self.clean_pushed_args() + + return instructions + + @visitor.when(cil.ErrorNode) + def visit(self, node): + instructions = [] + + mips_label = self._data_section[node.data_node.name].label + + instructions.append(mips.LoadInmediateNode(mips.V0_REG, 4)) + instructions.append(mips.LoadAddressNode(mips.ARG_REGISTERS[0], mips_label)) + instructions.append(mips.SyscallNode()) + instructions.append(mips.LoadInmediateNode(mips.V0_REG, 10)) + instructions.append(mips.SyscallNode()) + + return instructions + + @visitor.when(cil.NameNode) + def visit(self, node): + instructions = [] + + save = False + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + reg = mips.ARG_REGISTERS[0] + save = True + + instructions.append(mips.LoadAddressNode(reg, mips.TYPENAMES_TABLE_LABEL)) + + tp_number = self._types[node.name].index + instructions.append(mips.AddInmediateUnsignedNode(reg, reg, tp_number*4)) + instructions.append(mips.LoadWordNode(reg, mips.RegisterRelativeLocation(reg, 0))) + + if save: + instructions.append(mips.StoreWordNode(reg, self.get_var_location(node.dest))) + + return instructions + + @visitor.when(cil.PlusNode) + def visit(self, node): + instructions = [] + + reg1, reg2 = None, None + if type(node.left) == int: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadInmediateNode(reg1, node.left)) + else: + reg1 = self.memory_manager.get_reg_for_var(node.left) + if reg1 is None: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg1, self.get_var_location(node.left))) + + if type(node.right) == int: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.LoadInmediateNode(reg2, node.right)) + else: + reg2 = self.memory_manager.get_reg_for_var(node.right) + if reg2 is None: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.LoadWordNode(reg2, self.get_var_location(node.right))) + + reg3 = self.memory_manager.get_reg_for_var(node.dest) + if reg3 is None: + instructions.append(mips.AddNode(mips.ARG_REGISTERS[0], reg1, reg2)) + instructions.append(mips.StoreWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.dest))) + else: + instructions.append(mips.AddNode(reg3, reg1, reg2)) + + return instructions + + @visitor.when(cil.MinusNode) + def visit(self, node): + instructions = [] + + reg1, reg2 = None, None + if type(node.left) == int: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadInmediateNode(reg1, node.left)) + else: + reg1 = self.memory_manager.get_reg_for_var(node.left) + if reg1 is None: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg1, self.get_var_location(node.left))) + + if type(node.right) == int: + instructions.append(mips.LoadInmediateNode(reg2, node.right)) + else: + reg2 = self.memory_manager.get_reg_for_var(node.right) + if reg2 is None: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.LoadWordNode(reg2, self.get_var_location(node.right))) + + reg3 = self.memory_manager.get_reg_for_var(node.dest) + if reg3 is None: + instructions.append(mips.SubNode(mips.ARG_REGISTERS[0], reg1, reg2)) + instructions.append(mips.StoreWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.dest))) + else: + instructions.append(mips.SubNode(reg3, reg1, reg2)) + + return instructions + + @visitor.when(cil.StarNode) + def visit(self, node): + instructions = [] + + reg1, reg2 = None, None + if type(node.left) == int: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadInmediateNode(reg1, node.left)) + else: + reg1 = self.memory_manager.get_reg_for_var(node.left) + if reg1 is None: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg1, self.get_var_location(node.left))) + + if type(node.right) == int: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.LoadInmediateNode(reg2, node.right)) + else: + reg2 = self.memory_manager.get_reg_for_var(node.right) + if reg2 is None: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.LoadWordNode(reg2, self.get_var_location(node.right))) + + reg3 = self.memory_manager.get_reg_for_var(node.dest) + if reg3 is None: + instructions.append(mips.MultiplyNode(mips.ARG_REGISTERS[0], reg1, reg2)) + instructions.append(mips.StoreWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.dest))) + else: + instructions.append(mips.MultiplyNode(reg3, reg1, reg2)) + + return instructions + + @visitor.when(cil.DivNode) + def visit(self, node): + instructions = [] + + reg1, reg2 = None, None + if type(node.left) == int: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadInmediateNode(reg1, node.left)) + else: + reg1 = self.memory_manager.get_reg_for_var(node.left) + if reg1 is None: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg1, self.get_var_location(node.left))) + + if type(node.right) == int: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.LoadInmediateNode(reg2, node.right)) + else: + reg2 = self.memory_manager.get_reg_for_var(node.right) + if reg2 is None: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.LoadWordNode(reg2, self.get_var_location(node.right))) + + instructions.append(mips.DivideNode(reg1, reg2)) + reg3 = self.memory_manager.get_reg_for_var(node.dest) + if reg3 is None: + instructions.append(mips.MoveFromLowNode(mips.ARG_REGISTERS[0])) + instructions.append(mips.StoreWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveFromLowNode(reg3)) + + return instructions + + @visitor.when(cil.ComplementNode) + def visit(self, node): + instructions = [] + + reg1 = None + + if type(node.obj) == int: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadInmediateNode(reg1, node.obj)) + else: + reg1 = self.memory_manager.get_reg_for_var(node.obj) + if reg1 is None: + reg1 = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg1, self.get_var_location(node.obj))) + + reg2 = self.memory_manager.get_reg_for_var(node.dest) + if reg2 is None: + reg2 = mips.ARG_REGISTERS[1] + instructions.append(mips.ComplementNode(reg2, reg1)) + instructions.append(mips.AddInmediateNode(reg2, reg2, 1)) + instructions.append(mips.StoreWordNode(reg2, self.get_var_location(node.dest))) + else: + instructions.append(mips.ComplementNode(reg2, reg1)) + instructions.append(mips.AddInmediateNode(reg2, reg2, 1)) + + return instructions + + @visitor.when(cil.LessEqualNode) + def visit(self, node): + instructions = [] + #Save $a0, $a1, $v0 + + if type(node.left) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[0], node.left)) + else: + reg = self.memory_manager.get_reg_for_var(node.left) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.left))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + + if type(node.right) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[1], node.right)) + else: + reg = self.memory_manager.get_reg_for_var(node.right) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], self.get_var_location(node.right))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[1], reg)) + + instructions.append(mips.JumpAndLinkNode('less_equal')) + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.LessNode) + def visit(self, node): + instructions = [] + + if type(node.left) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[0], node.left)) + else: + reg = self.memory_manager.get_reg_for_var(node.left) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.left))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + + if type(node.right) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[1], node.right)) + else: + reg = self.memory_manager.get_reg_for_var(node.right) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], self.get_var_location(node.right))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[1], reg)) + + instructions.append(mips.JumpAndLinkNode('less')) + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.ReadStrNode) + def visit(self, node): + instructions = [] + instructions.append(mips.JumpAndLinkNode("read_str")) + + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.LengthNode) + def visit(self, node): + instructions = [] + + reg = self.memory_manager.get_reg_for_var(node.source) + if reg is None: + reg = mips.ARG_REGISTERS[0] + instructions.append(mips.LoadWordNode(reg, self.get_var_location(node.source))) + + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + instructions.append(mips.JumpAndLinkNode("len")) + + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.ReadIntNode) + def visit(self, node): + instructions = [] + + instructions.append(mips.LoadInmediateNode(mips.V0_REG, 5)) + instructions.append(mips.SyscallNode()) + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.ConcatNode) + def visit(self, node): + instructions = [] + + reg = self.memory_manager.get_reg_for_var(node.prefix) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.prefix))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + + reg = self.memory_manager.get_reg_for_var(node.suffix) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], self.get_var_location(node.suffix))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[1], reg)) + + reg = self.memory_manager.get_reg_for_var(node.length) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[2], self.get_var_location(node.lenght))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[2], reg)) + + instructions.append(mips.JumpAndLinkNode("concat")) + + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + + return instructions + + @visitor.when(cil.SubstringNode) + def visit(self, node): + instructions = [] + + reg = self.memory_manager.get_reg_for_var(node.str_value) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[0], self.get_var_location(node.str_value))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[0], reg)) + + if type(node.index) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[1], node.index)) + else: + reg = self.memory_manager.get_reg_for_var(node.index) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[1], self.get_var_location(node.index))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[1], reg)) + + if type(node.length) == int: + instructions.append(mips.LoadInmediateNode(mips.ARG_REGISTERS[2], node.length)) + else: + reg = self.memory_manager.get_reg_for_var(node.length) + if reg is None: + instructions.append(mips.LoadWordNode(mips.ARG_REGISTERS[2], self.get_var_location(node.length))) + else: + instructions.append(mips.MoveNode(mips.ARG_REGISTERS[2], reg)) + + instructions.append(mips.JumpAndLinkNode("substr")) + reg = self.memory_manager.get_reg_for_var(node.dest) + if reg is None: + instructions.append(mips.StoreWordNode(mips.V0_REG, self.get_var_location(node.dest))) + else: + instructions.append(mips.MoveNode(reg, mips.V0_REG)) + return instructions + + +class UsedRegisterFinder: + def __init__(self): + self.used_registers = set() + + def get_used_registers(self, instructions): + self.used_registers = set() + + for inst in instructions: + self.visit(inst) + self.used_registers = set.difference(self.used_registers, set([mips.SP_REG, mips.FP_REG, mips.V0_REG])) + return [reg for reg in self.used_registers] + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(mips.LoadInmediateNode) + def visit(self, node): + self.used_registers.add(node.reg) + + @visitor.when(mips.LoadAddressNode) + def visit(self, node): + self.used_registers.add(node.reg) + + @visitor.when(mips.AddInmediateNode) + def visit(self, node): + self.used_registers.add(node.dest) + + @visitor.when(mips.MoveNode) + def visit(self, node): + self.used_registers.add(node.reg1) + + @visitor.when(mips.LoadWordNode) + def visit(self, node): + self.used_registers.add(node.reg) + + @visitor.when(mips.JumpAndLinkNode) + def visit(self, node): + self.used_registers.add(mips.RA_REG) + + @visitor.when(mips.JumpRegisterAndLinkNode) + def visit(self, node): + self.used_registers.add(mips.RA_REG) + + @visitor.when(mips.AddUnsignedNode) + def visit(self, node): + self.used_registers.add(node.dest) + + @visitor.when(mips.ShiftLeftLogicalNode) + def visit(self, node): + self.used_registers.add(node.dest) + + @visitor.when(mips.AddNode) + def visit(self, node): + self.used_registers.add(node.reg1) + + @visitor.when(mips.SubNode) + def visit(self, node): + self.used_registers.add(node.reg1) + + @visitor.when(mips.MultiplyNode) + def visit(self, node): + self.used_registers.add(node.reg1) + + @visitor.when(mips.ComplementNode) + def visit(self, node): + self.used_registers.add(node.reg1) + + @visitor.when(mips.MoveFromLowNode) + def visit(self, node): + self.used_registers.add(node.reg) + +#Change Name +class RegistersAllocator: + def __init__(self): + self.mark = False + + def get_registers_for_variables(self, instructions, params, n): + self.numbered_instructions(instructions) + basic_blocks = self.divide_basics_blocks(instructions) + flow_graph = RegistersAllocator.create_flow_graph(basic_blocks) + gk, io = self.liveness_analysis((basic_blocks, flow_graph), params) + interference = RegistersAllocator.interference_compute(gk, io) + return RegistersAllocator.assign_registers(interference, n) + + def divide_basics_blocks(self, instructions): + self.mark = True + for instruction in instructions: + self.mark_leaders(instruction) + + blocks = [] + + for instruction in instructions: + if instruction.leader: + blocks.append([instruction]) + else: + blocks[-1].append(instruction) + + return blocks + + def liveness_analysis(self, graph, params): + blocks, ady_list = graph + + instructions = [] + for block in blocks: + instructions.extend(block) + instructions_total = len(instructions) + + suc = [ 0 for _ in range(instructions_total) ] + for block_index, block in enumerate(blocks): + for ins_index, instruction in enumerate(block): + if ins_index == len(block) - 1: + ady = [ i for i in range(len(blocks)) if ady_list[block_index][i] == 1 ] + suc[instruction.number] = [ blocks[b][0].number for b in ady ] + else: + suc[instruction.number] = [ block[ins_index + 1].number ] + + gk = [self.gen_kill(inst) for inst in instructions] + io = RegistersAllocator.out_in_compute(suc, gk) + gk = [([], [param.name for param in params ] )] + gk + io = [([], io[0][0])] + io + + return gk, io + + @staticmethod + def interference_compute(gk, in_out): + neigs = {} + for g, k in gk: + for v in g: + neigs[v] = set() + for v in k: + neigs[v] = set() + + for i,(_, k) in enumerate(gk): + for v in k: + neigs[v].update(in_out[i][1]) + + for k, v in neigs.items(): + for n in v: + neigs[n].add(k) + + for k, v in neigs.items(): + neigs[k] = list(v.difference([k])) + + return neigs + + @staticmethod + def assign_registers(interference_graph, n): + stack = [] + var_registers = defaultdict(lambda : -1) + nodes = set(interference_graph.keys()) + + def myLen(l): + count = 0 + for v in l: + if v in nodes: + count += 1 + return count + + #remove nodes with less than n edges + while nodes: + to_remove = None + for node in nodes: + if myLen(interference_graph[node]) < n: + stack.append((node, interference_graph[node])) + to_remove = node + break + + if to_remove: + nodes.remove(to_remove) + else: + selection = choice(list(nodes)) + stack.append((selection, interference_graph[selection])) + nodes.remove(selection) + + while stack: + node, ady = stack.pop() + regs = set(range(n)) + for neig in ady: + reg = var_registers[neig] + if reg != -1: + try: + regs.remove(reg) + except: + pass + if regs: + var_registers[node] = min(regs) + else: + var_registers[node] = -1 + + return var_registers + + @staticmethod + def out_in_compute(suc, gk): + n_instructions = len(gk) + in_out = [[set(), set()] for _ in range(n_instructions)] + next_in_out = [[set(), set()] for _ in range(n_instructions)] + + def add(set1, set2): + return not set2.issubset(set1) + + changed = True + while changed: + changed = False + for i in range(n_instructions)[::-1]: + for i_suc in suc[i]: + if i_suc < i: + changed |= add(next_in_out[i][1], in_out[i_suc][0]) + next_in_out[i][1] = next_in_out[i][1].union(in_out[i_suc][0]) + else: + changed |= add(next_in_out[i][1], next_in_out[i_suc][0]) + next_in_out[i][1] = next_in_out[i][1].union(next_in_out[i_suc][0]) + + g_i = set(gk[i][0]) + k_i = set(gk[i][1]) + new = g_i.union(next_in_out[i][1].difference(k_i)) + changed |= add(next_in_out[i][0], new) + next_in_out[i][0] = next_in_out[i][0].union(new) + + in_out = next_in_out + + return in_out + + @staticmethod + def create_flow_graph(blocks): #graph between blocks in a same function does not include relations between functions + graph = [[-1 for _ in range(len(blocks))] for _ in range(len(blocks)) ] + labels = {b[0].label : i for i, b in enumerate(blocks) if isinstance(b[0], cil.LabelNode)} + + for i, block in enumerate(blocks): + if isinstance(block[-1], cil.GotoNode): + graph[i][labels[block[-1].label]] = 1 + elif isinstance(block[-1], cil.GotoIfNode): + graph[i][labels[block[-1].label]] = 1 + graph[i][i + 1] = 1 if i + 1 < len(blocks) else -1 + elif i != len(blocks) - 1: + graph[i][i + 1] = 1 + + return graph + + @staticmethod + def numbered_instructions(instructions): + for i, instr in enumerate(instructions): + instr.number = i + + @visitor.on('instruction') + def gen_kill(self, instruction): + pass + + @visitor.when(cil.ArgNode) + def gen_kill(self, instruction): + if isinstance(instruction.name, int): + return ([], []) + return ([instruction.name], []) + + @visitor.when(cil.StaticCallNode) + def gen_kill(self, instruction): + return ([], [instruction.dest]) + + @visitor.when(cil.AssignNode) + def gen_kill(self, instruction): + gen = [] + if isinstance(instruction.source, str): + if not instruction.source.isnumeric(): + gen = [ instruction.source ] + return (gen, [ instruction.dest ]) + + @visitor.when(cil.AllocateNode) + def gen_kill(self, instruction): + return ([], [ instruction.dest ]) + + @visitor.when(cil.ReturnNode) + def gen_kill(self, instruction): + gen = [ instruction.value ] if isinstance(instruction.value, str) else [] + return (gen, []) + + @visitor.when(cil.LoadNode) + def gen_kill(self, instruction): + return ([], [instruction.dest]) + + @visitor.when(cil.PrintIntNode) + def gen_kill(self, instruction): + return ([ instruction.value ], []) + + @visitor.when(cil.PrintStrNode) + def gen_kill(self, instruction): + return ([ instruction.value ], []) + + @visitor.when(cil.TypeNameNode) + def gen_kill(self, instruction): + return ([ instruction.source ], [ instruction.dest ]) + + @visitor.when(cil.ExitNode) + def gen_kill(self, instruction): + return ( [], []) + + @visitor.when(cil.GetAttribNode) + def gen_kill(self, instruction): + return ([ instruction.obj ], [ instruction.dest ]) + + @visitor.when(cil.SetAttribNode) + def gen_kill(self, instruction): + gen = [ instruction.obj ] + if not isinstance(instruction.value, int): + gen.append(instruction.value) + return (gen, []) + + @visitor.when(cil.CopyNode) + def gen_kill(self, instruction): + return ([ instruction.source ], [ instruction.dest ]) + + @visitor.when(cil.ArithmeticNode) + def gen_kill(self, instruction): + gen = [x for x in [instruction.left, instruction.right] if isinstance(x, str)] + return (gen, [instruction.dest]) + + @visitor.when(cil.GotoIfNode) + def gen_kill(self, instruction): + return ([ instruction.condition ], []) + + @visitor.when(cil.GotoNode) + def gen_kill(self, instruction): + return ([], []) + + @visitor.when(cil.TypeOfNode) + def gen_kill(self, instruction): + return ([instruction.obj], [instruction.dest]) + + @visitor.when(cil.DynamicCallNode) + def gen_kill(self, instruction): + return ([], [instruction.dest]) + + @visitor.when(cil.NameNode) + def gen_kill(self, instruction): + return ([], [instruction.dest]) + + @visitor.when(cil.ComplementNode) + def gen_kill(self, instruction): + gen = [ instruction.obj ] if isinstance(instruction.obj, str) else [] + return (gen, [ instruction.dest ]) + + @visitor.when(cil.ReadStrNode) + def gen_kill(self, instruction): + return ([], [ instruction.dest ]) + + @visitor.when(cil.LengthNode) + def gen_kill(self, instruction): + return ([ instruction.source ], [ instruction.dest ]) + + @visitor.when(cil.ReadIntNode) + def gen_kill(self, instruction): + return ([], [ instruction.dest ]) + + @visitor.when(cil.ConcatNode) + def gen_kill(self, instruction): + return ( [ instruction.prefix, instruction.suffix ], [ instruction.dest ]) + + @visitor.when(cil.SubstringNode) + def gen_kill(self, instruction): + gen = [ instruction.str_value ] + if isinstance(instruction.index, str): + gen.append(instruction.index) + if isinstance(instruction.length, str): + gen.append(instruction.length) + + return (gen, [ instruction.dest ]) + + @visitor.when(cil.LabelNode) + def gen_kill(self, instruction): + return ([], []) + + @visitor.when(cil.ErrorNode) + def gen_kill(self, instruction): + return ([], []) + + @visitor.on('instruction') + def mark_leaders(self, instruction): + pass + + @visitor.when(cil.LabelNode) + def mark_leaders(self, instruction): + instruction.leader = True + self.mark = False + + @visitor.when(cil.GotoNode) + def mark_leaders(self, instruction): + instruction.leader = self.mark + self.mark = True + + @visitor.when(cil.GotoIfNode) + def mark_leaders(self, instruction): + instruction.leader = self.mark + self.mark = True + + @visitor.when(cil.InstructionNode) + def mark_leaders(self, instruction): + instruction.leader = self.mark + self.mark = False diff --git a/src/core/visitors/mips/mips.py b/src/core/visitors/mips/mips.py new file mode 100644 index 00000000..98eda471 --- /dev/null +++ b/src/core/visitors/mips/mips.py @@ -0,0 +1,335 @@ +from itertools import chain + +ATTR_SIZE = 4 +RESGISTER_SIZE = 4 +REGISTER_NAMES = ['t0', 't1', 't2', 't3', 't4', 't5', 't6', 't7','t8', 't9'] +ARG_REGISTERS_NAMES = ['a0', 'a1', 'a2', 'a3'] +OBJECT_MARK = -1 + + +INSTANCE_METADATA_SIZE = 4 + +TYPENAMES_TABLE_LABEL = "type_name_table" +PROTO_TABLE_LABEL = "proto_table" + +class Register(): + def __init__(self, name): + self.name = name + +REGISTERS = [ Register(name) for name in REGISTER_NAMES ] +ARG_REGISTERS = [ Register(name) for name in ARG_REGISTERS_NAMES ] +FP_REG = Register('fp') +SP_REG = Register('sp') +RA_REG = Register('ra') +V0_REG = Register('v0') +V1_REG = Register('v1') +ZERO_REG = Register('zero') +LOW_REG = Register('low') + +class Node: + pass + +class ProgramNode(Node): + def __init__(self, data, types, functions): + self._data = data + self._types = types + self._functions = functions + + @property + def data(self): + return self._data + + @property + def types(self): + return self._types + + @property + def functions(self): + return self._functions + +class FunctionNode(Node): + def __init__(self, label, params, localvars): + self._label = label + self._instructions = [] + self._params = params + self._localvars = localvars + + @property + def label(self): + return self._label + + @property + def instructions(self): + return self._instructions + + def add_instructions(self, instructions): + self._instructions.extend(instructions) + + def get_param_stack_location(self, name): + #TODO Tener en cuenta que los primeros argumentos se guardan en los registros para argumentos + index = self._params.index(name) + offset = ((len(self._params) -1 ) - index) * ATTR_SIZE + return RegisterRelativeLocation(FP_REG, offset) + + def get_local_stack_location(self, name): + index = self._localvars.index(name) + offset = (index + 2) * -ATTR_SIZE + return RegisterRelativeLocation(FP_REG, offset) + + def get_var_location(self, name): + try: + return self.get_param_stack_location(name) + except ValueError: + return self.get_local_stack_location(name) + +class DataNode(Node): + def __init__(self, label): + self._label = label + + @property + def label(self): + return self._label + +class StringConst(DataNode): + def __init__(self, label, string): + super().__init__(label) + self._string = string + + @property + def string(self): + return self._string + +class InstructionNode(Node): + pass + +class LabelNode(InstructionNode): + def __init__(self, name): + self.name = name + +class MoveNode(InstructionNode): + def __init__(self, reg1, reg2): + self.reg1 = reg1 + self.reg2 = reg2 + +class LoadInmediateNode(InstructionNode): + def __init__(self, reg, value): + self.reg = reg + self.value = value + +class LoadWordNode(InstructionNode): + def __init__(self, reg, addr): + self.reg = reg + self.addr = addr + +class SyscallNode(InstructionNode): + pass + +class LoadAddressNode(InstructionNode): + def __init__(self, reg, label): + self.reg = reg + self.label = label + +class StoreWordNode(InstructionNode): + def __init__(self, reg, addr): + self.reg = reg + self.addr = addr + +class JumpAndLinkNode(InstructionNode): + def __init__(self, label): + self.label = label + +class JumpRegisterAndLinkNode(InstructionNode): + def __init__(self, reg): + self.reg = reg + +class JumpRegister(InstructionNode): + def __init__(self, reg): + self.reg = reg + +class AddInmediateNode(InstructionNode): + def __init__(self, dest, src, value): + self.dest = dest + self.src = src + self.value = value + +class AddInmediateUnsignedNode(InstructionNode): + def __init__(self, dest, src, value): + self.dest = dest + self.src = src + self.value = value + +class AddUnsignedNode(InstructionNode): + def __init__(self, dest, sum1, sum2): + self.dest = dest + self.sum1 = sum1 + self.sum2 = sum2 + +class ShiftLeftLogicalNode(InstructionNode): + def __init__(self, dest, src, bits): + self.dest = dest + self.src = src + self.bits = bits + +class BranchOnNotEqualNode(InstructionNode): + def __init__(self, reg1, reg2, label): + self.reg1 = reg1 + self.reg2 = reg2 + self.label = label + +class JumpNode(InstructionNode): + def __init__(self, label): + self.label = label + +class AddNode(InstructionNode): + def __init__(self, reg1, reg2, reg3): + self.reg1 = reg1 + self.reg2 = reg2 + self.reg3 = reg3 + +class SubNode(InstructionNode): + def __init__(self, reg1, reg2, reg3): + self.reg1 = reg1 + self.reg2 = reg2 + self.reg3 = reg3 + +class MultiplyNode(InstructionNode): + def __init__(self, reg1, reg2, reg3): + self.reg1 = reg1 + self.reg2 = reg2 + self.reg3 = reg3 + +class DivideNode(InstructionNode): + def __init__(self, reg1, reg2): + self.reg1 = reg1 + self.reg2 = reg2 + +class ComplementNode(InstructionNode): + def __init__(self, reg1, reg2): + self.reg1 = reg1 + self.reg2 = reg2 + +class MoveFromLowNode(InstructionNode): + def __init__(self, reg): + self.reg = reg + + +class MIPSType: + def __init__(self, label, name_addr, attributes, methods, index, default = []): + self._label = label + self._name = name_addr + self._attributes = attributes + self._default_attributes = dict(default) + self._methods = methods + self._index = index + + + @property + def size(self): + return len(self.attributes) + INSTANCE_METADATA_SIZE + + @property + def label(self): + return self._label + + @property + def string_name_label(self): + return self._name + + @property + def methods(self): + return self._methods + + @property + def attributes(self): + return self._attributes + + @property + def index(self): + return self._index + + +class MemoryLocation: + pass + +class RegisterRelativeLocation(MemoryLocation): + def __init__(self, register, offset): + self._register = register + self._offset = offset + + @property + def register(self): + return self._register + + @property + def offset(self): + return self._offset + +class LabelRelativeLocation(MemoryLocation): + def __init__(self, label, offset): + self._label = label + self._offset = offset + + @property + def label(self): + return self._label + + @property + def offset(self): + return self._offset + +##Snippets + +def push_register(reg): + move_stack = AddInmediateNode(SP_REG, SP_REG, -RESGISTER_SIZE) + save_location = RegisterRelativeLocation(SP_REG, 0) + save_register = StoreWordNode(reg, save_location) + return [move_stack, save_register] + +def pop_register(reg): + load_value = LoadWordNode(reg, RegisterRelativeLocation(SP_REG, 0)) + move_stack = AddInmediateNode(SP_REG, SP_REG, RESGISTER_SIZE) + return [load_value, move_stack] + +def alloc_memory(size): + instructions = [] + instructions.append(LoadInmediateNode(V0_REG, 9)) + instructions.append(LoadInmediateNode(ARG_REGISTERS[0], size)) + instructions.append(SyscallNode()) + return instructions + +def exit_program(): + instructions = [] + instructions.append(LoadInmediateNode(V0_REG, 10)) + instructions.append(SyscallNode()) + return instructions + +def create_object(reg1, reg2): + instructions = [] + + instructions.append(ShiftLeftLogicalNode(reg1, reg1, 2)) + instructions.append(LoadAddressNode(reg2, PROTO_TABLE_LABEL)) + instructions.append(AddUnsignedNode(reg2, reg2, reg1)) + instructions.append(LoadWordNode(reg2, RegisterRelativeLocation(reg2, 0))) + instructions.append(LoadWordNode(ARG_REGISTERS[0], RegisterRelativeLocation(reg2, 4))) + instructions.append(ShiftLeftLogicalNode(ARG_REGISTERS[0], ARG_REGISTERS[0], 2)) + instructions.append(JumpAndLinkNode("malloc")) + instructions.append(MoveNode(ARG_REGISTERS[2], ARG_REGISTERS[0])) + instructions.append(MoveNode(ARG_REGISTERS[0], reg2)) + instructions.append(MoveNode(ARG_REGISTERS[1], V0_REG)) + instructions.append(JumpAndLinkNode("copy")) + + return instructions + +def copy_object(reg1, reg2): + instructions = [] + + instructions.append(LoadWordNode(ARG_REGISTERS[0], RegisterRelativeLocation(reg1, 4))) + instructions.append(ShiftLeftLogicalNode(ARG_REGISTERS[0], ARG_REGISTERS[0], 2)) + instructions.append(JumpAndLinkNode("malloc")) + instructions.append(MoveNode(ARG_REGISTERS[2], ARG_REGISTERS[0])) + instructions.append(MoveNode(ARG_REGISTERS[0], reg1)) + instructions.append(MoveNode(ARG_REGISTERS[1], V0_REG)) + instructions.append(JumpAndLinkNode("copy")) + + return instructions + \ No newline at end of file diff --git a/src/core/visitors/mips/mips_lib.asm b/src/core/visitors/mips/mips_lib.asm new file mode 100644 index 00000000..60853431 --- /dev/null +++ b/src/core/visitors/mips/mips_lib.asm @@ -0,0 +1,1152 @@ + + +header_size = 12 #in bytes +header_size_slot = 0 +header_next_slot = 4 +header_reachable_slot = 8 +alloc_size = 2048 +total_alloc_size = 2060 #alloc_size + header_size +neg_header_size = -12 #-header_size +free_list = 0 +used_list = header_size +state_size = 4 +stack_base = -4 +init_alloc_size = 28 #(header_size*2) + state_size +object_mark = -1 +meta_data_object_size = 4 #in words +object_expanded = -2 +reachable = 1 +new_line = 10 +str_size_treshold = 1024 +int_type = 0 +string_type = 0 +type_number = 0 + + + +##################################################################################################### +# Initialize memory manager # +# Args: # +# # +# Return: # +# # +# Summary: # +# The initial blocks for Free-List and Used-List are created. # +# The $gp is set to use as reference when initial blocks or values related to memory manager # +# state are needed. # +# A block of size alloc_size is created an added to Free-List # +##################################################################################################### +mem_manager_init: + + addiu $sp $sp -16 + sw $v0 0($sp) + sw $a0 4($sp) + sw $a1 8($sp) + sw $ra 12($sp) + + + li $v0 9 + li $a0 init_alloc_size + syscall #Creating free-list start point + move $gp $v0 + addiu $gp $gp state_size + + sw $zero header_size_slot($gp) #The free-list start with a block without space, just header, that will always be there. + sw $zero header_next_slot($gp) + sw $zero header_reachable_slot($gp) + + move $a0 $gp + li $a1 alloc_size + jal extend_heap + + addiu $a0 $a0 header_size + sw $zero header_size_slot($a0) #The used-list start with a block without space, just header, that will always be there. + sw $zero header_next_slot($a0) + sw $zero header_reachable_slot($a0) + + + + lw $v0 0($sp) + lw $a0 4($sp) + lw $a1 8($sp) + lw $ra 12($sp) + addiu $sp $sp 16 + + sw $sp stack_base($gp) + + jr $ra + + +##################################################################################################### +# Free a block previously allocated # +# Args: # +# $a0 Block to free address # +# Return: # +# # +# Summary: # +# Remove the block from the used-list and add it to the free-list # +##################################################################################################### +free_block: + addiu $sp $sp -28 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $a0 12($sp) + sw $ra 16($sp) + sw $t3 20($sp) + sw $t4 24($sp) + + move $t0 $a0 + + addiu $t1 $gp free_list # Store in $t1 the initial block of the free-list + + addiu $t3 $gp used_list # Store in $t3 the initial block of the used-list + +free_block_loop_used_list: # Iterate througth the used-list until find the block + lw $t4 header_next_slot($t3) + beq $t4 $t0 free_block_loop_free_list + move $t3 $t4 + j free_block_loop_used_list + + +free_block_loop_free_list: # Iterate througth the free-list to find the antecesor of the block in the free-list + lw $t2 header_next_slot($t1) + beq $t2 $zero free_block_founded_prev + bge $t2 $t0 free_block_founded_prev + move $t1 $t2 + j free_block_loop_free_list + +free_block_founded_prev: + # Remove the block from the used-list + lw $t4 header_next_slot($t0) + sw $t4 header_next_slot($t3) + + # Add the block to the free-list + sw $t2 header_next_slot($t0) + sw $t0 header_next_slot($t1) + +free_block_end: + + # Try to merge the list where the new block was added + move $a0 $t0 + jal expand_block + move $a0 $t1 + jal expand_block + + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $a0 12($sp) + lw $ra 16($sp) + lw $t3 20($sp) + lw $t4 24($sp) + addiu $sp $sp 28 + + jr $ra + + +##################################################################################################### +# Merge two continuos blocks of the free-list # +# Args: # +# $a0 First of the two blocks to merge # +# Return: # +# # +# Summary: # +# Check if a block can be merged with its sucesor in the free list # +##################################################################################################### +expand_block: + addiu $sp $sp -16 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + + + addiu $t0 $gp free_list # $t0 = the initial block of the free-list + + beq $t0 $a0 expand_block_end # The initial block can't be expanded, the initial block always will have size 0 + + move $t0 $a0 + + # Check if the block and its sucesor in the free list are contiguous in memory + lw $t1 header_next_slot($t0) + lw $t2 header_size_slot($t0) + move $t3 $t2 + addiu $t2 $t2 header_size + addu $t2 $t2 $t0 + beq $t2 $t1 expand_block_expand + j expand_block_end + +expand_block_expand: #Increment the size of the first block and update next field + lw $t2 header_size_slot($t1) + addi $t2 $t2 header_size + add $t2 $t2 $t3 + sw $t2 header_size_slot($t0) + lw $t1 header_next_slot($t1) + sw $t1 header_next_slot($t0) + +expand_block_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + addiu $sp $sp 16 + + jr $ra + + +##################################################################################################### +# Allocate more memory for the process and add it to the free-list # +# Args: # +# $a0 Last block of the free-list # +# $a1 Memory amount to alloc # +# Return: # +# # +# Summary: # +# More memory is allocated and add it to the free-list as a block. # +##################################################################################################### +extend_heap: + addiu $sp $sp -12 + sw $a0 0($sp) + sw $a1 4($sp) + sw $t0 8($sp) + + # Increase the amount of memory by header_size to create a block with that size + li $v0 9 + addiu $a0 $a1 header_size + syscall + + # Set values of the block_header + move $t0 $a1 + sw $t0 header_size_slot($v0) + sw $zero header_next_slot($v0) + sw $zero header_reachable_slot($v0) + + # Add block to the end of the free-list + lw $t0, 0($sp) + sw $v0 header_next_slot($t0) + + move $a0 $t0 + lw $a1 4($sp) + lw $t0 8($sp) + addiu $sp $sp 12 + + jr $ra + + + +##################################################################################################### +# Split a block into two blocks, one of the requested size and the other with the rest. # +# Args: # +# $a0 Address of the block to split # +# $a1 Size requested for one block # +# Return: # +# # +# Summary: # +# The block is splitted into two blocks if the size allow it. # +##################################################################################################### +split_block: + addiu $sp $sp -16 + sw $t0 0($sp) + sw $t1 4($sp) + sw $a0 8($sp) + sw $a1 12($sp) + + # Check if the block can be splitted in two blocks, one of the requested size + lw $t0 header_size_slot($a0) + bgt $a1 $t0 split_block_error_small + + # Check if after a split the block there is enough space to create another block, if there is not do not split + sub $t0 $t0 $a1 + li $t1 header_size + ble $t0 $t1 split_block_same_size + + # Compute the address of the second block + addu $t0 $a0 $a1 + addiu $t0 $t0 header_size + + #Update headers of the two blocks + lw $t1 header_next_slot($a0) + sw $t1 header_next_slot($t0) + sw $t0 header_next_slot($a0) + + lw $t1 header_size_slot($a0) #update sizes + sub $t1 $t1 $a1 + + addi $t1 $t1 neg_header_size + sw $t1 header_size_slot($t0) + sw $a1 header_size_slot($a0) + move $v0 $a0 + j split_block_end + +split_block_same_size: + move $v0 $a0 + j split_block_end + +split_block_error_small: + j split_block_end + +split_block_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $a0 8($sp) + lw $a1 12($sp) + addiu $sp $sp 16 + + jr $ra + + +##################################################################################################### +# Best Fit strategy is used to select the block # +# Args: # +# $a0 size to alloc # +# Return: # +# $v0 address of allocated block # +# Summary: # +# Actual block is store in $t0, the size block is checked to know if it is a # +# valid block (a block is valid if its size is larger or equal than the required size), # +# if the block is valid we compare it with the actual best block and keep the shorter block. # +# If there is not a block with the required size, a new block of size # +# max(total_alloc_size, size requested) is requested with sbrk and splitted if necessary # +##################################################################################################### +malloc: + move $v0 $zero + addiu $sp $sp -28 + sw $t1 0($sp) + sw $t0 4($sp) + sw $a0 8($sp) + sw $a1 12($sp) + sw $ra 16($sp) + sw $t2 20($sp) + sw $t3 24($sp) + + addiu $t0 $gp free_list + j malloc_loop + +malloc_end: + + move $a0 $v0 + lw $a1 8($sp) # a1 = requested block size + jal split_block + + lw $t1 header_next_slot($v0) + sw $t1 header_next_slot($t3) + + addiu $t1 $gp used_list + lw $a0 header_next_slot($t1) + + sw $a0 header_next_slot($v0) + sw $v0 header_next_slot($t1) + + addiu $v0 $v0 header_size + + lw $t3 24($sp) + lw $t2 20($sp) + lw $ra 16($sp) + lw $a1 12($sp) + lw $a0 8($sp) + lw $t0 4($sp) + lw $t1 0($sp) + addiu $sp $sp 28 + + jr $ra +####################################################################### +# t0 = actual block address # +####################################################################### +malloc_loop: + move $t2 $t0 # save previous block in $t2 (this is usefull when we lw $t3 24($sp)need to alloc the new block) + lw $t0 header_next_slot($t0) # t0 = next block address + beq $t0 $zero malloc_search_end # if t0 == 0 we reach to the free-list end + j malloc_check_valid_block + +####################################################################### +# $v0 = actual selected block address # +####################################################################### +malloc_search_end: + beq $v0 $zero malloc_alloc_new_block # if v0 == 0 a valid block was not found + j malloc_end + +####################################################################### +# t2 = last block of free list # +# a0 = requested block size # +####################################################################### +malloc_alloc_new_block: + li $t1 alloc_size # t1 = standard alloc size + move $t3 $t2 + move $a1 $a0 # a1 = requested block size + move $a0 $t2 # a0 = last block of free list + bge $a1 $t1 malloc_big_block # if the requested size is bigger than the standar alloc size go to malloc_big_block + li $a1 alloc_size # a1 = standard alloc size + jal extend_heap + + j malloc_end + +###################################################################### +# a1 = requested block size # +###################################################################### +malloc_big_block: + #addiu $a1 $a1 header_size # Add header size to alloc size + jal extend_heap + j malloc_end + + + +######################################################################## +# t0 = actual block address # +######################################################################## +malloc_check_valid_block: + lw $t1 header_size_slot($t0) # t1 = size new block + bge $t1 $a0 malloc_valid_block # the actual block have the required size + j malloc_loop + +######################################################################## +# t0 = actual block address # +# t1 = size actual block # +# v0 = actual selected block address(0 if no one have been selected) # +# v1 = actual selected block size # +######################################################################## +malloc_valid_block: + beq $v0 $zero malloc_first_valid_block # this is the first valid block + bge $t1 $v1 malloc_loop # the selected block is smaller than actual block + move $v0 $t0 # selected block address = actual block address + move $v1 $t1 # selected block size = actual block size + move $t3 $t2 + j malloc_loop + + +######################################################################## +# t0 = actual block address # +# t1 = size actual block # +# v0 = actual selected block address(0 if no one have been selected) # +# v1 = actual selected block size # +######################################################################## +malloc_first_valid_block: + move $v0 $t0 # selected block address = actual block address + move $v1 $t1 # selected block size = actual block size + move $t3 $t2 + j malloc_loop + + +#TODO Look for objects in registers +##################################################################################################### +# Remove from used-list the blocks that are not reachables, the root objects are in the stack and # +# registers # +# Args: # +# # +# Return: # +# # +# Summary: # +# First the objects in stack and registers are marked as reachables, after that the objects # +# that are reachables from them are marked as reachable too using a dfs algorithm. When all # +# reachables objects are marked the used-list is scanned and all the objects that are not # +# marked as reachables are released. # +##################################################################################################### + +gc_collect: + addiu $sp $sp -24 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $a0 16($sp) + sw $ra 20($sp) + + li $t3 reachable # $t3 = reachable value + addiu $t0 $sp 20 # $t0 = the start of the stack without count this function + lw $t1 stack_base($gp) # $t1 = the end of the stack + + li $t2 1 +# Go through the stack searching for objects +gc_collect_loop: + addiu $t0 $t0 4 + beq $t0 $t1 gc_collect_dfs # If the end of the stack was reached finish this loop + + lw $a0 0($t0) + jal check_if_is_object + + bne $v0 $t2 gc_collect_loop + + addiu $a0 $a0 neg_header_size + sw $t3 header_reachable_slot($a0) + + j gc_collect_loop + +gc_collect_dfs: + addiu $t1 $gp used_list + +# Go through the used-list and try to expand any reachable block +gc_collect_outer_loop: + lw $t1 header_next_slot($t1) + beq $t1 $zero gc_collect_free + lw $t2 header_reachable_slot($t1) + beq $t2 reachable gc_collect_expand + j gc_collect_outer_loop + +gc_collect_expand: + addiu $a0 $t1 header_size # expand an object not a block + jal gc_collect_recursive_expand + j gc_collect_outer_loop + +gc_collect_free: + addiu $t0 $gp used_list + lw $t0 header_next_slot($t0) + +# Go through the used-list and free any unreachable object and set the reachable and expanded field to their default values +gc_collect_free_loop: + beq $t0 $zero gc_collect_end + lw $t1 header_reachable_slot($t0) + bne $t1 reachable gc_collect_free_loop_free + sw $zero header_reachable_slot($t0) + move $a0 $t0 + jal check_if_is_object + beq $v0 $zero gc_collect_free_loop + li $t1 object_mark + addiu $t2 $t0 header_size + lw $t3 4($t2) + sll $t3 $t3 2 + addu $t2 $t2 $t3 + sw $t1 -4($t2) + lw $t0 header_next_slot($t0) + j gc_collect_free_loop + +gc_collect_free_loop_free: + move $a0 $t0 + lw $t0 header_next_slot($t0) + jal free_block + j gc_collect_free_loop + + +gc_collect_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $a0 16($sp) + lw $ra 20($sp) + addiu $sp $sp 24 + + jr $ra + + + + +##################################################################################################### +# Mark the objects that are reachable from the attrs of one object in a recursive way. # +# Args: # +# $a0: Object to expand # +# Return: # +# # +# Summary: # +# The actual object is marked as reachable and expanded to avoid infinite cycles, and this # +# routine is called recursively to expand the objects in the attrs of the actual object. # +##################################################################################################### +gc_collect_recursive_expand: + addiu $sp $sp -16 + sw $a0 0($sp) + sw $t0 4($sp) + sw $t1 8($sp) + sw $ra 12($sp) + + jal check_if_is_object # If is not an object can not be expanded + beq $v0 $zero gc_collect_recursive_expand_end + + lw $t0 4($a0) + sll $t0 $t0 2 + addiu $t0 $t0 -4 + addu $t0 $a0 $t0 + lw $t1 0($t0) # Check if the object was ready expanded to avoid infinite cycles + beq $t1 object_expanded gc_collect_recursive_expand_end + + # Mark the block that contains the object as reachable + li $t1 reachable + addiu $a0 $a0 neg_header_size + sw $t1 header_reachable_slot($a0) + addiu $a0 $a0 header_size + + # Mark the object as expanded + li $t1 object_expanded + sw $t1 0($t0) + + lw $t0 0($a0) # $t0 = type of the object + + # int and string types are special cases + la $t1 int_type + lw $t1 0($t1) + beq $t0 $t1 gc_collect_recursive_expand_end + + la $t1 string_type + lw $t1 0($t1) + beq $t0 $t1 gc_collect_recursive_expand_string_object + + lw $t0 4($a0) + li $t1 meta_data_object_size + sub $t0 $t0 $t1 + + addiu $t1 $a0 12 + +# call this routine in every attr of the object +gc_collect_recursive_expand_attr_loop: + beq $t0 $zero gc_collect_recursive_expand_end + lw $a0 0($t1) + jal gc_collect_recursive_expand + addiu $t1 $t1 4 + sub $t0 $t0 1 + j gc_collect_recursive_expand_attr_loop + +# the value field of string object is not an object but it is a +# reference to the block where the string is saved, so that block +# needs to be marked as reachable +gc_collect_recursive_expand_string_object: + lw $t0 8($a0) + addiu $t0 $t0 neg_header_size + li $t1 reachable + sw $t1 header_reachable_slot($t0) + + +gc_collect_recursive_expand_end: + lw $a0 0($sp) + lw $t0 4($sp) + lw $t1 8($sp) + lw $ra 12($sp) + addiu $sp $sp 16 + + jr $ra + + + + + + + + +# $a0 address from +# $a1 address to +# $a2 size +copy: + addiu $sp $sp -16 + sw $a0 0($sp) + sw $a1 4($sp) + sw $a2 8($sp) + sw $t0 12($sp) + +copy_loop: + beq $a2 $zero copy_end + lw $t0 0($a0) + sw $t0 0($a1) + addiu $a0 $a0 4 + addiu $a1 $a1 4 + addi $a2 $a2 -4 + j copy_loop + +copy_end: + lw $a0 0($sp) + lw $a1 4($sp) + lw $a2 8($sp) + lw $t0 12($sp) + addiu $sp $sp 16 + + jr $ra + + +##################################################################################################### +# Check if a value is a reference to an object # +# Args: # +# $a0: Value to check # +# Return: # +# $v0: 1 if is a reference to an object else 0 # +# Summary: # +# Check if a value is a valid heap address and if it is check if in that address there are # +# values that match with the object schema # +##################################################################################################### +check_if_is_object: + addiu $sp $sp -20 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $a0 16($sp) + + move $t0 $a0 + + li $v0 9 + move $a0 $zero + syscall + + addiu $t1 $v0 -4 # Last word of heap + + # Check that the first word is a type object + blt $t0 $gp check_if_is_object_not_object + bgt $t0 $t1 check_if_is_object_not_object + lw $t2 0($t0) + blt $t2 $zero check_if_is_object_not_object + la $t3 type_number + lw $t3 0($t3) + bge $t2 $t3 check_if_is_object_not_object + + addiu $t0 $t0 4 + blt $t0 $gp check_if_is_object_not_object + bgt $t0 $t1 check_if_is_object_not_object + lw $t2 0($t0) #Store size in $t2 + + addiu $t0 $t0 8 + + + li $t3 meta_data_object_size + sub $t2 $t2 $t3 + sll $t2 $t2 2 + addu $t0 $t0 $t2 + + # Check if the last word of the object is an object mark + blt $t0 $gp check_if_is_object_not_object + bgt $t0 $t1 check_if_is_object_not_object + lw $t2 0($t0) + beq $t2 object_mark check_if_is_object_is_object + beq $t2 object_expanded check_if_is_object_is_object + +check_if_is_object_not_object: + li $v0 0 + j check_if_is_object_end + + +check_if_is_object_is_object: + li $v0 1 + + +check_if_is_object_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $a0 16($sp) + addiu $sp $sp 20 + + jr $ra + + +equals: + beq $a0 $a1 equals_equal + li $v0 0 + j equals_end + +equals_equal: + li $v0 1 + +equals_end: + jr $ra + + + +less_equal: + ble $a0 $a1 less_equal_true + li $v0 0 + j less_equal_end + +less_equal_true: + li $v0 1 + +less_equal_end: + jr $ra + + +less: + blt $a0 $a1 less_true + li $v0 0 + j less_end + +less_true: + li $v0 1 + +less_end: + jr $ra + + +len: + addiu $sp $sp -8 + sw $t0 0($sp) + sw $t1 4($sp) + + move $t0 $a0 + move $v0 $zero + +len_loop: + lb $t1 0($t0) + beq $t1 $zero len_end + addi $v0 $v0 1 + addiu $t0 $t0 1 + j len_loop + +len_end: + lw $t0 0($sp) + lw $t1 4($sp) + addiu $sp $sp 8 + + jr $ra + + +use_block: + addiu $sp $sp -12 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + + addiu $t0 $gp free_list + +use_block_loop: + move $t1 $t0 + lw $t0 header_next_slot($t0) + beq $t0 $zero use_block_end + beq $t0 $a0 use_block_founded + j use_block_loop + +use_block_founded: + lw $t2 header_next_slot($t0) + sw $t2 header_next_slot($t1) + + addiu $t1 $gp used_list + lw $t2 header_next_slot($t1) + sw $t0 header_next_slot($t1) + sw $t2 header_next_slot($t0) + +use_block_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + addiu $sp $sp 12 + + jr $ra + + + + +read_str: + addiu $sp $sp -36 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $t4 16($sp) + sw $t5 20($sp) + sw $a0 24($sp) + sw $a1 28($sp) + sw $ra 32($sp) + + addiu $t0 $gp free_list + move $t1 $zero + move $t2 $t0 + +read_str_larger_block_loop: + lw $t0 header_next_slot($t0) + beq $t0 $zero read_str_reading + lw $t3 header_size_slot($t0) + bge $t1 $t3 read_str_larger_block_loop + move $t1 $t3 + move $t2 $t0 + j read_str_larger_block_loop + +read_str_reading: + beq $t1 $zero read_str_new_block + move $a1 $t1 + li $v0 8 + addiu $a0 $t2 header_size + syscall + move $t0 $a0 + move $t1 $zero + +read_str_look_nl: + lb $t2 0($t0) + beq $t2 new_line read_str_nl_founded + beq $t2 $zero read_str_zero_founded#read_str_no_nl + addi $t1 $t1 1 + addi $t0 $t0 1 + j read_str_look_nl + +read_str_zero_founded: + blt $t1 $t3 read_str_nl_founded + j read_str_no_nl + +read_str_nl_founded: + sb $zero 0($t0) + addi $t1 $t1 1 + li $t2 4 + div $t1 $t2 + mfhi $t3 + beq $t3 $zero read_str_nl_founded_alligned + sub $t2 $t2 $t3 + add $t1 $t1 $t2 +read_str_nl_founded_alligned: + move $a1 $t1 + addiu $a0 $a0 neg_header_size + jal split_block + jal use_block + + addiu $v0 $a0 header_size + j read_str_end + + +read_str_no_nl: + addi $t1 $t1 1 + blt $t1 str_size_treshold read_str_dup + addi $t1 $t1 alloc_size + j read_str_extend_heap +read_str_dup: + sll $t1 $t1 1 +read_str_extend_heap: + move $a1 $t1 + move $t0 $a0 + addiu $a0 $gp free_list + +read_str_last_block_loop: + lw $t1 header_next_slot($a0) + beq $t1 $zero read_str_last_block_founded + lw $a0 header_next_slot($a0) + j read_str_last_block_loop + +read_str_last_block_founded: + jal extend_heap + jal expand_block + lw $t1 header_next_slot($a0) + bne $t1 $zero read_str_copy_prev + move $t1 $a0 + +read_str_copy_prev: + lw $t3 header_size_slot($t1) + move $t2 $zero + move $t5 $t1 + addiu $t1 $t1 header_size + +read_str_copy_loop: + lb $t4 0($t0) + beq $t4 $zero read_str_copy_end + sb $t4 0($t1) + addi $t2 $t2 1 + addi $t0 $t0 1 + addi $t1 $t1 1 + j read_str_copy_loop + +read_str_copy_end: + sub $t3 $t3 $t2 + move $a0 $t1 + move $a1 $t3 + li $v0 8 + syscall + move $t0 $a0 + move $t1 $t2 + addiu $a0 $t5 header_size + j read_str_look_nl + + +read_str_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $t4 16($sp) + lw $t5 20($sp) + lw $a0 24($sp) + lw $a1 28($sp) + lw $ra 32($sp) + addiu $sp $sp 36 + + jr $ra + + +read_str_new_block: + addiu $t0 $gp free_list + +read_str_new_block_search_last: + lw $t1 header_next_slot($t0) + beq $t1 $zero read_str_new_block_create + move $t0 $t1 + j read_str_new_block_search_last + +read_str_new_block_create: + move $a0 $t0 + li $a1 alloc_size + jal extend_heap + jal expand_block + lw $t2 header_next_slot($a0) + beq $t2 $zero read_str_new_block_expanded + lw $t1 header_size_slot($t2) + j read_str_reading + +read_str_new_block_expanded: + move $t2 $a0 + lw $t1 header_size_slot($a0) + j read_str_reading + + + +concat: + addiu $sp $sp -24 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $a0 12($sp) + sw $a1 16($sp) + sw $ra 20($sp) + + move $t0 $a0 + move $t1 $a1 + + + addiu $a0 $a2 1 + li $t2 4 + div $a0 $t2 + mfhi $a0 + bne $a0 $zero concat_allign_size + addiu $a0 $a2 1 + +concat_size_alligned: + jal malloc + move $t2 $v0 + j concat_copy_first_loop + +concat_allign_size: + sub $t2 $t2 $a0 + add $a0 $a2 $t2 + addiu $a0 $a0 1 + j concat_size_alligned + +concat_copy_first_loop: + lb $a0 0($t0) + beq $a0 $zero concat_copy_second_loop + sb $a0 0($t2) + addiu $t0 $t0 1 + addiu $t2 $t2 1 + j concat_copy_first_loop + +concat_copy_second_loop: + lb $a0 0($t1) + beq $a0 $zero concat_end + sb $a0 0($t2) + addiu $t1 $t1 1 + addiu $t2 $t2 1 + j concat_copy_second_loop + +concat_end: + sb $zero 0($t2) + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $a0 12($sp) + lw $a1 16($sp) + lw $ra 20($sp) + addiu $sp $sp 24 + + jr $ra + + +substr: + addiu $sp $sp -24 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $a0 16($sp) + sw $ra 20($sp) + + move $t0 $a0 + li $t1 4 + addiu $t3 $a2 1 + div $t3 $t1 + + mfhi $t2 + bne $t2 $zero substr_allign_size + move $t1 $t3 + j substr_new_block + +substr_allign_size: + sub $t1 $t1 $t2 + add $t1 $t1 $t3 + +substr_new_block: + move $a0 $t1 + jal malloc + move $t3 $v0 + move $t1 $zero + addu $t0 $t0 $a1 + +substr_copy_loop: + beq $t1 $a2 substr_end + lb $t2 0($t0) + sb $t2 0($t3) + addiu $t0 $t0 1 + addiu $t3 $t3 1 + addiu $t1 $t1 1 + j substr_copy_loop + +substr_end: + sb $zero 0($t3) + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $a0 16($sp) + lw $ra 20($sp) + addiu $sp $sp 24 + + jr $ra + + +equal_str: + addiu $sp $sp -16 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + + move $t0 $a0 + move $t1 $a1 + +equal_str_loop: + lb $t2 0($t0) + lb $t3 0($t1) + bne $t2 $t3 equal_str_not_equal + beq $t2 $zero equal_str_equal + + addiu $t0 $t0 1 + addiu $t1 $t1 1 + j equal_str_loop + +equal_str_not_equal: + move $v0 $zero + j equal_str_end + +equal_str_equal: + li $v0 1 + +equal_str_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + addiu $sp $sp 16 + + jr $ra + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/core/visitors/mips/test.py b/src/core/visitors/mips/test.py new file mode 100644 index 00000000..da7d4f04 --- /dev/null +++ b/src/core/visitors/mips/test.py @@ -0,0 +1,20 @@ +from ..cil import cil +from .cil_to_mips import CILToMIPSVisitor + +#TEST +CIL_TYPE_1 = cil.TypeNode("myType") +CIL_TYPE_1.attributes = ["attr1", "attr2", "attr3"] +CIL_TYPE_1.methods = [("method1", "func1"), ("method2", "func2"), ("method3", "func3"), ("method4", "func4")] +CIL_TYPE_2 = cil.TypeNode("myType2") +CIL_TYPE_2.attributes = ["attr1", "attr2"] +CIL_TYPE_2.methods = [("method1", "func5"), ("method2", "func2"), ("method3", "func6"), ("method4", "func7")] +CIL_AST_TEST = cil.ProgramNode([],[],[]) +CIL_AST_TEST.dottypes = [CIL_TYPE_1, CIL_TYPE_2] + +# if __name__ == '__main__': +def test(): + conv = CILToMIPSVisitor() + conv.visit(CIL_AST_TEST) + for d in conv.dotdata: + print(d) + \ No newline at end of file diff --git a/src/core/visitors/type_check/__init__.py b/src/core/visitors/type_check/__init__.py new file mode 100644 index 00000000..2dea8414 --- /dev/null +++ b/src/core/visitors/type_check/__init__.py @@ -0,0 +1,6 @@ +from .checker import TypeChecker +from .builder import TypeBuilder +from .verifier import TypeVerifier +from .collector import TypeCollector +from .inferencer import InferenceVisitor +from .ast_printer import FormatVisitor diff --git a/src/core/visitors/type_check/ast_printer.py b/src/core/visitors/type_check/ast_printer.py new file mode 100644 index 00000000..14fd8444 --- /dev/null +++ b/src/core/visitors/type_check/ast_printer.py @@ -0,0 +1,122 @@ +from ...visitors import visitor +from ...cmp import cool_ast as cool + +#AST Printer +class FormatVisitor: + @visitor.on('node') + def visit(self, node, tabs): + pass + + @visitor.when(cool.ProgramNode) + def visit(self, node, tabs=0): + ans = '\t' * tabs + f'\\__ProgramNode [ ... ]' + statements = '\n'.join(self.visit(child, tabs + 1) for child in node.declarations) + return f'{ans}\n{statements}' + + @visitor.when(cool.ClassDeclarationNode) + def visit(self, node, tabs=0): + parent = '' if node.parent is None else f"inherits {node.parent}" + ans = '\t' * tabs + f'\\__ClassDeclarationNode: class {node.id} {parent} {{ ... }}' + features = '\n'.join(self.visit(child, tabs + 1) for child in node.features) + return f'{ans}\n{features}' + + @visitor.when(cool.AttrDeclarationNode) + def visit(self, node, tabs=0): + sons = [node.expr] if node.expr else [] + text = '<- ' if node.expr else '' + ans = '\t' * tabs + f'\\__{node.__class__.__name__}: {node.id} : {node.type} {text}' + body = '\n'.join(self.visit(child, tabs + 1) for child in sons) + return f'{ans}\n{body}' if body else f'{ans}' + + @visitor.when(cool.FuncDeclarationNode) + def visit(self, node, tabs=0): + params = ', '.join(' : '.join(param) for param in node.params) + ans = '\t' * tabs + f'\\__FuncDeclarationNode: {node.id}({params}) : {node.type} {{}}' + body = self.visit(node.body, tabs + 1) + return f'{ans}\n{body}' + + @visitor.when(cool.IfThenElseNode) + def visit(self, node, tabs=0): + sons = [node.condition, node.if_body, node.else_body] + ans = '\t' * tabs + f'\\__IfThenElseNode: if then else fi' + body = '\n'.join(self.visit(child, tabs + 1) for child in sons) + return f'{ans}\n{body}' + + @visitor.when(cool.WhileLoopNode) + def visit(self, node, tabs=0): + sons = [node.condition, node.body] + ans = '\t' * tabs + f'\\__WhileLoopNode: while loop pool' + body = '\n'.join(self.visit(child, tabs + 1) for child in sons) + return f'{ans}\n{body}' + + @visitor.when(cool.BlockNode) + def visit(self, node, tabs=0): + sons = node.exprs + ans = '\t' * tabs + f'\\__BlockNode: {{ ... }}' + body = '\n'.join(self.visit(child, tabs + 1) for child in sons) + return f'{ans}\n{body}' + + @visitor.when(cool.LetInNode) + def visit(self, node, tabs=0): + sons = node.let_body + [node.in_body] + ans = '\t' * tabs + f'\\__LetInNode: let {{ ... }} in ' + body = '\n'.join(self.visit(child, tabs + 1) for child in sons) + return f'{ans}\n{body}' + + @visitor.when(cool.CaseOfNode) + def visit(self, node, tabs=0): + sons = [node.expr] + node.branches + ans = '\t' * tabs + f'\\__CaseOfNode: case of {{ ... }} esac' + body = '\n'.join(self.visit(child, tabs + 1) for child in sons) + return f'{ans}\n{body}' + + @visitor.when(cool.CaseExpressionNode) + def visit(self, node, tabs=0): + sons = [node.expr] + ans = '\t' * tabs + f'\\__CaseExpressionNode: {node.id} : {node.type} => ' + body = '\n'.join(self.visit(child, tabs + 1) for child in sons) + return f'{ans}\n{body}' + + @visitor.when(cool.AssignNode) + def visit(self, node, tabs=0): + sons = [node.expr] + ans = '\t' * tabs + f'\\__AssignNode: {node.id} = ' + body = '\n'.join(self.visit(child, tabs + 1) for child in sons) + return f'{ans}\n{body}' + + @visitor.when(cool.UnaryNode) + def visit(self, node, tabs=0): + ans = '\t' * tabs + f'\\__{node.__class__.__name__}: {node.symbol.lex} ' + right = self.visit(node.expr, tabs + 1) + return f'{ans}\n{right}' + + @visitor.when(cool.BinaryNode) + def visit(self, node, tabs=0): + ans = '\t' * tabs + f'\\__{node.__class__.__name__}: {node.symbol.lex} ' + left = self.visit(node.left, tabs + 1) + right = self.visit(node.right, tabs + 1) + return f'{ans}\n{left}\n{right}' + + @visitor.when(cool.AtomicNode) + def visit(self, node, tabs=0): + return '\t' * tabs + f'\\__ {node.__class__.__name__}: {node.lex}' + + @visitor.when(cool.FunctionCallNode) + def visit(self, node, tabs=0): + obj = self.visit(node.obj, tabs + 1) + ans = '\t' * tabs + f'\\__FunctionCallNode: .{node.id}(, ..., )' + args = '\n'.join(self.visit(arg, tabs + 1) for arg in node.args) + ans = f'{ans}\n{obj}' + if args: ans += f'\n{args}' + return ans + + @visitor.when(cool.MemberCallNode) + def visit(self, node, tabs=0): + ans = '\t' * tabs + f'\\__MemberCallNode: {node.id}(, ..., )' + args = '\n'.join(self.visit(arg, tabs + 1) for arg in node.args) + if args: ans += f'\n{args}' + return ans + + @visitor.when(cool.NewNode) + def visit(self, node, tabs=0): + return '\t' * tabs + f'\\__NewNode: new {node.type}()' diff --git a/src/core/visitors/type_check/builder.py b/src/core/visitors/type_check/builder.py new file mode 100644 index 00000000..034b4321 --- /dev/null +++ b/src/core/visitors/type_check/builder.py @@ -0,0 +1,119 @@ +from .utils import * +from ...visitors import visitor +from ...cmp import cool_ast as cool, SemanticError, empty_token, ErrorType + +# Type Builder +class TypeBuilder: + def __init__(self, context, errors=[]): + self.context = context + self.current_type = None + self.errors = errors + self.methods = {} + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(cool.ProgramNode) + def visit(self, node): + main_token = None + for def_class in node.declarations: + self.visit(def_class) + if def_class.id == 'Main': + main_token = def_class.tid + + try: + main = self.context.get_type('Main') + method = main.methods['main'] + tmethod = self.methods['Main']['main'] + if method.param_names: + self.errors.append((SemanticError('Method "main" must takes no formal parameters'), tmethod)) + except TypeError: + self.errors.append((SemanticError('No definition for class "Main"'), empty_token)) + except KeyError: + self.errors.append((SemanticError('Class "Main" must have a method "main"'), main_token)) + + @visitor.when(cool.ClassDeclarationNode) + def visit(self, node): + self.current_type = self.context.get_type(node.id) + + if node.parent: + if node.parent in sealed: + self.errors.append((SemanticError(f'Is not possible to inherits from "{node.parent}"'), node.tparent)) + node.parent = 'Object' + try: + parent_type = self.context.get_type(node.parent) + self.current_type.set_parent(parent_type) + except TypeError as ex: + self.errors.append((ex, node.tparent)) + + for feature in node.features: + self.visit(feature) + + @visitor.when(cool.AttrDeclarationNode) + def visit(self, node): + try: + attr_type = self.context.get_type(node.type) + except TypeError as ex: + self.errors.append((ex, node.ttype)) + attr_type = ErrorType() + node.attr_type = attr_type + + try: + if node.id == 'self': + raise SemanticError(SELF_IS_READONLY) + attr = self.current_type.define_attribute(node.id, attr_type) + attr.node = node + node.attr = attr + except SemanticError as ex: + self.errors.append((ex, node.tid)) + + @visitor.when(cool.FuncDeclarationNode) + def visit(self, node): + arg_names, arg_types, arg_nodes = [], [], [] + for i, arg in enumerate(node.params): + idx, typex = arg + try: + assert typex != ST + arg_type = self.context.get_type(typex) + except TypeError as ex: + self.errors.append((ex, node.params[i].ttype)) + arg_type = ErrorType() + except AssertionError: + self.errors.append((SemanticError(INVALID_PARAMETER % (idx)), node.params[i].ttype)) + arg_type = ErrorType() + + if idx == 'self': + self.errors.append((SemanticError('"self" cannot be the name of a formal parameter'), node.params[i].ttype)) + if idx in arg_names: + self.errors.append((SemanticError(f'Formal parameter {idx} redefined'), node.params[i].ttype)) + arg_names.append(idx) + arg_types.append(arg_type) + arg_nodes.append(arg) + arg.idx = i + arg.method_types = arg_types + + try: + ret_type = self.context.get_type(node.type) + except TypeError as ex: + self.errors.append((ex, node.ttype)) + ret_type = ErrorType() + node.ret_type = ret_type + node.arg_types = arg_types + node.arg_names = arg_names + node.arg_nodes = arg_nodes + + try: + if node.id == 'self': + raise SemanticError('"self" is an invalid method name') + method = self.current_type.define_method(node.id, arg_names, arg_types, ret_type) + method.nodes = arg_nodes + method.ret_node = node + node.method = method + for arg in node.params: + arg.method = method + if not self.current_type.name in self.methods: + self.methods[self.current_type.name] = {} + self.methods[self.current_type.name][node.id] = node.tid + except SemanticError as ex: + self.errors.append((ex, node.tid)) diff --git a/src/core/visitors/type_check/checker.py b/src/core/visitors/type_check/checker.py new file mode 100644 index 00000000..3d8fe285 --- /dev/null +++ b/src/core/visitors/type_check/checker.py @@ -0,0 +1,409 @@ +from .utils import * +from ...visitors import visitor +from ...cmp import cool_ast as cool +from ...cmp import IntType, BoolType, StringType, SemanticError, ErrorType, SelfType, Scope + +# Type Checker +class TypeChecker: + def __init__(self, context, errors=[]): + self.context = context + self.current_type = None + self.current_method = None + self.errors = errors + + @visitor.on('node') + def visit(self, node, scope): + pass + + @visitor.when(cool.ProgramNode) + def visit(self, node, scope=None): + scope = Scope() + for declaration in node.declarations: + self.visit(declaration, scope.create_child()) + return scope + + @visitor.when(cool.ClassDeclarationNode) + def visit(self, node, scope): + self.current_type = self.context.get_type(node.id) + + scope.define_variable('self', SelfType(self.current_type)) + cur_type = self.current_type + while True: + for attr in cur_type.attributes: + vtype = attr.type + if vtype.name == ST: + vtype = SelfType(self.current_type) + var = scope.define_variable(attr.name, vtype) + var.node = attr.node + if not cur_type.parent: + break + cur_type = cur_type.parent + + cur_type = self.current_type + pending, count = [], 0 + for feature in node.features: + if isinstance(feature, cool.AttrDeclarationNode): + self.visit(feature, scope) + if not scope.is_defined(feature.id): + vtype = cur_type.attributes[count].type + if vtype.name == ST: + vtype = SelfType(self.current_type) + var = scope.define_variable(feature.id, vtype) + var.node = cur_type.attributes[count].node + count += 1 + else: + pending.append(feature) + + for feature in pending: + self.visit(feature, scope.create_child()) + + @visitor.when(cool.AttrDeclarationNode) + def visit(self, node, scope): + if not node.expr: + return + + self.visit(node.expr, scope) + expr_type = node.expr.computed_type + real_type = node.attr_type + node.info = [expr_type, real_type] + + if not expr_type.conforms_to(real_type): + self.errors.append((TypeError(INCOMPATIBLE_TYPES % (expr_type.name, real_type.name)), node.arrow)) + + @visitor.when(cool.FuncDeclarationNode) + def visit(self, node, scope): + self.current_method = node.id + + for pname, ptype, pnode in zip(node.arg_names, node.arg_types, node.arg_nodes): + var = scope.define_variable(pname, ptype) + var.node = pnode + + self.visit(node.body, scope) + + body_type = node.body.computed_type + method_rtn_type = node.ret_type + node.info = [body_type, method_rtn_type] + + if not body_type.conforms_to(method_rtn_type): + self.errors.append((TypeError(INCOMPATIBLE_TYPES % (body_type.name, method_rtn_type.name)), node.ttype)) + + @visitor.when(cool.AssignNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + node_type = node.expr.computed_type + var_type = None + + try: + if not scope.is_defined(node.id): + scope.define_variable(node.id, ErrorType()) + raise NameError(VARIABLE_NOT_DEFINED % (node.id)) + var = scope.find_variable(node.id) + var_type = var.type + if var.name == 'self': + raise SemanticError(SELF_IS_READONLY) + if not node_type.conforms_to(var.type): + raise TypeError(INCOMPATIBLE_TYPES % (node_type.name, var.type.name)) + except Exception as ex: + self.errors.append((ex, node.tid)) + node_type = ErrorType() + + node.info = [node_type, var_type] + node.computed_type = node_type + + @visitor.when(cool.CaseOfNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + + types_list = [] + branches = set() + for case in node.branches: + if case.type in branches: + # //TODO: Check this again after the Inference process + self.errors.append((SemanticError(DUPLICATED_BRANCH % (case.type)), case.ttype)) + branches.add(case.type) + self.visit(case, scope.create_child()) + types_list.append(case.computed_type) + + node.computed_type = LCA(types_list) + + @visitor.when(cool.CaseExpressionNode) + def visit(self, node, scope): + node.scope = scope + try: + assert node.type != ST + branch_type = self.context.get_type(node.type) + except TypeError as ex: + self.errors.append((ex, node.ttype)) + branch_type = ErrorType() + except AssertionError: + self.errors.append((SemanticError(INVALID_BRANCH % node.id), node.ttype)) + branch_type = ErrorType() + node.branch_type = branch_type + + if node.id == 'self': + self.errors.append((SemanticError(SELF_IS_READONLY), node.id)) + else: + var = scope.define_variable(node.id, branch_type) + var.node = node + self.visit(node.expr, scope) + node.computed_type = node.expr.computed_type + + @visitor.when(cool.LetInNode) + def visit(self, node, scope): + node.scope = scope + + for expr in node.let_body: + node.scope = node.scope.create_child() + self.visit(expr, node.scope) + + self.visit(node.in_body, node.scope) + node.computed_type = node.in_body.computed_type + + @visitor.when(cool.LetAttributeNode) + def visit(self, node, scope): + try: + node_type = self.context.get_type(node.type) + if node_type.name == ST: + node_type = SelfType(self.current_type) + except TypeError as ex: + self.errors.append((ex, node.ttype)) + node_type = ErrorType() + node.attr_type = node_type + node.scope = None + + if node.expr: + self.visit(node.expr, scope) + expr_type = node.expr.computed_type + node.info = [expr_type, node_type] + + if not expr_type.conforms_to(node_type): + self.errors.append((TypeError(INCOMPATIBLE_TYPES % (expr_type.name, node_type.name)), node.arrow)) + if node.id == 'self': + self.errors.append((SemanticError(SELF_IS_READONLY), node.tid)) + else: + var = scope.define_variable(node.id, node_type) + var.node = node + + @visitor.when(cool.IfThenElseNode) + def visit(self, node, scope): + self.visit(node.condition, scope) + node.cond_type = node.condition.computed_type + + BOOL = self.context.get_type('Bool') + if not node.cond_type.conforms_to(BOOL): + self.errors.append((TypeError(CONDITION_NOT_BOOL % ('If', node.cond_type.name)), node.token)) + + self.visit(node.if_body, scope) + if_type = node.if_body.computed_type + + self.visit(node.else_body, scope) + else_type = node.else_body.computed_type + node.computed_type = LCA([if_type, else_type]) + + @visitor.when(cool.BlockNode) + def visit(self, node, scope): + for expr in node.exprs: + self.visit(expr, scope) + + last_expr = node.exprs[-1] + node.computed_type = last_expr.computed_type + + @visitor.when(cool.WhileLoopNode) + def visit(self, node, scope): + self.visit(node.condition, scope) + node.cond_type = node.condition.computed_type + + BOOL = self.context.get_type('Bool') + if not node.cond_type.conforms_to(BOOL): + self.errors.append((TypeError(CONDITION_NOT_BOOL % ('While', node.cond_type.name)), node.token)) + + self.visit(node.body, scope) + node.computed_type = self.context.get_type('Object') + + @visitor.when(cool.FunctionCallNode) + def visit(self, node, scope): + self.visit(node.obj, scope) + obj_type = node.obj.computed_type + + error = False + + arg_types, real_types = [], [] + for arg in node.args: + self.visit(arg, scope) + arg_types.append(arg.computed_type) + + try: + if node.type: + token = node.ttype + cast_type = self.context.get_type(node.type) + if cast_type.name == ST: + raise SemanticError("Invalid use of SELF_TYPE") + if cast_type.name == AT: + raise SemanticError('Is not possible to use AUTO_TYPE in a cast') + if not obj_type.conforms_to(cast_type): + raise TypeError(INCOMPATIBLE_TYPES % (obj_type.name, node.type)) + obj_type = cast_type + + assert obj_type + token = node.tid + obj_method = obj_type.get_method(node.id) + node.obj_method = obj_method + if len(node.args) == len(obj_method.param_types): + for idx, (arg, param_type) in enumerate(zip(arg_types, obj_method.param_types)): + real_types.append(param_type) + + if not arg.conforms_to(param_type): + self.errors.append((TypeError(INCOMPATIBLE_TYPES % (arg.name, param_type.name + f" in the argument #{idx} of {node.id}")), token)) + error = True + else: + raise SemanticError(f'Method "{obj_method.name}" of "{obj_type.name}" only accepts {len(obj_method.param_types)} argument(s)') + assert not error + node_type = obj_method.return_type + if node_type.name == ST: + node_type = obj_type + except AssertionError: + node_type = ErrorType() + except Exception as ex: + self.errors.append((ex, token)) + node_type = ErrorType() + + node.info = [arg_types, real_types] + node.computed_type = node_type + + @visitor.when(cool.MemberCallNode) + def visit(self, node, scope): + obj_type = SelfType(self.current_type) + + error = False + + arg_types, real_types = [], [] + for arg in node.args: + self.visit(arg, scope) + arg_types.append(arg.computed_type) + + try: + token = node.tid + obj_method = obj_type.get_method(node.id) + node.obj_method = obj_method + if len(node.args) == len(obj_method.param_types): + for arg, param_type in zip(arg_types, obj_method.param_types): + real_types.append(param_type) + + if not arg.conforms_to(param_type): + self.errors.append((TypeError(INCOMPATIBLE_TYPES % (arg.name, param_type.name + f" in the argument #{idx} of {node.id}")), token)) + error = True + else: + raise SemanticError(f'Method "{obj_method.name}" of "{obj_type.name}" only accepts {len(obj_method.param_types)} argument(s)') + assert not error + node_type = obj_method.return_type + if node_type.name == ST: + node_type = obj_type + except AssertionError: + node_type = ErrorType() + except Exception as ex: + self.errors.append((ex, token)) + node_type = ErrorType() + + node.info = [arg_types, real_types] + node.computed_type = node_type + + @visitor.when(cool.BinaryNode) + def visit(self, node, scope): + self.visit(node.left, scope) + left_type = node.left.computed_type + + self.visit(node.right, scope) + right_type = node.right.computed_type + node.info = [left_type, right_type] + + INT = self.context.get_type('Int') + BOOL = self.context.get_type('Bool') + if not (right_type.conforms_to(INT) and left_type.conforms_to(INT)): + self.errors.append((TypeError(INVALID_OPERATION % (left_type.name, right_type.name)), node.symbol)) + + node.computed_type = [BOOL, INT][isinstance(node, cool.ArithmeticNode)] + + @visitor.when(cool.IntegerNode) + def visit(self, node, scope): + node.computed_type = self.context.get_type('Int') + + @visitor.when(cool.StringNode) + def visit(self, node, scope): + node.computed_type = self.context.get_type('String') + + @visitor.when(cool.BoolNode) + def visit(self, node, scope): + node.computed_type = self.context.get_type('Bool') + + @visitor.when(cool.IdNode) + def visit(self, node, scope): + if scope.is_defined(node.lex): + node_type = scope.find_variable(node.lex).type + else: + scope.define_variable(node.lex, ErrorType()) + self.errors.append((NameError(VARIABLE_NOT_DEFINED % (node.lex)), node.token)) + node_type = ErrorType() + + node.computed_type = node_type + + @visitor.when(cool.NewNode) + def visit(self, node, scope): + try: + node_type = self.context.get_type(node.type) + if node.type == ST: + node_type = SelfType(self.current_type) + except TypeError as ex: + self.errors.append((ex, node.ttype)) + node_type = ErrorType() + + node.computed_type = node_type + + @visitor.when(cool.IsVoidNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + node.computed_type = self.context.get_type('Bool') + + @visitor.when(cool.ComplementNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + expr_type = node.expr.computed_type + node.expr_type = expr_type + + INT = self.context.get_type('Int') + if not expr_type.conforms_to(INT): + self.errors.append((TypeError("Complement works only for Int"), node.symbol)) + node.computed_type = INT + + @visitor.when(cool.NotNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + expr_type = node.expr.computed_type + node.expr_type = expr_type + + BOOL = self.context.get_type('Bool') + if not expr_type.conforms_to(BOOL): + self.errors.append((TypeError("Not operator works only for Bool"), node.symbol)) + node.computed_type = BOOL + + @visitor.when(cool.EqualNode) + def visit(self, node, scope): + self.visit(node.left, scope) + left_type = node.left.computed_type + + self.visit(node.right, scope) + right_type = node.right.computed_type + node.info = [left_type, right_type] + + valid_types = [IntType(), BoolType(), StringType()] + try: + cur_types = [right_type, left_type] + for op_type in valid_types: + try: + cur_types.remove(op_type) + assert cur_types[0].conforms_to(op_type) + break + except ValueError: pass + except AssertionError: + self.errors.append((TypeError(INVALID_OPERATION % (left_type.name, right_type.name)), node.symbol)) + + node.computed_type = self.context.get_type('Bool') diff --git a/src/core/visitors/type_check/collector.py b/src/core/visitors/type_check/collector.py new file mode 100644 index 00000000..de6e0cf3 --- /dev/null +++ b/src/core/visitors/type_check/collector.py @@ -0,0 +1,93 @@ +from .utils import * +from ...visitors import visitor +from ...cmp import cool_ast as cool, empty_token, Context, SemanticError +from ...cmp import IntType, StringType, BoolType, IOType, VoidType, AutoType, SelfType + +def define_built_in_types(context): + obj = context.create_type('Object') + i = context.append_type(IntType()) + i.set_parent(obj) + s = context.append_type(StringType()) + s.set_parent(obj) + b = context.append_type(BoolType()) + b.set_parent(obj) + io = context.append_type(IOType()) + io.set_parent(obj) + st = context.append_type(SelfType()) + context.append_type(AutoType()) + + obj.define_method('abort', [], [], obj) + obj.define_method('type_name', [], [], s) + obj.define_method('copy', [], [], st) + + io.define_method('out_string', ['x'], [s], st) + io.define_method('out_int', ['x'], [i], st) + io.define_method('in_string', [], [], s) + io.define_method('in_int', [], [], i) + + s.define_method('length', [], [], i) + s.define_method('concat', ['s'], [s], s) + s.define_method('substr', ['i', 'l'], [i, i], s) + +# Type Collector +class TypeCollector: + def __init__(self, errors=[]): + self.context = None + self.errors = errors + self.type_level = {} + self.parent = {} + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(cool.ProgramNode) + def visit(self, node): + self.context = Context() + define_built_in_types(self.context) + + for def_class in node.declarations: + self.visit(def_class) + + # comparison for sort node.declarations + def get_type_level(typex, error_token=empty_token): + try: + parent = self.type_level[typex] + except KeyError: + return 0 + + if parent == 0: + node = self.parent[typex] + node.parent = "Object" + self.errors.append((SemanticError('Cyclic heritage.'), error_token)) + elif type(parent) is not int: + self.type_level[typex] = 0 if parent else 1 + if type(parent) is str: + self.type_level[typex] = get_type_level(parent, self.parent[typex].tid) + 1 + + return self.type_level[typex] + + node.declarations.sort(key = lambda node: get_type_level(node.id)) + + @visitor.when(cool.ClassDeclarationNode) + def visit(self, node): + def new_type(): + self.context.create_type(node.id) + self.type_level[node.id] = node.parent + self.parent[node.id] = node + + def make_a_duplicate(): + while True: + node.id = '1' + node.id + try: new_type() + except SemanticError: pass + else: break + + if node.id not in built_in_types: + try: new_type() + except SemanticError as ex: + self.errors.append((ex, node.tid)) + make_a_duplicate() + else: + self.errors.append((SemanticError(f'{node.id} is an invalid class name'), node.tid)) + make_a_duplicate() diff --git a/src/core/visitors/type_check/inferencer.py b/src/core/visitors/type_check/inferencer.py new file mode 100644 index 00000000..23b97ea7 --- /dev/null +++ b/src/core/visitors/type_check/inferencer.py @@ -0,0 +1,277 @@ +from .utils import * +from ...visitors import visitor +from .checker import TypeChecker +from ...cmp import cool_ast as cool +from ...cmp import SemanticError, AutoType, SelfType, InferenceSets + +# Type Inference Visitor +class InferenceVisitor(TypeChecker): + def __init__(self, context, errors=[]): + super().__init__(context, errors) + self.variable = {} + + def inference(self, node, ntype, conforms=True): + try: + self.variable[node].add(ntype, conforms) + except KeyError: + self.variable[node] = InferenceSets().add(ntype, conforms) + + @visitor.on('node') + def context_update(self, node, ntype): + pass + + @visitor.when(cool.Node) + def context_update(self, node, ntype): + pass + + @visitor.when(cool.Param) + def context_update(self, node, ntype): + node.method_types[node.idx] = ntype + try: node.method.param_types[node.idx] = ntype + except AttributeError: pass + + @visitor.when(cool.AttrDeclarationNode) + def context_update(self, node, ntype): + try: node.attr_type = ntype + except AttributeError: pass + try: node.branch_type = ntype + except AttributeError: pass + try: node.attr.type = ntype + except AttributeError: pass + + @visitor.when(cool.FuncDeclarationNode) + def context_update(self, node, ntype): + node.ret_type = ntype + try: node.method.return_type = ntype + except AttributeError: pass + + @visitor.on('node') + def update(self, node, scope, ntype): + pass + + @visitor.when(cool.AssignNode) + def update(self, node, scope, ntype): + self.update(node.expr, scope, ntype) + + @visitor.when(cool.CaseOfNode) + def update(self, node, scope, ntype): + for branch in node.branches: + if isinstance(branch.computed_type, AutoType): + self.update(branch, scope, ntype) + + @visitor.when(cool.CaseExpressionNode) + def update(self, node, scope, ntype): + self.update(node.expr, node.scope, ntype) + + @visitor.when(cool.LetInNode) + def update(self, node, scope, ntype): + self.update(node.in_body, node.scope, ntype) + + @visitor.when(cool.IfThenElseNode) + def update(self, node, scope, ntype): + if isinstance(node.if_body.computed_type, AutoType): + self.update(node.if_body, scope, ntype) + if isinstance(node.else_body.computed_type, AutoType): + self.update(node.else_body, scope, ntype) + + @visitor.when(cool.BlockNode) + def update(self, node, scope, ntype): + self.update(node.exprs[-1], scope, ntype) + + @visitor.when(cool.FunctionCallNode) + def update(self, node, scope, ntype): + self.inference(node.obj_method.ret_node, ntype) + + @visitor.when(cool.MemberCallNode) + def update(self, node, scope, ntype): + self.inference(node.obj_method.ret_node, ntype) + + @visitor.when(cool.IdNode) + def update(self, node, scope, ntype): + self.inference(scope.find_variable(node.lex).node, ntype) + + # Visit + @visitor.on('node') + def visit(self, node, scope): + pass + + @visitor.when(cool.Node) + def visit(self, node, scope): + if not issubclass(node.__class__, cool.BinaryNode): + super().visit(node, scope) + + @visitor.when(cool.ProgramNode) + def visit(self, node, scope=None): + scope = super().visit(node, scope) + + infered = 0 + pending = [] + OBJ = self.context.get_type('Object') + for (auto, sets) in self.variable.items(): + try: + if (len(sets.D) + len(sets.S) == 1): + pending.append(auto) + continue + ok, D1 = check_path(sets.D, OBJ) + assert ok + if len(sets.S) and not isinstance(D1, SelfType): + candidate = LCA(sets.S) + assert LCA([candidate, D1]) == D1 + D1 = candidate + auto.type = D1.name + self.context_update(auto, D1) + infered += 1 + except AssertionError: + self.errors.append((SemanticError(f'Bad use of AUTO_TYPE detected'), auto.ttype)) + if not infered: + for auto in pending: + auto.type = OBJ.name + self.context_update(auto, OBJ) + self.variable.clear() + return infered, scope + + @visitor.when(cool.AttrDeclarationNode) + def visit(self, node, scope): + super().visit(node, scope) + + if isinstance(node.attr_type, AutoType): + self.inference(node, self.context.get_type('Object')) + + if not node.expr: + return + + expr, rtype = node.info + if update_condition(rtype, expr): + self.inference(node, expr, False) + if update_condition(expr, rtype): + self.update(node.expr, scope, rtype) + + @visitor.when(cool.FuncDeclarationNode) + def visit(self, node, scope): + super().visit(node, scope) + + body, rtn = node.info + OBJ = self.context.get_type('Object') + if isinstance(rtn, AutoType): + self.inference(node, OBJ) + for ptype, pnode in zip(node.arg_types, node.arg_nodes): + if isinstance(ptype, AutoType): + self.inference(pnode, OBJ) + if update_condition(rtn, body): + self.inference(node, body, False) + if update_condition(body, rtn): + self.update(node.body, scope, rtn) + + @visitor.when(cool.AssignNode) + def visit(self, node, scope): + super().visit(node, scope) + + node_type, var = node.info + if update_condition(var, node_type): + self.inference(scope.find_variable(node.id).node, node_type, False) + if update_condition(node_type, var): + self.update(node.expr, scope, var) + + @visitor.when(cool.CaseExpressionNode) + def visit(self, node, scope): + super().visit(node, scope) + + if isinstance(node.branch_type, AutoType): + self.inference(node, self.context.get_type('Object')) + + @visitor.when(cool.LetAttributeNode) + def visit(self, node, scope): + super().visit(node, scope) + + if isinstance(node.attr_type, AutoType): + self.inference(node, self.context.get_type('Object')) + + if not node.expr: + return + + expr, rtype = node.info + if update_condition(rtype, expr): + self.inference(scope.find_variable(node.id).node, expr, False) + if update_condition(expr, rtype): + self.update(node.expr, scope, rtype) + + @visitor.when(cool.IfThenElseNode) + def visit(self, node, scope): + super().visit(node, scope) + + if isinstance(node.cond_type, AutoType): + self.update(node.condition, scope, OBJ = self.context.get_type('Bool')) + + @visitor.when(cool.WhileLoopNode) + def visit(self, node, scope): + super().visit(node, scope) + + if isinstance(node.cond_type, AutoType): + self.update(node.condition, scope, self.context.get_type('Bool')) + + @visitor.when(cool.FunctionCallNode) + def visit(self, node, scope): + super().visit(node, scope) + + args, real = node.info + if not real: + return + + for idx, (atype, rtype) in enumerate(zip(args, real)): + if update_condition(rtype, atype): + self.inference(node.obj_method.nodes[idx], atype, False) + if update_condition(atype, rtype): + self.update(node.args[idx], scope, rtype) + + @visitor.when(cool.MemberCallNode) + def visit(self, node, scope): + super().visit(node, scope) + + args, real = node.info + if not real: + return + + for idx, (atype, rtype) in enumerate(zip(args, real)): + if update_condition(rtype, atype): + self.inference(node.obj_method.nodes[idx], atype, False) + if update_condition(atype, rtype): + self.update(node.args[idx], scope, rtype) + + @visitor.when(cool.BinaryNode) + def visit(self, node, scope): + super().visit(node, scope) + + left, right = node.info + INT = self.context.get_type('Int') + if isinstance(left, AutoType): + self.update(node.left, scope, INT) + if isinstance(right, AutoType): + self.update(node.right, scope, INT) + + @visitor.when(cool.ComplementNode) + def visit(self, node, scope): + super().visit(node, scope) + + if isinstance(node.expr_type, AutoType): + self.update(node.expr, scope, self.context.get_type('Int')) + + @visitor.when(cool.NotNode) + def visit(self, node, scope): + super().visit(node, scope) + + if isinstance(node.expr_type, AutoType): + self.update(node.expr, scope, self.context.get_type('Bool')) + + @visitor.when(cool.EqualNode) + def visit(self, node, scope): + super().visit(node, scope) + + left, right = node.info + INT = self.context.get_type('Int') + BOOL = self.context.get_type('Bool') + STRING = self.context.get_type('String') + if update_condition(left, right) and right in [INT, BOOL, STRING]: + self.update(node.left, scope, right) + if update_condition(right, left) and left in [INT, BOOL, STRING]: + self.update(node.right, scope, left) + \ No newline at end of file diff --git a/src/core/visitors/type_check/utils.py b/src/core/visitors/type_check/utils.py new file mode 100644 index 00000000..ed13b906 --- /dev/null +++ b/src/core/visitors/type_check/utils.py @@ -0,0 +1,65 @@ +from ...cmp import AutoType, SelfType, ErrorType + +WRONG_SIGNATURE = 'Method "%s" already defined in "%s" with a different signature.' +SELF_IS_READONLY = 'Variable "self" is read-only.' +LOCAL_ALREADY_DEFINED = 'Variable "%s" is already defined in method "%s".' +INCOMPATIBLE_TYPES = 'Cannot convert "%s" into "%s".' +VARIABLE_NOT_DEFINED = 'Variable "%s" is not defined.' +INVALID_OPERATION = 'Operation is not defined between "%s" and "%s".' +CONDITION_NOT_BOOL = '"%s" conditions return type must be Bool not "%s".' +INVALID_PARAMETER = 'Formal parameter "%s" cannot have type SELF_TYPE.' +INVALID_BRANCH = 'Identifier "%s" declared with type SELF_TYPE in case branch.' +DUPLICATED_BRANCH = 'Duplicate branch "%s" in case statement.' + +ST, AT = ['SELF_TYPE', 'AUTO_TYPE'] +sealed = ['Int', 'String', 'Bool', 'SELF_TYPE', 'AUTO_TYPE'] +built_in_types = [ 'Int', 'String', 'Bool', 'Object', 'IO', 'SELF_TYPE', 'AUTO_TYPE'] + +def fixed_type(cur_type): + try: return cur_type.fixed + except AttributeError: return cur_type + +def update_condition(target, value): + c1 = isinstance(target, AutoType) + c2 = (not isinstance(value, AutoType)) and value + return c1 and c2 + +# Compute the Lowest Common Ancestor in +# the type hierarchy tree +def LCA(type_list): + counter = {} + + def check(target): + return [isinstance(t, target) for t in type_list] + + if all(check(SelfType)): + return SelfType(type_list[0].fixed) + if any(check(AutoType)): + return AutoType() + if any(check(ErrorType)): + return ErrorType() + type_list = [fixed_type(t) for t in type_list] + for typex in type_list: + node = typex + while True: + try: + counter[node.name] += 1 + except KeyError: + counter[node.name] = 1 + if counter[node.name] == len(type_list): + return node + if not node.parent: + break + node = node.parent + +def check_path(D, ans): + if any([(t.name == ST) for t in D]): + return True, SelfType() + for t in D: + l = [ans, t] + lca = LCA(l) + try: l.remove(lca) + except ValueError: + return False, None + ans = l[0] + return True, ans diff --git a/src/core/visitors/type_check/verifier.py b/src/core/visitors/type_check/verifier.py new file mode 100644 index 00000000..c63bd884 --- /dev/null +++ b/src/core/visitors/type_check/verifier.py @@ -0,0 +1,44 @@ +from .utils import * +from ...visitors import visitor +from ...cmp import cool_ast as cool, SemanticError + +# Type Verifier +class TypeVerifier: + def __init__(self, context, errors=[]): + self.context = context + self.current_type = None + self.errors = errors + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(cool.ProgramNode) + def visit(self, node): + for def_class in node.declarations: + self.visit(def_class) + + @visitor.when(cool.ClassDeclarationNode) + def visit(self, node): + self.current_type = self.context.get_type(node.id) + + for feature in node.features: + self.visit(feature) + + @visitor.when(cool.AttrDeclarationNode) + def visit(self, node): + pass + + @visitor.when(cool.FuncDeclarationNode) + def visit(self, node): + try: + m1 = node.method + m2 = self.current_type.parent.get_method(m1.name) + assert m1.return_type == m2.return_type and m1.param_types == m2.param_types + except AttributeError: + pass + except SemanticError: + pass + except AssertionError: + self.errors.append((SemanticError(f'Method "{m1.name}" already defined in {self.current_type.name} with a different signature.'), node.tid)) + \ No newline at end of file diff --git a/src/core/visitors/visitor.py b/src/core/visitors/visitor.py new file mode 100644 index 00000000..96484283 --- /dev/null +++ b/src/core/visitors/visitor.py @@ -0,0 +1,80 @@ +# The MIT License (MIT) +# +# Copyright (c) 2013 Curtis Schlak +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import inspect + +__all__ = ['on', 'when'] + +def on(param_name): + def f(fn): + dispatcher = Dispatcher(param_name, fn) + return dispatcher + return f + + +def when(param_type): + def f(fn): + frame = inspect.currentframe().f_back + func_name = fn.func_name if 'func_name' in dir(fn) else fn.__name__ + dispatcher = frame.f_locals[func_name] + if not isinstance(dispatcher, Dispatcher): + dispatcher = dispatcher.dispatcher + dispatcher.add_target(param_type, fn) + def ff(*args, **kw): + return dispatcher(*args, **kw) + ff.dispatcher = dispatcher + return ff + return f + + +class Dispatcher(object): + def __init__(self, param_name, fn): + frame = inspect.currentframe().f_back.f_back + top_level = frame.f_locals == frame.f_globals + self.param_index = self.__argspec(fn).args.index(param_name) + self.param_name = param_name + self.targets = {} + + def __call__(self, *args, **kw): + typ = args[self.param_index].__class__ + d = self.targets.get(typ) + if d is not None: + return d(*args, **kw) + else: + issub = issubclass + t = self.targets + ks = t.keys() + ans = [t[k](*args, **kw) for k in ks if issub(typ, k)] + if len(ans) == 1: + return ans.pop() + return ans + + def add_target(self, typ, target): + self.targets[typ] = target + + @staticmethod + def __argspec(fn): + # Support for Python 3 type hints requires inspect.getfullargspec + if hasattr(inspect, 'getfullargspec'): + return inspect.getfullargspec(fn) + else: + return inspect.getargspec(fn) diff --git a/src/doc/Informe.md b/src/doc/Informe.md new file mode 100644 index 00000000..13da033e --- /dev/null +++ b/src/doc/Informe.md @@ -0,0 +1,232 @@ +# Informe de Complementos de Compilación +## Datos Generales +### Autores +- Miguel Tenorio Potrony +- Mauricio Lázaro Perdomo Cortés +- Lázaro Raúl Iglesias Vera + +### Sobre el proyecto +Para la implementación de este proyecto se tomaron como base, los proyectos realizados durante 3er año, donde se desarrollaron las fases de chequeo e inferencia de tipos, además de parsing. El código de dichos proyectos conserva su estructura pero estuvo sujeto a cambios y mejoras. + +La mayoría de nuestras implementaciones siguen las ideas y utilizan las herramientas dadas en clase durante 3er año. + +Todas las fases del proceso de compilación y ejecución serán explicadas a continuación. + + +## Pipeline +Como se puede apreciar en [main.py](https://github.com/2kodevs/cool-compiler-2020/blob/master/src/main.py) el pipeline de nuestro proceso de compilación es: + +1. Lexer +2. Parsing +3. Recolección de tipos +4. Construcción de tipos +5. Chequeo/Inferencia de tipos +6. Verificación de tipos +7. Traducción de Cool a CIL +8. Traducción de CIL a MIPS + +Cada parte del proceso será discutida en detalle durante las siguientes secciones. + +Como se puede apreciar en la etapa #5 del proceso, el chequeo e inferencia de tipos se realizan al unísono, sin embargo cada parte se explicará en secciones separadas y se hará notar por qué se decidió realizarlas al mismo tiempo. + +## Lexer + +Para el proceso de lexer y tokenización se utilizó el paquete PLY. Se creó un un lexer que consta de tres estados: + + - INITIAL + - comments + - strings + +Para cada uno de estos estados se definieron las expresiones regulares que representan cada uno de los tokens posibles, y se +manejan otras variables que conforman el estado del lexer, como la línea actual. + +## Parsing +Para el proceso de parsing se utilizó el parser LR1 y la gramática de Cool que fueron implementados para el proyecto de 3er año sobre chequeo de tipos. + +Fue necesario modificar la salida del Parser para poder devolver la información referente al token de error en caso de que alguna falla fuera detectada. + +Dado que los proyectos llevados a cabo previamente fueron desarrollados para mini-Cool, se hizo necesario modificar la gramática, y se obtuvo como resultado: + +### Gramática de Cool +La grámatica implementada es S-Atributada. Una descripción de los símbolos y producciones de la grámatica, se puede ver en [grammar](https://github.com/2kodevs/cool-compiler-2020/blob/master/src/doc/grammar.pdf) + +## Recolección de tipos +Durante la recolección de tipos se visitan todas las declaraciones de clases, se crean los tipos asociados a ellas y se valida la correctitud de las mismas. + +**Errores detectados**: +- Herencia cíclica +- Redefinición de clases +- Nombres de clase no válidos + +## Construcción de tipos +A los tipos creados en la fase anterior se le añaden todos sus atributos y métodos. Además se verifica que se cumplan los requerimientos de un programa válido de Cool q son tener una clase `Main` con su método `main`. + +**Errores detectados**: +- Problemas de nombrado de atributos y métodos +- Redefinición de atributos +- Redefinición incorrecta de métodos +- Uso de tipos no definidos +- No definición de la clase `Main` o su método `main` +- Incorrecta definición del método `main` +- Mal uso de herencia + +## Chequeo de tipos +En esta fase se evalúa la correctitud de todas las expresiones del lenguaje y se decide el tipo estático de cada una de ellas según lo establecido en el manual de [Cool](https://github.com/2kodevs/cool-compiler-2020/blob/master/doc/cool-manual.pdf). + +**Errores detectados**: +- Incompatibilidad de tipos +- Uso de tipos no definidos +- Uso de variables, tipos y métodos no definidos +- mal usos de `self` y `SELF_TYPE` +- mal usos del `case` + +## Inferencia de tipos +Para la implementación de esta fase se expandió el comportamiento del visitor encargado del chequeo de tipos, razón por la cual ambos procesos se realizan en la misma fase. + +Para lograr la inferencia de tipos, se realizó un algoritmo de punto fijo en el cual mediante repeticiones sucesivas del proceso de inferencia se van definiendo los tipos de aquellas variables declaradas como `AUTO_TYPE`. + +### Idea +Una variable en Cool dada su utilización puede definir dos conjuntos + +1. Tipos a los que se conforma (**Ancestros**) +2. Tipos que se conforman a ella (**Descendientes**) + +Dados los dos conjuntos anteriores se puede decidir si una variable `AUTO_TYPE` puede ser inferida correctamente o no. + +Ambos conjuntos recibieron un nombre intuitivo mencionado anteriormente en **negrita** para hacer referencia a su contenido. + +El tipo que se decida otorgar(inferir) a la variable en cuestión, llamémosle _**T**_, deberá conformarse a todos los tipos del conjunto 1. Al mismo tiempo todos los tipos del conjunto 2 deberán conformarse a él. + +Dicho lo anterior y dado el hecho de que un tipo *A* se conforma a un tipo *B* solamente si *B* es ancestro de *A*, podemos notar que: + +1. El tipo a seleccionar debe ser un ancestro del **Menor Ancestro Común** (**LCA** por sus siglas en inglés) a todos los nodos del conjunto 2, llamémosle *N*. En otras palabras el primer tipo que es ancestro de todos los tipos en el conjunto 2. +2. Como todos los tipos del conjunto 1 necesitan ser ancestros de _**T**_, todos pertenecerán al camino que se forma desde _**T**_ hasta *Object* en el árbol de tipos, por tanto _**T**_ necesita ser descendiente del primero que aparezca en el camino mencionado y pertenezca al conjunto 1, llamémosle *M*. +3. Tomando el operador **<=** para referirnos a la relación *ser ancestro de*, se puede afirmar que _**T**_ es de la forma _**N <= T <= M**_, o lo que es lo mismo _**T**_ podría ser cualquier tipo en el camino de *N* a *M*. + +> El nodo que representa el **LCA** siempre existe dado que el árbol de tipos es único, por tanto en caso extremo *Object* siempre será válido como ancestro a todos los tipos. + +El algortimo implementado tras cada recorrido del **AST**(Árbol de sintaxis abstracta) infiere el tipo de todas aquellas variables de las cuales se tenga información, seleccionando como tipo inferido siempre el que representa a *N*. + +Al ser este algoritmo una extensión del chequeo de tipos, mientras se van infiriendo los tipos se valida que los mismos no ocasionen error. +> En todo lo anterior se asume que todo tipo es ancestro y descendiente de sí mismo. + +**Errores detectados**: +- Mal usos de `AUTO_TYPE` en casos donde no se cumpla que _**N <= M**_ o todos los tipos en el conjunto 1 no se encuentren en un camino del árbol de tipos +- Todos los errores de chequeo semántico que existan en el código o surgan tras la inferencia de una o varias variables. + +## Verificación de tipos +Esta fase surge dado que tras el proceso de inferencia puede haber ocurrido un error que durante el chequeo semántico no se valida. Dado que permitimos *AUTO_TYPE* en los parametros de las funciones, al terminar la inferencia pueden generarse conflictos de mala redefinición de métodos, los cuales son chequeados en la fase de Construcción de los tipos (etapa #4). Por tanto la única función de esta fase es verificar la correctitud de los tipos. + +**Errores detectados**: +- Mala redefinición de métodos ocacionada por la inferencia de tipos + +## Traducción a CIL +En esta etapa del proceso de compilación, requirió especial atención la generación de las expresiones *case*. Para ello se requiere ordenar las instrucciones de tal modo que se asegure el emparejamiento del tipo de la expresión principal con el tipo más específico declarado en las ramas del *case*. + +Primero por cada rama **b** se cuentan cuántos tipos declarados en las demás ramas se conforman a **b**, creando de este modo una tupla `(cantidad, tipo declarado en b)`. +Luego se ordenan todas estas tuplas por su primer elemento, obteniendo así una secuencia ordenada donde el primero elemento representa la rama cuyo tipo declarado se encuentra en el nivel más bajo en la jerarquía de tipos del programa. + +Luego por cada rama **b** de esta secuencia, se obtienen todos los tipos del programa que conforman a **b**, y por cada uno de estos que no haya sido tomado en cuenta en el procesamiento de ramas anteriores, se generan las instrucciones necesarias para comprobar si el tipo de la expresión principal del *case* coincide con él. En caso de coincidencia, se salta al bloque de las instrucciones generadas por el cuerpo de **b**; si no entonces se procede a comprobar con el tipo siguiente. Nótese que no se repiten comprobaciones. + +**Errores detectados**: +- Dispatch estático o dinámico desde un objeto void +- Expresión principal de un *case* tiene valor `void` +- Ejecución de un *case* sin que ocurra algún emparejamiento con alguna rama. +- División por cero +- Substring fuera de rango + +> Aunque estos errores realmente se detectan en ejecución, es en esta fase que se genera el código que permite detectarlos. + +## Traducción a MIPS +En la fase de generación de código `MIPS` se enfrentaron tres problemas fundamentales: + + - Estructura de los objetos en memoria. + - Definición de tipos en memoria. + - Elección de registros. + +### Estructura de los objetos en memoria. +Determinar el modelo que seguirían los objetos en la memoria fue un paso fundamental para la toma de múltiples decisiones tanto en la generación de código `CIL` como `MIPS`. Los objetos en memoria siguen el siguiente modelo: + +```| Tipo | Tamaño | Tabla de dispatch | -- Atributos -- | Marca de objeto |``` + - Tipo: Esta sección tiene tamaño 1 `palabra`, el valor aquí encontrado se interpreta como un entero e indica el tipo del objeto. + - Tamaño: Esta sección tiene tamaño 1 `palabra`, el valor aquí encontrado se interpreta como un entero e indica el tamaño en `palabras` del objeto. + - Tabla de dispatch: Esta sección tiene tamaño 1 `palabra`, el valor aquí encontrado se interpreta como una dirección de memoria e indica el inicio de la tabla de dispatch del objeto. La tabla de dispatch del objeto es un segmento de la memoria donde interpretamos cada `palabra` como la dirección a uno de los métodos del objeto. + - Atributos: Esta sección tiene tamaño **N** `palabras` donde **N** es la cantidad de atributos que conforman el objeto, cada una de las `palabras` que conforman esta sección representa el valor de un atributo del objeto. + - Marca de objeto: Esta sección tiene tamaño 1 `palabra`, es un valor usado para marcar que esta zona de la memoria corresponde a un objeto, se añadió con el objetivo de hacer menos propenso a fallos la tarea de identificar objetos en memoria en el `Garbage Collector`. + +### Definición de tipos en memoria. +Un tipo está representado por tres estructuras en la memoria: + - Una dirección a una cadena alfanumérica que representa el nombre del tipo. + - Un prototipo que es una especie de plantilla que se utiliza en la creación de los objetos. Cuando se crea un objeto este prototipo es copiado al segmento de memoria asignado al objeto. Un prototipo es un objeto válido por lo que tiene exactamente la misma estructura explicada anteriormente. El prototipo es también la solución escogida para el problema de los valores por defecto de los objetos. + - Una tabla de dispatch que como se explicó anteriormente contiene las direcciones de los métodos del objeto. +Existe una tabla de prototipos (nombres) donde se puede encontrar el prototipo (nombre) de un tipo específico, utilizando como índice el valor que representa al tipo. + +### Elección de registros. +La elección de registros fue un proceso que se decidió optimizar para disminuir la utilización de las operaciones `lw` y `sw` en `MIPS` que como se sabe, añaden una demora considerable a nuestros programas por el tiempo que tarda en realizarse un operación de escritura o lectura en la memoria. +El proceso de elección de registros se realiza para cada función y consta de los siguientes pasos: + - Separación del código en bloques básicos: + + Para obtener los bloques básicos primero se hace un recorrido por las instrucciones de la función marcando los líderes. Son considerados líderes las instrucciones de tipo `Label` y las instrucciones que tengan como predecesor un instrucción de tipo `Goto` o `Goto if`. Luego de tener marcados los líderes, se obtienen los bloques que serán los conjuntos de instrucciones consecutivas que comienzan con un líder y terminan con la primera instrucción que sea predecesor de un líder (notar que un bloque puede estar formado por una sola instrucción). + + - Creación del grafo de flujo: + + Este es un grafo dirigido que indica los caminos posibles entre los bloques básicos su elaboración es bastante sencilla: si la última instrucción de un bloque es un `Goto`, entonces se añadirá una arista desde este bloque hacia el bloque iniciado por la instrucción `Label` a la que hace referencia el `Goto`; si la última instrucción es de tipo `Goto if`, entonces se añadirán dos aristas una hacia el bloque que comienza con la instrucción `Label` a la que se hace referencia, y otra hacia el bloque que comienza con la instrucción siguiente en la función; en el caso de que la última instrucción sea de cualquier otro tipo, se colocará una sola arista desde el bloque actual hacia el bloque que comienza con la instrucción siguiente en la función. + + - Análisis de vida de las variables: + + En este procedimiento se computan cinco conjuntos para cada instrucción **I**: `succ`, `gen`, `kill`, `in` y `out`. `succ` contiene las instrucciones que se pueden ejecutar inmediatamente después de la instrucción **I**; `gen` contiene las variables de las que se necesita el valor en la instrucción **I**; `kill` contiene las variables a las que se les asigna un valor en la instrucción **I**; `in` contiene las variables que pueden estar vivas al llegar a la instrucción **I**, y `out` contiene las variables que pueden estar vivas luego de ejecutada la instrucción **I**. + + - Creación del grafo de interferencia: + + Los vértices de este grafo serán las variables que se utilizan en la función y existirá una arista entre los vértices **x** y **y**, si las variables que representan esos nodos interfieren. Dos variables interfieren si existe alguna instrucción **I** tal que **x** pertenezca al `kill` de **I** y **y** pertenezca al `out` de **I**. + + - Asignación de registros: + + Contando con el grafo de interferencia, se asignan registros a las variables de forma tal que dos variables que interfieran no se les asigne el mismo registro, esto puede verse como el problema de colorear un grafo con **N** colores siendo **N** la cantidad de registros que se tienen. Es conocido que este problema es *NP* por lo que para asignar los registros se usa una heurística muy sencilla que consiste en lo siguiente: + + Primero se va eliminando del grafo y colocando en una pila cada nodo que tenga menos de N vecinos, se nota que todos estos elementos pueden ser coloreados sin problemas. Si en algún momento no existe algún nodo con menos de N vecinos, se tomará un nodo al azar; este proceso terminará cuando no queden nodos en el grafo. Luego se va sacando cada nodo de la pila y se le asigna un registro que no esté usado por alguno de los nodos que eran vecinos de este en el momento en que se eliminó del grafo, en el caso de que existan más de un nodo posible, se le asigna el menor, en caso de que no exista nodo posible la variable no tendrá registro y su valor permanecerá en la memoria. + +**Errores detectados**: +- Heap overflow + +## Ejecución +Para ejecutar el proyecto se necesita tener instalado `Python` y el conjunto de dependencias listado en [requirements.txt](https://github.com/2kodevs/cool-compiler-2020/blob/master/requirements.txt). + +Para instalar las dependencias puede utilizar: +```bash +make install +``` +Una vez estén instaladas las dependencias, puede compilar y ejecutar cualquier archivo de código cool utilizando el comando: +```bash +make main CODE=.cl +``` +>Para usar `make` necesita estar en la dirección `/src` + +## Estructura +Los archivos del proyecto se encuentran modularizados de la siguiente manera: + +1. **core** + 1. **cmp** + 1. **cool** + 2. **parser** + 2. **lexer** + 3. **visitors** + 1. **type_check** + 2. **cil** + 3. **mips** + +**cmp** contiene todos los archivos heredados de las clases de 3er año y proyectos anteriores. + +**cool** contiene el *AST*, Gramática y Parser de Cool + +**parser** contiene la implementación parser LR1 utilizada + +**lexer** todo lo referente a lexer y tokenización + +**visitor** contiene la implementación del patrón visitor + +**type_checking** fases de la #3 a la #6 + +**cil** traducción a cil + +**mips** traducción a mips diff --git a/src/doc/Informe.pdf b/src/doc/Informe.pdf new file mode 100644 index 00000000..30f0f7cd Binary files /dev/null and b/src/doc/Informe.pdf differ diff --git a/src/doc/grammar.pdf b/src/doc/grammar.pdf new file mode 100644 index 00000000..bdf52cdc Binary files /dev/null and b/src/doc/grammar.pdf differ diff --git a/src/doc/grammar.tex b/src/doc/grammar.tex new file mode 100644 index 00000000..849ac7cc --- /dev/null +++ b/src/doc/grammar.tex @@ -0,0 +1,90 @@ +\documentclass{article} +\usepackage[utf8]{inputenc} +\usepackage{amsmath} + +\begin{document} + Terminals : class, type, inherits, id, let, in, isvoid, not, new, case, of, esac, if, then, else, fi, while, loop, pool + +\begin{eqnarray*} + program & \rightarrow & class\_list \\ + class\_list & \rightarrow & class\_def \\ + & \rightarrow & class\_def \ class\_list \\ + class\_def & \rightarrow & class \ type \ \{ \ feature\_list\ \} \ ;\\ + & \rightarrow & class \ type \ inherits \ type \ \{ \ feature\_list\ \} \ ;\\ + feature\_list & \rightarrow & feature \ feature\_list \\ + & \rightarrow & \epsilon \\ + feature & \rightarrow & param \ ; \\ + & \rightarrow & value\_param \ ;\\ + & \rightarrow & id \ ( \ ) \ : \ type \ \{ \ expression \ \} \ ; \\ + & \rightarrow & id \ ( \ param\_list \ ) \ : \ type\ \{ \ expression \ \} \ ; \\ + param\_list & \rightarrow & param \\ + & \rightarrow & param \ , \ param\_list \\ + param & \rightarrow & id \ : \ type \\ + value\_param & \rightarrow & param \ \leftarrow \ expression \\ + block & \rightarrow & expression \ ; \\ + & \rightarrow & expression \ ; \ block\\ + let\_list & \rightarrow & param\\ + & \rightarrow & param \ , \ let\_list\\ + & \rightarrow & value\_param\\ + & \rightarrow & value\_param \ , \ let\_list\\ + case\_list & \rightarrow & param \ \Rightarrow \ expression \ ; \\ + & \rightarrow & param \ \Rightarrow \ expression \ ; \ case\_list\\ + func\_call & \rightarrow & . \ id \ ( \ )\\ + & \rightarrow & @ \ type \ . \ id \ ( \ )\\ + & \rightarrow & . \ id \ ( \ arg\_list \ ) \\ + & \rightarrow & @ \ type \ . \ id \ ( \ arg\_list \ ) \\ + arg\_list & \rightarrow & expression \\ + & \rightarrow & expression \ , \ arg\_list \\ + member\_call & \rightarrow & id \ ( \ arg\_list \ ) \\ + & \rightarrow & id \ ( \ ) \\ + expression & \rightarrow & special\\ + & \rightarrow & comparison\_expr\\ +\end{eqnarray*} +\begin{eqnarray*} + special & \rightarrow & arith \ \le \ special\_arith \\ + & \rightarrow & arith \ < \ special\_arith \\ + & \rightarrow & arith \ = \ special\_arith \\ + & \rightarrow & special\_arith \\ + special\_arith & \rightarrow & arith \ + \ special\_term \\ + & \rightarrow & arith \ - \ special\_term \\ + & \rightarrow & special\_term \\ + special\_term & \rightarrow & term \ * \ special\_unary \\ + & \rightarrow & term \ / \ special\_unary \\ + & \rightarrow & special\_unary \\ + special\_unary & \rightarrow & isvoid \ special\_unary \\ + & \rightarrow & \sim \ special\_unary \\ + & \rightarrow & final\_expr \\ + final\_expr & \rightarrow & let \ let\_list \ in \ expression \\ + & \rightarrow & id \ \leftarrow \ expression \\ + & \rightarrow & not \ expression \\ + comparison\_expr & \rightarrow & arith \ \le \ arith \\ + & \rightarrow & arith \ < \ arith \\ + & \rightarrow & arith \ = \ arith \\ + & \rightarrow & arith \\ + arith & \rightarrow & arith \ + \ term \\ + & \rightarrow & arith \ - \ term \\ + & \rightarrow & term \\ + term & \rightarrow & term \ * \ unary \\ + & \rightarrow & term \ / \ unary \\ + & \rightarrow & unary \\ + unary & \rightarrow & isvoid \ unary \\ + & \rightarrow & \sim \ unary \\ + & \rightarrow & func\_expr \\ + func\_expr & \rightarrow & func\_expr \ func\_call \\ + & \rightarrow & atom \\ +\end{eqnarray*} +\begin{eqnarray*} + atom & \rightarrow & id \\ + & \rightarrow & bool \\ + & \rightarrow & string \\ + & \rightarrow & interger \\ + & \rightarrow & new \ type \\ + & \rightarrow & member\_call \\ + & \rightarrow & ( \ expression \ )\\ + & \rightarrow & \{ \ block \ \}\\ + & \rightarrow & if \ expression \ then \ expression \ else \ expression \ fi \\ + & \rightarrow & while \ expression \ loop \ expression \ pool\\ + & \rightarrow & case \ expression \ of \ case\_list \ esac \\ +\end{eqnarray*} + +\end{document} \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 00000000..dd8d3140 --- /dev/null +++ b/src/main.py @@ -0,0 +1,113 @@ +from sys import exit + +from core.cmp import evaluate_reverse_parse, CoolParser + +from core import CoolLexer +from core import TypeBuilder, TypeCollector, TypeVerifier, InferenceVisitor, COOLToCILVisitor, CILToMIPSVisitor +from core import PrintVisitor, FormatVisitor, get_formatter + +def main(args): + # Read code + try: + with open(args.file, 'r') as fd: + code = fd.read() + except: + print(f"(0,0) - CompilerError: file {args.file} not found") + exit(1) + + # Lexer + lexer = CoolLexer() + + # Tokenize + tokens = lexer.tokenize(code) + + if len(tokens) == 1 and tokens[0].lex == '$': + print("(0, 0) - SyntacticError: Unexpected token EOF") + exit(1) + + lexer_err = False + for token in tokens: + if token.token_type == "ERROR": + lexer_err = True + print(token.lex) + + if lexer_err: + exit(1) + + # Parse + parsedData, (failure, token) = CoolParser(tokens, get_shift_reduce=True) + + if failure: + print(f"({token.row},{token.column}) - SyntacticError: Unexpected token {token.lex}") + exit(1) + + # AST + parse, operations = parsedData + ast = evaluate_reverse_parse(parse, operations, tokens) + errors = [] + + # Collect user types + collector = TypeCollector() + collector.visit(ast) + context = collector.context + errors.extend(collector.errors) + + # Building types + builder = TypeBuilder(context) + builder.visit(ast) + errors.extend(builder.errors) + + # Checking types + inferencer = InferenceVisitor(context) + while inferencer.visit(ast)[0]: pass + inferencer.errors.clear() + _, scope = inferencer.visit(ast) + errors.extend(inferencer.errors) + + verifier = TypeVerifier(context) + verifier.visit(ast) + for e in verifier.errors: + if not e in errors: + errors.append(e) + + if errors: + for (ex, token) in errors: + print(f"({token.row},{token.column}) - {type(ex).__name__}: {str(ex)}") + exit(1) + # else: + # print(FormatVisitor().visit(ast)) + + #CIL Transformation + cool_to_cil = COOLToCILVisitor(context) + cil_ast = cool_to_cil.visit(ast, scope) + # formatter = get_formatter() + # ast_cil = formatter(cil_ast) + # print(ast_cil) + + cil_to_mips = CILToMIPSVisitor() + mips_ast = cil_to_mips.visit(cil_ast) + printer = PrintVisitor() + mips_code = printer.visit(mips_ast) + + out_file = args.file.split(".") + out_file[-1] = "mips" + out_file = ".".join(out_file) + + with open(out_file, 'w') as f: + f.write(mips_code) + with open("./core/visitors/mips/mips_lib.asm") as f2: + f.write("".join(f2.readlines())) + + exit(0) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description='CoolCompiler pipeline') + parser.add_argument('-f', '--file', type=str, default='code.cl', help='file to read') + + args = parser.parse_args() + main(args) + + # test() diff --git a/src/makefile b/src/makefile deleted file mode 100644 index 30df993f..00000000 --- a/src/makefile +++ /dev/null @@ -1,12 +0,0 @@ -.PHONY: clean - -main: - # Compiling the compiler :) - -clean: - rm -rf build/* - rm -rf ../tests/*/*.mips - -test: - pytest ../tests -v --tb=short -m=${TAG} -