写汇编写麻了,从年前断断续续写到现在,read_matrix和网上的对照了一遍又一遍,又放到venus上看内存,结果内存里存的是对的,但unittest就是过不了,最后把read_matrix换成网上别人的了,写project2的时候参考了这个仓库,以下为作业代码:
Part A:
ReLU:
relu:
li t0,1
bge a1,t0,prologue
li a0,17
li a1,78
ecall
prologue:
addi sp,sp,-4
sw s0,0(sp)
#end prologue
addi a1,a1,-1
loop_start:
blt a1,zero,loop_end
li t2,4
mul t0,a1,t2
add t1,a0,t0
lw s0,0(t1)
bge s0,zero,endif
mv s0,zero
endif:
sw s0,0(t1)
addi a1,a1,-1
j loop_start
loop_end:
# Epilogue
lw s0,0(sp)
addi sp,sp,4
ret
ArgMax:
argmax:
bgt a1,zero,prologue
li a0 17
li a1 77
ecall
prologue:
# Prologue
addi sp,sp,-8
sw s0,0(sp)
sw,s1,4(sp)
#end prologue
addi a1,a1,-1
lw s0,0(a0)
li s1,0
loop_start:
beq a1,zero,loop_end
li t0,4
mul t1,a1,t0
add t2,a0,t1
lw t3,0(t2)
blt t3,s0,endif
mv s0,t3
mv s1,a1
endif:
addi a1,a1,-1
j loop_start
loop_end:
mv a0,s1
# Epilogue
lw s0,0(sp)
lw s1,4(sp)
addi sp,sp,8
ret
Dot Product:
dot:
li t0,1
blt a2,t0,len_err
blt a3,t0,stride_err
blt a4,t0,stride_err
prologue:
# Prologue
addi sp,sp,-4
sw s0,0(sp)
#end prologue
li s0,0
addi a2,a2,-1
loop_start:
blt a2,zero,loop_end
li t0,4
mul t1,a3,a2
mul t1,t1,t0
add t1,a0,t1
mul t2,a4,a2
mul t2,t2,t0
add t2,a1,t2
lw t3,0(t1)
lw t4,0(t2)
mul t5,t3,t4
add s0,s0,t5
addi a2,a2,-1
j loop_start
loop_end:
mv a0,s0
# Epilogue
lw s0,0(sp)
addi sp,sp,4
ret
len_err:
li a0 17
li a1 75
ecall
stride_err:
li a0 17
li a1 76
ecall
Matrix Multiplication:
matmul:
# Error checks
ble a1,zero,exceptionA
ble a2,zero,exceptionA
ble a4,zero,exceptionB
ble a5,zero,exceptionB
bne a2,a4,exceptionC
# Prologue
addi sp,sp,-36
sw s0,0(sp)
sw s1,4(sp)
sw s2,8(sp)
sw s3,12(sp)
sw s4,16(sp)
sw s5,20(sp)
sw s6,24(sp)
sw s7,28(sp)
sw ra,32(sp)
#prologue end
mv s0,a1
mv s1,a5
mv s2,a2
mv s3,a0
mv s4,a3
mv s5,a6
li s6,0
outer_loop_start:
bge s6,s0,outer_loop_end
li s7,0
inner_loop_start:
bge s7,s1,inner_loop_end
#load args
li t2,4
mul t0,s6,s2
mul t0,t0,t2
add t0,t0,s3#address of the Arowi
mv a0,t0
li a3,1#stride
mul t1,s7,t2
add t1,t1,s4#address of the Bcolj
mv a1,t1
mv a4,s1#stride
mv a2,s2#len
jal dot#call func
li t2,4
mul t1,s6,s1
add t1,t1,s7
mul t1,t1,t2
add t1,t1,s5#address to store
sw a0,0(t1)
addi s7,s7,1
j inner_loop_start
inner_loop_end:
addi s6,s6,1
j outer_loop_start
outer_loop_end:
# Epilogue
lw s0,0(sp)
lw s1,4(sp)
lw s2,8(sp)
lw s3,12(sp)
lw s4,16(sp)
lw s5,20(sp)
lw s6,24(sp)
lw s7,28(sp)
lw ra,32(sp)
addi sp,sp,36
#epilogue end
ret
exceptionA:
li a0,17
li a1,72
ecall
exceptionB:
li a0,17
li a1,73
ecall
exceptionC:
li a0,17
li a1,74
ecall
Part B:
Read Matrix:
(这个是抄上面那个仓库里头的)
read_matrix:
# Prologue
addi sp, sp, -16
sw s0, 0(sp)
sw s1, 4(sp)
sw s2, 8(sp)
sw ra, 12(sp)
mv s0, a0
mv s1, a1
mv s2, a2
mv a1, a0
li a2, 0
jal fopen
li t0, -1
li t1, 90
beq a0, t0, fail
mv s0, a0
# read rows
mv a1, s0
mv a2, s1
li a3, 4
jal fread
li t0, 4
li t1, 91
bne a0, t0, fail
# read columns
mv a1, s0
mv a2, s2
li a3, 4
jal fread
li t0, 4
li t1, 91
bne a0, t0, fail
lw t0, 0(s1) # get rows
lw t1, 0(s2) # get columns
mul a0, t0, t1
slli a0, a0, 2 # number of bytes read from file
mv s1, a0 # we need this later
jal malloc
# make sure memory allocated
li t1, 88
beqz a0, fail
mv s2, a0 # we also need this later
# do the actual read
mv a1, s0
mv a2, s2
mv a3, s1
jal fread
bne a0, s1, fail
# close after read
mv a1, s0
jal fclose
li t0, -1
li t1, 92
beq a0, t0, fail
# Epilogue
mv a0, s2
lw s0, 0(sp)
lw s1, 4(sp)
lw s2, 8(sp)
lw ra, 12(sp)
addi sp, sp, 16
ret
fail:
li a0, 17
mv a1, t1
ecall
Write Matrix:
write_matrix:
# Prologue
addi sp,sp,-24
sw s0,0(sp)
sw s1,4(sp)
sw s2,8(sp)
sw s3,12(sp)
sw s4,16(sp)
sw ra,20(sp)
#save args
mv s0,a0
mv s1,a1
mv s2,a2
mv s3,a3
#open file
mv a1,s0
li a2,1
jal fopen
li t0,-1
beq t0,a0,fail_93
mv s4,a0#save file handle
#write row num
addi sp,sp,-4
sw s2,0(sp)
mv a1,s4
mv a2,sp
li a3,1
li a4,4
jal fwrite
li t0,1
bne a0,t0,fail_94
#write col num
addi sp,sp,-4
sw s3,0(sp)
mv a1,s4
mv a2,sp
li a3,1
li a4,4
jal fwrite
li t0,1
bne a0,t0,fail_94
#recover sp
addi sp,sp,8
#write matrix
mul t0,s2,s3
mv a1,s4
mv a2,s1
mv a3,t0
li a4,4
jal fwrite
mul t0,s2,s3
bne a0,t0,fail_94
#close file
mv a1,s4
jal fclose
li t0,-1
beq a0,t0,fail_95
# Epilogue
lw s0,0(sp)
lw s1,4(sp)
lw s2,8(sp)
lw s3,12(sp)
lw s4,16(sp)
lw ra,20(sp)
addi sp,sp,24
ret
fail_93:
li a0,17
li a1,93
ecall
fail_94:
li a0,17
li a1,94
ecall
fail_95:
li a0,17
li a1,95
ecall
Classify:
classify:
# =====================================
# COMMAND LINE ARGUMENTS
# =====================================
# Args:
# a0 (int) argc
# a1 (char**) argv
# a2 (int) print_classification, if this is zero,
# you should print the classification. Otherwise,
# this function should not print ANYTHING.
# Returns:
# a0 (int) Classification
# Exceptions:
# - If there are an incorrect number of command line args,
# this function terminates the program with exit code 89.
# - If malloc fails, this function terminats the program with exit code 88.
#
# Usage:
# main.s <M0_PATH> <M1_PATH> <INPUT_PATH> <OUTPUT_PATH>
#check args
li t0,5
bne a0,t0,fail_89
#prologue
addi sp,sp,-52
sw ra,0(sp)
sw s0,4(sp)
sw s1,8(sp)
sw s2,12(sp)
sw s3,16(sp)
sw s4,20(sp)
sw s5,24(sp)
sw s6,28(sp)
sw s7,32(sp)
sw s8,36(sp)
sw s9,40(sp)
sw s10,44(sp)
sw s11,48(sp)
# =====================================
# LOAD MATRICES
# =====================================
#save args
mv s0,a1
mv s1,a2
# Load pretrained m0
lw s2,4(s0)
jal malloc_four_byte_with_error_handled
mv s3,a0
jal malloc_four_byte_with_error_handled
mv s4,a0
mv a0,s2
mv a1,s3
mv a2,s4
jal read_matrix
mv s5,a0
# Load pretrained m1
lw s2,8(s0)
jal malloc_four_byte_with_error_handled
mv s6,a0
jal malloc_four_byte_with_error_handled
mv s7,a0
mv a0,s2
mv a1,s6
mv a2,s7
jal read_matrix
mv s8,a0
# Load input matrix
lw s2,12(s0)
jal malloc_four_byte_with_error_handled
mv s9,a0
jal malloc_four_byte_with_error_handled
mv s10,a0
mv a0,s2
mv a1,s9
mv a2,s10
jal read_matrix
mv s11,a0
# =====================================
# RUN LAYERS
# =====================================
# 1. LINEAR LAYER: m0 * input
# 2. NONLINEAR LAYER: ReLU(m0 * input)
# 3. LINEAR LAYER: m1 * ReLU(m0 * input)
#allocate space for D1
lw t0,0(s3)#m0.rows
lw t1,0(s10)#input.cols
mul t0,t0,t1#num of d's element
slli t0,t0,2#*4
mv a0,t0
jal malloc_with_error_handled
mv s2,a0
#m0*input
mv a0,s5
lw a1,0(s3)
lw a2,0(s4)
mv a3,s11
lw a4,0(s9)
lw a5,0(s10)
mv a6,s2
jal matmul
#call relu
lw t0,0(s3)#m0.rows
lw t1,0(s10)#input.cols
mul t0,t0,t1#num of d's element
mv a1,t0
mv a0,s2
jal relu
#free s5
mv a0,s5
jal free
#allocate space for D2
lw t0,0(s6)
lw t1,0(s10)
mul t0,t0,t1
slli t0,t0,2
mv a0,t0
jal malloc_with_error_handled
mv s5,a0
#m1*D1
mv a0,s8
lw a1,0(s6)
lw a2,0(s7)
mv a3,s2
lw a4,0(s3)
lw a5,0(s10)
mv a6,s5
jal matmul
# =====================================
# WRITE OUTPUT
# =====================================
# Write output matrix
lw a0,16(s0)
mv a1,s5
lw a2,0(s6)
lw a3,0(s10)
jal write_matrix
# =====================================
# CALCULATE CLASSIFICATION/LABEL
# =====================================
# Call argmax
lw t0,0(s6)
lw t1,0(s10)
mul t0,t0,t1
mv a1,t0
mv a0,s5
jal argmax
mv s0,a0
bne s1,zero,endif
# Print classification
mv a1,s0
jal print_int
# Print newline afterwards for clarity
li a1,'\n'
jal print_char
endif:
#free
mv a0,s3
jal free
mv a0,s4
jal free
mv a0,s5
jal free
mv a0,s6
jal free
mv a0,s7
jal free
mv a0,s8
jal free
mv a0,s9
jal free
mv a0,s10
jal free
mv a0,s11
jal free
#epilogue
lw ra,0(sp)
lw s0,4(sp)
lw s1,8(sp)
lw s2,12(sp)
lw s3,16(sp)
lw s4,20(sp)
lw s5,24(sp)
lw s6,28(sp)
lw s7,32(sp)
lw s8,36(sp)
lw s9,40(sp)
lw s10,44(sp)
lw s11,48(sp)
addi sp,sp,52
ret
fail_89:
li a0,17
li a1,89
ecall
malloc_four_byte_with_error_handled:
addi sp,sp,-4
sw ra,0(sp)
li a0,4
jal malloc
beqz a0,fail_88
lw ra,0(sp)
addi sp,sp,4
ret
malloc_with_error_handled:
addi sp,sp,-4
sw ra,0(sp)
jal malloc
beqz a0,fail_88
lw ra,0(sp)
addi sp,sp,4
ret
fail_88:
li a0,17
li a1,88
ecall
Unittest:
from unittest import TestCase
from framework import AssemblyTest, print_coverage
class TestAbs(TestCase):
def test_zero(self):
t = AssemblyTest(self, "abs.s")
# load 0 into register a0
t.input_scalar("a0", 0)
# call the abs function
t.call("abs")
# check that after calling abs, a0 is equal to 0 (abs(0) = 0)
t.check_scalar("a0", 0)
# generate the `assembly/TestAbs_test_zero.s` file and run it through venus
t.execute()
def test_one(self):
# same as test_zero, but with input 1
t = AssemblyTest(self, "abs.s")
t.input_scalar("a0", 1)
t.call("abs")
t.check_scalar("a0", 1)
t.execute()
def test_minus_one(self):
t=AssemblyTest(self,"abs.s")
t.input_scalar("a0",-1)
t.call("abs")
t.check_scalar("a0",1)
t.execute()
@classmethod
def tearDownClass(cls):
print_coverage("abs.s", verbose=False)
class TestRelu(TestCase):
def test_simple(self):
t = AssemblyTest(self, "relu.s")
# create an array in the data section
array0 = t.array([1, -2, 3, -4, 5, -6, 7, -8, 9])
# load address of `array0` into register a0
t.input_array("a0", array0)
# set a1 to the length of our array
t.input_scalar("a1", len(array0))
# call the relu function
t.call("relu")
# check that the array0 was changed appropriately
t.check_array(array0, [1, 0, 3, 0, 5, 0, 7, 0, 9])
# generate the `assembly/TestRelu_test_simple.s` file and run it through venus
t.execute()
def test_exception(self):
t = AssemblyTest(self, "relu.s")
array0=t.array([])
t.input_array("a0",array0)
t.input_scalar("a1",len(array0))
t.call("relu")
t.execute(code=78)
@classmethod
def tearDownClass(cls):
print_coverage("relu.s", verbose=False)
class TestArgmax(TestCase):
def test_simple(self):
t = AssemblyTest(self, "argmax.s")
# create an array in the data section
array0 = t.array([1,3,5,7,9,8,6,4,2])
# TODO
t.input_array("a0",array0)
# load address of the array into register a0
# TODO
t.input_scalar("a1",len(array0))
# set a1 to the length of the array
# TODO
t.call("argmax")
# call the `argmax` function
# TODO
t.check_scalar("a0",4)
# check that the register a0 contains the correct output
# TODO
# generate the `assembly/TestArgmax_test_simple.s` file and run it through venus
t.execute()
def test_exception(self):
t = AssemblyTest(self, "argmax.s")
# create an array in the data section
array0 = t.array([])
# TODO
t.input_array("a0",array0)
# load address of the array into register a0
# TODO
t.input_scalar("a1",len(array0))
# set a1 to the length of the array
# TODO
t.call("argmax")
# call the `argmax` function
# TODO
t.execute(code=77)
@classmethod
def tearDownClass(cls):
print_coverage("argmax.s", verbose=False)
class TestDot(TestCase):
def test_simple(self):
t = AssemblyTest(self, "dot.s")
# create arrays in the data section
array0=t.array([1,2,3,4,5,6,7,8,9])
array1=t.array([1,2,3,4,5,6,7,8,9])
# TODO
t.input_array("a0",array0)
t.input_array("a1",array1)
# load array addresses into argument registers
# TODO
t.input_scalar("a2",9)
t.input_scalar("a3",1)
t.input_scalar("a4",1)
# load array attributes into argument registers
# TODO
# call the `dot` function
t.call("dot")
# check the return value
t.check_scalar("a0",285)
# TODO
t.execute()
def test_exception1(self):
t = AssemblyTest(self, "dot.s")
array0=t.array([1])
array1=t.array([1])
t.input_array("a0",array0)
t.input_array("a1",array1)
t.input_scalar("a2",0)
t.input_scalar("a3",1)
t.input_scalar("a4",1)
t.call("dot")
t.check_scalar("a0",285)
t.execute(code=75)
def test_exception2(self):
t = AssemblyTest(self, "dot.s")
array0=t.array([1])
array1=t.array([1])
t.input_array("a0",array0)
t.input_array("a1",array1)
t.input_scalar("a2",1)
t.input_scalar("a3",0)
t.input_scalar("a4",1)
t.call("dot")
t.check_scalar("a0",285)
t.execute(code=76)
@classmethod
def tearDownClass(cls):
print_coverage("dot.s", verbose=False)
class TestMatmul(TestCase):
def do_matmul(self, m0, m0_rows, m0_cols, m1, m1_rows, m1_cols, result, code=0):
t = AssemblyTest(self, "matmul.s")
# we need to include (aka import) the dot.s file since it is used by matmul.s
t.include("dot.s")
# create arrays for the arguments and to store the result
array0 = t.array(m0)
array1 = t.array(m1)
array_out = t.array([0] * len(result))
# load address of input matrices and set their dimensions
t.input_array("a0",array0)
t.input_array("a3",array1)
t.input_scalar("a1",m0_rows)
t.input_scalar("a2",m0_cols)
t.input_scalar("a4",m1_rows)
t.input_scalar("a5",m1_cols)
# load address of output array
t.input_array("a6",array_out)
# call the matmul function
t.call("matmul")
# check the content of the output array
t.check_array(array_out,result)
# generate the assembly file and run it through venus, we expect the simulation to exit with code `code`
t.execute(code=code)
def test_simple(self):
self.do_matmul(
[1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 3,
[1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 3,
[30, 36, 42, 66, 81, 96, 102, 126, 150]
)
def test_exception1(self):
self.do_matmul(
[1],0,0,
[1],1,1,
[1],72
)
def test_exception2(self):
self.do_matmul(
[1],1,1,
[1],0,0,
[1],73
)
def test_exception3(self):
self.do_matmul(
[1],1,1,
[2,2,2,2],2,2,
[1],74
)
@classmethod
def tearDownClass(cls):
print_coverage("matmul.s", verbose=False)
class TestReadMatrix(TestCase):
def do_read_matrix(self, fail='', code=0):
t = AssemblyTest(self, "read_matrix.s")
# load address to the name of the input file into register a0
t.input_read_filename("a0", "inputs/test_read_matrix/test_input.bin")
# allocate space to hold the rows and cols output parameters
rows = t.array([-1])
cols = t.array([-1])
# load the addresses to the output parameters into the argument registers
t.input_array("a1",rows)
t.input_array("a2",cols)
# call the read_matrix function
t.call("read_matrix")
# check the output from the function
t.check_array_pointer("a0",[1,2,3,4,5,6,7,8,9])
t.check_array(rows,[3])
t.check_array(cols,[3])
# generate assembly and run it through venus
t.execute(fail=fail, code=code)
def test_simple(self):
self.do_read_matrix()
def test_malloc_exception(self):
self.do_read_matrix(fail='malloc', code=88)
def test_fopen_exception(self):
self.do_read_matrix(fail='fopen', code=90)
def test_fread_exception(self):
self.do_read_matrix(fail='fread', code=91)
def test_fclose_exception(self):
self.do_read_matrix(fail='fclose', code=92)
@classmethod
def tearDownClass(cls):
print_coverage("read_matrix.s", verbose=False)
class TestWriteMatrix(TestCase):
def do_write_matrix(self, fail='', code=0):
t = AssemblyTest(self, "write_matrix.s")
outfile = "outputs/test_write_matrix/student.bin"
# load output file name into a0 register
t.input_write_filename("a0", outfile)
# load input array and other arguments
matrix=t.array([1,2,3,4,5,6,7,8,9])
t.input_array("a1",matrix)
t.input_scalar("a2",3)
t.input_scalar("a3",3)
# call `write_matrix` function
t.call("write_matrix")
# generate assembly and run it through venus
t.execute(fail=fail, code=code)
# compare the output file against the reference
if not fail:
t.check_file_output(outfile, "outputs/test_write_matrix/reference.bin")
def test_simple(self):
self.do_write_matrix()
def test_fopen(self):
self.do_write_matrix(fail="fopen",code=93)
def test_fwrite(self):
self.do_write_matrix(fail="fwrite",code=94)
def test_fclose(self):
self.do_write_matrix(fail="fclose",code=95)
@classmethod
def tearDownClass(cls):
print_coverage("write_matrix.s", verbose=False)
class TestClassify(TestCase):
def make_test(self):
t = AssemblyTest(self, "classify.s")
t.include("argmax.s")
t.include("dot.s")
t.include("matmul.s")
t.include("read_matrix.s")
t.include("relu.s")
t.include("write_matrix.s")
return t
def test_simple0_input0(self):
t = self.make_test()
out_file = "outputs/test_basic_main/student0.bin"
ref_file = "outputs/test_basic_main/reference0.bin"
args = ["inputs/simple0/bin/m0.bin", "inputs/simple0/bin/m1.bin",
"inputs/simple0/bin/inputs/input0.bin", out_file]
# call classify function
t.call("classify")
# generate assembly and pass program arguments directly to venus
t.execute(args=args)
# compare the output file and
t.check_file_output(out_file,ref_file)
# compare the classification output with `check_stdout`
t.check_stdout("2")
@classmethod
def tearDownClass(cls):
print_coverage("classify.s", verbose=False)
class TestMain(TestCase):
def run_main(self, inputs, output_id, label):
args = [f"{inputs}/m0.bin", f"{inputs}/m1.bin", f"{inputs}/inputs/input0.bin",
f"outputs/test_basic_main/student{output_id}.bin"]
reference = f"outputs/test_basic_main/reference{output_id}.bin"
t = AssemblyTest(self, "main.s", no_utils=True)
t.call("main")
t.execute(args=args, verbose=False)
t.check_stdout(label)
t.check_file_output(args[-1], reference)
def test0(self):
self.run_main("inputs/simple0/bin", "0", "2")
def test1(self):
self.run_main("inputs/simple1/bin", "1", "1")