From b19efd669ab80973daefcf66406a8da4f754bd79 Mon Sep 17 00:00:00 2001
From: xamidev <121681048+xamidev@users.noreply.github.com>
Date: Sun, 20 Oct 2024 17:15:06 +0200
Subject: [PATCH] Add: PUT instruction, Assembler separation
---
.gitignore | 2 +
README.md | 11 ++-
assembler/das.c | 192 ++++++++++++++++++++++++++++++++++++++++++++
cpu.c | 209 +++++++-----------------------------------------
cpu.h | 60 ++++++++++++++
makefile | 12 ++-
program.asm | 8 +-
7 files changed, 306 insertions(+), 188 deletions(-)
create mode 100644 assembler/das.c
create mode 100644 cpu.h
diff --git a/.gitignore b/.gitignore
index 5293f37..f7c8cb5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
*.out
dumb8
+das
+*.bin
diff --git a/README.md b/README.md
index 3a2d9e8..e2a5db2 100644
--- a/README.md
+++ b/README.md
@@ -6,11 +6,14 @@ This project is the implementation of a CPU in a high-level language, C. It aims
## How to test
-You can run a test program like that. I'll try making a developer's manual so one can make its own programs using the custom assembly here.
+You can run a test program like that. I'll try making a developer's manual so one can make its own programs using the custom assembly here.
+
+`das` is the Dumb8 Assembler, which converts `.asm` files to `.bin` CPU executables. `dumb8` is the CPU emulator.
```
make
-./dumb8 program.asm
+./das program.asm
+./dumb8 program.bin
```
## Technical specifications
@@ -58,3 +61,7 @@ NOP
HLT
```
+
+## Known bugs
+
+- R0 is not usable (confusion with NOP opcode 0 in memory)
\ No newline at end of file
diff --git a/assembler/das.c b/assembler/das.c
new file mode 100644
index 0000000..b414c65
--- /dev/null
+++ b/assembler/das.c
@@ -0,0 +1,192 @@
+/*
+ * Dumb8 code assembler
+ * Made by github.com/xamidev
+ *
+ * This is free and unencumbered software released into the public domain.
+ * For more information, please refer to
+*/
+
+#include
+#include
+#include
+#include
+#include
+#include "../cpu.h"
+
+/*
+ * Reading the assembly file and writing its instructions in
+ * opcode format in a binary file
+*/
+
+// The code is not optimized at all. But remember: it is a DUMB assembler!
+
+void assemble(char* filename)
+{
+ FILE* fp = fopen(filename, "r");
+
+ if (!fp)
+ {
+ printf("Cannot read file '%s'\n", filename);
+ exit(1);
+ }
+
+ char* binary_file = strtok(filename, ".");
+ binary_file = strcat(binary_file, ".bin");
+
+ FILE* bin_fp = fopen(binary_file, "wb");
+
+ if (!bin_fp)
+ {
+ printf("Cannot open file '%s' for writing.\n", binary_file);
+ fclose(fp);
+ exit(1);
+ }
+
+ char line[256] = {0};
+ uint8_t buffer[BUF_MAX] = {0};
+ size_t i = 0;
+
+ while (fgets(line, sizeof(line), fp))
+ {
+ char instruction[10] = {0};
+ char reg1[10] = {0};
+ char reg2[10] = {0};
+ int addr = 0;
+ int value = 0;
+
+ if (strncmp(line, ";", 1) == 0)
+ {
+ continue;
+ }
+ else if (strncmp(line, "\n", 1) == 0)
+ {
+ continue;
+ }
+ else if (strncmp(line, "HLT", 3) == 0)
+ {
+ buffer[i++] = HLT;
+ }
+ else if (strncmp(line, "NOP", 3) == 0)
+ {
+ buffer[i++] = NOP;
+ }
+ else if (strncmp(line, "MOV", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %s", instruction, reg1, reg2);
+ int reg1_n = reg1[1] - '0';
+ int reg2_n = reg2[1] - '0';
+ buffer[i++] = MOV;
+ buffer[i++] = reg1_n;
+ buffer[i++] = reg2_n;
+ }
+ else if (strncmp(line, "ADD", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %s", instruction, reg1, reg2);
+ int reg1_n = reg1[1] - '0';
+ int reg2_n = reg2[1] - '0';
+ buffer[i++] = ADD;
+ buffer[i++] = reg1_n;
+ buffer[i++] = reg2_n;
+ }
+ else if (strncmp(line, "CMP", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %s", instruction, reg1, reg2);
+ int reg1_n = reg1[1] - '0';
+ int reg2_n = reg2[1] - '0';
+ buffer[i++] = CMP;
+ buffer[i++] = reg1_n;
+ buffer[i++] = reg2_n;
+ }
+ else if (strncmp(line, "SUB", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %s", instruction, reg1, reg2);
+ int reg1_n = reg1[1] - '0';
+ int reg2_n = reg2[1] - '0';
+ buffer[i++] = SUB;
+ buffer[i++] = reg1_n;
+ buffer[i++] = reg2_n;
+ }
+ else if (strncmp(line, "OR", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %s", instruction, reg1, reg2);
+ int reg1_n = reg1[1] - '0';
+ int reg2_n = reg2[1] - '0';
+ buffer[i++] = OR;
+ buffer[i++] = reg1_n;
+ buffer[i++] = reg2_n;
+ }
+ else if (strncmp(line, "AND", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %s", instruction, reg1, reg2);
+ int reg1_n = reg1[1] - '0';
+ int reg2_n = reg2[1] - '0';
+ buffer[i++] = AND;
+ buffer[i++] = reg1_n;
+ buffer[i++] = reg2_n;
+ }
+ else if (strncmp(line, "XOR", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %s", instruction, reg1, reg2);
+ int reg1_n = reg1[1] - '0';
+ int reg2_n = reg2[1] - '0';
+ buffer[i++] = XOR;
+ buffer[i++] = reg1_n;
+ buffer[i++] = reg2_n;
+ }
+ else if (strncmp(line, "JEQ", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %d", instruction, reg1, &addr);
+ int reg1_n = reg1[1] - '0';
+ buffer[i++] = JEQ;
+ buffer[i++] = reg1_n;
+ buffer[i++] = addr;
+ }
+ else if (strncmp(line, "JMP", 3) == 0)
+ {
+ sscanf(line, "%s %d", instruction, &addr);
+ buffer[i++] = JMP;
+ buffer[i++] = addr;
+ }
+ else if (strncmp(line, "OUT", 3) == 0)
+ {
+ sscanf(line, "%s %s", instruction, reg1);
+ int reg1_n = reg1[1] - '0';
+ buffer[i++] = OUT;
+ buffer[i++] = reg1_n;
+ }
+ else if (strncmp(line, "IN", 2) == 0)
+ {
+ sscanf(line, "%s %s", instruction, reg1);
+ int reg1_n = reg1[1] - '0';
+ buffer[i++] = IN;
+ buffer[i++] = reg1_n;
+ }
+ else if (strncmp(line, "PUT", 3) == 0)
+ {
+ sscanf(line, "%s %[^,], %d", instruction, reg1, &value);
+ int reg1_n = reg1[1] - '0';
+ buffer[i++] = PUT;
+ buffer[i++] = reg1_n;
+ buffer[i++] = value;
+ }
+ else {
+ printf("Unknown instruction '%s'\n", line);
+ }
+ }
+
+ fwrite(buffer, sizeof(uint8_t), i, bin_fp);
+
+ fclose(fp);
+ fclose(bin_fp);
+}
+
+int main(int argc, char* argv[])
+{
+ if (argc < 2)
+ {
+ printf("Usage: %s \n", argv[0]);
+ exit(1);
+ }
+ assemble(argv[1]);
+ return 0;
+}
\ No newline at end of file
diff --git a/cpu.c b/cpu.c
index 6d8d102..46fd8f6 100644
--- a/cpu.c
+++ b/cpu.c
@@ -11,60 +11,7 @@
#include
#include
#include
-
-#define MEM_SIZE 256
-#define NUM_REGISTERS 4
-
-/*
- * Instruction set
- * Here, we're making a RISC (reduced instruction set computer)
- * so we're staying minimalistic.
-*/
-
-typedef enum
-{
- // 0x00 -> No operation
- NOP = 0,
-
- // 0xA? -> Memory operations
- MOV = 0xA0,
-
- // 0xB? -> Arithmetic operations
- ADD = 0xB0,
- SUB = 0xB1,
-
- // 0xC? -> Bitwise operations
- OR = 0xC0,
- AND = 0xC1,
- XOR = 0xC2,
-
- // 0xD? -> Input/output operations
- OUT = 0xD0,
- IN = 0xD1,
-
- // 0xE? -> Jump and comparisons
- JMP = 0xE0,
- JEQ = 0xE1,
- CMP = 0xE2,
-
- // 0xF? -> Misc operations
- HLT = 0xFF
-} instruction_set_t;
-
-/*
- * CPU structure definition
- * Contains 4 8-bit registers, memory, a program counter, a halt switch, and flags.
-*/
-
-typedef struct
-{
- uint8_t reg[NUM_REGISTERS];
- uint8_t memory[MEM_SIZE];
- uint16_t pc;
- bool halted;
- bool equal_flag;
- int flag_clear_delay;
-} CPU_t;
+#include "cpu.h"
CPU_t cpu;
@@ -91,7 +38,7 @@ void cpu_init()
void cpu_exec(uint8_t opcode)
{
- uint8_t reg1, reg2, addr;
+ uint8_t reg1, reg2, addr, value;
if (cpu.flag_clear_delay > 0)
{
@@ -112,6 +59,11 @@ void cpu_exec(uint8_t opcode)
reg2 = cpu.memory[cpu.pc++];
cpu.reg[reg1] = cpu.reg[reg2];
break;
+ case PUT:
+ reg1 = cpu.memory[cpu.pc++];
+ value = cpu.memory[cpu.pc++];
+ cpu.reg[reg1] = value;
+ break;
case ADD:
reg1 = cpu.memory[cpu.pc++];
reg2 = cpu.memory[cpu.pc++];
@@ -184,130 +136,32 @@ void cpu_load(const uint8_t* program, size_t size)
}
}
-/*
- * Reading the assembly file and writing its instructions in
- * opcode format in memory
-*/
-
-void assemble(const char* filename)
+void load_program_from_bin(char* binary_file)
{
- FILE* fp = fopen(filename, "r");
+ FILE* binary_fp = fopen(binary_file, "rb");
- if (!fp)
+ if (!binary_fp)
{
- printf("Cannot read file '%s'\n", filename);
+ printf("Cannot open file '%s' for reading.\n", binary_file);
exit(1);
}
- char line[256] = {0};
- size_t mem_index = 0;
+ fseek(binary_fp, 0, SEEK_END);
+ size_t size = ftell(binary_fp);
+ rewind(binary_fp);
- while (fgets(line, sizeof(line), fp))
- {
- char instruction[10] = {0};
- char reg1[10] = {0};
- char reg2[10] = {0};
- int addr;
- if (strncmp(line, ";", 1) == 0)
- {
- // comment, ignore
- continue;
- }
- else if (sscanf(line, "%s %[^,], %s", instruction, reg1, reg2) == 3)
- {
- //printf("SS1");
- int reg1_n = reg1[1] - '0';
- int reg2_n = reg2[1] - '0';
+ uint8_t* program_buffer = (uint8_t*)malloc(size);
+ if (!program_buffer)
+ {
+ printf("Memory allocation failed\n");
+ fclose(binary_fp);
+ exit(1);
+ }
- if (strncmp(instruction, "MOV", 3) == 0)
- {
- cpu.memory[mem_index++] = MOV;
- cpu.memory[mem_index++] = reg1_n;
- cpu.memory[mem_index++] = reg2_n;
- } else if (strncmp(instruction, "ADD", 3) == 0)
- {
- cpu.memory[mem_index++] = ADD;
- cpu.memory[mem_index++] = reg1_n;
- cpu.memory[mem_index++] = reg2_n;
- } else if (strncmp(instruction, "CMP", 3) == 0)
- {
- cpu.memory[mem_index++] = CMP;
- cpu.memory[mem_index++] = reg1_n;
- cpu.memory[mem_index++] = reg2_n;
- } else if (strncmp(instruction, "SUB", 3) == 0)
- {
- cpu.memory[mem_index++] = SUB;
- cpu.memory[mem_index++] = reg1_n;
- cpu.memory[mem_index++] = reg2_n;
- }
- else if (strncmp(instruction, "OR", 2) == 0)
- {
- cpu.memory[mem_index++] = OR;
- cpu.memory[mem_index++] = reg1_n;
- cpu.memory[mem_index++] = reg2_n;
- }
- else if (strncmp(instruction, "AND", 3) == 0)
- {
- cpu.memory[mem_index++] = AND;
- cpu.memory[mem_index++] = reg1_n;
- cpu.memory[mem_index++] = reg2_n;
- }
- else if (strncmp(instruction, "XOR", 3) == 0)
- {
- cpu.memory[mem_index++] = XOR;
- cpu.memory[mem_index++] = reg1_n;
- cpu.memory[mem_index++] = reg2_n;
- }
- } else if (sscanf(line, "%s %[^,], %d", instruction, reg1, &addr) == 2)
- {
- //printf("SS2");
- int reg1_n = reg1[1] - '0';
-
- if (strncmp(instruction, "JEQ", 3) == 0)
- {
- cpu.memory[mem_index++] = JEQ;
- cpu.memory[mem_index++] = reg1_n;
- cpu.memory[mem_index++] = addr;
- }
- else if (sscanf(line, "%s %d", instruction, &addr) == 2)
- {
- //printf("SS3");
- if (strncmp(instruction, "JMP", 3) == 0)
- {
- cpu.memory[mem_index++] = JMP;
- cpu.memory[mem_index++] = addr;
- }
- }
-
- else if (sscanf(line, "%s %s", instruction, reg1) == 2)
- {
- //printf("SS4");
- int reg1_n = reg1[1] - '0';
-
- if (strncmp(instruction, "OUT", 3) == 0)
- {
- cpu.memory[mem_index++] = OUT;
- cpu.memory[mem_index++] = reg1_n;
- }
-
- if (strncmp(instruction, "IN", 2) == 0)
- {
- cpu.memory[mem_index++] = IN;
- cpu.memory[mem_index++] = reg1_n;
- }
- }
- }
-
- else if (strncmp(line, "HLT", 3) == 0)
- {
- cpu.memory[mem_index++] = HLT;
- }
- else if (strncmp(line, "NOP", 3) == 0)
- {
- cpu.memory[mem_index++] = NOP;
- }
- }
- fclose(fp);
+ fread(program_buffer, sizeof(uint8_t), size, binary_fp);
+ cpu_load(program_buffer, size);
+ free(program_buffer);
+ fclose(binary_fp);
}
/*
@@ -335,7 +189,7 @@ void cpu_dump()
for (size_t i=0; i\n", argv[0]);
+ printf("Usage: %s \n", argv[0]);
return -1;
}
- assemble(argv[1]);
-
+ load_program_from_bin(argv[1]);
+
// Dumping our program
mem_dump();
-
- reg_write(1, 0x68);
- reg_write(2, 0x69);
- reg_write(3, 0x21);
cpu_run();
// Post-mortem analysis
diff --git a/cpu.h b/cpu.h
new file mode 100644
index 0000000..fa2f559
--- /dev/null
+++ b/cpu.h
@@ -0,0 +1,60 @@
+#ifndef CPU_H
+#define CPU_H
+
+#define MEM_SIZE 256
+#define NUM_REGISTERS 4
+#define BUF_MAX 256
+
+/*
+ * Instruction set
+ * Here, we're making a RISC (reduced instruction set computer)
+ * so we're staying minimalistic.
+*/
+
+typedef enum
+{
+ // 0x00 -> No operation
+ NOP = 0,
+
+ // 0xA? -> Memory operations
+ MOV = 0xA0,
+ PUT = 0xA1,
+
+ // 0xB? -> Arithmetic operations
+ ADD = 0xB0,
+ SUB = 0xB1,
+
+ // 0xC? -> Bitwise operations
+ OR = 0xC0,
+ AND = 0xC1,
+ XOR = 0xC2,
+
+ // 0xD? -> Input/output operations
+ OUT = 0xD0,
+ IN = 0xD1,
+
+ // 0xE? -> Jump and comparisons
+ JMP = 0xE0,
+ JEQ = 0xE1,
+ CMP = 0xE2,
+
+ // 0xF? -> Misc operations
+ HLT = 0xFF
+} instruction_set_t;
+
+/*
+ * CPU structure definition
+ * Contains 4 8-bit registers, memory, a program counter, a halt switch, and flags.
+*/
+
+typedef struct
+{
+ uint8_t reg[NUM_REGISTERS];
+ uint8_t memory[MEM_SIZE];
+ uint16_t pc;
+ bool halted;
+ bool equal_flag;
+ int flag_clear_delay;
+} CPU_t;
+
+#endif
\ No newline at end of file
diff --git a/makefile b/makefile
index e20ce02..a48d73f 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,11 @@
-all:
- gcc -g *.c -o dumb8
+CC=gcc
+CFLAGS=-g -Wall -Wextra
+
+all: asm
+ $(CC) *.c $(CFLAGS) -o dumb8
+
+asm:
+ $(CC) assembler/*.c $(CFLAGS) -o das
clean:
- rm a.out
+ rm dumb8 das
diff --git a/program.asm b/program.asm
index 1ee2c47..a1baa94 100644
--- a/program.asm
+++ b/program.asm
@@ -1,7 +1,7 @@
;this is a comment
+PUT R2, 5
+MOV R1, R2
+
+ADD R1, R2
-OUT R1
-OUT R2
-OUT R3
-IN R0
HLT