ive always wanted to write a compiler but have found myself to be just a little too retarded
but today that changes!!!!!
its buggy, incomplete and implements a terrible language but it runs and its mine, maybe ill try to make a better one at some point.
and running this program in it
i=0;
n1=0;
n2=1;
x=0;
WHILE (i < 12)
{
x = n1;
n1 = n2;
n2 = n1 + x;
i = i + 1;
PRINT x;
};
"successfully" prints out the first 12 fibonacci numbers
from enum import Enum, auto
class Token(Enum):
LPAR = 0
RPAR = auto()
PLUS = auto()
MINUS = auto()
STAR = auto()
SLASH = auto()
PERCENT = auto()
NUM = auto()
NEQ = auto()
EQ = auto()
GT = auto()
GE = auto()
LT = auto()
LE = auto()
BANG = auto()
BOR = auto()
BAND = auto()
XOR = auto()
SHL = auto()
SHR = auto()
SEMICOLON = auto()
IF = auto()
ELSE = auto()
IDENT = auto()
LBRACE = auto()
RBRACE = auto()
ASSIGN = auto()
PRINT = auto()
WHILE = auto()
class Opcode(Enum):
CONSTANT = 99
JNZ = auto()
JMP = auto()
POP = auto()
SBRK = auto()
STOR = auto()
LOAD = auto()
class Tokenizer():
def __init__(self, source):
self.source = source
self.cur = 0
def tokenize(self):
r = []
while not self.end():
t = self.token()
if t: r.append(t)
return r
def token(self):
while self.char().isspace():
if self.advance(): return None
if self.char().isnumeric():
x = []
while self.char().isnumeric():
x.append(self.char())
if self.advance(): break
return (Token.NUM, int("".join(x)))
if self.char().isalpha():
x = [self.char()]
if self.advance(): return (Token.IDENT, "".join(x))
while self.char().isalnum():
x.append(self.char())
if self.advance(): break
match "".join(x):
case "IF": return (Token.IF, None)
case "ELSE": return (Token.ELSE, None)
case "PRINT": return (Token.PRINT, None)
case "WHILE": return (Token.WHILE, None)
case (x): return (Token.IDENT, x)
match self.char():
case "+": return (Token.PLUS, self.advance())
case "-": return (Token.MINUS, self.advance())
case "*": return (Token.STAR, self.advance())
case "/": return (Token.SLASH, self.advance())
case "(": return (Token.LPAR, self.advance())
case ")": return (Token.RPAR, self.advance())
case "{": return (Token.LBRACE, self.advance())
case "}": return (Token.RBRACE, self.advance())
case "|": return (Token.BOR, self.advance())
case "&": return (Token.BAND, self.advance())
case "^": return (Token.XOR, self.advance())
case "%": return (Token.PERCENT, self.advance())
case ";": return (Token.SEMICOLON, self.advance())
case "<":
if self.advance() or self.char() != "<": return (Token.LT, None)
return (Token.SHL, self.advance())
case "=":
if self.advance() or self.char() != "=": return (Token.ASSIGN, None)
return (Token.EQ, self.advance())
raise Exception("err", self.char())
def char(self):
return self.source[self.cur]
def end(self):
return self.cur >= len(self.source)
def advance(self):
self.cur += 1
return self.end()
class Compiler():
def __init__(self):
self.idents = {}
def compile(self, tokens):
self.cur = 0
self.tokens = tokens
return self.statement()
def statement(self):
if self.match(Token.LBRACE):
stmt = []
while not self.match(Token.RBRACE):
stmt = [*stmt, *self.statement()]
if not self.match(Token.RBRACE): stmt.append(Opcode.POP)
else: break
return stmt
expr = self.expression()
self.consume(Token.SEMICOLON)
return expr
def expression(self):
return self.ewhile()
def ewhile(self):
if self.match(Token.WHILE):
self.consume(Token.LPAR)
expr = self.expression()
self.consume(Token.RPAR)
stmt = self.statement()
return [*expr, Opcode.JNZ, len(stmt) + 4, *stmt, Opcode.JMP, -len(stmt) - 2 - len(expr)]
return self.print()
def print(self):
if self.match(Token.PRINT):
return [*self.expression(), Token.PRINT]
return self.assign()
def assign(self):
if self.match(Token.IDENT):
if self.peek()[0] != Token.ASSIGN:
self.cur -= 1
return self.bor()
var = self.previous()[1]
expr = []
if var not in self.idents:
self.idents[var] = len(self.idents)
expr = [Opcode.SBRK]
while self.match(Token.ASSIGN):
val = self.bor()
expr = [*expr, *val, Opcode.STOR, self.idents[var]]
return expr
return self.bor()
def bor(self):
return self.binary(self.xor, Token.BOR)
def xor(self):
return self.binary(self.band, Token.XOR)
def band(self):
return self.binary(self.equality, Token.BAND)
def equality(self):
return self.binary(self.compshartyson, Token.NEQ, Token.EQ)
def compshartyson(self):
return self.binary(self.bitshift, Token.GT, Token.GE, Token.LT, Token.LE)
def bitshift(self):
return self.binary(self.term, Token.SHL, Token.SHR)
def term(self):
return self.binary(self.factor, Token.MINUS, Token.PLUS)
def factor(self):
return self.binary(self.unary, Token.SLASH, Token.STAR, Token.PERCENT)
def unary(self):
while self.match(Token.BANG, Token.MINUS):
op = self.previous()
return [*self.unary(), op[0]]
return self.eif()
def eif(self):
if self.match(Token.IF):
self.consume(Token.LPAR)
expr = self.expression()
self.consume(Token.RPAR)
stmt = self.statement()
if self.match(Token.ELSE):
elsestmt = self.statement()
return [*expr, Opcode.JNZ, len(stmt) + 4, *stmt, Opcode.JMP, len(elsestmt) + 2, *elsestmt]
return [*expr, Opcode.JNZ, len(stmt) + 2, *stmt]
return self.primary()
def primary(self):
if self.match(Token.NUM): return [Opcode.CONSTANT, self.previous()[1]]
if self.match(Token.IDENT): return [Opcode.LOAD, self.idents[self.previous()[1]]]
if self.match(Token.LPAR):
expr = self.expression()
self.consume(Token.RPAR)
return expr
raise Exception("err", self.peek())
def binary(self, last, *match):
expr = [*last()]
while self.match(*match):
op = self.previous()
right = last()
expr = [*expr, *right, op[0]]
return expr
def match(self, *types):
for type in types:
if self.check(type):
self.advance()
return True
return False
def check(self, type):
if self.end(): return False
return self.peek()[0] == type
def advance(self):
if not self.end(): self.cur += 1
return self.previous()
def end(self):
return self.cur >= len(self.tokens)
def peek(self):
return self.tokens[self.cur]
def previous(self):
return self.tokens[self.cur - 1]
def consume(self, type):
if self.check(type): return self.advance()
raise Exception("err", type, self.peek())
class VM():
def __init__(self):
self.heap = []
def run(self, code):
self.code = code
self.pc = 0
self.stack = []
while self.pc < len(code):
self.execute()
return self.stack.pop()
def execute(self):
match self.code[self.pc]:
case Opcode.CONSTANT:
self.stack.append(self.code[self.pc + 1])
self.pc += 1
case Opcode.JNZ:
if not self.stack.pop(): self.pc += self.code[self.pc + 1] - 1
else: self.pc += 1
case Opcode.JMP:
self.pc += self.code[self.pc + 1] - 1
case Opcode.POP:
self.stack.pop()
case Opcode.SBRK:
self.heap.append(0)
case Opcode.STOR:
self.heap[self.code[self.pc + 1]] = self.stack.pop()
self.stack.append(self.heap[self.code[self.pc + 1]])
self.pc += 1
case Opcode.LOAD:
self.stack.append(self.heap[self.code[self.pc + 1]])
self.pc += 1
case Token.PRINT:
print(self.stack[-1])
case Token.PLUS:
self.stack.append(self.stack.pop() + self.stack.pop())
case Token.MINUS:
r = self.stack.pop()
self.stack.append(self.stack.pop() - r)
case Token.STAR:
self.stack.append(self.stack.pop() * self.stack.pop())
case Token.SLASH:
r = self.stack.pop()
self.stack.append(self.stack.pop() // r)
case Token.PERCENT:
r = self.stack.pop()
self.stack.append(self.stack.pop() % r)
case Token.LT:
r = self.stack.pop()
self.stack.append(self.stack.pop() < r)
case Token.BOR:
self.stack.append(self.stack.pop() | self.stack.pop())
case Token.SHL:
r = self.stack.pop()
self.stack.append(self.stack.pop() << r)
case (x):
raise Exception("err", x)
self.pc += 1
if __name__ == "__main__":
c = Compiler()
v = VM()
while True:
s = input("> ")
a = Tokenizer(s).tokenize()
print(a)
b = c.compile(a)
print(b)
print(v.run(b))
also CCC is back, would be cool to go since i live quite near (kiel) but currently unable to due to personal circumstances.
__attribute__((noreturn)) int main(int argc, char* argv[])
{
std::cout << "Goodbye, World!" << std::endl;
delete world;
}