fuzzy compiler v0 rev0, written in assembly & running on george <3

This commit is contained in:
august kline 2024-10-07 01:49:17 -04:00
parent cbc7bff7f7
commit a062f30659
14 changed files with 268 additions and 1047 deletions

271
Cargo.lock generated
View File

@ -1,271 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
[[package]]
name = "cc"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
dependencies = [
"hashbrown",
"stacker",
]
[[package]]
name = "either"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "fuzzy"
version = "0.1.0"
dependencies = [
"chumsky",
"indextree",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "indextree"
version = "4.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f"
dependencies = [
"indextree-macros",
]
[[package]]
name = "indextree-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf"
dependencies = [
"either",
"itertools",
"proc-macro2",
"quote",
"strum",
"syn",
"thiserror",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustversion"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strum"
version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn",
]
[[package]]
name = "syn"
version = "2.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@ -1,15 +0,0 @@
[package]
name = "fuzzy"
version = "0.1.0"
edition = "2021"
[dependencies]
chumsky = { version = "0.9.3"}
indextree = "4.7.2"
[lib]
crate-type = ["lib"]
[[bin]]
name = "main"

View File

@ -16,16 +16,26 @@ fuzzy is part of george, and shouldn't run anywhere else.
#### low-level
(most of) fuzzy could be made with assembler macros, but that's no fun
(most of) fuzzy could be written as assembler macros, but that's no fun
#### reliable
if fuzzy says it can run, george can run it
## feature progress
## how to work on fuzzy
- [x] parser
- roughly complete, but want to finish the whole pipeline before adding things to the parser
- [x] typechecker
- generates a "type stack" from parsed input and checks that word definition types match their body
- [ ] code generation
edit `program.asm` and run `./run.sh`. the program gets included in the fuzzy compiler `fuzzy.asm` and is assembled with `vasm6502_oldstyle`, then george runs the program, reading out her system image when she reaches `stp` or `brk`
then the program she compiled gets formatted as a standard 32k rom, and she reads it again, and then shows her system image again when the program finishes (hits `stp` or `brk`).
since fuzzy works on a zero-page data stack, it's pretty easy to read the results of a program from the hexdump.
for now this loop only works on apple silicon, but eventually i'll compile a `george` binary for x86 linux and switch based on the host platform.
## reference help
i wrote [syntax](./syntax.md) and [semantics](./semantics.md) docs to keep track of how fuzzy works before starting work on the compiler implementation in assembly. they're the _official_ source of truth for how fuzzy works. assume that the compiler implementation is always in flux :)
## a note on implementation
i was writing fuzzy's compiler in rust for a sec, but then i realized that it would be a fun challenge to write it in assembly. it's been wayyy easier! and fun! and so rewarding :) this feels like a flex but i'm genuinely just so happy to see george & fuzzy playing together in this little computer world i've made <3

214
fuzzy.asm Normal file
View File

@ -0,0 +1,214 @@
; ʕ·ᴥ·ʔ- fuzzy v0 rev 0: parse program text and spit out binary representation @ $4000
.include "./macro.inc"
n = $05 ; temporary storage for data stack operations
base = $00
result_binary_base = base ; pointer to where the next byte of binary data should be stored0
binary_base_index = result_binary_base + 2 ; offset for that pointer
binary_subroutine_address = binary_base_index + 1 ; pointer to a subroutine to be written to the binary
.org $8000
.include "./subroutines.inc"
program_text:
.include "./program.inc"
reset:
sei
lda #0
ldx #0
ldy #0
main:
stz binary_base_index
lda #$40
sta result_binary_base + 1 ; set where to store resulting binary
stz binary_subroutine_address
lda #$80
sta binary_subroutine_address + 1 ; available subroutines start at $8000
jsr compile_values
stp
; parser loop, eventually this will be able to handle longer program strings, but indexing by y is fine for now
compile_values:
ldy #0
parser_loop:
lda program_text, y ; get character at index
cmp #0 ; is eof?
beq .end ; yes, exit loop
cmp #20 ; is space?
beq parser_loop ; yes, skip this char
cmp #12 ; is newline?
beq .newline ; yes, handle newline
jsr compile_values_op
jsr compile_values_nat
.newline: ; we reached a newline, y is program string index
iny ; WARN: don't accidentally iny in this loop w/out handling a character
lda program_text, y ; load next char
cmp #12 ; is newline?
bne parser_loop ; no, keep parsing tokens
rts ; yes, no more tokens in body (see syntax.md for info)
.end:
rts
; a holds character value, y program text index, only iny if you find a matching character & consume it
compile_values_op:
cmp #"+" ; i personally think this syntax is really silly but whatever, one of these days i'm gonna write my own assembler and document everything cause vasm documentation is kinda terrible
bne .next
.is_plus:
lda #1
jsr store_subroutine
rts
.next:
rts
; cmp #"!" ; commenting these out for now to handle a single simple case
; cmp #"&"
; cmp #"|"
; cmp #"-"
; cmp #"*"
; cmp #"/"
; cmp #"="
; cmp #">"
; cmp #"<"
; cmp #"#"
; a holds character value, y program text index, only iny if you find a matching character & consume it
; TODO:
; 1-3 digit decimal values
; 1-2 digit hex values
compile_values_nat:
; TODO:
; cmp #"$" ; is hex?
; bne .decimal ; no, try decimal
; cmp
; rts
cmp #47 ; less than (before) start of 0-9 georgescii range?
bcc .not_nat
cmp #57 ; greater than end of 0-9 georgescii range?
bcs .not_nat
pha
lda #$a9 ; $a9: lda imm
jsr store_binary
pla
jsr georgescii_decimal_to_value
jsr store_binary
lda #2 ; push
jsr store_subroutine
iny
rts
.not_nat:
rts
; georgescii decimal value in a register, return equivalent plain value in a register
georgescii_decimal_to_value:
clc
sbc #$30 ; decimal digits start at georgescii $30
rts
; we have binary in the a register we want to store
store_binary:
phy
ldy binary_base_index
sta (result_binary_base), y
inc binary_base_index
bne .not_overflow ; did we roll over?
inc result_binary_base + 1 ; yes, roll over base address
.not_overflow: ; no, carry on as normal
ply
rts
; binary_subroutine_address is a pointer to a subroutine that we want to store
; the first byte at the subroutine's address is its length
store_contiguous_binary:
pha ; just to be safe
lda (binary_subroutine_address) ; get the subroutine length
tax ; loop counter
ldy #1 ; index into subroutine, offset by one to skip subroutine length
.loop:
lda (binary_subroutine_address), y
jsr store_binary
iny
dex
bne .loop
.end:
pla
rts
; this wouldn't be necessary if we could get the
; address of a label in vasm, but that's for another time
; (when i feel like writing an assembler lol)
; for now, pass the index of the subroutine (in subroutines.asm)
; to a and it will get written to binary_subroutine_address
get_subroutine_address:
pha
tax ; set up counter
bne .loop ; first subrotine?
stz binary_subroutine_address ; yes, store its address
lda #$80
sta binary_subroutine_address + 1
rts
.loop: ; loop through
lda (binary_subroutine_address) ; no, load length of subroutine
inc ; distance from next subroutine
clc
adc binary_subroutine_address ; add it to the current address
sta binary_subroutine_address
bcs .no_carry
lda binary_subroutine_address + 1 ; add the carry to the high byte of address
adc #0
sta binary_subroutine_address + 1
.no_carry:
dex ; is this our address?
bne .loop ; yes, we're done
pla
rts
; pass subroutine index to a and it will get written into the binary
; TODO: stabilize subroutine location & just write a `jsr $subroutine` to the binary
store_subroutine:
pha
phy
phx
jsr get_subroutine_address
jsr store_contiguous_binary
; reset subroutine address
stz binary_subroutine_address
lda #$80
sta binary_subroutine_address + 1
plx
ply
pla
rts
; write error message and stop execution
error:
ldy #0
.loop:
lda .message, y
sta $4000, y
beq .end
iny
bra .loop
.end:
stp
.message:
.asciiz "ruh roh! fuzzy couldn't compile"
isr: ; interrupt service routine
pha
phx
phy
ply
plx
pla
rti
.org $fffc
.word reset
.word isr

BIN
george Executable file

Binary file not shown.

View File

@ -14,9 +14,13 @@
inx
.endm
.macro push ; push a data stack cell
.macro push, cell_high, cell_low ; push a data stack cell
dex
dex
lda \cell_low
sta 0, x
lda \cell_high
sta 1, x
.endm
.macro push2 ; push 2 data stack cells

1
program.inc Normal file
View File

@ -0,0 +1 @@
.asciiz '2 3 +'

16
run.sh Executable file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -e
rm *.bin *.rom
vasm6502_oldstyle fuzzy.asm -dotdir -wdc02 -ldots -Fbin -o fuzzy.rom &> /dev/null;
echo -e "\nʕ·ᴥ·ʔ- source text:\n";
cat program.inc;
cat fuzzy.rom | ./george > compiled.bin;
dd skip=16384 count=500 if=compiled.bin of=compiled.rom bs=1 &> /dev/null;
truncate -s 32k compiled.rom &> /dev/null;
printf '\x80\x00\x00' | dd of=compiled.rom bs=1 seek=32765 count=3 conv=notrunc &> /dev/null;
cat compiled.rom | ./george > result.bin;
echo -e "\n\nʕ·ᴥ·ʔ- compiled program result:\n";
hexdump -C ./result.bin;
echo -e "";

View File

@ -1,168 +0,0 @@
use core::panic;
use std::{any::Any, fmt::Display};
use crate::{
typecheck::{self, Checkable, TypeStack},
Symtab,
};
#[derive(PartialEq, Eq, Debug, Clone)]
// enum values are parser values, not compiler values,
// e.g. for `Str(String)` the `String` value will be put somewhere in memory
// and a pointer to it will be put on the stack
pub enum Value {
Nat(u16), // 16-bit natural number
Int(i16), // 16-bit twos-complement integer
Bool(bool),
Op(String),
Char(char), // 8-bit georgescii character padded with leading zeros (might change later)
Str(String), // 16-bit pointer to a string
Word(String), // 16-bit pointer to a word
}
#[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)]
pub enum VType {
Nat,
Int,
Bool,
Char,
Str,
}
#[derive(Eq, PartialEq, Debug, Clone)]
pub struct WType {
pub pop: Vec<VType>,
pub push: Vec<VType>,
}
impl WType {
pub fn new() -> Self {
WType {
pop: vec![],
push: vec![],
}
}
// Adds a `push` type
pub fn push(mut self, mut t: Vec<VType>) -> Self {
self.push.append(&mut t);
self
}
// Adds a `pop` type (Note: does not actually pop anything)
pub fn pop(mut self, mut t: Vec<VType>) -> Self {
self.pop.append(&mut t);
self
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Effect {
Paint,
Sing,
Store,
Do,
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct WordDef {
pub name: String,
pub values: Vec<Value>,
pub r#type: WType,
pub effects: Vec<Effect>,
}
impl WordDef {
pub fn new<S>(name: S, values: Vec<Value>, r#type: WType, effects: Vec<Effect>) -> Self
where
S: Into<String>,
{
WordDef {
name: name.into(),
values,
r#type,
effects,
}
}
fn flatten_values(&self, symtab: &Symtab) -> Vec<Value> {
let mut vals = vec![];
for value in self.values.iter() {
if let Value::Word(string) = value {
let symbol = symtab.get(string);
let mut child_vals = symbol.flatten_values(symtab);
vals.append(&mut child_vals);
} else {
vals.push(value.clone());
}
}
vals
}
pub fn flatten(&self, symtab: &Symtab) -> WordDef {
let values = self.flatten_values(symtab);
WordDef::new(
self.name.clone(),
values,
self.r#type.clone(),
self.effects.clone(),
)
}
}
impl Display for WordDef {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "{:#?}", self)?;
Ok(())
}
}
impl Checkable<VType> for WordDef {
fn check(&mut self, mut stack: TypeStack<VType>) -> Result<TypeStack<VType>, String> {
for value in self.values.iter() {
println!(
"WORDDEF: checking value {:?} in word {:?}, current stack is {:?}",
value, self.name, stack
);
if let Value::Word(_) = value {
panic!("Don't typecheck on an unflattened word!");
}
match value {
Value::Op(op) => {
if stack.len() < 2 {
return Err(format!(
"Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}",
self.name,
stack,
stack.len()
));
} else {
match op.as_str() {
"+" | "*" => {
println!("WORDDEF: checking Op");
stack = stack.test_consume(
TypeStack::new().push(VType::Nat).push(VType::Nat),
)?;
stack = stack.push(VType::Nat);
}
"&" | "||" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Bool).push(VType::Bool),
)?;
stack = stack.pop().pop().push(VType::Bool);
}
_ => return Err(format!("unknown opcode {:?}", op)),
}
}
}
&Value::Nat(_) => stack = stack.push(VType::Nat),
&Value::Int(_) => stack = stack.push(VType::Int),
&Value::Bool(_) => stack = stack.push(VType::Int),
&Value::Str(_) => stack = stack.push(VType::Str),
&Value::Char(_) => stack = stack.push(VType::Char),
&Value::Word(_) => unreachable!(),
};
}
Ok(stack)
}
}

View File

@ -1,11 +0,0 @@
use fuzzy::{
ast::VType,
parse,
typecheck::{Checkable, TypeStack},
};
fn main() {
let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest";
let mut program = parse(input).unwrap();
println!("{:?}", program.check(TypeStack::new()));
}

View File

@ -1,7 +0,0 @@
pub mod ast;
pub mod compiler;
pub mod parser;
pub mod typecheck;
pub use compiler::*;
pub use parser::*;

View File

@ -1,483 +0,0 @@
use std::collections::HashMap;
// TODO:
// - add error types and error handling
// - figure out if we can use a single ast or should make a second ast for compilation, then
// convert to that after parsing/do another parsing step but on the ast made the first time
// (i also don't really know what i'm doing so maybe u never do that or always do that or
// something i don't understand yet)
// - improve whitespace parsing
// - think more about language rules:
// - what types actually need to be exposed to the "user" (me) and what types can be internal
// to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char
// internally (put on the stack as a value instead of put somewhere in memory))
use chumsky::{
prelude::*,
text::{ident, keyword},
};
use crate::{
ast::*,
typecheck::{Checkable, TypeStack},
};
#[derive(PartialEq, Debug, Clone)]
pub struct Symtab(HashMap<String, WordDef>);
impl Symtab {
fn new() -> Self {
Symtab(HashMap::new())
}
pub fn get(&self, string: &String) -> &WordDef {
self.0.get(string).unwrap()
}
pub fn add_def(&mut self, def: WordDef) {
let key = def.name.clone();
self.0.insert(key, def);
}
pub fn flatten_refs(&mut self) {
let mut new_symtab = Symtab::new();
for (_string, def) in self.0.iter() {
new_symtab.add_def(def.flatten(self));
}
// this is an abomination, there must be a better way
self.0.clear();
self.0.extend(new_symtab.0);
}
}
impl From<Vec<WordDef>> for Symtab {
fn from(value: Vec<WordDef>) -> Self {
let symtab: HashMap<String, WordDef> = value
.iter()
.map(|x| (x.name.to_owned(), x.to_owned()))
.collect();
Symtab(symtab)
}
}
#[derive(Debug, PartialEq)]
pub struct Program {
symtab: Symtab,
body: Vec<Value>,
}
impl Program {
fn new(defs: Vec<WordDef>, body: Vec<Value>) -> Self {
let symtab = Symtab::from(defs);
Program { symtab, body }
}
fn reduce_body(&mut self) {
let mut vals = vec![];
for value in self.body.iter() {
if let Value::Word(string) = value {
let symbol = self.symtab.get(string);
let mut child_vals = symbol.flatten(&self.symtab).values;
vals.append(&mut child_vals);
} else {
vals.push(value.clone());
}
}
self.body = vals;
}
}
impl<'a> Checkable<VType> for Program {
fn check(
&mut self,
mut stack: crate::typecheck::TypeStack<VType>,
) -> Result<crate::typecheck::TypeStack<VType>, String> {
//TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5
fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> {
let mut visited: Vec<&WordDef> = vec![];
let mut rec_stack: Vec<&WordDef> = vec![];
for (_, def) in symtab.0.iter() {
if !visited.contains(&def) {
dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?;
}
}
Ok(())
}
fn dfs_cycle_check<'a>(
def: &'a WordDef,
visited: &mut Vec<&'a WordDef>,
rec_stack: &mut Vec<&'a WordDef>,
symtab: &'a Symtab,
) -> Result<(), String> {
visited.push(def);
rec_stack.push(def);
for val in def.values.iter() {
if let Value::Word(name) = val {
let next_def = symtab.get(name);
if !visited.contains(&next_def) {
dfs_cycle_check(next_def, visited, rec_stack, symtab)?;
} else if rec_stack.contains(&next_def) {
return Err(format!(
"illegal recursion detected! definitions {}create a reference cycle",
rec_stack
.iter()
.map(|def| {
let mut name = def.name.clone();
name.insert(0, '"');
name.push_str("\" ");
name
})
.collect::<String>()
));
}
}
}
rec_stack.pop();
Ok(())
}
cyclic_graph_check(&self.symtab)?;
self.symtab.flatten_refs();
println!(
"we have flattened refs, here's the symtab: {:#?}\n",
self.symtab
);
// then check that all symtab defs are sound
// at this point they shouldn't have any references,
// and if they do we will panic (see the Checkable impl for WordDef)
for (name, def) in self.symtab.0.iter_mut() {
let local_stack: TypeStack<VType> = def.r#type.pop.clone().into();
println!(
"PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}",
name, def
);
let result_stack = def.check(local_stack)?;
if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) {
println!("{error:?}");
return Err(error);
}
}
self.reduce_body();
// then we'll check that the body is sound with the given stack
// maybe in the future i'll change this trait so there isn't a stack
// param and the implementer picks what stack to check against
//
// TODO: this block also is shared behavior between basically all checkables but potentially with
// different internal types for T, will have to figure out how to dedup this later
for value in self.body.iter() {
match value {
Value::Op(op) => {
if stack.len() < 2 {
return Err(format!(
"expected a stack with 2 elements, got only {:?}",
stack.len()
));
} else {
match op.as_str() {
"+" | "*" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Nat).push(VType::Nat),
)?;
stack = stack.pop().pop().push(VType::Nat);
}
"&" | "||" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Bool).push(VType::Bool),
)?;
stack = stack.pop().pop().push(VType::Bool);
}
_ => return Err(format!("unknown opcode {:?}", op)),
}
}
}
&Value::Nat(_) => stack = stack.push(VType::Nat),
&Value::Int(_) => stack = stack.push(VType::Int),
&Value::Bool(_) => stack = stack.push(VType::Bool),
&Value::Str(_) => stack = stack.push(VType::Str),
&Value::Char(_) => stack = stack.push(VType::Char),
&Value::Word(_) => unreachable!(),
};
}
Ok(stack)
}
}
pub fn parse<S>(input: S) -> Result<Program, Vec<Simple<char>>>
where
S: ToString,
{
let parsed = match parser().parse(input.to_string()) {
Ok(parsed) => parsed,
Err(error) => return Err(error),
};
Ok(parsed)
}
pub fn parser() -> impl Parser<char, Program, Error = Simple<char>> {
let name = ident().labelled("word_name");
let value = {
// nats will be coerced to ints at compile time depending on word type
let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap()));
// vice versa for non-negative ints
let int = just("-").ignore_then(
text::int::<char, Simple<char>>(10).map(|s: String| Value::Int(s.parse().unwrap())),
);
let op = one_of::<char, &str, Simple<char>>("*+-/&|<>").map(|s| Value::Op(s.to_string()));
let str_or_char = just::<char, char, Simple<char>>('"')
.ignore_then(none_of('"').repeated())
.then_ignore(just('"'))
.map(|s: Vec<char>| match s.len() {
1 => Value::Char(s[0]),
_ => Value::Str(s.into_iter().collect::<String>()),
});
let word = name.map(|n: String| Value::Word(n));
let bool = keyword::<_, _, Simple<char>>("true")
.map(|_| Value::Bool(true))
.or(keyword("false").map(|_| Value::Bool(false)));
nat.or(int).or(bool).or(str_or_char).or(word).or(op)
};
let value_seperator = text::newline()
.repeated()
.at_least(2)
.not()
.rewind()
.then_ignore(
// TODO: figure out if this could be simplified
choice((
just(" ")
.repeated()
.then_ignore(just("\n").repeated().exactly(1).or_not()),
just("\n")
.repeated()
.exactly(1)
.then_ignore(just(" ").repeated().or_not()),
))
.then_ignore(just(" ").repeated()),
);
let body = value_seperator
.or_not()
.ignored()
.then(value)
.map(|(_, v)| v)
.repeated()
.then_ignore(
just(" ")
.repeated()
.ignored()
.then(text::newline().repeated().at_least(2).or_not()),
);
let word_def = {
let pop_types = {
let pop_type = keyword("nat")
.to(VType::Nat)
.or(keyword("int").to(VType::Int))
.or(keyword("bool").to(VType::Bool))
.or(keyword("char").to(VType::Char))
.or(keyword("str").to(VType::Str));
pop_type
.padded()
.repeated()
.collect::<Vec<VType>>()
.labelled("pop_types")
.boxed()
};
let push_types = {
let push_type = keyword("nat")
.to(VType::Nat)
.or(keyword("int").to(VType::Int))
.or(keyword("char").to(VType::Char))
.or(keyword("str").to(VType::Str));
push_type
.padded()
.repeated()
.collect::<Vec<VType>>()
.labelled("push_types")
.boxed()
};
let effects = {
let effect_keyword = keyword("paint")
.to(Effect::Paint)
.or(keyword("sing").to(Effect::Sing))
.or(keyword("store").to(Effect::Store))
.or(keyword("do").to(Effect::Do));
let effect = just("~").ignore_then(effect_keyword).labelled("effect");
effect.padded().repeated().labelled("effects").boxed()
};
let definition = text::whitespace()
.ignore_then(name)
.then_ignore(just(" "))
.then(pop_types)
.then_ignore(keyword("is").or(keyword("are")).padded())
.then(push_types)
.then(effects)
.then_ignore(just(":"))
.map(|(((name, pop_types), push_types), effects)| {
(name, pop_types, push_types, effects)
});
definition
.then(body.clone())
.map(|((name, pop_types, push_types, effects), body)| {
WordDef::new(
name,
body,
WType::new().push(push_types).pop(pop_types),
effects,
)
})
};
word_def
.repeated()
.then(body)
.map(|(defs, body): (Vec<WordDef>, Vec<Value>)| Program::new(defs, body))
}
#[cfg(test)]
mod tests {
use crate::typecheck::TypeStack;
use super::*;
#[test]
fn test_parser() {
let input = "
a is nat: 5 7 *
b is nat:
5 a *
a
";
let ast = vec![
WordDef::new(
"a",
vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())],
WType::new().push(vec![VType::Nat]),
vec![],
),
WordDef::new(
"b",
vec![
Value::Nat(5),
Value::Word("a".to_string()),
Value::Op("*".to_string()),
],
WType::new().push(vec![VType::Nat]),
vec![],
),
];
let body: Vec<Value> = vec![Value::Word("a".to_string())];
println!("sound: {:?}\n", parser().parse(input).unwrap());
assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body));
}
#[test]
fn test_typecheck() {
let sound = "
a is nat: 5 7 *
b nat nat is nat:
a *
a 5 *
";
let unsound_defs = "
a is nat nat: 5 7 *
b nat is nat:
a *
a 5 *
";
let unsound_body = "
a is nat: 5 7 *
b nat is nat:
a *
a 5 * *
";
let unsound_body_and_defs = "
a is nat nat: 5 7 *
b nat is nat:
a *
a 5 * *
";
fn typecheck(input: &str, sound: bool) {
let mut parsed = parse(input).unwrap();
parsed.symtab.flatten_refs();
parsed.reduce_body();
let stack = TypeStack::new();
if sound {
assert!(parsed.check(stack).is_ok());
} else {
assert!(parsed.check(stack).is_err());
}
}
typecheck(sound, true);
typecheck(unsound_defs, false);
typecheck(unsound_body, false);
typecheck(unsound_body_and_defs, false);
}
#[test]
fn test_illegal_recursion() {
let illegal = "
a is: b
b is: a
a
";
let illegal_multilevel = "
a is: b
b is: c
c is: a
a
";
fn typecheck(input: &str) {
let mut parsed = parse(input).unwrap();
let stack = TypeStack::new();
println!("{:?}", parsed.check(TypeStack::new()));
assert!(parsed.check(stack).is_err());
}
typecheck(illegal);
typecheck(illegal_multilevel);
}
}

View File

@ -1,84 +0,0 @@
use std::fmt::Debug;
use chumsky::chain::Chain;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TypeStack<T>(Vec<T>);
impl<T: Debug + PartialEq> TypeStack<T> {
pub fn new() -> Self {
TypeStack(vec![])
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn pop(mut self) -> TypeStack<T> {
let _ = self.0.pop();
self
}
pub fn push(mut self, t: T) -> TypeStack<T> {
self.0.push(t);
self
}
/// tests if ts matches the top of the stack
pub fn test_consume(mut self, ts: TypeStack<T>) -> Result<TypeStack<T>, String> {
if ts.len() > self.len() {
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
} else {
for (index, t) in ts.0.iter().rev().enumerate() {
let val = self.0.pop().unwrap();
if val != *t {
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
}
}
Ok(self)
}
}
/// tests if ts matches the top of the stack
pub fn test(&self, ts: &TypeStack<T>) -> Result<(), String> {
if ts.len() > self.len() {
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
} else {
for (index, t) in ts.0.iter().rev().enumerate() {
let val = &self.0[self.0.len() - 1];
if val != t {
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
}
}
Ok(())
}
}
// pub fn test_many(mut self, mut tss: Vec<TypeStack<T>>) -> Result<TypeStack<T>, String> {
// if tss.is_empty() {
// return Ok(TypeStack::new());
// }
// for _i in 0..tss.len() {
// let ts = tss.pop().unwrap();
// self = self.test(ts)?;
// if self.is_ok() {
// return self;
// }
// }
// Err("did not match any types".to_string())
// }
pub fn append(&mut self, t: &mut Vec<T>) {
self.0.append(t);
}
}
impl<T: PartialEq + Debug> From<Vec<T>> for TypeStack<T> {
fn from(value: Vec<T>) -> Self {
TypeStack(value)
}
}
pub trait Checkable<T: PartialEq + Debug> {
fn check(&mut self, stack: TypeStack<T>) -> Result<TypeStack<T>, String>;
}

15
subroutines.inc Normal file
View File

@ -0,0 +1,15 @@
; 0
test_contiguous_binary:
.byte 3,$1,$2,$3
; 1 - assembled from "plus.asm"
subroutine_plus:
.byte 15, $18,$b5,$00,$75,$02,$95, $02, $b5, $01, $75, $03, $95, $03, $ca, $ca
; 2
subroutine_push:
.byte 6,$ca,$ca,$95,$0,$74,$1
; dex
; dex
; sta 0, x
; stz 1, x