fuzzy compiler v0 rev 0, written in assembly,rrunning on george :)

This commit is contained in:
august kline 2024-10-06 22:06:10 -04:00
parent cbc7bff7f7
commit 2d4df76be7
12 changed files with 201 additions and 1040 deletions

271
Cargo.lock generated
View File

@ -1,271 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
[[package]]
name = "cc"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
dependencies = [
"hashbrown",
"stacker",
]
[[package]]
name = "either"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "fuzzy"
version = "0.1.0"
dependencies = [
"chumsky",
"indextree",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "indextree"
version = "4.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f"
dependencies = [
"indextree-macros",
]
[[package]]
name = "indextree-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf"
dependencies = [
"either",
"itertools",
"proc-macro2",
"quote",
"strum",
"syn",
"thiserror",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustversion"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strum"
version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn",
]
[[package]]
name = "syn"
version = "2.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@ -1,15 +0,0 @@
[package]
name = "fuzzy"
version = "0.1.0"
edition = "2021"
[dependencies]
chumsky = { version = "0.9.3"}
indextree = "4.7.2"
[lib]
crate-type = ["lib"]
[[bin]]
name = "main"

182
fuzzy.asm Normal file
View File

@ -0,0 +1,182 @@
; ʕ·ᴥ·ʔ- fuzzy v0 rev 0: parse program text and spit out binary representation @ $4000
.include "./macro.inc"
n = $05 ; temporary storage for data stack operations
base = $00
result_binary_base = base ; pointer to where the next byte of binary data should be stored0
binary_base_index = result_binary_base + 2 ; offset for that pointer
binary_subroutine_address = binary_base_index + 1 ; pointer to a subroutine to be written to the binary
.org $8000
.include "./subroutines.inc"
program_text:
.asciiz '2 3 +'
reset:
sei
lda #0
ldx #0
ldy #0
main:
stz binary_base_index
lda #$40
sta result_binary_base + 1 ; set where to store resulting binary
stz binary_subroutine_address
lda #$80
sta binary_subroutine_address + 1 ; available subroutines start at $8000
jsr compile_values
stp
; parser loop, eventually this will be able to handle longer program strings, but indexing by y is fine for now
compile_values:
ldy #0
parser_loop:
lda program_text, y ; get character at index
cmp #0 ; is eof?
beq .end ; yes, exit loop
cmp #20 ; is space?
beq parser_loop ; yes, skip this char
cmp #12 ; is newline?
beq .newline ; yes, handle newline
jsr compile_values_op
jsr compile_values_nat
.newline: ; we reached a newline, y is program string index
iny ; WARN: don't accidentally iny in this loop w/out handling a character
lda program_text, y ; load next char
cmp #12 ; is newline?
bne parser_loop ; no, keep parsing tokens
rts ; yes, no more tokens in body (see syntax.md for info)
.end:
rts
; a holds character value, y program text index, only iny if you find a matching character & consume it
compile_values_op:
cmp #"+" ; i personally think this syntax is really silly but whatever, one of these days i'm gonna write my own assembler and document everything cause vasm documentation is kinda terrible
bne .next
.is_plus:
lda #1
jsr store_subroutine
rts
.next:
rts
; cmp #"!" ; commenting these out for now to handle a single simple case
; cmp #"&"
; cmp #"|"
; cmp #"-"
; cmp #"*"
; cmp #"/"
; cmp #"="
; cmp #">"
; cmp #"<"
; cmp #"#"
; a holds character value, y program text index, only iny if you find a matching character & consume it
compile_values_nat:
; TODO:
; cmp #"$" ; is hex?
; bne .decimal ; no, try decimal
; cmp
; rts
cmp #47 ; less than (before) start of 0-9 georgescii range?
bcc .not_nat
cmp #57 ; greater than end of 0-9 georgescii range?
bcs .not_nat
jsr georgescii_decimal_to_value
jsr store_binary
iny
rts
.not_nat:
rts
; georgescii decmal value in a register, return equivalent plain value in a register
georgescii_decimal_to_value:
clc
sbc #$30 ; decimal digits start at georgescii $30
rts
; we have binary in the a register we want to store
store_binary:
phy
ldy binary_base_index
sta (result_binary_base), y
inc binary_base_index
bne .not_overflow ; did we roll over?
inc result_binary_base + 1 ; yes, roll over base address
.not_overflow: ; no, carry on as normal
ply
rts
; binary_subroutine_address is a pointer to a subroutine that we want to store
; the first byte at the subroutine's address is its length
store_contiguous_binary:
pha ; just to be safe
phy
phx
lda (binary_subroutine_address) ; get the subroutine length
tax ; loop counter
ldy #0 ; index into subroutine
.loop:
lda (binary_subroutine_address), y
jsr store_binary
iny
dex
bne .loop
.end:
phx
ply
pla
rts
; this wouldn't be necessary if we could get the
; address of a label in vasm, but that's for another time
; (when i feel like writing an assembler lol)
; for now, pass the index of the subroutine (in subroutines.asm)
; to a and it will get written to binary_subroutine_address
get_subroutine_address:
tax ; set up counter
bne .loop ; first subrotine?
stz binary_subroutine_address ; yes, store its address
lda #$80
sta binary_subroutine_address + 1
rts
.loop: ; loop through
lda (binary_subroutine_address) ; no, load length of subroutine
inc ; distance from next subroutine
clc
adc binary_subroutine_address ; add it to the current address
sta binary_subroutine_address
bcs .no_carry
lda binary_subroutine_address + 1 ; add the carry to the high byte of address
adc #0
sta binary_subroutine_address + 1
.no_carry:
dex ; is this our address?
bne .loop ; yes, we're done
rts
; pass subroutine index to a and it will get written into the binary
; TODO: stabilize subroutine location & just write a `jsr $subroutine` to the binary
store_subroutine:
jsr get_subroutine_address
jsr store_contiguous_binary
rts
isr: ; interrupt service routine
pha
phx
phy
ply
plx
pla
rti
.org $fffc
.word reset
.word isr

BIN
george Executable file

Binary file not shown.

View File

@ -14,9 +14,13 @@
inx
.endm
.macro push ; push a data stack cell
.macro push, cell_high, cell_low ; push a data stack cell
dex
dex
lda \cell_low
sta 0, x
lda \cell_high
sta 1, x
.endm
.macro push2 ; push 2 data stack cells

7
run.sh Executable file
View File

@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -e
vasm6502_oldstyle fuzzy.asm -dotdir -wdc02 -ldots -Fbin -o fuzzy.rom;
cat fuzzy.rom | ./george > result.bin;
hexdump -C ./result.bin;

View File

@ -1,168 +0,0 @@
use core::panic;
use std::{any::Any, fmt::Display};
use crate::{
typecheck::{self, Checkable, TypeStack},
Symtab,
};
#[derive(PartialEq, Eq, Debug, Clone)]
// enum values are parser values, not compiler values,
// e.g. for `Str(String)` the `String` value will be put somewhere in memory
// and a pointer to it will be put on the stack
pub enum Value {
Nat(u16), // 16-bit natural number
Int(i16), // 16-bit twos-complement integer
Bool(bool),
Op(String),
Char(char), // 8-bit georgescii character padded with leading zeros (might change later)
Str(String), // 16-bit pointer to a string
Word(String), // 16-bit pointer to a word
}
#[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)]
pub enum VType {
Nat,
Int,
Bool,
Char,
Str,
}
#[derive(Eq, PartialEq, Debug, Clone)]
pub struct WType {
pub pop: Vec<VType>,
pub push: Vec<VType>,
}
impl WType {
pub fn new() -> Self {
WType {
pop: vec![],
push: vec![],
}
}
// Adds a `push` type
pub fn push(mut self, mut t: Vec<VType>) -> Self {
self.push.append(&mut t);
self
}
// Adds a `pop` type (Note: does not actually pop anything)
pub fn pop(mut self, mut t: Vec<VType>) -> Self {
self.pop.append(&mut t);
self
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Effect {
Paint,
Sing,
Store,
Do,
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct WordDef {
pub name: String,
pub values: Vec<Value>,
pub r#type: WType,
pub effects: Vec<Effect>,
}
impl WordDef {
pub fn new<S>(name: S, values: Vec<Value>, r#type: WType, effects: Vec<Effect>) -> Self
where
S: Into<String>,
{
WordDef {
name: name.into(),
values,
r#type,
effects,
}
}
fn flatten_values(&self, symtab: &Symtab) -> Vec<Value> {
let mut vals = vec![];
for value in self.values.iter() {
if let Value::Word(string) = value {
let symbol = symtab.get(string);
let mut child_vals = symbol.flatten_values(symtab);
vals.append(&mut child_vals);
} else {
vals.push(value.clone());
}
}
vals
}
pub fn flatten(&self, symtab: &Symtab) -> WordDef {
let values = self.flatten_values(symtab);
WordDef::new(
self.name.clone(),
values,
self.r#type.clone(),
self.effects.clone(),
)
}
}
impl Display for WordDef {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "{:#?}", self)?;
Ok(())
}
}
impl Checkable<VType> for WordDef {
fn check(&mut self, mut stack: TypeStack<VType>) -> Result<TypeStack<VType>, String> {
for value in self.values.iter() {
println!(
"WORDDEF: checking value {:?} in word {:?}, current stack is {:?}",
value, self.name, stack
);
if let Value::Word(_) = value {
panic!("Don't typecheck on an unflattened word!");
}
match value {
Value::Op(op) => {
if stack.len() < 2 {
return Err(format!(
"Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}",
self.name,
stack,
stack.len()
));
} else {
match op.as_str() {
"+" | "*" => {
println!("WORDDEF: checking Op");
stack = stack.test_consume(
TypeStack::new().push(VType::Nat).push(VType::Nat),
)?;
stack = stack.push(VType::Nat);
}
"&" | "||" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Bool).push(VType::Bool),
)?;
stack = stack.pop().pop().push(VType::Bool);
}
_ => return Err(format!("unknown opcode {:?}", op)),
}
}
}
&Value::Nat(_) => stack = stack.push(VType::Nat),
&Value::Int(_) => stack = stack.push(VType::Int),
&Value::Bool(_) => stack = stack.push(VType::Int),
&Value::Str(_) => stack = stack.push(VType::Str),
&Value::Char(_) => stack = stack.push(VType::Char),
&Value::Word(_) => unreachable!(),
};
}
Ok(stack)
}
}

View File

@ -1,11 +0,0 @@
use fuzzy::{
ast::VType,
parse,
typecheck::{Checkable, TypeStack},
};
fn main() {
let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest";
let mut program = parse(input).unwrap();
println!("{:?}", program.check(TypeStack::new()));
}

View File

@ -1,7 +0,0 @@
pub mod ast;
pub mod compiler;
pub mod parser;
pub mod typecheck;
pub use compiler::*;
pub use parser::*;

View File

@ -1,483 +0,0 @@
use std::collections::HashMap;
// TODO:
// - add error types and error handling
// - figure out if we can use a single ast or should make a second ast for compilation, then
// convert to that after parsing/do another parsing step but on the ast made the first time
// (i also don't really know what i'm doing so maybe u never do that or always do that or
// something i don't understand yet)
// - improve whitespace parsing
// - think more about language rules:
// - what types actually need to be exposed to the "user" (me) and what types can be internal
// to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char
// internally (put on the stack as a value instead of put somewhere in memory))
use chumsky::{
prelude::*,
text::{ident, keyword},
};
use crate::{
ast::*,
typecheck::{Checkable, TypeStack},
};
#[derive(PartialEq, Debug, Clone)]
pub struct Symtab(HashMap<String, WordDef>);
impl Symtab {
fn new() -> Self {
Symtab(HashMap::new())
}
pub fn get(&self, string: &String) -> &WordDef {
self.0.get(string).unwrap()
}
pub fn add_def(&mut self, def: WordDef) {
let key = def.name.clone();
self.0.insert(key, def);
}
pub fn flatten_refs(&mut self) {
let mut new_symtab = Symtab::new();
for (_string, def) in self.0.iter() {
new_symtab.add_def(def.flatten(self));
}
// this is an abomination, there must be a better way
self.0.clear();
self.0.extend(new_symtab.0);
}
}
impl From<Vec<WordDef>> for Symtab {
fn from(value: Vec<WordDef>) -> Self {
let symtab: HashMap<String, WordDef> = value
.iter()
.map(|x| (x.name.to_owned(), x.to_owned()))
.collect();
Symtab(symtab)
}
}
#[derive(Debug, PartialEq)]
pub struct Program {
symtab: Symtab,
body: Vec<Value>,
}
impl Program {
fn new(defs: Vec<WordDef>, body: Vec<Value>) -> Self {
let symtab = Symtab::from(defs);
Program { symtab, body }
}
fn reduce_body(&mut self) {
let mut vals = vec![];
for value in self.body.iter() {
if let Value::Word(string) = value {
let symbol = self.symtab.get(string);
let mut child_vals = symbol.flatten(&self.symtab).values;
vals.append(&mut child_vals);
} else {
vals.push(value.clone());
}
}
self.body = vals;
}
}
impl<'a> Checkable<VType> for Program {
fn check(
&mut self,
mut stack: crate::typecheck::TypeStack<VType>,
) -> Result<crate::typecheck::TypeStack<VType>, String> {
//TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5
fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> {
let mut visited: Vec<&WordDef> = vec![];
let mut rec_stack: Vec<&WordDef> = vec![];
for (_, def) in symtab.0.iter() {
if !visited.contains(&def) {
dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?;
}
}
Ok(())
}
fn dfs_cycle_check<'a>(
def: &'a WordDef,
visited: &mut Vec<&'a WordDef>,
rec_stack: &mut Vec<&'a WordDef>,
symtab: &'a Symtab,
) -> Result<(), String> {
visited.push(def);
rec_stack.push(def);
for val in def.values.iter() {
if let Value::Word(name) = val {
let next_def = symtab.get(name);
if !visited.contains(&next_def) {
dfs_cycle_check(next_def, visited, rec_stack, symtab)?;
} else if rec_stack.contains(&next_def) {
return Err(format!(
"illegal recursion detected! definitions {}create a reference cycle",
rec_stack
.iter()
.map(|def| {
let mut name = def.name.clone();
name.insert(0, '"');
name.push_str("\" ");
name
})
.collect::<String>()
));
}
}
}
rec_stack.pop();
Ok(())
}
cyclic_graph_check(&self.symtab)?;
self.symtab.flatten_refs();
println!(
"we have flattened refs, here's the symtab: {:#?}\n",
self.symtab
);
// then check that all symtab defs are sound
// at this point they shouldn't have any references,
// and if they do we will panic (see the Checkable impl for WordDef)
for (name, def) in self.symtab.0.iter_mut() {
let local_stack: TypeStack<VType> = def.r#type.pop.clone().into();
println!(
"PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}",
name, def
);
let result_stack = def.check(local_stack)?;
if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) {
println!("{error:?}");
return Err(error);
}
}
self.reduce_body();
// then we'll check that the body is sound with the given stack
// maybe in the future i'll change this trait so there isn't a stack
// param and the implementer picks what stack to check against
//
// TODO: this block also is shared behavior between basically all checkables but potentially with
// different internal types for T, will have to figure out how to dedup this later
for value in self.body.iter() {
match value {
Value::Op(op) => {
if stack.len() < 2 {
return Err(format!(
"expected a stack with 2 elements, got only {:?}",
stack.len()
));
} else {
match op.as_str() {
"+" | "*" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Nat).push(VType::Nat),
)?;
stack = stack.pop().pop().push(VType::Nat);
}
"&" | "||" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Bool).push(VType::Bool),
)?;
stack = stack.pop().pop().push(VType::Bool);
}
_ => return Err(format!("unknown opcode {:?}", op)),
}
}
}
&Value::Nat(_) => stack = stack.push(VType::Nat),
&Value::Int(_) => stack = stack.push(VType::Int),
&Value::Bool(_) => stack = stack.push(VType::Bool),
&Value::Str(_) => stack = stack.push(VType::Str),
&Value::Char(_) => stack = stack.push(VType::Char),
&Value::Word(_) => unreachable!(),
};
}
Ok(stack)
}
}
pub fn parse<S>(input: S) -> Result<Program, Vec<Simple<char>>>
where
S: ToString,
{
let parsed = match parser().parse(input.to_string()) {
Ok(parsed) => parsed,
Err(error) => return Err(error),
};
Ok(parsed)
}
pub fn parser() -> impl Parser<char, Program, Error = Simple<char>> {
let name = ident().labelled("word_name");
let value = {
// nats will be coerced to ints at compile time depending on word type
let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap()));
// vice versa for non-negative ints
let int = just("-").ignore_then(
text::int::<char, Simple<char>>(10).map(|s: String| Value::Int(s.parse().unwrap())),
);
let op = one_of::<char, &str, Simple<char>>("*+-/&|<>").map(|s| Value::Op(s.to_string()));
let str_or_char = just::<char, char, Simple<char>>('"')
.ignore_then(none_of('"').repeated())
.then_ignore(just('"'))
.map(|s: Vec<char>| match s.len() {
1 => Value::Char(s[0]),
_ => Value::Str(s.into_iter().collect::<String>()),
});
let word = name.map(|n: String| Value::Word(n));
let bool = keyword::<_, _, Simple<char>>("true")
.map(|_| Value::Bool(true))
.or(keyword("false").map(|_| Value::Bool(false)));
nat.or(int).or(bool).or(str_or_char).or(word).or(op)
};
let value_seperator = text::newline()
.repeated()
.at_least(2)
.not()
.rewind()
.then_ignore(
// TODO: figure out if this could be simplified
choice((
just(" ")
.repeated()
.then_ignore(just("\n").repeated().exactly(1).or_not()),
just("\n")
.repeated()
.exactly(1)
.then_ignore(just(" ").repeated().or_not()),
))
.then_ignore(just(" ").repeated()),
);
let body = value_seperator
.or_not()
.ignored()
.then(value)
.map(|(_, v)| v)
.repeated()
.then_ignore(
just(" ")
.repeated()
.ignored()
.then(text::newline().repeated().at_least(2).or_not()),
);
let word_def = {
let pop_types = {
let pop_type = keyword("nat")
.to(VType::Nat)
.or(keyword("int").to(VType::Int))
.or(keyword("bool").to(VType::Bool))
.or(keyword("char").to(VType::Char))
.or(keyword("str").to(VType::Str));
pop_type
.padded()
.repeated()
.collect::<Vec<VType>>()
.labelled("pop_types")
.boxed()
};
let push_types = {
let push_type = keyword("nat")
.to(VType::Nat)
.or(keyword("int").to(VType::Int))
.or(keyword("char").to(VType::Char))
.or(keyword("str").to(VType::Str));
push_type
.padded()
.repeated()
.collect::<Vec<VType>>()
.labelled("push_types")
.boxed()
};
let effects = {
let effect_keyword = keyword("paint")
.to(Effect::Paint)
.or(keyword("sing").to(Effect::Sing))
.or(keyword("store").to(Effect::Store))
.or(keyword("do").to(Effect::Do));
let effect = just("~").ignore_then(effect_keyword).labelled("effect");
effect.padded().repeated().labelled("effects").boxed()
};
let definition = text::whitespace()
.ignore_then(name)
.then_ignore(just(" "))
.then(pop_types)
.then_ignore(keyword("is").or(keyword("are")).padded())
.then(push_types)
.then(effects)
.then_ignore(just(":"))
.map(|(((name, pop_types), push_types), effects)| {
(name, pop_types, push_types, effects)
});
definition
.then(body.clone())
.map(|((name, pop_types, push_types, effects), body)| {
WordDef::new(
name,
body,
WType::new().push(push_types).pop(pop_types),
effects,
)
})
};
word_def
.repeated()
.then(body)
.map(|(defs, body): (Vec<WordDef>, Vec<Value>)| Program::new(defs, body))
}
#[cfg(test)]
mod tests {
use crate::typecheck::TypeStack;
use super::*;
#[test]
fn test_parser() {
let input = "
a is nat: 5 7 *
b is nat:
5 a *
a
";
let ast = vec![
WordDef::new(
"a",
vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())],
WType::new().push(vec![VType::Nat]),
vec![],
),
WordDef::new(
"b",
vec![
Value::Nat(5),
Value::Word("a".to_string()),
Value::Op("*".to_string()),
],
WType::new().push(vec![VType::Nat]),
vec![],
),
];
let body: Vec<Value> = vec![Value::Word("a".to_string())];
println!("sound: {:?}\n", parser().parse(input).unwrap());
assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body));
}
#[test]
fn test_typecheck() {
let sound = "
a is nat: 5 7 *
b nat nat is nat:
a *
a 5 *
";
let unsound_defs = "
a is nat nat: 5 7 *
b nat is nat:
a *
a 5 *
";
let unsound_body = "
a is nat: 5 7 *
b nat is nat:
a *
a 5 * *
";
let unsound_body_and_defs = "
a is nat nat: 5 7 *
b nat is nat:
a *
a 5 * *
";
fn typecheck(input: &str, sound: bool) {
let mut parsed = parse(input).unwrap();
parsed.symtab.flatten_refs();
parsed.reduce_body();
let stack = TypeStack::new();
if sound {
assert!(parsed.check(stack).is_ok());
} else {
assert!(parsed.check(stack).is_err());
}
}
typecheck(sound, true);
typecheck(unsound_defs, false);
typecheck(unsound_body, false);
typecheck(unsound_body_and_defs, false);
}
#[test]
fn test_illegal_recursion() {
let illegal = "
a is: b
b is: a
a
";
let illegal_multilevel = "
a is: b
b is: c
c is: a
a
";
fn typecheck(input: &str) {
let mut parsed = parse(input).unwrap();
let stack = TypeStack::new();
println!("{:?}", parsed.check(TypeStack::new()));
assert!(parsed.check(stack).is_err());
}
typecheck(illegal);
typecheck(illegal_multilevel);
}
}

View File

@ -1,84 +0,0 @@
use std::fmt::Debug;
use chumsky::chain::Chain;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TypeStack<T>(Vec<T>);
impl<T: Debug + PartialEq> TypeStack<T> {
pub fn new() -> Self {
TypeStack(vec![])
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn pop(mut self) -> TypeStack<T> {
let _ = self.0.pop();
self
}
pub fn push(mut self, t: T) -> TypeStack<T> {
self.0.push(t);
self
}
/// tests if ts matches the top of the stack
pub fn test_consume(mut self, ts: TypeStack<T>) -> Result<TypeStack<T>, String> {
if ts.len() > self.len() {
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
} else {
for (index, t) in ts.0.iter().rev().enumerate() {
let val = self.0.pop().unwrap();
if val != *t {
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
}
}
Ok(self)
}
}
/// tests if ts matches the top of the stack
pub fn test(&self, ts: &TypeStack<T>) -> Result<(), String> {
if ts.len() > self.len() {
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
} else {
for (index, t) in ts.0.iter().rev().enumerate() {
let val = &self.0[self.0.len() - 1];
if val != t {
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
}
}
Ok(())
}
}
// pub fn test_many(mut self, mut tss: Vec<TypeStack<T>>) -> Result<TypeStack<T>, String> {
// if tss.is_empty() {
// return Ok(TypeStack::new());
// }
// for _i in 0..tss.len() {
// let ts = tss.pop().unwrap();
// self = self.test(ts)?;
// if self.is_ok() {
// return self;
// }
// }
// Err("did not match any types".to_string())
// }
pub fn append(&mut self, t: &mut Vec<T>) {
self.0.append(t);
}
}
impl<T: PartialEq + Debug> From<Vec<T>> for TypeStack<T> {
fn from(value: Vec<T>) -> Self {
TypeStack(value)
}
}
pub trait Checkable<T: PartialEq + Debug> {
fn check(&mut self, stack: TypeStack<T>) -> Result<TypeStack<T>, String>;
}

7
subroutines.inc Normal file
View File

@ -0,0 +1,7 @@
; 0
test_contiguous_binary:
.byte 3,$1,$2,$3
; 1 - assembled from "plus.asm"
subroutine_plus:
.byte 17,$b5,$18,$75,$00,$95,$02,$b5,$02,$75,$01,$95,$03,$e8,$03,$60,$e8,$60