Initial commit

This commit is contained in:
august kline 2024-09-03 17:57:28 -04:00
commit ed8e20f0db
12 changed files with 1307 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

271
Cargo.lock generated Normal file
View File

@ -0,0 +1,271 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
[[package]]
name = "cc"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
dependencies = [
"hashbrown",
"stacker",
]
[[package]]
name = "either"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "fuzzy"
version = "0.1.0"
dependencies = [
"chumsky",
"indextree",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "indextree"
version = "4.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f"
dependencies = [
"indextree-macros",
]
[[package]]
name = "indextree-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf"
dependencies = [
"either",
"itertools",
"proc-macro2",
"quote",
"strum",
"syn",
"thiserror",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustversion"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strum"
version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn",
]
[[package]]
name = "syn"
version = "2.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

15
Cargo.toml Normal file
View File

@ -0,0 +1,15 @@
[package]
name = "fuzzy"
version = "0.1.0"
edition = "2021"
[dependencies]
chumsky = { version = "0.9.3"}
indextree = "4.7.2"
[lib]
crate-type = ["lib"]
[[bin]]
name = "main"

31
README.md Normal file
View File

@ -0,0 +1,31 @@
# fuzzy
<img src="./fuzzy.jpeg" style="height:150px;"/>
a concatenative stack based programming language for [george](https://git.augustkline.com/august/george) <3
## design principles
#### lightweight
fuzzy runs on a 65\(c\)02 and nothing else
#### antiportable
fuzzy is part of george, and shouldn't run anywhere else.
#### low-level
(most of) fuzzy could be made with assembler macros, but that's no fun
#### reliable
if fuzzy says it can run, george can run it
## feature progress
- [x] parser
- roughly complete, but want to finish the whole pipeline before adding things to the parser
- [x] typechecker
- generates a "type stack" from parsed input and checks that word definition types match their body
- [ ] code generation

BIN
fuzzy.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 162 KiB

62
macro.inc Normal file
View File

@ -0,0 +1,62 @@
.macro breakpoint ; $02 isn't a valid instruction, the emulator will see this and halt, dump memory contents
.byte $02
.endm
.macro pop ; drops a data stack cell
inx
inx
.endm
.macro pop2 ; drops 2 data stack cells
inx
inx
inx
inx
.endm
.macro push ; push a data stack cell
dex
dex
.endm
.macro push2 ; push 2 data stack cells
dex
dex
dex
dex
.endm
.macro push_char, char; pushes an ascii character code onto the stack
lda \char
push
sta 0, x ; char low byte
stz 1, x ; char high byte
.endm
.macro push_coords, coord_x, coord_y ; push a set of (x,y) coordinates onto the data stack
lda \coord_x
push
sta 0, x ; low byte
stz 1,x ; high byte is zero
lda \coord_y
push
sta 0,x ; same here
stz 1,x
.endm
.macro to_r ; pop the top of the stack off and save it in the return (hardware) stack: (n -- )
lda 1, x
pha
lda 0, x
pha
pop
.endm
.macro from_r ; pop the top of the return stack off and put it on the data stack: ( -- n)
push
pla
sta 0, x
pla
sta 1, x
.endm

174
routines.asm Normal file
View File

@ -0,0 +1,174 @@
; ops
; - [ ] nat (positive integers)
; - [ ] * mult (i think anything bigger than 16 bits just gets cut off, george does not need big numbers rite)
; - [ ] / div (what do we do w fractions/remainders)
; - [ ] + add (same w *)
; - [ ] - sub (what should happen when y > x & x - y? )
; - [ ] int (pos/neg 2s complement numbers) (lots of the above applies)
; - [ ] * mult
; - [ ] / div
; - [ ] + add
; - [ ] - sub
; - [ ] bool
; - [ ] & and
; - [ ] | or
;
; literals (these will be macros here, meaning when i implement them in the compiler they will depend on what literals r being pushed)
; - [ ] bool
; - [ ] num
; - [ ] str
; - [ ] char
;
; i/o
; - [ ] write (pop off an address and some data and store it)
; - [ ] read (pop off an address and push the data at that address onto the stack)
; - [ ] key (stealing algo from forth: pause execution until there's a key pressed, then push that key on the stack, i don't think this breaks anything, this could probably be written easily in fuzzy w/ write and read words)
;
; control flow
; - [ ] if (pop off a bool and jump to word, this will also depend on compiler, so writing as macro)
; for example
; a is nat: 5
; b is nat: 3
; c bool is nat: if[a | b] *we'll figure out syntax later*
; true c
;
; (assuming c gets inlined since it's only referenced in the body)
; compiles to
; a:
; lda 5
; dex
; dex
; sta 0, x
; rts
;
; b:
; lda 3
; dex
; dex
; sta 0, x
; rts
;
; main:
; lda #1
; dex
; dex
; sta 0, x
; stz 1,x
; lda #1
; inx
; inx
; bit 2, x
; beq .falsy
; jsr a
; .falsy:
; jsr b
; stp ; or whatever here
; literals - the important thing is what the actual value is, each of these functions/macros are the same for any type, since the val is always just a 16 bit number
.macro lit, val
lda val
dex
dex
sta 0, x
stz 1, x
.endm
; control flow
.macro if, bool, addr ; condition, where to jump if true; pops a bool off the stack
lda #1
; in either case we pop a cell off the top
inx
inx
; but still need to check the bool on top
; TODO: make sure this works
bit 2, x
beq .falsy
; TODO: words will be defined as subroutines ig,
; and will be returned from after execution
jsr addr
.falsy:
.endm
; ops
nat_plus:
clc
lda 0, x
adc 2, x
sta 2, x
lda 1, x
adc 3, x
sta 3, x
inx
inx
rts
nat_mult_2:
asl 0, x
rol 1, x
rts
; after a silly escapade plotting u16 overflow charts
; i am making the executive decision that the vast majority
; of results (those that overflow 2 bytes) will be wrong :)
; for the sake of all results being 1 cell wide :)
; see this plot: ./overflow.png
; algo here: https://www.llx.com/Neil/a2/mult.html
nat_mult:
result = $200
lda #0 ; initialize result to 0
sta result+2
ldy #16 ; 16 bits in NUM2
.1:
lsr 0, x ; low byte of first number
ror 1, x
bcc .2 ; 0 or 1?
phy ; some register shuffling so we don't lose x stack pointer
tay
clc
lda 2, x ; low byte of second number
adc result + 2 ; add it to low byte of result
sta result + 2
tya
ply
adc 3, x ;
.2:
ror
ror result + 2
ror result + 1
ror result
dey
bne .1
sta result + 3
dex
dex
; TODO: double check endianness of result
lda result + 3 ; store high byte of result
sta 0, x
lda result + 2 ; store low byte of result
sta 1, x
rts
; i/o
read:
lda (0, x)
sta 0, x
stz 1, x
rts
write:
lda 0, x
sta (2, x)
inx
inx
inx
inx
rts
key:
;TODO

168
src/ast.rs Normal file
View File

@ -0,0 +1,168 @@
use core::panic;
use std::{any::Any, fmt::Display};
use crate::{
typecheck::{self, Checkable, TypeStack},
Symtab,
};
#[derive(PartialEq, Eq, Debug, Clone)]
// enum values are parser values, not compiler values,
// e.g. for `Str(String)` the `String` value will be put somewhere in memory
// and a pointer to it will be put on the stack
pub enum Value {
Nat(u16), // 16-bit natural number
Int(i16), // 16-bit twos-complement integer
Bool(bool),
Op(String),
Char(char), // 8-bit georgescii character padded with leading zeros (might change later)
Str(String), // 16-bit pointer to a string
Word(String), // 16-bit pointer to a word
}
#[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)]
pub enum VType {
Nat,
Int,
Bool,
Char,
Str,
}
#[derive(Eq, PartialEq, Debug, Clone)]
pub struct WType {
pub pop: Vec<VType>,
pub push: Vec<VType>,
}
impl WType {
pub fn new() -> Self {
WType {
pop: vec![],
push: vec![],
}
}
// Adds a `push` type
pub fn push(mut self, mut t: Vec<VType>) -> Self {
self.push.append(&mut t);
self
}
// Adds a `pop` type (Note: does not actually pop anything)
pub fn pop(mut self, mut t: Vec<VType>) -> Self {
self.pop.append(&mut t);
self
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Effect {
Paint,
Sing,
Store,
Do,
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct WordDef {
pub name: String,
pub values: Vec<Value>,
pub r#type: WType,
pub effects: Vec<Effect>,
}
impl WordDef {
pub fn new<S>(name: S, values: Vec<Value>, r#type: WType, effects: Vec<Effect>) -> Self
where
S: Into<String>,
{
WordDef {
name: name.into(),
values,
r#type,
effects,
}
}
fn flatten_values(&self, symtab: &Symtab) -> Vec<Value> {
let mut vals = vec![];
for value in self.values.iter() {
if let Value::Word(string) = value {
let symbol = symtab.get(string);
let mut child_vals = symbol.flatten_values(symtab);
vals.append(&mut child_vals);
} else {
vals.push(value.clone());
}
}
vals
}
pub fn flatten(&self, symtab: &Symtab) -> WordDef {
let values = self.flatten_values(symtab);
WordDef::new(
self.name.clone(),
values,
self.r#type.clone(),
self.effects.clone(),
)
}
}
impl Display for WordDef {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "{:#?}", self)?;
Ok(())
}
}
impl Checkable<VType> for WordDef {
fn check(&mut self, mut stack: TypeStack<VType>) -> Result<TypeStack<VType>, String> {
for value in self.values.iter() {
println!(
"WORDDEF: checking value {:?} in word {:?}, current stack is {:?}",
value, self.name, stack
);
if let Value::Word(_) = value {
panic!("Don't typecheck on an unflattened word!");
}
match value {
Value::Op(op) => {
if stack.len() < 2 {
return Err(format!(
"Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}",
self.name,
stack,
stack.len()
));
} else {
match op.as_str() {
"+" | "*" => {
println!("WORDDEF: checking Op");
stack = stack.test_consume(
TypeStack::new().push(VType::Nat).push(VType::Nat),
)?;
stack = stack.push(VType::Nat);
}
"&" | "||" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Bool).push(VType::Bool),
)?;
stack = stack.pop().pop().push(VType::Bool);
}
_ => return Err(format!("unknown opcode {:?}", op)),
}
}
}
&Value::Nat(_) => stack = stack.push(VType::Nat),
&Value::Int(_) => stack = stack.push(VType::Int),
&Value::Bool(_) => stack = stack.push(VType::Int),
&Value::Str(_) => stack = stack.push(VType::Str),
&Value::Char(_) => stack = stack.push(VType::Char),
&Value::Word(_) => unreachable!(),
};
}
Ok(stack)
}
}

11
src/bin/main.rs Normal file
View File

@ -0,0 +1,11 @@
use fuzzy::{
ast::VType,
parse,
typecheck::{Checkable, TypeStack},
};
fn main() {
let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest";
let mut program = parse(input).unwrap();
println!("{:?}", program.check(TypeStack::new()));
}

7
src/lib.rs Normal file
View File

@ -0,0 +1,7 @@
pub mod ast;
pub mod compiler;
pub mod parser;
pub mod typecheck;
pub use compiler::*;
pub use parser::*;

483
src/parser/mod.rs Normal file
View File

@ -0,0 +1,483 @@
use std::collections::HashMap;
// TODO:
// - add error types and error handling
// - figure out if we can use a single ast or should make a second ast for compilation, then
// convert to that after parsing/do another parsing step but on the ast made the first time
// (i also don't really know what i'm doing so maybe u never do that or always do that or
// something i don't understand yet)
// - improve whitespace parsing
// - think more about language rules:
// - what types actually need to be exposed to the "user" (me) and what types can be internal
// to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char
// internally (put on the stack as a value instead of put somewhere in memory))
use chumsky::{
prelude::*,
text::{ident, keyword},
};
use crate::{
ast::*,
typecheck::{Checkable, TypeStack},
};
#[derive(PartialEq, Debug, Clone)]
pub struct Symtab(HashMap<String, WordDef>);
impl Symtab {
fn new() -> Self {
Symtab(HashMap::new())
}
pub fn get(&self, string: &String) -> &WordDef {
self.0.get(string).unwrap()
}
pub fn add_def(&mut self, def: WordDef) {
let key = def.name.clone();
self.0.insert(key, def);
}
pub fn flatten_refs(&mut self) {
let mut new_symtab = Symtab::new();
for (_string, def) in self.0.iter() {
new_symtab.add_def(def.flatten(self));
}
// this is an abomination, there must be a better way
self.0.clear();
self.0.extend(new_symtab.0);
}
}
impl From<Vec<WordDef>> for Symtab {
fn from(value: Vec<WordDef>) -> Self {
let symtab: HashMap<String, WordDef> = value
.iter()
.map(|x| (x.name.to_owned(), x.to_owned()))
.collect();
Symtab(symtab)
}
}
#[derive(Debug, PartialEq)]
pub struct Program {
symtab: Symtab,
body: Vec<Value>,
}
impl Program {
fn new(defs: Vec<WordDef>, body: Vec<Value>) -> Self {
let symtab = Symtab::from(defs);
Program { symtab, body }
}
fn reduce_body(&mut self) {
let mut vals = vec![];
for value in self.body.iter() {
if let Value::Word(string) = value {
let symbol = self.symtab.get(string);
let mut child_vals = symbol.flatten(&self.symtab).values;
vals.append(&mut child_vals);
} else {
vals.push(value.clone());
}
}
self.body = vals;
}
}
impl<'a> Checkable<VType> for Program {
fn check(
&mut self,
mut stack: crate::typecheck::TypeStack<VType>,
) -> Result<crate::typecheck::TypeStack<VType>, String> {
//TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5
fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> {
let mut visited: Vec<&WordDef> = vec![];
let mut rec_stack: Vec<&WordDef> = vec![];
for (_, def) in symtab.0.iter() {
if !visited.contains(&def) {
dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?;
}
}
Ok(())
}
fn dfs_cycle_check<'a>(
def: &'a WordDef,
visited: &mut Vec<&'a WordDef>,
rec_stack: &mut Vec<&'a WordDef>,
symtab: &'a Symtab,
) -> Result<(), String> {
visited.push(def);
rec_stack.push(def);
for val in def.values.iter() {
if let Value::Word(name) = val {
let next_def = symtab.get(name);
if !visited.contains(&next_def) {
dfs_cycle_check(next_def, visited, rec_stack, symtab)?;
} else if rec_stack.contains(&next_def) {
return Err(format!(
"illegal recursion detected! definitions {}create a reference cycle",
rec_stack
.iter()
.map(|def| {
let mut name = def.name.clone();
name.insert(0, '"');
name.push_str("\" ");
name
})
.collect::<String>()
));
}
}
}
rec_stack.pop();
Ok(())
}
cyclic_graph_check(&self.symtab)?;
self.symtab.flatten_refs();
println!(
"we have flattened refs, here's the symtab: {:#?}\n",
self.symtab
);
// then check that all symtab defs are sound
// at this point they shouldn't have any references,
// and if they do we will panic (see the Checkable impl for WordDef)
for (name, def) in self.symtab.0.iter_mut() {
let local_stack: TypeStack<VType> = def.r#type.pop.clone().into();
println!(
"PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}",
name, def
);
let result_stack = def.check(local_stack)?;
if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) {
println!("{error:?}");
return Err(error);
}
}
self.reduce_body();
// then we'll check that the body is sound with the given stack
// maybe in the future i'll change this trait so there isn't a stack
// param and the implementer picks what stack to check against
//
// TODO: this block also is shared behavior between basically all checkables but potentially with
// different internal types for T, will have to figure out how to dedup this later
for value in self.body.iter() {
match value {
Value::Op(op) => {
if stack.len() < 2 {
return Err(format!(
"expected a stack with 2 elements, got only {:?}",
stack.len()
));
} else {
match op.as_str() {
"+" | "*" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Nat).push(VType::Nat),
)?;
stack = stack.pop().pop().push(VType::Nat);
}
"&" | "||" => {
stack = stack.test_consume(
TypeStack::new().push(VType::Bool).push(VType::Bool),
)?;
stack = stack.pop().pop().push(VType::Bool);
}
_ => return Err(format!("unknown opcode {:?}", op)),
}
}
}
&Value::Nat(_) => stack = stack.push(VType::Nat),
&Value::Int(_) => stack = stack.push(VType::Int),
&Value::Bool(_) => stack = stack.push(VType::Bool),
&Value::Str(_) => stack = stack.push(VType::Str),
&Value::Char(_) => stack = stack.push(VType::Char),
&Value::Word(_) => unreachable!(),
};
}
Ok(stack)
}
}
pub fn parse<S>(input: S) -> Result<Program, Vec<Simple<char>>>
where
S: ToString,
{
let parsed = match parser().parse(input.to_string()) {
Ok(parsed) => parsed,
Err(error) => return Err(error),
};
Ok(parsed)
}
pub fn parser() -> impl Parser<char, Program, Error = Simple<char>> {
let name = ident().labelled("word_name");
let value = {
// nats will be coerced to ints at compile time depending on word type
let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap()));
// vice versa for non-negative ints
let int = just("-").ignore_then(
text::int::<char, Simple<char>>(10).map(|s: String| Value::Int(s.parse().unwrap())),
);
let op = one_of::<char, &str, Simple<char>>("*+-/&|<>").map(|s| Value::Op(s.to_string()));
let str_or_char = just::<char, char, Simple<char>>('"')
.ignore_then(none_of('"').repeated())
.then_ignore(just('"'))
.map(|s: Vec<char>| match s.len() {
1 => Value::Char(s[0]),
_ => Value::Str(s.into_iter().collect::<String>()),
});
let word = name.map(|n: String| Value::Word(n));
let bool = keyword::<_, _, Simple<char>>("true")
.map(|_| Value::Bool(true))
.or(keyword("false").map(|_| Value::Bool(false)));
nat.or(int).or(bool).or(str_or_char).or(word).or(op)
};
let value_seperator = text::newline()
.repeated()
.at_least(2)
.not()
.rewind()
.then_ignore(
// TODO: figure out if this could be simplified
choice((
just(" ")
.repeated()
.then_ignore(just("\n").repeated().exactly(1).or_not()),
just("\n")
.repeated()
.exactly(1)
.then_ignore(just(" ").repeated().or_not()),
))
.then_ignore(just(" ").repeated()),
);
let body = value_seperator
.or_not()
.ignored()
.then(value)
.map(|(_, v)| v)
.repeated()
.then_ignore(
just(" ")
.repeated()
.ignored()
.then(text::newline().repeated().at_least(2).or_not()),
);
let word_def = {
let pop_types = {
let pop_type = keyword("nat")
.to(VType::Nat)
.or(keyword("int").to(VType::Int))
.or(keyword("bool").to(VType::Bool))
.or(keyword("char").to(VType::Char))
.or(keyword("str").to(VType::Str));
pop_type
.padded()
.repeated()
.collect::<Vec<VType>>()
.labelled("pop_types")
.boxed()
};
let push_types = {
let push_type = keyword("nat")
.to(VType::Nat)
.or(keyword("int").to(VType::Int))
.or(keyword("char").to(VType::Char))
.or(keyword("str").to(VType::Str));
push_type
.padded()
.repeated()
.collect::<Vec<VType>>()
.labelled("push_types")
.boxed()
};
let effects = {
let effect_keyword = keyword("paint")
.to(Effect::Paint)
.or(keyword("sing").to(Effect::Sing))
.or(keyword("store").to(Effect::Store))
.or(keyword("do").to(Effect::Do));
let effect = just("~").ignore_then(effect_keyword).labelled("effect");
effect.padded().repeated().labelled("effects").boxed()
};
let definition = text::whitespace()
.ignore_then(name)
.then_ignore(just(" "))
.then(pop_types)
.then_ignore(keyword("is").or(keyword("are")).padded())
.then(push_types)
.then(effects)
.then_ignore(just(":"))
.map(|(((name, pop_types), push_types), effects)| {
(name, pop_types, push_types, effects)
});
definition
.then(body.clone())
.map(|((name, pop_types, push_types, effects), body)| {
WordDef::new(
name,
body,
WType::new().push(push_types).pop(pop_types),
effects,
)
})
};
word_def
.repeated()
.then(body)
.map(|(defs, body): (Vec<WordDef>, Vec<Value>)| Program::new(defs, body))
}
#[cfg(test)]
mod tests {
use crate::typecheck::TypeStack;
use super::*;
#[test]
fn test_parser() {
let input = "
a is nat: 5 7 *
b is nat:
5 a *
a
";
let ast = vec![
WordDef::new(
"a",
vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())],
WType::new().push(vec![VType::Nat]),
vec![],
),
WordDef::new(
"b",
vec![
Value::Nat(5),
Value::Word("a".to_string()),
Value::Op("*".to_string()),
],
WType::new().push(vec![VType::Nat]),
vec![],
),
];
let body: Vec<Value> = vec![Value::Word("a".to_string())];
println!("sound: {:?}\n", parser().parse(input).unwrap());
assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body));
}
#[test]
fn test_typecheck() {
let sound = "
a is nat: 5 7 *
b nat nat is nat:
a *
a 5 *
";
let unsound_defs = "
a is nat nat: 5 7 *
b nat is nat:
a *
a 5 *
";
let unsound_body = "
a is nat: 5 7 *
b nat is nat:
a *
a 5 * *
";
let unsound_body_and_defs = "
a is nat nat: 5 7 *
b nat is nat:
a *
a 5 * *
";
fn typecheck(input: &str, sound: bool) {
let mut parsed = parse(input).unwrap();
parsed.symtab.flatten_refs();
parsed.reduce_body();
let stack = TypeStack::new();
if sound {
assert!(parsed.check(stack).is_ok());
} else {
assert!(parsed.check(stack).is_err());
}
}
typecheck(sound, true);
typecheck(unsound_defs, false);
typecheck(unsound_body, false);
typecheck(unsound_body_and_defs, false);
}
#[test]
fn test_illegal_recursion() {
let illegal = "
a is: b
b is: a
a
";
let illegal_multilevel = "
a is: b
b is: c
c is: a
a
";
fn typecheck(input: &str) {
let mut parsed = parse(input).unwrap();
let stack = TypeStack::new();
println!("{:?}", parsed.check(TypeStack::new()));
assert!(parsed.check(stack).is_err());
}
typecheck(illegal);
typecheck(illegal_multilevel);
}
}

84
src/typecheck.rs Normal file
View File

@ -0,0 +1,84 @@
use std::fmt::Debug;
use chumsky::chain::Chain;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TypeStack<T>(Vec<T>);
impl<T: Debug + PartialEq> TypeStack<T> {
pub fn new() -> Self {
TypeStack(vec![])
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn pop(mut self) -> TypeStack<T> {
let _ = self.0.pop();
self
}
pub fn push(mut self, t: T) -> TypeStack<T> {
self.0.push(t);
self
}
/// tests if ts matches the top of the stack
pub fn test_consume(mut self, ts: TypeStack<T>) -> Result<TypeStack<T>, String> {
if ts.len() > self.len() {
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
} else {
for (index, t) in ts.0.iter().rev().enumerate() {
let val = self.0.pop().unwrap();
if val != *t {
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
}
}
Ok(self)
}
}
/// tests if ts matches the top of the stack
pub fn test(&self, ts: &TypeStack<T>) -> Result<(), String> {
if ts.len() > self.len() {
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
} else {
for (index, t) in ts.0.iter().rev().enumerate() {
let val = &self.0[self.0.len() - 1];
if val != t {
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
}
}
Ok(())
}
}
// pub fn test_many(mut self, mut tss: Vec<TypeStack<T>>) -> Result<TypeStack<T>, String> {
// if tss.is_empty() {
// return Ok(TypeStack::new());
// }
// for _i in 0..tss.len() {
// let ts = tss.pop().unwrap();
// self = self.test(ts)?;
// if self.is_ok() {
// return self;
// }
// }
// Err("did not match any types".to_string())
// }
pub fn append(&mut self, t: &mut Vec<T>) {
self.0.append(t);
}
}
impl<T: PartialEq + Debug> From<Vec<T>> for TypeStack<T> {
fn from(value: Vec<T>) -> Self {
TypeStack(value)
}
}
pub trait Checkable<T: PartialEq + Debug> {
fn check(&mut self, stack: TypeStack<T>) -> Result<TypeStack<T>, String>;
}