commit ed8e20f0dbe792f658c42c23ecbc6cf53b9998f9 Author: august kline Date: Tue Sep 3 17:57:28 2024 -0400 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..eb6139b --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,271 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "cc" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown", + "stacker", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "fuzzy" +version = "0.1.0" +dependencies = [ + "chumsky", + "indextree", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indextree" +version = "4.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f" +dependencies = [ + "indextree-macros", +] + +[[package]] +name = "indextree-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf" +dependencies = [ + "either", + "itertools", + "proc-macro2", + "quote", + "strum", + "syn", + "thiserror", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7e02987 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "fuzzy" +version = "0.1.0" +edition = "2021" + +[dependencies] +chumsky = { version = "0.9.3"} +indextree = "4.7.2" + +[lib] + +crate-type = ["lib"] + +[[bin]] +name = "main" diff --git a/README.md b/README.md new file mode 100644 index 0000000..a6b08fa --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +# fuzzy + + + +a concatenative stack based programming language for [george](https://git.augustkline.com/august/george) <3 + +## design principles + +#### lightweight + +fuzzy runs on a 65\(c\)02 and nothing else + +#### antiportable + +fuzzy is part of george, and shouldn't run anywhere else. + +#### low-level + +(most of) fuzzy could be made with assembler macros, but that's no fun + +#### reliable + +if fuzzy says it can run, george can run it + +## feature progress + +- [x] parser + - roughly complete, but want to finish the whole pipeline before adding things to the parser +- [x] typechecker + - generates a "type stack" from parsed input and checks that word definition types match their body +- [ ] code generation diff --git a/fuzzy.jpeg b/fuzzy.jpeg new file mode 100644 index 0000000..60e4dcd Binary files /dev/null and b/fuzzy.jpeg differ diff --git a/macro.inc b/macro.inc new file mode 100644 index 0000000..96a3471 --- /dev/null +++ b/macro.inc @@ -0,0 +1,62 @@ + .macro breakpoint ; $02 isn't a valid instruction, the emulator will see this and halt, dump memory contents + .byte $02 + .endm + + .macro pop ; drops a data stack cell + inx + inx + .endm + + .macro pop2 ; drops 2 data stack cells + inx + inx + inx + inx + .endm + + .macro push ; push a data stack cell + dex + dex + .endm + + .macro push2 ; push 2 data stack cells + dex + dex + dex + dex + .endm + + .macro push_char, char; pushes an ascii character code onto the stack + lda \char + push + sta 0, x ; char low byte + stz 1, x ; char high byte + .endm + + .macro push_coords, coord_x, coord_y ; push a set of (x,y) coordinates onto the data stack + lda \coord_x + push + sta 0, x ; low byte + stz 1,x ; high byte is zero + lda \coord_y + push + sta 0,x ; same here + stz 1,x + .endm + + .macro to_r ; pop the top of the stack off and save it in the return (hardware) stack: (n -- ) + lda 1, x + pha + lda 0, x + pha + pop + .endm + + .macro from_r ; pop the top of the return stack off and put it on the data stack: ( -- n) + push + pla + sta 0, x + pla + sta 1, x + .endm + diff --git a/routines.asm b/routines.asm new file mode 100644 index 0000000..570a8ce --- /dev/null +++ b/routines.asm @@ -0,0 +1,174 @@ +; ops +; - [ ] nat (positive integers) +; - [ ] * mult (i think anything bigger than 16 bits just gets cut off, george does not need big numbers rite) +; - [ ] / div (what do we do w fractions/remainders) +; - [ ] + add (same w *) +; - [ ] - sub (what should happen when y > x & x - y? ) +; - [ ] int (pos/neg 2s complement numbers) (lots of the above applies) +; - [ ] * mult +; - [ ] / div +; - [ ] + add +; - [ ] - sub +; - [ ] bool +; - [ ] & and +; - [ ] | or +; +; literals (these will be macros here, meaning when i implement them in the compiler they will depend on what literals r being pushed) +; - [ ] bool +; - [ ] num +; - [ ] str +; - [ ] char +; +; i/o +; - [ ] write (pop off an address and some data and store it) +; - [ ] read (pop off an address and push the data at that address onto the stack) +; - [ ] key (stealing algo from forth: pause execution until there's a key pressed, then push that key on the stack, i don't think this breaks anything, this could probably be written easily in fuzzy w/ write and read words) +; +; control flow +; - [ ] if (pop off a bool and jump to word, this will also depend on compiler, so writing as macro) + +; for example +; a is nat: 5 +; b is nat: 3 +; c bool is nat: if[a | b] *we'll figure out syntax later* +; true c +; +; (assuming c gets inlined since it's only referenced in the body) +; compiles to +; a: +; lda 5 +; dex +; dex +; sta 0, x +; rts +; +; b: +; lda 3 +; dex +; dex +; sta 0, x +; rts +; +; main: +; lda #1 +; dex +; dex +; sta 0, x +; stz 1,x +; lda #1 +; inx +; inx +; bit 2, x +; beq .falsy +; jsr a +; .falsy: +; jsr b +; stp ; or whatever here + + +; literals - the important thing is what the actual value is, each of these functions/macros are the same for any type, since the val is always just a 16 bit number + +.macro lit, val + lda val + dex + dex + sta 0, x + stz 1, x +.endm + +; control flow + +.macro if, bool, addr ; condition, where to jump if true; pops a bool off the stack + lda #1 + ; in either case we pop a cell off the top + inx + inx + ; but still need to check the bool on top + ; TODO: make sure this works + bit 2, x + beq .falsy + ; TODO: words will be defined as subroutines ig, + ; and will be returned from after execution + jsr addr + .falsy: +.endm + +; ops +nat_plus: + clc + lda 0, x + adc 2, x + sta 2, x + lda 1, x + adc 3, x + sta 3, x + inx + inx + rts + +nat_mult_2: + asl 0, x + rol 1, x + rts + +; after a silly escapade plotting u16 overflow charts +; i am making the executive decision that the vast majority +; of results (those that overflow 2 bytes) will be wrong :) +; for the sake of all results being 1 cell wide :) +; see this plot: ./overflow.png +; algo here: https://www.llx.com/Neil/a2/mult.html +nat_mult: + result = $200 + lda #0 ; initialize result to 0 + sta result+2 + ldy #16 ; 16 bits in NUM2 + .1: + lsr 0, x ; low byte of first number + ror 1, x + bcc .2 ; 0 or 1? + phy ; some register shuffling so we don't lose x stack pointer + tay + clc + lda 2, x ; low byte of second number + adc result + 2 ; add it to low byte of result + sta result + 2 + tya + ply + adc 3, x ; + .2: + ror + ror result + 2 + ror result + 1 + ror result + dey + bne .1 + sta result + 3 + dex + dex + ; TODO: double check endianness of result + lda result + 3 ; store high byte of result + sta 0, x + lda result + 2 ; store low byte of result + sta 1, x + rts + +; i/o +read: + lda (0, x) + sta 0, x + stz 1, x + rts + +write: + lda 0, x + sta (2, x) + inx + inx + inx + inx + rts + +key: + ;TODO + + diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..d4e93df --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,168 @@ +use core::panic; +use std::{any::Any, fmt::Display}; + +use crate::{ + typecheck::{self, Checkable, TypeStack}, + Symtab, +}; + +#[derive(PartialEq, Eq, Debug, Clone)] +// enum values are parser values, not compiler values, +// e.g. for `Str(String)` the `String` value will be put somewhere in memory +// and a pointer to it will be put on the stack +pub enum Value { + Nat(u16), // 16-bit natural number + Int(i16), // 16-bit twos-complement integer + Bool(bool), + Op(String), + Char(char), // 8-bit georgescii character padded with leading zeros (might change later) + Str(String), // 16-bit pointer to a string + Word(String), // 16-bit pointer to a word +} + +#[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)] +pub enum VType { + Nat, + Int, + Bool, + Char, + Str, +} + +#[derive(Eq, PartialEq, Debug, Clone)] +pub struct WType { + pub pop: Vec, + pub push: Vec, +} + +impl WType { + pub fn new() -> Self { + WType { + pop: vec![], + push: vec![], + } + } + + // Adds a `push` type + pub fn push(mut self, mut t: Vec) -> Self { + self.push.append(&mut t); + self + } + + // Adds a `pop` type (Note: does not actually pop anything) + pub fn pop(mut self, mut t: Vec) -> Self { + self.pop.append(&mut t); + self + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Effect { + Paint, + Sing, + Store, + Do, +} + +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct WordDef { + pub name: String, + pub values: Vec, + pub r#type: WType, + pub effects: Vec, +} + +impl WordDef { + pub fn new(name: S, values: Vec, r#type: WType, effects: Vec) -> Self + where + S: Into, + { + WordDef { + name: name.into(), + values, + r#type, + effects, + } + } + + fn flatten_values(&self, symtab: &Symtab) -> Vec { + let mut vals = vec![]; + for value in self.values.iter() { + if let Value::Word(string) = value { + let symbol = symtab.get(string); + let mut child_vals = symbol.flatten_values(symtab); + vals.append(&mut child_vals); + } else { + vals.push(value.clone()); + } + } + vals + } + + pub fn flatten(&self, symtab: &Symtab) -> WordDef { + let values = self.flatten_values(symtab); + WordDef::new( + self.name.clone(), + values, + self.r#type.clone(), + self.effects.clone(), + ) + } +} + +impl Display for WordDef { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "{:#?}", self)?; + Ok(()) + } +} + +impl Checkable for WordDef { + fn check(&mut self, mut stack: TypeStack) -> Result, String> { + for value in self.values.iter() { + println!( + "WORDDEF: checking value {:?} in word {:?}, current stack is {:?}", + value, self.name, stack + ); + if let Value::Word(_) = value { + panic!("Don't typecheck on an unflattened word!"); + } + match value { + Value::Op(op) => { + if stack.len() < 2 { + return Err(format!( + "Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}", + self.name, + stack, + stack.len() + )); + } else { + match op.as_str() { + "+" | "*" => { + println!("WORDDEF: checking Op"); + stack = stack.test_consume( + TypeStack::new().push(VType::Nat).push(VType::Nat), + )?; + stack = stack.push(VType::Nat); + } + "&" | "||" => { + stack = stack.test_consume( + TypeStack::new().push(VType::Bool).push(VType::Bool), + )?; + stack = stack.pop().pop().push(VType::Bool); + } + _ => return Err(format!("unknown opcode {:?}", op)), + } + } + } + &Value::Nat(_) => stack = stack.push(VType::Nat), + &Value::Int(_) => stack = stack.push(VType::Int), + &Value::Bool(_) => stack = stack.push(VType::Int), + &Value::Str(_) => stack = stack.push(VType::Str), + &Value::Char(_) => stack = stack.push(VType::Char), + &Value::Word(_) => unreachable!(), + }; + } + Ok(stack) + } +} diff --git a/src/bin/main.rs b/src/bin/main.rs new file mode 100644 index 0000000..24197c8 --- /dev/null +++ b/src/bin/main.rs @@ -0,0 +1,11 @@ +use fuzzy::{ + ast::VType, + parse, + typecheck::{Checkable, TypeStack}, +}; + +fn main() { + let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest"; + let mut program = parse(input).unwrap(); + println!("{:?}", program.check(TypeStack::new())); +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..5a29d41 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,7 @@ +pub mod ast; +pub mod compiler; +pub mod parser; +pub mod typecheck; + +pub use compiler::*; +pub use parser::*; diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..c360b81 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,483 @@ +use std::collections::HashMap; + +// TODO: +// - add error types and error handling +// - figure out if we can use a single ast or should make a second ast for compilation, then +// convert to that after parsing/do another parsing step but on the ast made the first time +// (i also don't really know what i'm doing so maybe u never do that or always do that or +// something i don't understand yet) +// - improve whitespace parsing +// - think more about language rules: +// - what types actually need to be exposed to the "user" (me) and what types can be internal +// to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char +// internally (put on the stack as a value instead of put somewhere in memory)) + +use chumsky::{ + prelude::*, + text::{ident, keyword}, +}; + +use crate::{ + ast::*, + typecheck::{Checkable, TypeStack}, +}; + +#[derive(PartialEq, Debug, Clone)] +pub struct Symtab(HashMap); + +impl Symtab { + fn new() -> Self { + Symtab(HashMap::new()) + } + + pub fn get(&self, string: &String) -> &WordDef { + self.0.get(string).unwrap() + } + + pub fn add_def(&mut self, def: WordDef) { + let key = def.name.clone(); + self.0.insert(key, def); + } + + pub fn flatten_refs(&mut self) { + let mut new_symtab = Symtab::new(); + for (_string, def) in self.0.iter() { + new_symtab.add_def(def.flatten(self)); + } + // this is an abomination, there must be a better way + self.0.clear(); + self.0.extend(new_symtab.0); + } +} + +impl From> for Symtab { + fn from(value: Vec) -> Self { + let symtab: HashMap = value + .iter() + .map(|x| (x.name.to_owned(), x.to_owned())) + .collect(); + Symtab(symtab) + } +} + +#[derive(Debug, PartialEq)] +pub struct Program { + symtab: Symtab, + body: Vec, +} + +impl Program { + fn new(defs: Vec, body: Vec) -> Self { + let symtab = Symtab::from(defs); + Program { symtab, body } + } + + fn reduce_body(&mut self) { + let mut vals = vec![]; + for value in self.body.iter() { + if let Value::Word(string) = value { + let symbol = self.symtab.get(string); + let mut child_vals = symbol.flatten(&self.symtab).values; + vals.append(&mut child_vals); + } else { + vals.push(value.clone()); + } + } + self.body = vals; + } +} + +impl<'a> Checkable for Program { + fn check( + &mut self, + mut stack: crate::typecheck::TypeStack, + ) -> Result, String> { + //TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5 + fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> { + let mut visited: Vec<&WordDef> = vec![]; + let mut rec_stack: Vec<&WordDef> = vec![]; + for (_, def) in symtab.0.iter() { + if !visited.contains(&def) { + dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?; + } + } + Ok(()) + } + fn dfs_cycle_check<'a>( + def: &'a WordDef, + visited: &mut Vec<&'a WordDef>, + rec_stack: &mut Vec<&'a WordDef>, + symtab: &'a Symtab, + ) -> Result<(), String> { + visited.push(def); + rec_stack.push(def); + + for val in def.values.iter() { + if let Value::Word(name) = val { + let next_def = symtab.get(name); + if !visited.contains(&next_def) { + dfs_cycle_check(next_def, visited, rec_stack, symtab)?; + } else if rec_stack.contains(&next_def) { + return Err(format!( + "illegal recursion detected! definitions {}create a reference cycle", + rec_stack + .iter() + .map(|def| { + let mut name = def.name.clone(); + name.insert(0, '"'); + name.push_str("\" "); + name + }) + .collect::() + )); + } + } + } + + rec_stack.pop(); + Ok(()) + } + + cyclic_graph_check(&self.symtab)?; + + self.symtab.flatten_refs(); + + println!( + "we have flattened refs, here's the symtab: {:#?}\n", + self.symtab + ); + + // then check that all symtab defs are sound + // at this point they shouldn't have any references, + // and if they do we will panic (see the Checkable impl for WordDef) + for (name, def) in self.symtab.0.iter_mut() { + let local_stack: TypeStack = def.r#type.pop.clone().into(); + println!( + "PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}", + name, def + ); + let result_stack = def.check(local_stack)?; + if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) { + println!("{error:?}"); + return Err(error); + } + } + + self.reduce_body(); + + // then we'll check that the body is sound with the given stack + // maybe in the future i'll change this trait so there isn't a stack + // param and the implementer picks what stack to check against + // + // TODO: this block also is shared behavior between basically all checkables but potentially with + // different internal types for T, will have to figure out how to dedup this later + for value in self.body.iter() { + match value { + Value::Op(op) => { + if stack.len() < 2 { + return Err(format!( + "expected a stack with 2 elements, got only {:?}", + stack.len() + )); + } else { + match op.as_str() { + "+" | "*" => { + stack = stack.test_consume( + TypeStack::new().push(VType::Nat).push(VType::Nat), + )?; + stack = stack.pop().pop().push(VType::Nat); + } + "&" | "||" => { + stack = stack.test_consume( + TypeStack::new().push(VType::Bool).push(VType::Bool), + )?; + stack = stack.pop().pop().push(VType::Bool); + } + _ => return Err(format!("unknown opcode {:?}", op)), + } + } + } + &Value::Nat(_) => stack = stack.push(VType::Nat), + &Value::Int(_) => stack = stack.push(VType::Int), + &Value::Bool(_) => stack = stack.push(VType::Bool), + &Value::Str(_) => stack = stack.push(VType::Str), + &Value::Char(_) => stack = stack.push(VType::Char), + &Value::Word(_) => unreachable!(), + }; + } + Ok(stack) + } +} + +pub fn parse(input: S) -> Result>> +where + S: ToString, +{ + let parsed = match parser().parse(input.to_string()) { + Ok(parsed) => parsed, + Err(error) => return Err(error), + }; + + Ok(parsed) +} + +pub fn parser() -> impl Parser> { + let name = ident().labelled("word_name"); + let value = { + // nats will be coerced to ints at compile time depending on word type + let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap())); + + // vice versa for non-negative ints + let int = just("-").ignore_then( + text::int::>(10).map(|s: String| Value::Int(s.parse().unwrap())), + ); + + let op = one_of::>("*+-/&|<>").map(|s| Value::Op(s.to_string())); + + let str_or_char = just::>('"') + .ignore_then(none_of('"').repeated()) + .then_ignore(just('"')) + .map(|s: Vec| match s.len() { + 1 => Value::Char(s[0]), + _ => Value::Str(s.into_iter().collect::()), + }); + + let word = name.map(|n: String| Value::Word(n)); + + let bool = keyword::<_, _, Simple>("true") + .map(|_| Value::Bool(true)) + .or(keyword("false").map(|_| Value::Bool(false))); + + nat.or(int).or(bool).or(str_or_char).or(word).or(op) + }; + let value_seperator = text::newline() + .repeated() + .at_least(2) + .not() + .rewind() + .then_ignore( + // TODO: figure out if this could be simplified + choice(( + just(" ") + .repeated() + .then_ignore(just("\n").repeated().exactly(1).or_not()), + just("\n") + .repeated() + .exactly(1) + .then_ignore(just(" ").repeated().or_not()), + )) + .then_ignore(just(" ").repeated()), + ); + + let body = value_seperator + .or_not() + .ignored() + .then(value) + .map(|(_, v)| v) + .repeated() + .then_ignore( + just(" ") + .repeated() + .ignored() + .then(text::newline().repeated().at_least(2).or_not()), + ); + let word_def = { + let pop_types = { + let pop_type = keyword("nat") + .to(VType::Nat) + .or(keyword("int").to(VType::Int)) + .or(keyword("bool").to(VType::Bool)) + .or(keyword("char").to(VType::Char)) + .or(keyword("str").to(VType::Str)); + + pop_type + .padded() + .repeated() + .collect::>() + .labelled("pop_types") + .boxed() + }; + + let push_types = { + let push_type = keyword("nat") + .to(VType::Nat) + .or(keyword("int").to(VType::Int)) + .or(keyword("char").to(VType::Char)) + .or(keyword("str").to(VType::Str)); + push_type + .padded() + .repeated() + .collect::>() + .labelled("push_types") + .boxed() + }; + + let effects = { + let effect_keyword = keyword("paint") + .to(Effect::Paint) + .or(keyword("sing").to(Effect::Sing)) + .or(keyword("store").to(Effect::Store)) + .or(keyword("do").to(Effect::Do)); + + let effect = just("~").ignore_then(effect_keyword).labelled("effect"); + + effect.padded().repeated().labelled("effects").boxed() + }; + + let definition = text::whitespace() + .ignore_then(name) + .then_ignore(just(" ")) + .then(pop_types) + .then_ignore(keyword("is").or(keyword("are")).padded()) + .then(push_types) + .then(effects) + .then_ignore(just(":")) + .map(|(((name, pop_types), push_types), effects)| { + (name, pop_types, push_types, effects) + }); + + definition + .then(body.clone()) + .map(|((name, pop_types, push_types, effects), body)| { + WordDef::new( + name, + body, + WType::new().push(push_types).pop(pop_types), + effects, + ) + }) + }; + word_def + .repeated() + .then(body) + .map(|(defs, body): (Vec, Vec)| Program::new(defs, body)) +} + +#[cfg(test)] +mod tests { + use crate::typecheck::TypeStack; + + use super::*; + + #[test] + fn test_parser() { + let input = " + a is nat: 5 7 * + + b is nat: + 5 a * + + a + "; + + let ast = vec![ + WordDef::new( + "a", + vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())], + WType::new().push(vec![VType::Nat]), + vec![], + ), + WordDef::new( + "b", + vec![ + Value::Nat(5), + Value::Word("a".to_string()), + Value::Op("*".to_string()), + ], + WType::new().push(vec![VType::Nat]), + vec![], + ), + ]; + let body: Vec = vec![Value::Word("a".to_string())]; + println!("sound: {:?}\n", parser().parse(input).unwrap()); + assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body)); + } + + #[test] + fn test_typecheck() { + let sound = " + a is nat: 5 7 * + + b nat nat is nat: + a * + + a 5 * + "; + + let unsound_defs = " + a is nat nat: 5 7 * + + b nat is nat: + a * + + a 5 * + "; + + let unsound_body = " + a is nat: 5 7 * + + b nat is nat: + a * + + a 5 * * + "; + + let unsound_body_and_defs = " + a is nat nat: 5 7 * + + b nat is nat: + a * + + a 5 * * + "; + + fn typecheck(input: &str, sound: bool) { + let mut parsed = parse(input).unwrap(); + parsed.symtab.flatten_refs(); + parsed.reduce_body(); + + let stack = TypeStack::new(); + if sound { + assert!(parsed.check(stack).is_ok()); + } else { + assert!(parsed.check(stack).is_err()); + } + } + + typecheck(sound, true); + typecheck(unsound_defs, false); + typecheck(unsound_body, false); + typecheck(unsound_body_and_defs, false); + } + + #[test] + fn test_illegal_recursion() { + let illegal = " + a is: b + + b is: a + + a + "; + + let illegal_multilevel = " + a is: b + + b is: c + + c is: a + + a + "; + + fn typecheck(input: &str) { + let mut parsed = parse(input).unwrap(); + let stack = TypeStack::new(); + println!("{:?}", parsed.check(TypeStack::new())); + assert!(parsed.check(stack).is_err()); + } + + typecheck(illegal); + typecheck(illegal_multilevel); + } +} diff --git a/src/typecheck.rs b/src/typecheck.rs new file mode 100644 index 0000000..e5fcc1e --- /dev/null +++ b/src/typecheck.rs @@ -0,0 +1,84 @@ +use std::fmt::Debug; + +use chumsky::chain::Chain; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TypeStack(Vec); + +impl TypeStack { + pub fn new() -> Self { + TypeStack(vec![]) + } + pub fn len(&self) -> usize { + self.0.len() + } + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + pub fn pop(mut self) -> TypeStack { + let _ = self.0.pop(); + self + } + pub fn push(mut self, t: T) -> TypeStack { + self.0.push(t); + self + } + + /// tests if ts matches the top of the stack + pub fn test_consume(mut self, ts: TypeStack) -> Result, String> { + if ts.len() > self.len() { + Err(format!("error during test: {ts:?} is bigger than {self:?}")) + } else { + for (index, t) in ts.0.iter().rev().enumerate() { + let val = self.0.pop().unwrap(); + if val != *t { + return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}")); + } + } + Ok(self) + } + } + + /// tests if ts matches the top of the stack + pub fn test(&self, ts: &TypeStack) -> Result<(), String> { + if ts.len() > self.len() { + Err(format!("error during test: {ts:?} is bigger than {self:?}")) + } else { + for (index, t) in ts.0.iter().rev().enumerate() { + let val = &self.0[self.0.len() - 1]; + if val != t { + return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}")); + } + } + Ok(()) + } + } + + // pub fn test_many(mut self, mut tss: Vec>) -> Result, String> { + // if tss.is_empty() { + // return Ok(TypeStack::new()); + // } + // for _i in 0..tss.len() { + // let ts = tss.pop().unwrap(); + // self = self.test(ts)?; + // if self.is_ok() { + // return self; + // } + // } + // Err("did not match any types".to_string()) + // } + + pub fn append(&mut self, t: &mut Vec) { + self.0.append(t); + } +} + +impl From> for TypeStack { + fn from(value: Vec) -> Self { + TypeStack(value) + } +} + +pub trait Checkable { + fn check(&mut self, stack: TypeStack) -> Result, String>; +}