diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index eb6139b..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,271 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "allocator-api2" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" - -[[package]] -name = "cc" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chumsky" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" -dependencies = [ - "hashbrown", - "stacker", -] - -[[package]] -name = "either" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" - -[[package]] -name = "fuzzy" -version = "0.1.0" -dependencies = [ - "chumsky", - "indextree", -] - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "indextree" -version = "4.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f" -dependencies = [ - "indextree-macros", -] - -[[package]] -name = "indextree-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf" -dependencies = [ - "either", - "itertools", - "proc-macro2", - "quote", - "strum", - "syn", - "thiserror", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "libc" -version = "0.2.155" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "proc-macro2" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "psm" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" -dependencies = [ - "cc", -] - -[[package]] -name = "quote" -version = "1.0.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rustversion" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" - -[[package]] -name = "stacker" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "winapi", -] - -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros", -] - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - -[[package]] -name = "syn" -version = "2.0.71" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "thiserror" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] diff --git a/Cargo.toml b/Cargo.toml deleted file mode 100644 index 7e02987..0000000 --- a/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "fuzzy" -version = "0.1.0" -edition = "2021" - -[dependencies] -chumsky = { version = "0.9.3"} -indextree = "4.7.2" - -[lib] - -crate-type = ["lib"] - -[[bin]] -name = "main" diff --git a/README.md b/README.md index a6b08fa..4e7fb13 100644 --- a/README.md +++ b/README.md @@ -16,16 +16,26 @@ fuzzy is part of george, and shouldn't run anywhere else. #### low-level -(most of) fuzzy could be made with assembler macros, but that's no fun +(most of) fuzzy could be written as assembler macros, but that's no fun #### reliable if fuzzy says it can run, george can run it -## feature progress +## how to work on fuzzy -- [x] parser - - roughly complete, but want to finish the whole pipeline before adding things to the parser -- [x] typechecker - - generates a "type stack" from parsed input and checks that word definition types match their body -- [ ] code generation +edit `program.asm` and run `./run.sh`. the program gets included in the fuzzy compiler `fuzzy.asm` and is assembled with `vasm6502_oldstyle`, then george runs the program, reading out her system image when she reaches `stp` or `brk` + +then the program she compiled gets formatted as a standard 32k rom, and she reads it again, and then shows her system image again when the program finishes (hits `stp` or `brk`). + +since fuzzy works on a zero-page data stack, it's pretty easy to read the results of a program from the hexdump. + +for now this loop only works on apple silicon, but eventually i'll compile a `george` binary for x86 linux and switch based on the host platform. + +## reference help + +i wrote [syntax](./syntax.md) and [semantics](./semantics.md) docs to keep track of how fuzzy works before starting work on the compiler implementation in assembly. they're the _official_ source of truth for how fuzzy works. assume that the compiler implementation is always in flux :) + +## a note on implementation + +i was writing fuzzy's compiler in rust for a sec, but then i realized that it would be a fun challenge to write it in assembly. it's been wayyy easier! and fun! and so rewarding :) this feels like a flex but i'm genuinely just so happy to see george & fuzzy playing together in this little computer world i've made <3 diff --git a/fuzzy.asm b/fuzzy.asm new file mode 100644 index 0000000..31f3364 --- /dev/null +++ b/fuzzy.asm @@ -0,0 +1,214 @@ +; ʕ·ᴥ·ʔ- fuzzy v0 rev 0: parse program text and spit out binary representation @ $4000 + + .include "./macro.inc" + +n = $05 ; temporary storage for data stack operations +base = $00 +result_binary_base = base ; pointer to where the next byte of binary data should be stored0 +binary_base_index = result_binary_base + 2 ; offset for that pointer +binary_subroutine_address = binary_base_index + 1 ; pointer to a subroutine to be written to the binary + + .org $8000 + .include "./subroutines.inc" + +program_text: + .include "./program.inc" + +reset: + sei + lda #0 + ldx #0 + ldy #0 + +main: + stz binary_base_index + lda #$40 + sta result_binary_base + 1 ; set where to store resulting binary + stz binary_subroutine_address + lda #$80 + sta binary_subroutine_address + 1 ; available subroutines start at $8000 + jsr compile_values + stp + +; parser loop, eventually this will be able to handle longer program strings, but indexing by y is fine for now +compile_values: + ldy #0 +parser_loop: + lda program_text, y ; get character at index + cmp #0 ; is eof? + beq .end ; yes, exit loop + cmp #20 ; is space? + beq parser_loop ; yes, skip this char + cmp #12 ; is newline? + beq .newline ; yes, handle newline + jsr compile_values_op + jsr compile_values_nat + .newline: ; we reached a newline, y is program string index + iny ; WARN: don't accidentally iny in this loop w/out handling a character + lda program_text, y ; load next char + cmp #12 ; is newline? + bne parser_loop ; no, keep parsing tokens + rts ; yes, no more tokens in body (see syntax.md for info) + .end: + rts + +; a holds character value, y program text index, only iny if you find a matching character & consume it +compile_values_op: + cmp #"+" ; i personally think this syntax is really silly but whatever, one of these days i'm gonna write my own assembler and document everything cause vasm documentation is kinda terrible + bne .next + .is_plus: + lda #1 + jsr store_subroutine + rts + .next: + rts + ; cmp #"!" ; commenting these out for now to handle a single simple case + ; cmp #"&" + ; cmp #"|" + ; cmp #"-" + ; cmp #"*" + ; cmp #"/" + ; cmp #"=" + ; cmp #">" + ; cmp #"<" + ; cmp #"#" + +; a holds character value, y program text index, only iny if you find a matching character & consume it +; TODO: +; 1-3 digit decimal values +; 1-2 digit hex values +compile_values_nat: + ; TODO: + ; cmp #"$" ; is hex? + ; bne .decimal ; no, try decimal + ; cmp + ; rts + cmp #47 ; less than (before) start of 0-9 georgescii range? + bcc .not_nat + cmp #57 ; greater than end of 0-9 georgescii range? + bcs .not_nat + pha + lda #$a9 ; $a9: lda imm + jsr store_binary + pla + jsr georgescii_decimal_to_value + jsr store_binary + lda #2 ; push + jsr store_subroutine + iny + rts + .not_nat: + rts + +; georgescii decimal value in a register, return equivalent plain value in a register +georgescii_decimal_to_value: + clc + sbc #$30 ; decimal digits start at georgescii $30 + rts + +; we have binary in the a register we want to store +store_binary: + phy + ldy binary_base_index + sta (result_binary_base), y + inc binary_base_index + bne .not_overflow ; did we roll over? + inc result_binary_base + 1 ; yes, roll over base address + .not_overflow: ; no, carry on as normal + ply + rts + +; binary_subroutine_address is a pointer to a subroutine that we want to store +; the first byte at the subroutine's address is its length +store_contiguous_binary: + pha ; just to be safe + lda (binary_subroutine_address) ; get the subroutine length + tax ; loop counter + ldy #1 ; index into subroutine, offset by one to skip subroutine length + .loop: + lda (binary_subroutine_address), y + jsr store_binary + iny + dex + bne .loop + .end: + pla + rts + +; this wouldn't be necessary if we could get the +; address of a label in vasm, but that's for another time +; (when i feel like writing an assembler lol) +; for now, pass the index of the subroutine (in subroutines.asm) +; to a and it will get written to binary_subroutine_address +get_subroutine_address: + pha + tax ; set up counter + bne .loop ; first subrotine? + stz binary_subroutine_address ; yes, store its address + lda #$80 + sta binary_subroutine_address + 1 + rts + .loop: ; loop through + lda (binary_subroutine_address) ; no, load length of subroutine + inc ; distance from next subroutine + clc + adc binary_subroutine_address ; add it to the current address + sta binary_subroutine_address + bcs .no_carry + lda binary_subroutine_address + 1 ; add the carry to the high byte of address + adc #0 + sta binary_subroutine_address + 1 + .no_carry: + dex ; is this our address? + bne .loop ; yes, we're done + pla + rts + + +; pass subroutine index to a and it will get written into the binary +; TODO: stabilize subroutine location & just write a `jsr $subroutine` to the binary +store_subroutine: + pha + phy + phx + jsr get_subroutine_address + jsr store_contiguous_binary + ; reset subroutine address + stz binary_subroutine_address + lda #$80 + sta binary_subroutine_address + 1 + plx + ply + pla + rts + +; write error message and stop execution +error: + ldy #0 + .loop: + lda .message, y + sta $4000, y + beq .end + iny + bra .loop + .end: + stp + .message: + .asciiz "ruh roh! fuzzy couldn't compile" + + + +isr: ; interrupt service routine + pha + phx + phy + ply + plx + pla + rti + + + + .org $fffc + .word reset + .word isr diff --git a/george b/george new file mode 100755 index 0000000..0693d36 Binary files /dev/null and b/george differ diff --git a/macro.inc b/macro.inc index 96a3471..1aaf3a8 100644 --- a/macro.inc +++ b/macro.inc @@ -14,9 +14,13 @@ inx .endm - .macro push ; push a data stack cell + .macro push, cell_high, cell_low ; push a data stack cell dex dex + lda \cell_low + sta 0, x + lda \cell_high + sta 1, x .endm .macro push2 ; push 2 data stack cells diff --git a/program.inc b/program.inc new file mode 100644 index 0000000..bfb9f44 --- /dev/null +++ b/program.inc @@ -0,0 +1 @@ + .asciiz '2 3 +' diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..51090f6 --- /dev/null +++ b/run.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -e + +rm *.bin *.rom +vasm6502_oldstyle fuzzy.asm -dotdir -wdc02 -ldots -Fbin -o fuzzy.rom &> /dev/null; +echo -e "\nʕ·ᴥ·ʔ- source text:\n"; +cat program.inc; +cat fuzzy.rom | ./george > compiled.bin; +dd skip=16384 count=500 if=compiled.bin of=compiled.rom bs=1 &> /dev/null; +truncate -s 32k compiled.rom &> /dev/null; +printf '\x80\x00\x00' | dd of=compiled.rom bs=1 seek=32765 count=3 conv=notrunc &> /dev/null; +cat compiled.rom | ./george > result.bin; +echo -e "\n\nʕ·ᴥ·ʔ- compiled program result:\n"; +hexdump -C ./result.bin; +echo -e ""; diff --git a/src/ast.rs b/src/ast.rs deleted file mode 100644 index d4e93df..0000000 --- a/src/ast.rs +++ /dev/null @@ -1,168 +0,0 @@ -use core::panic; -use std::{any::Any, fmt::Display}; - -use crate::{ - typecheck::{self, Checkable, TypeStack}, - Symtab, -}; - -#[derive(PartialEq, Eq, Debug, Clone)] -// enum values are parser values, not compiler values, -// e.g. for `Str(String)` the `String` value will be put somewhere in memory -// and a pointer to it will be put on the stack -pub enum Value { - Nat(u16), // 16-bit natural number - Int(i16), // 16-bit twos-complement integer - Bool(bool), - Op(String), - Char(char), // 8-bit georgescii character padded with leading zeros (might change later) - Str(String), // 16-bit pointer to a string - Word(String), // 16-bit pointer to a word -} - -#[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)] -pub enum VType { - Nat, - Int, - Bool, - Char, - Str, -} - -#[derive(Eq, PartialEq, Debug, Clone)] -pub struct WType { - pub pop: Vec, - pub push: Vec, -} - -impl WType { - pub fn new() -> Self { - WType { - pop: vec![], - push: vec![], - } - } - - // Adds a `push` type - pub fn push(mut self, mut t: Vec) -> Self { - self.push.append(&mut t); - self - } - - // Adds a `pop` type (Note: does not actually pop anything) - pub fn pop(mut self, mut t: Vec) -> Self { - self.pop.append(&mut t); - self - } -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Effect { - Paint, - Sing, - Store, - Do, -} - -#[derive(Debug, Eq, PartialEq, Clone)] -pub struct WordDef { - pub name: String, - pub values: Vec, - pub r#type: WType, - pub effects: Vec, -} - -impl WordDef { - pub fn new(name: S, values: Vec, r#type: WType, effects: Vec) -> Self - where - S: Into, - { - WordDef { - name: name.into(), - values, - r#type, - effects, - } - } - - fn flatten_values(&self, symtab: &Symtab) -> Vec { - let mut vals = vec![]; - for value in self.values.iter() { - if let Value::Word(string) = value { - let symbol = symtab.get(string); - let mut child_vals = symbol.flatten_values(symtab); - vals.append(&mut child_vals); - } else { - vals.push(value.clone()); - } - } - vals - } - - pub fn flatten(&self, symtab: &Symtab) -> WordDef { - let values = self.flatten_values(symtab); - WordDef::new( - self.name.clone(), - values, - self.r#type.clone(), - self.effects.clone(), - ) - } -} - -impl Display for WordDef { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - writeln!(f, "{:#?}", self)?; - Ok(()) - } -} - -impl Checkable for WordDef { - fn check(&mut self, mut stack: TypeStack) -> Result, String> { - for value in self.values.iter() { - println!( - "WORDDEF: checking value {:?} in word {:?}, current stack is {:?}", - value, self.name, stack - ); - if let Value::Word(_) = value { - panic!("Don't typecheck on an unflattened word!"); - } - match value { - Value::Op(op) => { - if stack.len() < 2 { - return Err(format!( - "Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}", - self.name, - stack, - stack.len() - )); - } else { - match op.as_str() { - "+" | "*" => { - println!("WORDDEF: checking Op"); - stack = stack.test_consume( - TypeStack::new().push(VType::Nat).push(VType::Nat), - )?; - stack = stack.push(VType::Nat); - } - "&" | "||" => { - stack = stack.test_consume( - TypeStack::new().push(VType::Bool).push(VType::Bool), - )?; - stack = stack.pop().pop().push(VType::Bool); - } - _ => return Err(format!("unknown opcode {:?}", op)), - } - } - } - &Value::Nat(_) => stack = stack.push(VType::Nat), - &Value::Int(_) => stack = stack.push(VType::Int), - &Value::Bool(_) => stack = stack.push(VType::Int), - &Value::Str(_) => stack = stack.push(VType::Str), - &Value::Char(_) => stack = stack.push(VType::Char), - &Value::Word(_) => unreachable!(), - }; - } - Ok(stack) - } -} diff --git a/src/bin/main.rs b/src/bin/main.rs deleted file mode 100644 index 24197c8..0000000 --- a/src/bin/main.rs +++ /dev/null @@ -1,11 +0,0 @@ -use fuzzy::{ - ast::VType, - parse, - typecheck::{Checkable, TypeStack}, -}; - -fn main() { - let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest"; - let mut program = parse(input).unwrap(); - println!("{:?}", program.check(TypeStack::new())); -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 5a29d41..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -pub mod ast; -pub mod compiler; -pub mod parser; -pub mod typecheck; - -pub use compiler::*; -pub use parser::*; diff --git a/src/parser/mod.rs b/src/parser/mod.rs deleted file mode 100644 index c360b81..0000000 --- a/src/parser/mod.rs +++ /dev/null @@ -1,483 +0,0 @@ -use std::collections::HashMap; - -// TODO: -// - add error types and error handling -// - figure out if we can use a single ast or should make a second ast for compilation, then -// convert to that after parsing/do another parsing step but on the ast made the first time -// (i also don't really know what i'm doing so maybe u never do that or always do that or -// something i don't understand yet) -// - improve whitespace parsing -// - think more about language rules: -// - what types actually need to be exposed to the "user" (me) and what types can be internal -// to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char -// internally (put on the stack as a value instead of put somewhere in memory)) - -use chumsky::{ - prelude::*, - text::{ident, keyword}, -}; - -use crate::{ - ast::*, - typecheck::{Checkable, TypeStack}, -}; - -#[derive(PartialEq, Debug, Clone)] -pub struct Symtab(HashMap); - -impl Symtab { - fn new() -> Self { - Symtab(HashMap::new()) - } - - pub fn get(&self, string: &String) -> &WordDef { - self.0.get(string).unwrap() - } - - pub fn add_def(&mut self, def: WordDef) { - let key = def.name.clone(); - self.0.insert(key, def); - } - - pub fn flatten_refs(&mut self) { - let mut new_symtab = Symtab::new(); - for (_string, def) in self.0.iter() { - new_symtab.add_def(def.flatten(self)); - } - // this is an abomination, there must be a better way - self.0.clear(); - self.0.extend(new_symtab.0); - } -} - -impl From> for Symtab { - fn from(value: Vec) -> Self { - let symtab: HashMap = value - .iter() - .map(|x| (x.name.to_owned(), x.to_owned())) - .collect(); - Symtab(symtab) - } -} - -#[derive(Debug, PartialEq)] -pub struct Program { - symtab: Symtab, - body: Vec, -} - -impl Program { - fn new(defs: Vec, body: Vec) -> Self { - let symtab = Symtab::from(defs); - Program { symtab, body } - } - - fn reduce_body(&mut self) { - let mut vals = vec![]; - for value in self.body.iter() { - if let Value::Word(string) = value { - let symbol = self.symtab.get(string); - let mut child_vals = symbol.flatten(&self.symtab).values; - vals.append(&mut child_vals); - } else { - vals.push(value.clone()); - } - } - self.body = vals; - } -} - -impl<'a> Checkable for Program { - fn check( - &mut self, - mut stack: crate::typecheck::TypeStack, - ) -> Result, String> { - //TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5 - fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> { - let mut visited: Vec<&WordDef> = vec![]; - let mut rec_stack: Vec<&WordDef> = vec![]; - for (_, def) in symtab.0.iter() { - if !visited.contains(&def) { - dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?; - } - } - Ok(()) - } - fn dfs_cycle_check<'a>( - def: &'a WordDef, - visited: &mut Vec<&'a WordDef>, - rec_stack: &mut Vec<&'a WordDef>, - symtab: &'a Symtab, - ) -> Result<(), String> { - visited.push(def); - rec_stack.push(def); - - for val in def.values.iter() { - if let Value::Word(name) = val { - let next_def = symtab.get(name); - if !visited.contains(&next_def) { - dfs_cycle_check(next_def, visited, rec_stack, symtab)?; - } else if rec_stack.contains(&next_def) { - return Err(format!( - "illegal recursion detected! definitions {}create a reference cycle", - rec_stack - .iter() - .map(|def| { - let mut name = def.name.clone(); - name.insert(0, '"'); - name.push_str("\" "); - name - }) - .collect::() - )); - } - } - } - - rec_stack.pop(); - Ok(()) - } - - cyclic_graph_check(&self.symtab)?; - - self.symtab.flatten_refs(); - - println!( - "we have flattened refs, here's the symtab: {:#?}\n", - self.symtab - ); - - // then check that all symtab defs are sound - // at this point they shouldn't have any references, - // and if they do we will panic (see the Checkable impl for WordDef) - for (name, def) in self.symtab.0.iter_mut() { - let local_stack: TypeStack = def.r#type.pop.clone().into(); - println!( - "PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}", - name, def - ); - let result_stack = def.check(local_stack)?; - if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) { - println!("{error:?}"); - return Err(error); - } - } - - self.reduce_body(); - - // then we'll check that the body is sound with the given stack - // maybe in the future i'll change this trait so there isn't a stack - // param and the implementer picks what stack to check against - // - // TODO: this block also is shared behavior between basically all checkables but potentially with - // different internal types for T, will have to figure out how to dedup this later - for value in self.body.iter() { - match value { - Value::Op(op) => { - if stack.len() < 2 { - return Err(format!( - "expected a stack with 2 elements, got only {:?}", - stack.len() - )); - } else { - match op.as_str() { - "+" | "*" => { - stack = stack.test_consume( - TypeStack::new().push(VType::Nat).push(VType::Nat), - )?; - stack = stack.pop().pop().push(VType::Nat); - } - "&" | "||" => { - stack = stack.test_consume( - TypeStack::new().push(VType::Bool).push(VType::Bool), - )?; - stack = stack.pop().pop().push(VType::Bool); - } - _ => return Err(format!("unknown opcode {:?}", op)), - } - } - } - &Value::Nat(_) => stack = stack.push(VType::Nat), - &Value::Int(_) => stack = stack.push(VType::Int), - &Value::Bool(_) => stack = stack.push(VType::Bool), - &Value::Str(_) => stack = stack.push(VType::Str), - &Value::Char(_) => stack = stack.push(VType::Char), - &Value::Word(_) => unreachable!(), - }; - } - Ok(stack) - } -} - -pub fn parse(input: S) -> Result>> -where - S: ToString, -{ - let parsed = match parser().parse(input.to_string()) { - Ok(parsed) => parsed, - Err(error) => return Err(error), - }; - - Ok(parsed) -} - -pub fn parser() -> impl Parser> { - let name = ident().labelled("word_name"); - let value = { - // nats will be coerced to ints at compile time depending on word type - let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap())); - - // vice versa for non-negative ints - let int = just("-").ignore_then( - text::int::>(10).map(|s: String| Value::Int(s.parse().unwrap())), - ); - - let op = one_of::>("*+-/&|<>").map(|s| Value::Op(s.to_string())); - - let str_or_char = just::>('"') - .ignore_then(none_of('"').repeated()) - .then_ignore(just('"')) - .map(|s: Vec| match s.len() { - 1 => Value::Char(s[0]), - _ => Value::Str(s.into_iter().collect::()), - }); - - let word = name.map(|n: String| Value::Word(n)); - - let bool = keyword::<_, _, Simple>("true") - .map(|_| Value::Bool(true)) - .or(keyword("false").map(|_| Value::Bool(false))); - - nat.or(int).or(bool).or(str_or_char).or(word).or(op) - }; - let value_seperator = text::newline() - .repeated() - .at_least(2) - .not() - .rewind() - .then_ignore( - // TODO: figure out if this could be simplified - choice(( - just(" ") - .repeated() - .then_ignore(just("\n").repeated().exactly(1).or_not()), - just("\n") - .repeated() - .exactly(1) - .then_ignore(just(" ").repeated().or_not()), - )) - .then_ignore(just(" ").repeated()), - ); - - let body = value_seperator - .or_not() - .ignored() - .then(value) - .map(|(_, v)| v) - .repeated() - .then_ignore( - just(" ") - .repeated() - .ignored() - .then(text::newline().repeated().at_least(2).or_not()), - ); - let word_def = { - let pop_types = { - let pop_type = keyword("nat") - .to(VType::Nat) - .or(keyword("int").to(VType::Int)) - .or(keyword("bool").to(VType::Bool)) - .or(keyword("char").to(VType::Char)) - .or(keyword("str").to(VType::Str)); - - pop_type - .padded() - .repeated() - .collect::>() - .labelled("pop_types") - .boxed() - }; - - let push_types = { - let push_type = keyword("nat") - .to(VType::Nat) - .or(keyword("int").to(VType::Int)) - .or(keyword("char").to(VType::Char)) - .or(keyword("str").to(VType::Str)); - push_type - .padded() - .repeated() - .collect::>() - .labelled("push_types") - .boxed() - }; - - let effects = { - let effect_keyword = keyword("paint") - .to(Effect::Paint) - .or(keyword("sing").to(Effect::Sing)) - .or(keyword("store").to(Effect::Store)) - .or(keyword("do").to(Effect::Do)); - - let effect = just("~").ignore_then(effect_keyword).labelled("effect"); - - effect.padded().repeated().labelled("effects").boxed() - }; - - let definition = text::whitespace() - .ignore_then(name) - .then_ignore(just(" ")) - .then(pop_types) - .then_ignore(keyword("is").or(keyword("are")).padded()) - .then(push_types) - .then(effects) - .then_ignore(just(":")) - .map(|(((name, pop_types), push_types), effects)| { - (name, pop_types, push_types, effects) - }); - - definition - .then(body.clone()) - .map(|((name, pop_types, push_types, effects), body)| { - WordDef::new( - name, - body, - WType::new().push(push_types).pop(pop_types), - effects, - ) - }) - }; - word_def - .repeated() - .then(body) - .map(|(defs, body): (Vec, Vec)| Program::new(defs, body)) -} - -#[cfg(test)] -mod tests { - use crate::typecheck::TypeStack; - - use super::*; - - #[test] - fn test_parser() { - let input = " - a is nat: 5 7 * - - b is nat: - 5 a * - - a - "; - - let ast = vec![ - WordDef::new( - "a", - vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())], - WType::new().push(vec![VType::Nat]), - vec![], - ), - WordDef::new( - "b", - vec![ - Value::Nat(5), - Value::Word("a".to_string()), - Value::Op("*".to_string()), - ], - WType::new().push(vec![VType::Nat]), - vec![], - ), - ]; - let body: Vec = vec![Value::Word("a".to_string())]; - println!("sound: {:?}\n", parser().parse(input).unwrap()); - assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body)); - } - - #[test] - fn test_typecheck() { - let sound = " - a is nat: 5 7 * - - b nat nat is nat: - a * - - a 5 * - "; - - let unsound_defs = " - a is nat nat: 5 7 * - - b nat is nat: - a * - - a 5 * - "; - - let unsound_body = " - a is nat: 5 7 * - - b nat is nat: - a * - - a 5 * * - "; - - let unsound_body_and_defs = " - a is nat nat: 5 7 * - - b nat is nat: - a * - - a 5 * * - "; - - fn typecheck(input: &str, sound: bool) { - let mut parsed = parse(input).unwrap(); - parsed.symtab.flatten_refs(); - parsed.reduce_body(); - - let stack = TypeStack::new(); - if sound { - assert!(parsed.check(stack).is_ok()); - } else { - assert!(parsed.check(stack).is_err()); - } - } - - typecheck(sound, true); - typecheck(unsound_defs, false); - typecheck(unsound_body, false); - typecheck(unsound_body_and_defs, false); - } - - #[test] - fn test_illegal_recursion() { - let illegal = " - a is: b - - b is: a - - a - "; - - let illegal_multilevel = " - a is: b - - b is: c - - c is: a - - a - "; - - fn typecheck(input: &str) { - let mut parsed = parse(input).unwrap(); - let stack = TypeStack::new(); - println!("{:?}", parsed.check(TypeStack::new())); - assert!(parsed.check(stack).is_err()); - } - - typecheck(illegal); - typecheck(illegal_multilevel); - } -} diff --git a/src/typecheck.rs b/src/typecheck.rs deleted file mode 100644 index e5fcc1e..0000000 --- a/src/typecheck.rs +++ /dev/null @@ -1,84 +0,0 @@ -use std::fmt::Debug; - -use chumsky::chain::Chain; - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct TypeStack(Vec); - -impl TypeStack { - pub fn new() -> Self { - TypeStack(vec![]) - } - pub fn len(&self) -> usize { - self.0.len() - } - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - pub fn pop(mut self) -> TypeStack { - let _ = self.0.pop(); - self - } - pub fn push(mut self, t: T) -> TypeStack { - self.0.push(t); - self - } - - /// tests if ts matches the top of the stack - pub fn test_consume(mut self, ts: TypeStack) -> Result, String> { - if ts.len() > self.len() { - Err(format!("error during test: {ts:?} is bigger than {self:?}")) - } else { - for (index, t) in ts.0.iter().rev().enumerate() { - let val = self.0.pop().unwrap(); - if val != *t { - return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}")); - } - } - Ok(self) - } - } - - /// tests if ts matches the top of the stack - pub fn test(&self, ts: &TypeStack) -> Result<(), String> { - if ts.len() > self.len() { - Err(format!("error during test: {ts:?} is bigger than {self:?}")) - } else { - for (index, t) in ts.0.iter().rev().enumerate() { - let val = &self.0[self.0.len() - 1]; - if val != t { - return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}")); - } - } - Ok(()) - } - } - - // pub fn test_many(mut self, mut tss: Vec>) -> Result, String> { - // if tss.is_empty() { - // return Ok(TypeStack::new()); - // } - // for _i in 0..tss.len() { - // let ts = tss.pop().unwrap(); - // self = self.test(ts)?; - // if self.is_ok() { - // return self; - // } - // } - // Err("did not match any types".to_string()) - // } - - pub fn append(&mut self, t: &mut Vec) { - self.0.append(t); - } -} - -impl From> for TypeStack { - fn from(value: Vec) -> Self { - TypeStack(value) - } -} - -pub trait Checkable { - fn check(&mut self, stack: TypeStack) -> Result, String>; -} diff --git a/subroutines.inc b/subroutines.inc new file mode 100644 index 0000000..aaecae5 --- /dev/null +++ b/subroutines.inc @@ -0,0 +1,15 @@ +; 0 +test_contiguous_binary: + .byte 3,$1,$2,$3 + +; 1 - assembled from "plus.asm" +subroutine_plus: + .byte 15, $18,$b5,$00,$75,$02,$95, $02, $b5, $01, $75, $03, $95, $03, $ca, $ca + +; 2 +subroutine_push: + .byte 6,$ca,$ca,$95,$0,$74,$1 + ; dex + ; dex + ; sta 0, x + ; stz 1, x