fuzzy's compiler is written in assembly now :)

fuzzy compiler v0 rev 0, written in assembly,rrunning on george :)
fuzzy spec :)
2024-10-07 01:44:20 -04:00 · 2024-10-06 22:06:10 -04:00 · 2024-10-06 21:56:37 -04:00
16 changed files with 431 additions and 1047 deletions
@@ -1,271 +0,0 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 version = 3
 [[package]]
 name = "ahash"
 version = "0.8.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
 dependencies = [
 "cfg-if",
 "once_cell",
 "version_check",
 "zerocopy",
 ]
 [[package]]
 name = "allocator-api2"
 version = "0.2.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
 [[package]]
 name = "cc"
 version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 [[package]]
 name = "chumsky"
 version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
 dependencies = [
 "hashbrown",
 "stacker",
 ]
 [[package]]
 name = "either"
 version = "1.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 [[package]]
 name = "fuzzy"
 version = "0.1.0"
 dependencies = [
 "chumsky",
 "indextree",
 ]
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 dependencies = [
 "ahash",
 "allocator-api2",
 ]
 [[package]]
 name = "heck"
 version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 [[package]]
 name = "indextree"
 version = "4.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f"
 dependencies = [
 "indextree-macros",
 ]
 [[package]]
 name = "indextree-macros"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf"
 dependencies = [
 "either",
 "itertools",
 "proc-macro2",
 "quote",
 "strum",
 "syn",
 "thiserror",
 ]
 [[package]]
 name = "itertools"
 version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
 dependencies = [
 "either",
 ]
 [[package]]
 name = "libc"
 version = "0.2.155"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
 [[package]]
 name = "once_cell"
 version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
 [[package]]
 name = "proc-macro2"
 version = "1.0.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
 dependencies = [
 "unicode-ident",
 ]
 [[package]]
 name = "psm"
 version = "0.1.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
 dependencies = [
 "cc",
 ]
 [[package]]
 name = "quote"
 version = "1.0.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
 dependencies = [
 "proc-macro2",
 ]
 [[package]]
 name = "rustversion"
 version = "1.0.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
 [[package]]
 name = "stacker"
 version = "0.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
 dependencies = [
 "cc",
 "cfg-if",
 "libc",
 "psm",
 "winapi",
 ]
 [[package]]
 name = "strum"
 version = "0.26.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
 dependencies = [
 "strum_macros",
 ]
 [[package]]
 name = "strum_macros"
 version = "0.26.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
 dependencies = [
 "heck",
 "proc-macro2",
 "quote",
 "rustversion",
 "syn",
 ]
 [[package]]
 name = "syn"
 version = "2.0.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
 dependencies = [
 "proc-macro2",
 "quote",
 "unicode-ident",
 ]
 [[package]]
 name = "thiserror"
 version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
 dependencies = [
 "thiserror-impl",
 ]
 [[package]]
 name = "thiserror-impl"
 version = "1.0.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 ]
 [[package]]
 name = "unicode-ident"
 version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 [[package]]
 name = "version_check"
 version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
 [[package]]
 name = "winapi"
 version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
 dependencies = [
 "winapi-i686-pc-windows-gnu",
 "winapi-x86_64-pc-windows-gnu",
 ]
 [[package]]
 name = "winapi-i686-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 [[package]]
 name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 [[package]]
 name = "zerocopy"
 version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
 "zerocopy-derive",
 ]
 [[package]]
 name = "zerocopy-derive"
 version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 ]
@@ -1,15 +0,0 @@
 [package]
 name = "fuzzy"
 version = "0.1.0"
 edition = "2021"
 [dependencies]
 chumsky = { version = "0.9.3"}
 indextree = "4.7.2"
 [lib]
 crate-type = ["lib"]
 [[bin]]
 name = "main"
@@ -16,16 +16,26 @@ fuzzy is part of george, and shouldn't run anywhere else.
 #### low-level
-(most of) fuzzy could be made with assembler macros, but that's no fun
+(most of) fuzzy could be written as assembler macros, but that's no fun
 #### reliable
 if fuzzy says it can run, george can run it
-## feature progress
+## how to work on fuzzy
- [x] parser
+edit `program.asm` and run `./run.sh`. the program gets included in the fuzzy compiler `fuzzy.asm` and is assembled with `vasm6502_oldstyle`, then george runs the program, reading out her system image when she reaches `stp` or `brk`
-  - roughly complete, but want to finish the whole pipeline before adding things to the parser
+
- [x] typechecker
+then the program she compiled gets formatted as a standard 32k rom, and she reads it again, and then shows her system image again when the program finishes (hits `stp` or `brk`).
-  - generates a "type stack" from parsed input and checks that word definition types match their body
+
- [ ] code generation
+since fuzzy works on a zero-page data stack, it's pretty easy to read the results of a program from the hexdump.
 for now this loop only works on apple silicon, but eventually i'll compile a `george` binary for x86 linux and switch based on the host platform.
 ## reference help
 i wrote [syntax](./syntax.md) and [semantics](./semantics.md) docs to keep track of how fuzzy works before starting work on the compiler implementation in assembly. they're the _official_ source of truth for how fuzzy works. assume that the compiler implementation is always in flux :)
 ## a note on implementation
 i was writing fuzzy's compiler in rust for a sec, but then i realized that it would be a fun challenge to write it in assembly. it's been wayyy easier! and fun! and so rewarding :) this feels like a flex but i'm genuinely just so happy to see george & fuzzy playing together in this little computer world i've made <3
@@ -0,0 +1,214 @@
 ; ʕ·ᴥ·ʔ- fuzzy v0 rev 0: parse program text and spit out binary representation @ $4000
    .include "./macro.inc"
 n = $05 ; temporary storage for data stack operations
 base = $00
 result_binary_base = base ; pointer to where the next byte of binary data should be stored0
 binary_base_index = result_binary_base + 2 ; offset for that pointer
 binary_subroutine_address = binary_base_index + 1 ; pointer to a subroutine to be written to the binary
    .org $8000
    .include "./subroutines.inc"
 program_text:
    .include "./program.inc"
 reset:
    sei
    lda #0
    ldx #0
    ldy #0
 main:
    stz binary_base_index
    lda #$40
    sta result_binary_base + 1 ; set where to store resulting binary
    stz binary_subroutine_address 
    lda #$80
    sta binary_subroutine_address + 1 ; available subroutines start at $8000
    jsr compile_values
    stp
 ; parser loop, eventually this will be able to handle longer program strings, but indexing by y is fine for now
 compile_values:
    ldy #0
 parser_loop:
    lda program_text, y ; get character at index
    cmp #0 ; is eof?
    beq .end ; yes, exit loop
    cmp #20 ; is space?
    beq parser_loop ; yes, skip this char
    cmp #12 ; is newline?
    beq .newline ; yes, handle newline
    jsr compile_values_op
    jsr compile_values_nat
    .newline: ; we reached a newline, y is program string index
       iny ; WARN: don't accidentally iny in this loop w/out handling a character
       lda program_text, y ; load next char
       cmp #12 ; is newline?
       bne parser_loop ; no, keep parsing tokens
       rts ; yes, no more tokens in body (see syntax.md for info)
    .end:
        rts
 ; a holds character value, y program text index, only iny if you find a matching character & consume it
 compile_values_op:
    cmp #"+" ; i personally think this syntax is really silly but whatever, one of these days i'm gonna write my own assembler and document everything cause vasm documentation is kinda terrible
    bne .next
    .is_plus: 
        lda #1
        jsr store_subroutine
        rts
    .next:
        rts
    ; cmp #"!" ; commenting these out for now to handle a single simple case
    ; cmp #"&"
    ; cmp #"|"
    ; cmp #"-"
    ; cmp #"*"
    ; cmp #"/"
    ; cmp #"="
    ; cmp #">"
    ; cmp #"<"
    ; cmp #"#"
 ; a holds character value, y program text index, only iny if you find a matching character & consume it
 ; TODO:
 ; 1-3 digit decimal values
 ; 1-2 digit hex values
 compile_values_nat:
    ; TODO:
    ; cmp #"$" ; is hex?
    ; bne .decimal ; no, try decimal
    ; cmp  
    ; rts 
    cmp #47 ; less than (before) start of 0-9 georgescii range?
    bcc .not_nat 
    cmp #57 ; greater than end of 0-9 georgescii range?
    bcs .not_nat
    pha
    lda #$a9 ; $a9: lda imm
    jsr store_binary
    pla
    jsr georgescii_decimal_to_value
    jsr store_binary
    lda #2 ; push
    jsr store_subroutine
    iny
    rts
    .not_nat:
        rts
 ; georgescii decimal value in a register, return equivalent plain value in a register
 georgescii_decimal_to_value:
    clc
    sbc #$30 ; decimal digits start at georgescii $30
    rts
 ; we have binary in the a register we want to store
 store_binary:
    phy  
    ldy binary_base_index
    sta (result_binary_base), y
    inc binary_base_index
    bne .not_overflow ; did we roll over?
    inc result_binary_base + 1 ; yes, roll over base address
    .not_overflow: ; no, carry on as normal
        ply
        rts
 ; binary_subroutine_address is a pointer to a subroutine that we want to store
 ; the first byte at the subroutine's address is its length
 store_contiguous_binary:
    pha ; just to be safe
    lda (binary_subroutine_address) ; get the subroutine length
    tax ; loop counter
    ldy #1 ; index into subroutine, offset by one to skip subroutine length
    .loop:
        lda (binary_subroutine_address), y
        jsr store_binary
        iny
        dex
        bne .loop
    .end:
        pla
        rts
 ; this wouldn't be necessary if we could get the 
 ; address of a label in vasm, but that's for another time
 ; (when i feel like writing an assembler lol)
 ; for now, pass the index of the subroutine (in subroutines.asm)
 ; to a and it will get written to binary_subroutine_address
 get_subroutine_address:
   pha
   tax  ; set up counter
   bne .loop ; first subrotine? 
   stz binary_subroutine_address ; yes, store its address
   lda #$80
   sta binary_subroutine_address + 1
   rts
   .loop: ; loop through
       lda (binary_subroutine_address) ; no, load length of subroutine
       inc ; distance from next subroutine
       clc
       adc binary_subroutine_address ; add it to the current address
       sta binary_subroutine_address
       bcs .no_carry
       lda binary_subroutine_address + 1 ; add the carry to the high byte of address
       adc #0 
       sta binary_subroutine_address + 1
       .no_carry:
           dex ; is this our address?
           bne .loop ; yes, we're done
       pla
       rts
 ; pass subroutine index to a and it will get written into the binary
 ; TODO: stabilize subroutine location & just write a `jsr $subroutine` to the binary
 store_subroutine:
    pha
    phy
    phx
    jsr get_subroutine_address
    jsr store_contiguous_binary
    ; reset subroutine address
    stz binary_subroutine_address
    lda #$80
    sta binary_subroutine_address + 1
    plx
    ply
    pla
    rts
 ; write error message and stop execution
 error:
    ldy #0
    .loop:
        lda .message, y
        sta $4000, y
        beq .end
        iny
        bra .loop
        .end:
            stp
    .message:
        .asciiz "ruh roh! fuzzy couldn't compile"
 isr: ; interrupt service routine
    pha
    phx
    phy
    ply
    plx
    pla
    rti
    .org $fffc
    .word reset
    .word isr
@@ -14,9 +14,13 @@
        inx
    .endm
-    .macro push ; push a data stack cell
+    .macro push, cell_high, cell_low ; push a data stack cell
        dex
        dex
        lda \cell_low
        sta 0, x
        lda \cell_high
        sta 1, x
    .endm
    .macro push2 ; push 2 data stack cells
@@ -0,0 +1 @@
    .asciiz '2 3 +'
@@ -0,0 +1,16 @@
 #!/usr/bin/env bash
 set -e
 rm *.bin *.rom
 vasm6502_oldstyle fuzzy.asm -dotdir -wdc02 -ldots -Fbin -o fuzzy.rom &> /dev/null;
 echo -e "\nʕ·ᴥ·ʔ- source text:\n";
 cat program.inc;
 cat fuzzy.rom | ./george > compiled.bin;
 dd skip=16384 count=500 if=compiled.bin of=compiled.rom bs=1 &> /dev/null;
 truncate -s 32k compiled.rom &> /dev/null;
 printf '\x80\x00\x00' | dd of=compiled.rom bs=1 seek=32765 count=3 conv=notrunc &> /dev/null;
 cat compiled.rom | ./george > result.bin;
 echo -e "\n\nʕ·ᴥ·ʔ- compiled program result:\n";
 hexdump -C ./result.bin;
 echo -e "";
@@ -0,0 +1,85 @@
 # i swear this is what fuzzy actually does
 ## the stack
 fuzzy works on a 16-bit cell-width, zero-page data stack indexed with the x register, as documented in Garth Wilson's [stack treatise](https://wilsonminesco.com/stacks/virtualstacks.html)
 to push a byte onto the data stack, we just:
 ```asm
   dex            ; decrement the stack pointer
   lda some_value ; load the byte we want on the stack into a
   sta 0, x       ; put the byte on the stack!
 ```
 and to pop a byte off it:
 ```asm
   lda 0, x       ; pop the top of stack off into a
   inx            ; increment the stack pointer
 ```
 ## types
 these are used in word definitions, and refer to the type of an individual stack cell:
 | type                   | desc                                                        |
 | ---------------------- | ----------------------------------------------------------- |
 | **bool**               | a boolean value, represented by $0000 or $ffff              |
 | **nat**                | an unsigned 16-bit integer                                  |
 | **int**                | a signed 16-bit integer                                     |
 | **char**               | an 8-bit george-ascii character, padded with leading zeroes |
 | **string**             | a 16-bit pointer to a string in memory                      |
 | **word** _`dangerous`_ | a 16-bit pointer to a fuzzy word or quotation               |
 ## operators
 - `!` NOT: applies NOT to tos
 - `&` AND: pops 2 off the stack and pushes the AND'ed result
 - `|` OR: pops 2 off the stack and pushes the OR'ed result
 - `+` add: pops 2 off the stack and pushes the sum
 - `-` subtract: pops 2 off the stack and pushes the difference
 - `*` multiply: pops 2 off the stack and pushes the result, truncating if it's >$FFFF
 - `/` divide: pops 2 off the stack and pushes the remainder and quotient
 - `=` equality: pushes true/false if the top 2 stack cells do/don't match
 - `>` greater than: pushes true/false if tos-1 is/isn't greater than tos
 - `<` less than: pushes true/false if tos-1 is/isn't greater than tos
 - `#` quote _`dangerous`_: pops tos and pushes a word that produces its value
 ### supported types (this will need to be more clearly laid out later)
 | operator | input type               | output type              | notes                                                                                                                                                               |
 | -------- | ------------------------ | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `!`      | `bool`, `nat`, `int`     | `bool`, `nat`, `int`     |                                                                                                                                                                     |
 | `&`      | `bool`, `nat`, `int`     | `bool`, `nat`, `int`     |                                                                                                                                                                     |
 | `\|`     | `bool`, `nat`, `int`     | `bool`, `nat`, `int`     |                                                                                                                                                                     |
 | `+`      | `nat` `nat`, `int` `int` | `nat`, `int`             |                                                                                                                                                                     |
 | `-`      | `nat` `nat`, `int` `int` | `nat`, `int`             | subtracting two `nat`s                                                                                                                                              |
 | `*`      | `nat` `nat`, `int` `int` | `nat`, `int`             | most products will be truncated, since most 16 bit multiplications result in a >16 bit product, but in practice that shouldn't matter cause we're not doing science |
 | `/`      | `nat` `nat`, `int` `int` | `nat` `nat`, `int` `int` | produces two cells, the quotient and remainder                                                                                                                      |
 | `=`      | any any                  | `bool`                   | equality/order is checked based on stack cell value, not type (e.g. a `word` pointing to $abcd and a `nat` with the value $abcd are equivalent)                     |
 | `>`      | any any                  | `bool`                   | see above                                                                                                                                                           |
 | `<`      | any any                  | `bool`                   | see above                                                                                                                                                           |
 | `#`      | any                      | `word`                   | _`dangerous`_                                                                                                                                                       |
 ## `danger!`
 the `danger!` keyword marks a word as being _`dangerous`_. certain language features can only be used in dangerous words, such as:
 - inline assembly
 - quotations
  - typechecking quotations is a difficult problem & probably too complex too implement on george if we ever want to fully self-host fuzzy
 - unchecked operator usage
  - applying `+` to two chars, applying `&` to two strings, etc
  - this does not mean that _dangerous_ words are untyped! just the type of the result of an operation is asserted to be the word result type
    - `danger! dangerous_word num num is char: +` can't be used on a `num char` stack, and any words used after `dangerous_word` treat the top of the stack as having a `char` and don't care that it was made with two `num`s
 the program body cannot use any _dangerous_ features. this makes it so that _dangerous_ behavior is contained to specific words.
 ## memory layout
 | start  | end    | use                          |
 | ------ | ------ | ---------------------------- |
 | `$200` | `$300` |                              |
 |        |        | core language implementation |
 |        |        | core language implementation |
@@ -1,168 +0,0 @@
 use core::panic;
 use std::{any::Any, fmt::Display};
 use crate::{
    typecheck::{self, Checkable, TypeStack},
    Symtab,
 };
 #[derive(PartialEq, Eq, Debug, Clone)]
 // enum values are parser values, not compiler values,
 // e.g. for `Str(String)` the `String` value will be put somewhere in memory
 // and a pointer to it will be put on the stack
 pub enum Value {
    Nat(u16), // 16-bit natural number
    Int(i16), // 16-bit twos-complement integer
    Bool(bool),
    Op(String),
    Char(char), // 8-bit georgescii character padded with leading zeros (might change later)
    Str(String), // 16-bit pointer to a string
    Word(String), // 16-bit pointer to a word
 }
 #[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)]
 pub enum VType {
    Nat,
    Int,
    Bool,
    Char,
    Str,
 }
 #[derive(Eq, PartialEq, Debug, Clone)]
 pub struct WType {
    pub pop: Vec<VType>,
    pub push: Vec<VType>,
 }
 impl WType {
    pub fn new() -> Self {
        WType {
            pop: vec![],
            push: vec![],
        }
    }
    // Adds a `push` type
    pub fn push(mut self, mut t: Vec<VType>) -> Self {
        self.push.append(&mut t);
        self
    }
    // Adds a `pop` type (Note: does not actually pop anything)
    pub fn pop(mut self, mut t: Vec<VType>) -> Self {
        self.pop.append(&mut t);
        self
    }
 }
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum Effect {
    Paint,
    Sing,
    Store,
    Do,
 }
 #[derive(Debug, Eq, PartialEq, Clone)]
 pub struct WordDef {
    pub name: String,
    pub values: Vec<Value>,
    pub r#type: WType,
    pub effects: Vec<Effect>,
 }
 impl WordDef {
    pub fn new<S>(name: S, values: Vec<Value>, r#type: WType, effects: Vec<Effect>) -> Self
    where
        S: Into<String>,
    {
        WordDef {
            name: name.into(),
            values,
            r#type,
            effects,
        }
    }
    fn flatten_values(&self, symtab: &Symtab) -> Vec<Value> {
        let mut vals = vec![];
        for value in self.values.iter() {
            if let Value::Word(string) = value {
                let symbol = symtab.get(string);
                let mut child_vals = symbol.flatten_values(symtab);
                vals.append(&mut child_vals);
            } else {
                vals.push(value.clone());
            }
        }
        vals
    }
    pub fn flatten(&self, symtab: &Symtab) -> WordDef {
        let values = self.flatten_values(symtab);
        WordDef::new(
            self.name.clone(),
            values,
            self.r#type.clone(),
            self.effects.clone(),
        )
    }
 }
 impl Display for WordDef {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        writeln!(f, "{:#?}", self)?;
        Ok(())
    }
 }
 impl Checkable<VType> for WordDef {
    fn check(&mut self, mut stack: TypeStack<VType>) -> Result<TypeStack<VType>, String> {
        for value in self.values.iter() {
            println!(
                "WORDDEF: checking value {:?} in word {:?}, current stack is {:?}",
                value, self.name, stack
            );
            if let Value::Word(_) = value {
                panic!("Don't typecheck on an unflattened word!");
            }
            match value {
                Value::Op(op) => {
                    if stack.len() < 2 {
                        return Err(format!(
                            "Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}",
                            self.name,
                            stack,
                            stack.len()
                        ));
                    } else {
                        match op.as_str() {
                            "+" | "*" => {
                                println!("WORDDEF: checking Op");
                                stack = stack.test_consume(
                                    TypeStack::new().push(VType::Nat).push(VType::Nat),
                                )?;
                                stack = stack.push(VType::Nat);
                            }
                            "&" | "||" => {
                                stack = stack.test_consume(
                                    TypeStack::new().push(VType::Bool).push(VType::Bool),
                                )?;
                                stack = stack.pop().pop().push(VType::Bool);
                            }
                            _ => return Err(format!("unknown opcode {:?}", op)),
                        }
                    }
                }
                &Value::Nat(_) => stack = stack.push(VType::Nat),
                &Value::Int(_) => stack = stack.push(VType::Int),
                &Value::Bool(_) => stack = stack.push(VType::Int),
                &Value::Str(_) => stack = stack.push(VType::Str),
                &Value::Char(_) => stack = stack.push(VType::Char),
                &Value::Word(_) => unreachable!(),
            };
        }
        Ok(stack)
    }
 }
@@ -1,11 +0,0 @@
 use fuzzy::{
    ast::VType,
    parse,
    typecheck::{Checkable, TypeStack},
 };
 fn main() {
    let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest";
    let mut program = parse(input).unwrap();
    println!("{:?}", program.check(TypeStack::new()));
 }
@@ -1,7 +0,0 @@
 pub mod ast;
 pub mod compiler;
 pub mod parser;
 pub mod typecheck;
 pub use compiler::*;
 pub use parser::*;
@@ -1,483 +0,0 @@
 use std::collections::HashMap;
 // TODO:
 //  - add error types and error handling
 //  - figure out if we can use a single ast or should make a second ast for compilation, then
 //  convert to that after parsing/do another parsing step but on the ast made the first time
 //  (i also don't really know what i'm doing so maybe u never do that or always do that or
 //  something i don't understand yet)
 //  - improve whitespace parsing
 //  - think more about language rules:
 //      - what types actually need to be exposed to the "user" (me) and what types can be internal
 //      to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char
 //      internally (put on the stack as a value instead of put somewhere in memory))
 use chumsky::{
    prelude::*,
    text::{ident, keyword},
 };
 use crate::{
    ast::*,
    typecheck::{Checkable, TypeStack},
 };
 #[derive(PartialEq, Debug, Clone)]
 pub struct Symtab(HashMap<String, WordDef>);
 impl Symtab {
    fn new() -> Self {
        Symtab(HashMap::new())
    }
    pub fn get(&self, string: &String) -> &WordDef {
        self.0.get(string).unwrap()
    }
    pub fn add_def(&mut self, def: WordDef) {
        let key = def.name.clone();
        self.0.insert(key, def);
    }
    pub fn flatten_refs(&mut self) {
        let mut new_symtab = Symtab::new();
        for (_string, def) in self.0.iter() {
            new_symtab.add_def(def.flatten(self));
        }
        // this is an abomination, there must be a better way
        self.0.clear();
        self.0.extend(new_symtab.0);
    }
 }
 impl From<Vec<WordDef>> for Symtab {
    fn from(value: Vec<WordDef>) -> Self {
        let symtab: HashMap<String, WordDef> = value
            .iter()
            .map(|x| (x.name.to_owned(), x.to_owned()))
            .collect();
        Symtab(symtab)
    }
 }
 #[derive(Debug, PartialEq)]
 pub struct Program {
    symtab: Symtab,
    body: Vec<Value>,
 }
 impl Program {
    fn new(defs: Vec<WordDef>, body: Vec<Value>) -> Self {
        let symtab = Symtab::from(defs);
        Program { symtab, body }
    }
    fn reduce_body(&mut self) {
        let mut vals = vec![];
        for value in self.body.iter() {
            if let Value::Word(string) = value {
                let symbol = self.symtab.get(string);
                let mut child_vals = symbol.flatten(&self.symtab).values;
                vals.append(&mut child_vals);
            } else {
                vals.push(value.clone());
            }
        }
        self.body = vals;
    }
 }
 impl<'a> Checkable<VType> for Program {
    fn check(
        &mut self,
        mut stack: crate::typecheck::TypeStack<VType>,
    ) -> Result<crate::typecheck::TypeStack<VType>, String> {
        //TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5
        fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> {
            let mut visited: Vec<&WordDef> = vec![];
            let mut rec_stack: Vec<&WordDef> = vec![];
            for (_, def) in symtab.0.iter() {
                if !visited.contains(&def) {
                    dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?;
                }
            }
            Ok(())
        }
        fn dfs_cycle_check<'a>(
            def: &'a WordDef,
            visited: &mut Vec<&'a WordDef>,
            rec_stack: &mut Vec<&'a WordDef>,
            symtab: &'a Symtab,
        ) -> Result<(), String> {
            visited.push(def);
            rec_stack.push(def);
            for val in def.values.iter() {
                if let Value::Word(name) = val {
                    let next_def = symtab.get(name);
                    if !visited.contains(&next_def) {
                        dfs_cycle_check(next_def, visited, rec_stack, symtab)?;
                    } else if rec_stack.contains(&next_def) {
                        return Err(format!(
                            "illegal recursion detected! definitions {}create a reference cycle",
                            rec_stack
                                .iter()
                                .map(|def| {
                                    let mut name = def.name.clone();
                                    name.insert(0, '"');
                                    name.push_str("\" ");
                                    name
                                })
                                .collect::<String>()
                        ));
                    }
                }
            }
            rec_stack.pop();
            Ok(())
        }
        cyclic_graph_check(&self.symtab)?;
        self.symtab.flatten_refs();
        println!(
            "we have flattened refs, here's the symtab: {:#?}\n",
            self.symtab
        );
        // then check that all symtab defs are sound
        // at this point they shouldn't have any references,
        // and if they do we will panic (see the Checkable impl for WordDef)
        for (name, def) in self.symtab.0.iter_mut() {
            let local_stack: TypeStack<VType> = def.r#type.pop.clone().into();
            println!(
                "PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}",
                name, def
            );
            let result_stack = def.check(local_stack)?;
            if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) {
                println!("{error:?}");
                return Err(error);
            }
        }
        self.reduce_body();
        // then we'll check that the body is sound with the given stack
        // maybe in the future i'll change this trait so there isn't a stack
        // param and the implementer picks what stack to check against
        //
        // TODO: this block also is shared behavior between basically all checkables but potentially with
        // different internal types for T, will have to figure out how to dedup this later
        for value in self.body.iter() {
            match value {
                Value::Op(op) => {
                    if stack.len() < 2 {
                        return Err(format!(
                            "expected a stack with 2 elements, got only {:?}",
                            stack.len()
                        ));
                    } else {
                        match op.as_str() {
                            "+" | "*" => {
                                stack = stack.test_consume(
                                    TypeStack::new().push(VType::Nat).push(VType::Nat),
                                )?;
                                stack = stack.pop().pop().push(VType::Nat);
                            }
                            "&" | "||" => {
                                stack = stack.test_consume(
                                    TypeStack::new().push(VType::Bool).push(VType::Bool),
                                )?;
                                stack = stack.pop().pop().push(VType::Bool);
                            }
                            _ => return Err(format!("unknown opcode {:?}", op)),
                        }
                    }
                }
                &Value::Nat(_) => stack = stack.push(VType::Nat),
                &Value::Int(_) => stack = stack.push(VType::Int),
                &Value::Bool(_) => stack = stack.push(VType::Bool),
                &Value::Str(_) => stack = stack.push(VType::Str),
                &Value::Char(_) => stack = stack.push(VType::Char),
                &Value::Word(_) => unreachable!(),
            };
        }
        Ok(stack)
    }
 }
 pub fn parse<S>(input: S) -> Result<Program, Vec<Simple<char>>>
 where
    S: ToString,
 {
    let parsed = match parser().parse(input.to_string()) {
        Ok(parsed) => parsed,
        Err(error) => return Err(error),
    };
    Ok(parsed)
 }
 pub fn parser() -> impl Parser<char, Program, Error = Simple<char>> {
    let name = ident().labelled("word_name");
    let value = {
        // nats will be coerced to ints at compile time depending on word type
        let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap()));
        // vice versa for non-negative ints
        let int = just("-").ignore_then(
            text::int::<char, Simple<char>>(10).map(|s: String| Value::Int(s.parse().unwrap())),
        );
        let op = one_of::<char, &str, Simple<char>>("*+-/&|<>").map(|s| Value::Op(s.to_string()));
        let str_or_char = just::<char, char, Simple<char>>('"')
            .ignore_then(none_of('"').repeated())
            .then_ignore(just('"'))
            .map(|s: Vec<char>| match s.len() {
                1 => Value::Char(s[0]),
                _ => Value::Str(s.into_iter().collect::<String>()),
            });
        let word = name.map(|n: String| Value::Word(n));
        let bool = keyword::<_, _, Simple<char>>("true")
            .map(|_| Value::Bool(true))
            .or(keyword("false").map(|_| Value::Bool(false)));
        nat.or(int).or(bool).or(str_or_char).or(word).or(op)
    };
    let value_seperator = text::newline()
        .repeated()
        .at_least(2)
        .not()
        .rewind()
        .then_ignore(
            // TODO: figure out if this could be simplified
            choice((
                just(" ")
                    .repeated()
                    .then_ignore(just("\n").repeated().exactly(1).or_not()),
                just("\n")
                    .repeated()
                    .exactly(1)
                    .then_ignore(just(" ").repeated().or_not()),
            ))
            .then_ignore(just(" ").repeated()),
        );
    let body = value_seperator
        .or_not()
        .ignored()
        .then(value)
        .map(|(_, v)| v)
        .repeated()
        .then_ignore(
            just(" ")
                .repeated()
                .ignored()
                .then(text::newline().repeated().at_least(2).or_not()),
        );
    let word_def = {
        let pop_types = {
            let pop_type = keyword("nat")
                .to(VType::Nat)
                .or(keyword("int").to(VType::Int))
                .or(keyword("bool").to(VType::Bool))
                .or(keyword("char").to(VType::Char))
                .or(keyword("str").to(VType::Str));
            pop_type
                .padded()
                .repeated()
                .collect::<Vec<VType>>()
                .labelled("pop_types")
                .boxed()
        };
        let push_types = {
            let push_type = keyword("nat")
                .to(VType::Nat)
                .or(keyword("int").to(VType::Int))
                .or(keyword("char").to(VType::Char))
                .or(keyword("str").to(VType::Str));
            push_type
                .padded()
                .repeated()
                .collect::<Vec<VType>>()
                .labelled("push_types")
                .boxed()
        };
        let effects = {
            let effect_keyword = keyword("paint")
                .to(Effect::Paint)
                .or(keyword("sing").to(Effect::Sing))
                .or(keyword("store").to(Effect::Store))
                .or(keyword("do").to(Effect::Do));
            let effect = just("~").ignore_then(effect_keyword).labelled("effect");
            effect.padded().repeated().labelled("effects").boxed()
        };
        let definition = text::whitespace()
            .ignore_then(name)
            .then_ignore(just(" "))
            .then(pop_types)
            .then_ignore(keyword("is").or(keyword("are")).padded())
            .then(push_types)
            .then(effects)
            .then_ignore(just(":"))
            .map(|(((name, pop_types), push_types), effects)| {
                (name, pop_types, push_types, effects)
            });
        definition
            .then(body.clone())
            .map(|((name, pop_types, push_types, effects), body)| {
                WordDef::new(
                    name,
                    body,
                    WType::new().push(push_types).pop(pop_types),
                    effects,
                )
            })
    };
    word_def
        .repeated()
        .then(body)
        .map(|(defs, body): (Vec<WordDef>, Vec<Value>)| Program::new(defs, body))
 }
 #[cfg(test)]
 mod tests {
    use crate::typecheck::TypeStack;
    use super::*;
    #[test]
    fn test_parser() {
        let input = "
                    a is nat: 5 7 * 
                    b is nat: 
                        5 a * 
                    a
                    ";
        let ast = vec![
            WordDef::new(
                "a",
                vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())],
                WType::new().push(vec![VType::Nat]),
                vec![],
            ),
            WordDef::new(
                "b",
                vec![
                    Value::Nat(5),
                    Value::Word("a".to_string()),
                    Value::Op("*".to_string()),
                ],
                WType::new().push(vec![VType::Nat]),
                vec![],
            ),
        ];
        let body: Vec<Value> = vec![Value::Word("a".to_string())];
        println!("sound: {:?}\n", parser().parse(input).unwrap());
        assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body));
    }
    #[test]
    fn test_typecheck() {
        let sound = "
                    a is nat: 5 7 * 
                    b nat nat is nat: 
                        a * 
                    a 5 * 
                    ";
        let unsound_defs = "
                    a is nat nat: 5 7 *
                    b nat is nat:
                        a *
                    a 5 *
                    ";
        let unsound_body = "
                    a is nat: 5 7 *
                    b nat is nat:
                        a *
                    a 5 * *
                    ";
        let unsound_body_and_defs = "
                    a is nat nat: 5 7 *
                    b nat is nat:
                        a *
                    a 5 * *
                    ";
        fn typecheck(input: &str, sound: bool) {
            let mut parsed = parse(input).unwrap();
            parsed.symtab.flatten_refs();
            parsed.reduce_body();
            let stack = TypeStack::new();
            if sound {
                assert!(parsed.check(stack).is_ok());
            } else {
                assert!(parsed.check(stack).is_err());
            }
        }
        typecheck(sound, true);
        typecheck(unsound_defs, false);
        typecheck(unsound_body, false);
        typecheck(unsound_body_and_defs, false);
    }
    #[test]
    fn test_illegal_recursion() {
        let illegal = "
                    a is: b
                    b is: a
                    a
                    ";
        let illegal_multilevel = "
                    a is: b
                    b is: c
                    c is: a
                    a
                    ";
        fn typecheck(input: &str) {
            let mut parsed = parse(input).unwrap();
            let stack = TypeStack::new();
            println!("{:?}", parsed.check(TypeStack::new()));
            assert!(parsed.check(stack).is_err());
        }
        typecheck(illegal);
        typecheck(illegal_multilevel);
    }
 }
@@ -1,84 +0,0 @@
 use std::fmt::Debug;
 use chumsky::chain::Chain;
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct TypeStack<T>(Vec<T>);
 impl<T: Debug + PartialEq> TypeStack<T> {
    pub fn new() -> Self {
        TypeStack(vec![])
    }
    pub fn len(&self) -> usize {
        self.0.len()
    }
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }
    pub fn pop(mut self) -> TypeStack<T> {
        let _ = self.0.pop();
        self
    }
    pub fn push(mut self, t: T) -> TypeStack<T> {
        self.0.push(t);
        self
    }
    /// tests if ts matches the top of the stack
    pub fn test_consume(mut self, ts: TypeStack<T>) -> Result<TypeStack<T>, String> {
        if ts.len() > self.len() {
            Err(format!("error during test: {ts:?} is bigger than {self:?}"))
        } else {
            for (index, t) in ts.0.iter().rev().enumerate() {
                let val = self.0.pop().unwrap();
                if val != *t {
                    return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
                }
            }
            Ok(self)
        }
    }
    /// tests if ts matches the top of the stack
    pub fn test(&self, ts: &TypeStack<T>) -> Result<(), String> {
        if ts.len() > self.len() {
            Err(format!("error during test: {ts:?} is bigger than {self:?}"))
        } else {
            for (index, t) in ts.0.iter().rev().enumerate() {
                let val = &self.0[self.0.len() - 1];
                if val != t {
                    return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
                }
            }
            Ok(())
        }
    }
    // pub fn test_many(mut self, mut tss: Vec<TypeStack<T>>) -> Result<TypeStack<T>, String> {
    //     if tss.is_empty() {
    //         return Ok(TypeStack::new());
    //     }
    //     for _i in 0..tss.len() {
    //         let ts = tss.pop().unwrap();
    //         self = self.test(ts)?;
    //         if self.is_ok() {
    //             return self;
    //         }
    //     }
    //     Err("did not match any types".to_string())
    // }
    pub fn append(&mut self, t: &mut Vec<T>) {
        self.0.append(t);
    }
 }
 impl<T: PartialEq + Debug> From<Vec<T>> for TypeStack<T> {
    fn from(value: Vec<T>) -> Self {
        TypeStack(value)
    }
 }
 pub trait Checkable<T: PartialEq + Debug> {
    fn check(&mut self, stack: TypeStack<T>) -> Result<TypeStack<T>, String>;
 }
@@ -0,0 +1,15 @@
 ; 0 
 test_contiguous_binary:
    .byte 3,$1,$2,$3
 ; 1 - assembled from "plus.asm"
 subroutine_plus:
    .byte 15, $18,$b5,$00,$75,$02,$95, $02, $b5, $01, $75, $03, $95, $03, $ca, $ca
 ; 2
 subroutine_push:
    .byte 6,$ca,$ca,$95,$0,$74,$1
    ; dex
    ; dex
    ; sta 0, x
    ; stz 1, x
@@ -0,0 +1,78 @@
 # fuzzy syntax in a well-defined grammar so i don't lose my mind
 ## notation
 | notation | meaning                                       |
 | -------- | --------------------------------------------- |
 | abc      | syntactical production                        |
 | :        | maps production to children (products?)       |
 | ()       | groups items                                  |
 | ʕ·ᴥ·ʔ    | any 8-bit georgesci character                 |
 | `abc`    | exact character(s)                            |
 | \x       | an escape character                           |
 | x?       | optional                                      |
 | x\*      | zero or more of x                             |
 | x+       | one or more of x                              |
 | x+y      | y or more of x                                |
 | x.y      | y repetitions of x                            |
 | \|       | one or another                                |
 | [-]      | any characters in range (>=1 ranges accepted) |
 (adapted from the rust reference cause i like how simple they do it)
 ## grammar
 the only semantically significant whitespace is \n+2 after a word definition.
 otherwise, assume tokens are delimited by an arbitrary amount of (not \n+2) whitespace, including no whitespace, e.g. the colon in `hello is: "hello"`
 also order is significant! if `value` produced `word` first, it would make reserved words like `true` and `false` parse into word references.
 ```syntax
 george: defs? body
 defs: (def \n+2)*
 body: values
 def: signature `:` values
 signature: `danger!`? word typedef
 values: (value | op)*
 typedef: pop? `is` push? effects?
 pop: type*
 push: type*
 effects: effect*
 type: `bool` | `nat` | `int` | `char` | `string` | `word`
 effect: `paint` | `sing` | `store`
 value: bool | num | char | string | word
 op: `!` | `&` | `|` | `+` | `-` | `*` | `/` | `=` | `>` | `<` | `#`
 quote: `[` values `]`
 bool: `true` | `false`
 word: [a-z A-Z]+
 num: hexnum | binarynum
 binarynum: binarydigit+
 binarydigit: [0-9]
 hexnum: (`$` hexdigit+)
 hexdigit: [0-9 a-f A-F]
 char: `'` ʕ·ᴥ·ʔ `'`
 string: `"` ʕ·ᴥ·ʔ* `"`
 ```
 ## notes
 fuzzy assumes the source text to be encoded in [georgesci](#), which is nearly ascii-compatible and should only cause minor headaches <3
Author	SHA1	Message	Date
august	c0e7f4024c	fuzzy's compiler is written in assembly now :)	2024-10-07 01:44:20 -04:00
august	2d4df76be7	fuzzy compiler v0 rev 0, written in assembly,rrunning on george :)	2024-10-06 22:06:10 -04:00
august	cbc7bff7f7	fuzzy spec :)	2024-10-06 21:56:37 -04:00