Compare commits
No commits in common. "c0e7f4024c85e3f0e485a2b24d2cd27abb46de2a" and "ed8e20f0dbe792f658c42c23ecbc6cf53b9998f9" have entirely different histories.
c0e7f4024c
...
ed8e20f0db
|
@ -0,0 +1,271 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chumsky"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
"stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||
|
||||
[[package]]
|
||||
name = "fuzzy"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chumsky",
|
||||
"indextree",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"allocator-api2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "indextree"
|
||||
version = "4.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f"
|
||||
dependencies = [
|
||||
"indextree-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indextree-macros"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf"
|
||||
dependencies = [
|
||||
"either",
|
||||
"itertools",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strum",
|
||||
"syn",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.155"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psm"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
|
||||
|
||||
[[package]]
|
||||
name = "stacker"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"psm",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.26.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.26.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.71"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.63"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.63"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.7.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "fuzzy"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
chumsky = { version = "0.9.3"}
|
||||
indextree = "4.7.2"
|
||||
|
||||
[lib]
|
||||
|
||||
crate-type = ["lib"]
|
||||
|
||||
[[bin]]
|
||||
name = "main"
|
24
README.md
24
README.md
|
@ -16,26 +16,16 @@ fuzzy is part of george, and shouldn't run anywhere else.
|
|||
|
||||
#### low-level
|
||||
|
||||
(most of) fuzzy could be written as assembler macros, but that's no fun
|
||||
(most of) fuzzy could be made with assembler macros, but that's no fun
|
||||
|
||||
#### reliable
|
||||
|
||||
if fuzzy says it can run, george can run it
|
||||
|
||||
## how to work on fuzzy
|
||||
## feature progress
|
||||
|
||||
edit `program.asm` and run `./run.sh`. the program gets included in the fuzzy compiler `fuzzy.asm` and is assembled with `vasm6502_oldstyle`, then george runs the program, reading out her system image when she reaches `stp` or `brk`
|
||||
|
||||
then the program she compiled gets formatted as a standard 32k rom, and she reads it again, and then shows her system image again when the program finishes (hits `stp` or `brk`).
|
||||
|
||||
since fuzzy works on a zero-page data stack, it's pretty easy to read the results of a program from the hexdump.
|
||||
|
||||
for now this loop only works on apple silicon, but eventually i'll compile a `george` binary for x86 linux and switch based on the host platform.
|
||||
|
||||
## reference help
|
||||
|
||||
i wrote [syntax](./syntax.md) and [semantics](./semantics.md) docs to keep track of how fuzzy works before starting work on the compiler implementation in assembly. they're the _official_ source of truth for how fuzzy works. assume that the compiler implementation is always in flux :)
|
||||
|
||||
## a note on implementation
|
||||
|
||||
i was writing fuzzy's compiler in rust for a sec, but then i realized that it would be a fun challenge to write it in assembly. it's been wayyy easier! and fun! and so rewarding :) this feels like a flex but i'm genuinely just so happy to see george & fuzzy playing together in this little computer world i've made <3
|
||||
- [x] parser
|
||||
- roughly complete, but want to finish the whole pipeline before adding things to the parser
|
||||
- [x] typechecker
|
||||
- generates a "type stack" from parsed input and checks that word definition types match their body
|
||||
- [ ] code generation
|
||||
|
|
214
fuzzy.asm
214
fuzzy.asm
|
@ -1,214 +0,0 @@
|
|||
; ʕ·ᴥ·ʔ- fuzzy v0 rev 0: parse program text and spit out binary representation @ $4000
|
||||
|
||||
.include "./macro.inc"
|
||||
|
||||
n = $05 ; temporary storage for data stack operations
|
||||
base = $00
|
||||
result_binary_base = base ; pointer to where the next byte of binary data should be stored0
|
||||
binary_base_index = result_binary_base + 2 ; offset for that pointer
|
||||
binary_subroutine_address = binary_base_index + 1 ; pointer to a subroutine to be written to the binary
|
||||
|
||||
.org $8000
|
||||
.include "./subroutines.inc"
|
||||
|
||||
program_text:
|
||||
.include "./program.inc"
|
||||
|
||||
reset:
|
||||
sei
|
||||
lda #0
|
||||
ldx #0
|
||||
ldy #0
|
||||
|
||||
main:
|
||||
stz binary_base_index
|
||||
lda #$40
|
||||
sta result_binary_base + 1 ; set where to store resulting binary
|
||||
stz binary_subroutine_address
|
||||
lda #$80
|
||||
sta binary_subroutine_address + 1 ; available subroutines start at $8000
|
||||
jsr compile_values
|
||||
stp
|
||||
|
||||
; parser loop, eventually this will be able to handle longer program strings, but indexing by y is fine for now
|
||||
compile_values:
|
||||
ldy #0
|
||||
parser_loop:
|
||||
lda program_text, y ; get character at index
|
||||
cmp #0 ; is eof?
|
||||
beq .end ; yes, exit loop
|
||||
cmp #20 ; is space?
|
||||
beq parser_loop ; yes, skip this char
|
||||
cmp #12 ; is newline?
|
||||
beq .newline ; yes, handle newline
|
||||
jsr compile_values_op
|
||||
jsr compile_values_nat
|
||||
.newline: ; we reached a newline, y is program string index
|
||||
iny ; WARN: don't accidentally iny in this loop w/out handling a character
|
||||
lda program_text, y ; load next char
|
||||
cmp #12 ; is newline?
|
||||
bne parser_loop ; no, keep parsing tokens
|
||||
rts ; yes, no more tokens in body (see syntax.md for info)
|
||||
.end:
|
||||
rts
|
||||
|
||||
; a holds character value, y program text index, only iny if you find a matching character & consume it
|
||||
compile_values_op:
|
||||
cmp #"+" ; i personally think this syntax is really silly but whatever, one of these days i'm gonna write my own assembler and document everything cause vasm documentation is kinda terrible
|
||||
bne .next
|
||||
.is_plus:
|
||||
lda #1
|
||||
jsr store_subroutine
|
||||
rts
|
||||
.next:
|
||||
rts
|
||||
; cmp #"!" ; commenting these out for now to handle a single simple case
|
||||
; cmp #"&"
|
||||
; cmp #"|"
|
||||
; cmp #"-"
|
||||
; cmp #"*"
|
||||
; cmp #"/"
|
||||
; cmp #"="
|
||||
; cmp #">"
|
||||
; cmp #"<"
|
||||
; cmp #"#"
|
||||
|
||||
; a holds character value, y program text index, only iny if you find a matching character & consume it
|
||||
; TODO:
|
||||
; 1-3 digit decimal values
|
||||
; 1-2 digit hex values
|
||||
compile_values_nat:
|
||||
; TODO:
|
||||
; cmp #"$" ; is hex?
|
||||
; bne .decimal ; no, try decimal
|
||||
; cmp
|
||||
; rts
|
||||
cmp #47 ; less than (before) start of 0-9 georgescii range?
|
||||
bcc .not_nat
|
||||
cmp #57 ; greater than end of 0-9 georgescii range?
|
||||
bcs .not_nat
|
||||
pha
|
||||
lda #$a9 ; $a9: lda imm
|
||||
jsr store_binary
|
||||
pla
|
||||
jsr georgescii_decimal_to_value
|
||||
jsr store_binary
|
||||
lda #2 ; push
|
||||
jsr store_subroutine
|
||||
iny
|
||||
rts
|
||||
.not_nat:
|
||||
rts
|
||||
|
||||
; georgescii decimal value in a register, return equivalent plain value in a register
|
||||
georgescii_decimal_to_value:
|
||||
clc
|
||||
sbc #$30 ; decimal digits start at georgescii $30
|
||||
rts
|
||||
|
||||
; we have binary in the a register we want to store
|
||||
store_binary:
|
||||
phy
|
||||
ldy binary_base_index
|
||||
sta (result_binary_base), y
|
||||
inc binary_base_index
|
||||
bne .not_overflow ; did we roll over?
|
||||
inc result_binary_base + 1 ; yes, roll over base address
|
||||
.not_overflow: ; no, carry on as normal
|
||||
ply
|
||||
rts
|
||||
|
||||
; binary_subroutine_address is a pointer to a subroutine that we want to store
|
||||
; the first byte at the subroutine's address is its length
|
||||
store_contiguous_binary:
|
||||
pha ; just to be safe
|
||||
lda (binary_subroutine_address) ; get the subroutine length
|
||||
tax ; loop counter
|
||||
ldy #1 ; index into subroutine, offset by one to skip subroutine length
|
||||
.loop:
|
||||
lda (binary_subroutine_address), y
|
||||
jsr store_binary
|
||||
iny
|
||||
dex
|
||||
bne .loop
|
||||
.end:
|
||||
pla
|
||||
rts
|
||||
|
||||
; this wouldn't be necessary if we could get the
|
||||
; address of a label in vasm, but that's for another time
|
||||
; (when i feel like writing an assembler lol)
|
||||
; for now, pass the index of the subroutine (in subroutines.asm)
|
||||
; to a and it will get written to binary_subroutine_address
|
||||
get_subroutine_address:
|
||||
pha
|
||||
tax ; set up counter
|
||||
bne .loop ; first subrotine?
|
||||
stz binary_subroutine_address ; yes, store its address
|
||||
lda #$80
|
||||
sta binary_subroutine_address + 1
|
||||
rts
|
||||
.loop: ; loop through
|
||||
lda (binary_subroutine_address) ; no, load length of subroutine
|
||||
inc ; distance from next subroutine
|
||||
clc
|
||||
adc binary_subroutine_address ; add it to the current address
|
||||
sta binary_subroutine_address
|
||||
bcs .no_carry
|
||||
lda binary_subroutine_address + 1 ; add the carry to the high byte of address
|
||||
adc #0
|
||||
sta binary_subroutine_address + 1
|
||||
.no_carry:
|
||||
dex ; is this our address?
|
||||
bne .loop ; yes, we're done
|
||||
pla
|
||||
rts
|
||||
|
||||
|
||||
; pass subroutine index to a and it will get written into the binary
|
||||
; TODO: stabilize subroutine location & just write a `jsr $subroutine` to the binary
|
||||
store_subroutine:
|
||||
pha
|
||||
phy
|
||||
phx
|
||||
jsr get_subroutine_address
|
||||
jsr store_contiguous_binary
|
||||
; reset subroutine address
|
||||
stz binary_subroutine_address
|
||||
lda #$80
|
||||
sta binary_subroutine_address + 1
|
||||
plx
|
||||
ply
|
||||
pla
|
||||
rts
|
||||
|
||||
; write error message and stop execution
|
||||
error:
|
||||
ldy #0
|
||||
.loop:
|
||||
lda .message, y
|
||||
sta $4000, y
|
||||
beq .end
|
||||
iny
|
||||
bra .loop
|
||||
.end:
|
||||
stp
|
||||
.message:
|
||||
.asciiz "ruh roh! fuzzy couldn't compile"
|
||||
|
||||
|
||||
|
||||
isr: ; interrupt service routine
|
||||
pha
|
||||
phx
|
||||
phy
|
||||
ply
|
||||
plx
|
||||
pla
|
||||
rti
|
||||
|
||||
|
||||
|
||||
.org $fffc
|
||||
.word reset
|
||||
.word isr
|
|
@ -14,13 +14,9 @@
|
|||
inx
|
||||
.endm
|
||||
|
||||
.macro push, cell_high, cell_low ; push a data stack cell
|
||||
.macro push ; push a data stack cell
|
||||
dex
|
||||
dex
|
||||
lda \cell_low
|
||||
sta 0, x
|
||||
lda \cell_high
|
||||
sta 1, x
|
||||
.endm
|
||||
|
||||
.macro push2 ; push 2 data stack cells
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
.asciiz '2 3 +'
|
16
run.sh
16
run.sh
|
@ -1,16 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
rm *.bin *.rom
|
||||
vasm6502_oldstyle fuzzy.asm -dotdir -wdc02 -ldots -Fbin -o fuzzy.rom &> /dev/null;
|
||||
echo -e "\nʕ·ᴥ·ʔ- source text:\n";
|
||||
cat program.inc;
|
||||
cat fuzzy.rom | ./george > compiled.bin;
|
||||
dd skip=16384 count=500 if=compiled.bin of=compiled.rom bs=1 &> /dev/null;
|
||||
truncate -s 32k compiled.rom &> /dev/null;
|
||||
printf '\x80\x00\x00' | dd of=compiled.rom bs=1 seek=32765 count=3 conv=notrunc &> /dev/null;
|
||||
cat compiled.rom | ./george > result.bin;
|
||||
echo -e "\n\nʕ·ᴥ·ʔ- compiled program result:\n";
|
||||
hexdump -C ./result.bin;
|
||||
echo -e "";
|
85
semantics.md
85
semantics.md
|
@ -1,85 +0,0 @@
|
|||
# i swear this is what fuzzy actually does
|
||||
|
||||
## the stack
|
||||
|
||||
fuzzy works on a 16-bit cell-width, zero-page data stack indexed with the x register, as documented in Garth Wilson's [stack treatise](https://wilsonminesco.com/stacks/virtualstacks.html)
|
||||
|
||||
to push a byte onto the data stack, we just:
|
||||
|
||||
```asm
|
||||
dex ; decrement the stack pointer
|
||||
lda some_value ; load the byte we want on the stack into a
|
||||
sta 0, x ; put the byte on the stack!
|
||||
```
|
||||
|
||||
and to pop a byte off it:
|
||||
|
||||
```asm
|
||||
lda 0, x ; pop the top of stack off into a
|
||||
inx ; increment the stack pointer
|
||||
```
|
||||
|
||||
## types
|
||||
|
||||
these are used in word definitions, and refer to the type of an individual stack cell:
|
||||
|
||||
| type | desc |
|
||||
| ---------------------- | ----------------------------------------------------------- |
|
||||
| **bool** | a boolean value, represented by $0000 or $ffff |
|
||||
| **nat** | an unsigned 16-bit integer |
|
||||
| **int** | a signed 16-bit integer |
|
||||
| **char** | an 8-bit george-ascii character, padded with leading zeroes |
|
||||
| **string** | a 16-bit pointer to a string in memory |
|
||||
| **word** _`dangerous`_ | a 16-bit pointer to a fuzzy word or quotation |
|
||||
|
||||
## operators
|
||||
|
||||
- `!` NOT: applies NOT to tos
|
||||
- `&` AND: pops 2 off the stack and pushes the AND'ed result
|
||||
- `|` OR: pops 2 off the stack and pushes the OR'ed result
|
||||
- `+` add: pops 2 off the stack and pushes the sum
|
||||
- `-` subtract: pops 2 off the stack and pushes the difference
|
||||
- `*` multiply: pops 2 off the stack and pushes the result, truncating if it's >$FFFF
|
||||
- `/` divide: pops 2 off the stack and pushes the remainder and quotient
|
||||
- `=` equality: pushes true/false if the top 2 stack cells do/don't match
|
||||
- `>` greater than: pushes true/false if tos-1 is/isn't greater than tos
|
||||
- `<` less than: pushes true/false if tos-1 is/isn't greater than tos
|
||||
- `#` quote _`dangerous`_: pops tos and pushes a word that produces its value
|
||||
|
||||
### supported types (this will need to be more clearly laid out later)
|
||||
|
||||
| operator | input type | output type | notes |
|
||||
| -------- | ------------------------ | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `!` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
||||
| `&` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
||||
| `\|` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
||||
| `+` | `nat` `nat`, `int` `int` | `nat`, `int` | |
|
||||
| `-` | `nat` `nat`, `int` `int` | `nat`, `int` | subtracting two `nat`s |
|
||||
| `*` | `nat` `nat`, `int` `int` | `nat`, `int` | most products will be truncated, since most 16 bit multiplications result in a >16 bit product, but in practice that shouldn't matter cause we're not doing science |
|
||||
| `/` | `nat` `nat`, `int` `int` | `nat` `nat`, `int` `int` | produces two cells, the quotient and remainder |
|
||||
| `=` | any any | `bool` | equality/order is checked based on stack cell value, not type (e.g. a `word` pointing to $abcd and a `nat` with the value $abcd are equivalent) |
|
||||
| `>` | any any | `bool` | see above |
|
||||
| `<` | any any | `bool` | see above |
|
||||
| `#` | any | `word` | _`dangerous`_ |
|
||||
|
||||
## `danger!`
|
||||
|
||||
the `danger!` keyword marks a word as being _`dangerous`_. certain language features can only be used in dangerous words, such as:
|
||||
|
||||
- inline assembly
|
||||
- quotations
|
||||
- typechecking quotations is a difficult problem & probably too complex too implement on george if we ever want to fully self-host fuzzy
|
||||
- unchecked operator usage
|
||||
- applying `+` to two chars, applying `&` to two strings, etc
|
||||
- this does not mean that _dangerous_ words are untyped! just the type of the result of an operation is asserted to be the word result type
|
||||
- `danger! dangerous_word num num is char: +` can't be used on a `num char` stack, and any words used after `dangerous_word` treat the top of the stack as having a `char` and don't care that it was made with two `num`s
|
||||
|
||||
the program body cannot use any _dangerous_ features. this makes it so that _dangerous_ behavior is contained to specific words.
|
||||
|
||||
## memory layout
|
||||
|
||||
| start | end | use |
|
||||
| ------ | ------ | ---------------------------- |
|
||||
| `$200` | `$300` | |
|
||||
| | | core language implementation |
|
||||
| | | core language implementation |
|
|
@ -0,0 +1,168 @@
|
|||
use core::panic;
|
||||
use std::{any::Any, fmt::Display};
|
||||
|
||||
use crate::{
|
||||
typecheck::{self, Checkable, TypeStack},
|
||||
Symtab,
|
||||
};
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||
// enum values are parser values, not compiler values,
|
||||
// e.g. for `Str(String)` the `String` value will be put somewhere in memory
|
||||
// and a pointer to it will be put on the stack
|
||||
pub enum Value {
|
||||
Nat(u16), // 16-bit natural number
|
||||
Int(i16), // 16-bit twos-complement integer
|
||||
Bool(bool),
|
||||
Op(String),
|
||||
Char(char), // 8-bit georgescii character padded with leading zeros (might change later)
|
||||
Str(String), // 16-bit pointer to a string
|
||||
Word(String), // 16-bit pointer to a word
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)]
|
||||
pub enum VType {
|
||||
Nat,
|
||||
Int,
|
||||
Bool,
|
||||
Char,
|
||||
Str,
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Debug, Clone)]
|
||||
pub struct WType {
|
||||
pub pop: Vec<VType>,
|
||||
pub push: Vec<VType>,
|
||||
}
|
||||
|
||||
impl WType {
|
||||
pub fn new() -> Self {
|
||||
WType {
|
||||
pop: vec![],
|
||||
push: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
// Adds a `push` type
|
||||
pub fn push(mut self, mut t: Vec<VType>) -> Self {
|
||||
self.push.append(&mut t);
|
||||
self
|
||||
}
|
||||
|
||||
// Adds a `pop` type (Note: does not actually pop anything)
|
||||
pub fn pop(mut self, mut t: Vec<VType>) -> Self {
|
||||
self.pop.append(&mut t);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Effect {
|
||||
Paint,
|
||||
Sing,
|
||||
Store,
|
||||
Do,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub struct WordDef {
|
||||
pub name: String,
|
||||
pub values: Vec<Value>,
|
||||
pub r#type: WType,
|
||||
pub effects: Vec<Effect>,
|
||||
}
|
||||
|
||||
impl WordDef {
|
||||
pub fn new<S>(name: S, values: Vec<Value>, r#type: WType, effects: Vec<Effect>) -> Self
|
||||
where
|
||||
S: Into<String>,
|
||||
{
|
||||
WordDef {
|
||||
name: name.into(),
|
||||
values,
|
||||
r#type,
|
||||
effects,
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten_values(&self, symtab: &Symtab) -> Vec<Value> {
|
||||
let mut vals = vec![];
|
||||
for value in self.values.iter() {
|
||||
if let Value::Word(string) = value {
|
||||
let symbol = symtab.get(string);
|
||||
let mut child_vals = symbol.flatten_values(symtab);
|
||||
vals.append(&mut child_vals);
|
||||
} else {
|
||||
vals.push(value.clone());
|
||||
}
|
||||
}
|
||||
vals
|
||||
}
|
||||
|
||||
pub fn flatten(&self, symtab: &Symtab) -> WordDef {
|
||||
let values = self.flatten_values(symtab);
|
||||
WordDef::new(
|
||||
self.name.clone(),
|
||||
values,
|
||||
self.r#type.clone(),
|
||||
self.effects.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for WordDef {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
writeln!(f, "{:#?}", self)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Checkable<VType> for WordDef {
|
||||
fn check(&mut self, mut stack: TypeStack<VType>) -> Result<TypeStack<VType>, String> {
|
||||
for value in self.values.iter() {
|
||||
println!(
|
||||
"WORDDEF: checking value {:?} in word {:?}, current stack is {:?}",
|
||||
value, self.name, stack
|
||||
);
|
||||
if let Value::Word(_) = value {
|
||||
panic!("Don't typecheck on an unflattened word!");
|
||||
}
|
||||
match value {
|
||||
Value::Op(op) => {
|
||||
if stack.len() < 2 {
|
||||
return Err(format!(
|
||||
"Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}",
|
||||
self.name,
|
||||
stack,
|
||||
stack.len()
|
||||
));
|
||||
} else {
|
||||
match op.as_str() {
|
||||
"+" | "*" => {
|
||||
println!("WORDDEF: checking Op");
|
||||
stack = stack.test_consume(
|
||||
TypeStack::new().push(VType::Nat).push(VType::Nat),
|
||||
)?;
|
||||
stack = stack.push(VType::Nat);
|
||||
}
|
||||
"&" | "||" => {
|
||||
stack = stack.test_consume(
|
||||
TypeStack::new().push(VType::Bool).push(VType::Bool),
|
||||
)?;
|
||||
stack = stack.pop().pop().push(VType::Bool);
|
||||
}
|
||||
_ => return Err(format!("unknown opcode {:?}", op)),
|
||||
}
|
||||
}
|
||||
}
|
||||
&Value::Nat(_) => stack = stack.push(VType::Nat),
|
||||
&Value::Int(_) => stack = stack.push(VType::Int),
|
||||
&Value::Bool(_) => stack = stack.push(VType::Int),
|
||||
&Value::Str(_) => stack = stack.push(VType::Str),
|
||||
&Value::Char(_) => stack = stack.push(VType::Char),
|
||||
&Value::Word(_) => unreachable!(),
|
||||
};
|
||||
}
|
||||
Ok(stack)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
use fuzzy::{
|
||||
ast::VType,
|
||||
parse,
|
||||
typecheck::{Checkable, TypeStack},
|
||||
};
|
||||
|
||||
fn main() {
|
||||
let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest";
|
||||
let mut program = parse(input).unwrap();
|
||||
println!("{:?}", program.check(TypeStack::new()));
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
pub mod ast;
|
||||
pub mod compiler;
|
||||
pub mod parser;
|
||||
pub mod typecheck;
|
||||
|
||||
pub use compiler::*;
|
||||
pub use parser::*;
|
|
@ -0,0 +1,483 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
// TODO:
|
||||
// - add error types and error handling
|
||||
// - figure out if we can use a single ast or should make a second ast for compilation, then
|
||||
// convert to that after parsing/do another parsing step but on the ast made the first time
|
||||
// (i also don't really know what i'm doing so maybe u never do that or always do that or
|
||||
// something i don't understand yet)
|
||||
// - improve whitespace parsing
|
||||
// - think more about language rules:
|
||||
// - what types actually need to be exposed to the "user" (me) and what types can be internal
|
||||
// to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char
|
||||
// internally (put on the stack as a value instead of put somewhere in memory))
|
||||
|
||||
use chumsky::{
|
||||
prelude::*,
|
||||
text::{ident, keyword},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
ast::*,
|
||||
typecheck::{Checkable, TypeStack},
|
||||
};
|
||||
|
||||
#[derive(PartialEq, Debug, Clone)]
|
||||
pub struct Symtab(HashMap<String, WordDef>);
|
||||
|
||||
impl Symtab {
|
||||
fn new() -> Self {
|
||||
Symtab(HashMap::new())
|
||||
}
|
||||
|
||||
pub fn get(&self, string: &String) -> &WordDef {
|
||||
self.0.get(string).unwrap()
|
||||
}
|
||||
|
||||
pub fn add_def(&mut self, def: WordDef) {
|
||||
let key = def.name.clone();
|
||||
self.0.insert(key, def);
|
||||
}
|
||||
|
||||
pub fn flatten_refs(&mut self) {
|
||||
let mut new_symtab = Symtab::new();
|
||||
for (_string, def) in self.0.iter() {
|
||||
new_symtab.add_def(def.flatten(self));
|
||||
}
|
||||
// this is an abomination, there must be a better way
|
||||
self.0.clear();
|
||||
self.0.extend(new_symtab.0);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<WordDef>> for Symtab {
|
||||
fn from(value: Vec<WordDef>) -> Self {
|
||||
let symtab: HashMap<String, WordDef> = value
|
||||
.iter()
|
||||
.map(|x| (x.name.to_owned(), x.to_owned()))
|
||||
.collect();
|
||||
Symtab(symtab)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Program {
|
||||
symtab: Symtab,
|
||||
body: Vec<Value>,
|
||||
}
|
||||
|
||||
impl Program {
|
||||
fn new(defs: Vec<WordDef>, body: Vec<Value>) -> Self {
|
||||
let symtab = Symtab::from(defs);
|
||||
Program { symtab, body }
|
||||
}
|
||||
|
||||
fn reduce_body(&mut self) {
|
||||
let mut vals = vec![];
|
||||
for value in self.body.iter() {
|
||||
if let Value::Word(string) = value {
|
||||
let symbol = self.symtab.get(string);
|
||||
let mut child_vals = symbol.flatten(&self.symtab).values;
|
||||
vals.append(&mut child_vals);
|
||||
} else {
|
||||
vals.push(value.clone());
|
||||
}
|
||||
}
|
||||
self.body = vals;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Checkable<VType> for Program {
|
||||
fn check(
|
||||
&mut self,
|
||||
mut stack: crate::typecheck::TypeStack<VType>,
|
||||
) -> Result<crate::typecheck::TypeStack<VType>, String> {
|
||||
//TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5
|
||||
fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> {
|
||||
let mut visited: Vec<&WordDef> = vec![];
|
||||
let mut rec_stack: Vec<&WordDef> = vec![];
|
||||
for (_, def) in symtab.0.iter() {
|
||||
if !visited.contains(&def) {
|
||||
dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
fn dfs_cycle_check<'a>(
|
||||
def: &'a WordDef,
|
||||
visited: &mut Vec<&'a WordDef>,
|
||||
rec_stack: &mut Vec<&'a WordDef>,
|
||||
symtab: &'a Symtab,
|
||||
) -> Result<(), String> {
|
||||
visited.push(def);
|
||||
rec_stack.push(def);
|
||||
|
||||
for val in def.values.iter() {
|
||||
if let Value::Word(name) = val {
|
||||
let next_def = symtab.get(name);
|
||||
if !visited.contains(&next_def) {
|
||||
dfs_cycle_check(next_def, visited, rec_stack, symtab)?;
|
||||
} else if rec_stack.contains(&next_def) {
|
||||
return Err(format!(
|
||||
"illegal recursion detected! definitions {}create a reference cycle",
|
||||
rec_stack
|
||||
.iter()
|
||||
.map(|def| {
|
||||
let mut name = def.name.clone();
|
||||
name.insert(0, '"');
|
||||
name.push_str("\" ");
|
||||
name
|
||||
})
|
||||
.collect::<String>()
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rec_stack.pop();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
cyclic_graph_check(&self.symtab)?;
|
||||
|
||||
self.symtab.flatten_refs();
|
||||
|
||||
println!(
|
||||
"we have flattened refs, here's the symtab: {:#?}\n",
|
||||
self.symtab
|
||||
);
|
||||
|
||||
// then check that all symtab defs are sound
|
||||
// at this point they shouldn't have any references,
|
||||
// and if they do we will panic (see the Checkable impl for WordDef)
|
||||
for (name, def) in self.symtab.0.iter_mut() {
|
||||
let local_stack: TypeStack<VType> = def.r#type.pop.clone().into();
|
||||
println!(
|
||||
"PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}",
|
||||
name, def
|
||||
);
|
||||
let result_stack = def.check(local_stack)?;
|
||||
if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) {
|
||||
println!("{error:?}");
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
|
||||
self.reduce_body();
|
||||
|
||||
// then we'll check that the body is sound with the given stack
|
||||
// maybe in the future i'll change this trait so there isn't a stack
|
||||
// param and the implementer picks what stack to check against
|
||||
//
|
||||
// TODO: this block also is shared behavior between basically all checkables but potentially with
|
||||
// different internal types for T, will have to figure out how to dedup this later
|
||||
for value in self.body.iter() {
|
||||
match value {
|
||||
Value::Op(op) => {
|
||||
if stack.len() < 2 {
|
||||
return Err(format!(
|
||||
"expected a stack with 2 elements, got only {:?}",
|
||||
stack.len()
|
||||
));
|
||||
} else {
|
||||
match op.as_str() {
|
||||
"+" | "*" => {
|
||||
stack = stack.test_consume(
|
||||
TypeStack::new().push(VType::Nat).push(VType::Nat),
|
||||
)?;
|
||||
stack = stack.pop().pop().push(VType::Nat);
|
||||
}
|
||||
"&" | "||" => {
|
||||
stack = stack.test_consume(
|
||||
TypeStack::new().push(VType::Bool).push(VType::Bool),
|
||||
)?;
|
||||
stack = stack.pop().pop().push(VType::Bool);
|
||||
}
|
||||
_ => return Err(format!("unknown opcode {:?}", op)),
|
||||
}
|
||||
}
|
||||
}
|
||||
&Value::Nat(_) => stack = stack.push(VType::Nat),
|
||||
&Value::Int(_) => stack = stack.push(VType::Int),
|
||||
&Value::Bool(_) => stack = stack.push(VType::Bool),
|
||||
&Value::Str(_) => stack = stack.push(VType::Str),
|
||||
&Value::Char(_) => stack = stack.push(VType::Char),
|
||||
&Value::Word(_) => unreachable!(),
|
||||
};
|
||||
}
|
||||
Ok(stack)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse<S>(input: S) -> Result<Program, Vec<Simple<char>>>
|
||||
where
|
||||
S: ToString,
|
||||
{
|
||||
let parsed = match parser().parse(input.to_string()) {
|
||||
Ok(parsed) => parsed,
|
||||
Err(error) => return Err(error),
|
||||
};
|
||||
|
||||
Ok(parsed)
|
||||
}
|
||||
|
||||
pub fn parser() -> impl Parser<char, Program, Error = Simple<char>> {
|
||||
let name = ident().labelled("word_name");
|
||||
let value = {
|
||||
// nats will be coerced to ints at compile time depending on word type
|
||||
let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap()));
|
||||
|
||||
// vice versa for non-negative ints
|
||||
let int = just("-").ignore_then(
|
||||
text::int::<char, Simple<char>>(10).map(|s: String| Value::Int(s.parse().unwrap())),
|
||||
);
|
||||
|
||||
let op = one_of::<char, &str, Simple<char>>("*+-/&|<>").map(|s| Value::Op(s.to_string()));
|
||||
|
||||
let str_or_char = just::<char, char, Simple<char>>('"')
|
||||
.ignore_then(none_of('"').repeated())
|
||||
.then_ignore(just('"'))
|
||||
.map(|s: Vec<char>| match s.len() {
|
||||
1 => Value::Char(s[0]),
|
||||
_ => Value::Str(s.into_iter().collect::<String>()),
|
||||
});
|
||||
|
||||
let word = name.map(|n: String| Value::Word(n));
|
||||
|
||||
let bool = keyword::<_, _, Simple<char>>("true")
|
||||
.map(|_| Value::Bool(true))
|
||||
.or(keyword("false").map(|_| Value::Bool(false)));
|
||||
|
||||
nat.or(int).or(bool).or(str_or_char).or(word).or(op)
|
||||
};
|
||||
let value_seperator = text::newline()
|
||||
.repeated()
|
||||
.at_least(2)
|
||||
.not()
|
||||
.rewind()
|
||||
.then_ignore(
|
||||
// TODO: figure out if this could be simplified
|
||||
choice((
|
||||
just(" ")
|
||||
.repeated()
|
||||
.then_ignore(just("\n").repeated().exactly(1).or_not()),
|
||||
just("\n")
|
||||
.repeated()
|
||||
.exactly(1)
|
||||
.then_ignore(just(" ").repeated().or_not()),
|
||||
))
|
||||
.then_ignore(just(" ").repeated()),
|
||||
);
|
||||
|
||||
let body = value_seperator
|
||||
.or_not()
|
||||
.ignored()
|
||||
.then(value)
|
||||
.map(|(_, v)| v)
|
||||
.repeated()
|
||||
.then_ignore(
|
||||
just(" ")
|
||||
.repeated()
|
||||
.ignored()
|
||||
.then(text::newline().repeated().at_least(2).or_not()),
|
||||
);
|
||||
let word_def = {
|
||||
let pop_types = {
|
||||
let pop_type = keyword("nat")
|
||||
.to(VType::Nat)
|
||||
.or(keyword("int").to(VType::Int))
|
||||
.or(keyword("bool").to(VType::Bool))
|
||||
.or(keyword("char").to(VType::Char))
|
||||
.or(keyword("str").to(VType::Str));
|
||||
|
||||
pop_type
|
||||
.padded()
|
||||
.repeated()
|
||||
.collect::<Vec<VType>>()
|
||||
.labelled("pop_types")
|
||||
.boxed()
|
||||
};
|
||||
|
||||
let push_types = {
|
||||
let push_type = keyword("nat")
|
||||
.to(VType::Nat)
|
||||
.or(keyword("int").to(VType::Int))
|
||||
.or(keyword("char").to(VType::Char))
|
||||
.or(keyword("str").to(VType::Str));
|
||||
push_type
|
||||
.padded()
|
||||
.repeated()
|
||||
.collect::<Vec<VType>>()
|
||||
.labelled("push_types")
|
||||
.boxed()
|
||||
};
|
||||
|
||||
let effects = {
|
||||
let effect_keyword = keyword("paint")
|
||||
.to(Effect::Paint)
|
||||
.or(keyword("sing").to(Effect::Sing))
|
||||
.or(keyword("store").to(Effect::Store))
|
||||
.or(keyword("do").to(Effect::Do));
|
||||
|
||||
let effect = just("~").ignore_then(effect_keyword).labelled("effect");
|
||||
|
||||
effect.padded().repeated().labelled("effects").boxed()
|
||||
};
|
||||
|
||||
let definition = text::whitespace()
|
||||
.ignore_then(name)
|
||||
.then_ignore(just(" "))
|
||||
.then(pop_types)
|
||||
.then_ignore(keyword("is").or(keyword("are")).padded())
|
||||
.then(push_types)
|
||||
.then(effects)
|
||||
.then_ignore(just(":"))
|
||||
.map(|(((name, pop_types), push_types), effects)| {
|
||||
(name, pop_types, push_types, effects)
|
||||
});
|
||||
|
||||
definition
|
||||
.then(body.clone())
|
||||
.map(|((name, pop_types, push_types, effects), body)| {
|
||||
WordDef::new(
|
||||
name,
|
||||
body,
|
||||
WType::new().push(push_types).pop(pop_types),
|
||||
effects,
|
||||
)
|
||||
})
|
||||
};
|
||||
word_def
|
||||
.repeated()
|
||||
.then(body)
|
||||
.map(|(defs, body): (Vec<WordDef>, Vec<Value>)| Program::new(defs, body))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::typecheck::TypeStack;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parser() {
|
||||
let input = "
|
||||
a is nat: 5 7 *
|
||||
|
||||
b is nat:
|
||||
5 a *
|
||||
|
||||
a
|
||||
";
|
||||
|
||||
let ast = vec![
|
||||
WordDef::new(
|
||||
"a",
|
||||
vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())],
|
||||
WType::new().push(vec![VType::Nat]),
|
||||
vec![],
|
||||
),
|
||||
WordDef::new(
|
||||
"b",
|
||||
vec![
|
||||
Value::Nat(5),
|
||||
Value::Word("a".to_string()),
|
||||
Value::Op("*".to_string()),
|
||||
],
|
||||
WType::new().push(vec![VType::Nat]),
|
||||
vec![],
|
||||
),
|
||||
];
|
||||
let body: Vec<Value> = vec![Value::Word("a".to_string())];
|
||||
println!("sound: {:?}\n", parser().parse(input).unwrap());
|
||||
assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typecheck() {
|
||||
let sound = "
|
||||
a is nat: 5 7 *
|
||||
|
||||
b nat nat is nat:
|
||||
a *
|
||||
|
||||
a 5 *
|
||||
";
|
||||
|
||||
let unsound_defs = "
|
||||
a is nat nat: 5 7 *
|
||||
|
||||
b nat is nat:
|
||||
a *
|
||||
|
||||
a 5 *
|
||||
";
|
||||
|
||||
let unsound_body = "
|
||||
a is nat: 5 7 *
|
||||
|
||||
b nat is nat:
|
||||
a *
|
||||
|
||||
a 5 * *
|
||||
";
|
||||
|
||||
let unsound_body_and_defs = "
|
||||
a is nat nat: 5 7 *
|
||||
|
||||
b nat is nat:
|
||||
a *
|
||||
|
||||
a 5 * *
|
||||
";
|
||||
|
||||
fn typecheck(input: &str, sound: bool) {
|
||||
let mut parsed = parse(input).unwrap();
|
||||
parsed.symtab.flatten_refs();
|
||||
parsed.reduce_body();
|
||||
|
||||
let stack = TypeStack::new();
|
||||
if sound {
|
||||
assert!(parsed.check(stack).is_ok());
|
||||
} else {
|
||||
assert!(parsed.check(stack).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
typecheck(sound, true);
|
||||
typecheck(unsound_defs, false);
|
||||
typecheck(unsound_body, false);
|
||||
typecheck(unsound_body_and_defs, false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_illegal_recursion() {
|
||||
let illegal = "
|
||||
a is: b
|
||||
|
||||
b is: a
|
||||
|
||||
a
|
||||
";
|
||||
|
||||
let illegal_multilevel = "
|
||||
a is: b
|
||||
|
||||
b is: c
|
||||
|
||||
c is: a
|
||||
|
||||
a
|
||||
";
|
||||
|
||||
fn typecheck(input: &str) {
|
||||
let mut parsed = parse(input).unwrap();
|
||||
let stack = TypeStack::new();
|
||||
println!("{:?}", parsed.check(TypeStack::new()));
|
||||
assert!(parsed.check(stack).is_err());
|
||||
}
|
||||
|
||||
typecheck(illegal);
|
||||
typecheck(illegal_multilevel);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
use std::fmt::Debug;
|
||||
|
||||
use chumsky::chain::Chain;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct TypeStack<T>(Vec<T>);
|
||||
|
||||
impl<T: Debug + PartialEq> TypeStack<T> {
|
||||
pub fn new() -> Self {
|
||||
TypeStack(vec![])
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
pub fn pop(mut self) -> TypeStack<T> {
|
||||
let _ = self.0.pop();
|
||||
self
|
||||
}
|
||||
pub fn push(mut self, t: T) -> TypeStack<T> {
|
||||
self.0.push(t);
|
||||
self
|
||||
}
|
||||
|
||||
/// tests if ts matches the top of the stack
|
||||
pub fn test_consume(mut self, ts: TypeStack<T>) -> Result<TypeStack<T>, String> {
|
||||
if ts.len() > self.len() {
|
||||
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
|
||||
} else {
|
||||
for (index, t) in ts.0.iter().rev().enumerate() {
|
||||
let val = self.0.pop().unwrap();
|
||||
if val != *t {
|
||||
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
|
||||
}
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// tests if ts matches the top of the stack
|
||||
pub fn test(&self, ts: &TypeStack<T>) -> Result<(), String> {
|
||||
if ts.len() > self.len() {
|
||||
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
|
||||
} else {
|
||||
for (index, t) in ts.0.iter().rev().enumerate() {
|
||||
let val = &self.0[self.0.len() - 1];
|
||||
if val != t {
|
||||
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn test_many(mut self, mut tss: Vec<TypeStack<T>>) -> Result<TypeStack<T>, String> {
|
||||
// if tss.is_empty() {
|
||||
// return Ok(TypeStack::new());
|
||||
// }
|
||||
// for _i in 0..tss.len() {
|
||||
// let ts = tss.pop().unwrap();
|
||||
// self = self.test(ts)?;
|
||||
// if self.is_ok() {
|
||||
// return self;
|
||||
// }
|
||||
// }
|
||||
// Err("did not match any types".to_string())
|
||||
// }
|
||||
|
||||
pub fn append(&mut self, t: &mut Vec<T>) {
|
||||
self.0.append(t);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PartialEq + Debug> From<Vec<T>> for TypeStack<T> {
|
||||
fn from(value: Vec<T>) -> Self {
|
||||
TypeStack(value)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Checkable<T: PartialEq + Debug> {
|
||||
fn check(&mut self, stack: TypeStack<T>) -> Result<TypeStack<T>, String>;
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
; 0
|
||||
test_contiguous_binary:
|
||||
.byte 3,$1,$2,$3
|
||||
|
||||
; 1 - assembled from "plus.asm"
|
||||
subroutine_plus:
|
||||
.byte 15, $18,$b5,$00,$75,$02,$95, $02, $b5, $01, $75, $03, $95, $03, $ca, $ca
|
||||
|
||||
; 2
|
||||
subroutine_push:
|
||||
.byte 6,$ca,$ca,$95,$0,$74,$1
|
||||
; dex
|
||||
; dex
|
||||
; sta 0, x
|
||||
; stz 1, x
|
78
syntax.md
78
syntax.md
|
@ -1,78 +0,0 @@
|
|||
# fuzzy syntax in a well-defined grammar so i don't lose my mind
|
||||
|
||||
## notation
|
||||
|
||||
| notation | meaning |
|
||||
| -------- | --------------------------------------------- |
|
||||
| abc | syntactical production |
|
||||
| : | maps production to children (products?) |
|
||||
| () | groups items |
|
||||
| ʕ·ᴥ·ʔ | any 8-bit georgesci character |
|
||||
| `abc` | exact character(s) |
|
||||
| \x | an escape character |
|
||||
| x? | optional |
|
||||
| x\* | zero or more of x |
|
||||
| x+ | one or more of x |
|
||||
| x+y | y or more of x |
|
||||
| x.y | y repetitions of x |
|
||||
| \| | one or another |
|
||||
| [-] | any characters in range (>=1 ranges accepted) |
|
||||
|
||||
(adapted from the rust reference cause i like how simple they do it)
|
||||
|
||||
## grammar
|
||||
|
||||
the only semantically significant whitespace is \n+2 after a word definition.
|
||||
|
||||
otherwise, assume tokens are delimited by an arbitrary amount of (not \n+2) whitespace, including no whitespace, e.g. the colon in `hello is: "hello"`
|
||||
|
||||
also order is significant! if `value` produced `word` first, it would make reserved words like `true` and `false` parse into word references.
|
||||
|
||||
```syntax
|
||||
george: defs? body
|
||||
|
||||
defs: (def \n+2)*
|
||||
body: values
|
||||
|
||||
def: signature `:` values
|
||||
signature: `danger!`? word typedef
|
||||
|
||||
values: (value | op)*
|
||||
|
||||
typedef: pop? `is` push? effects?
|
||||
|
||||
pop: type*
|
||||
|
||||
push: type*
|
||||
|
||||
effects: effect*
|
||||
|
||||
type: `bool` | `nat` | `int` | `char` | `string` | `word`
|
||||
|
||||
effect: `paint` | `sing` | `store`
|
||||
|
||||
value: bool | num | char | string | word
|
||||
|
||||
op: `!` | `&` | `|` | `+` | `-` | `*` | `/` | `=` | `>` | `<` | `#`
|
||||
|
||||
quote: `[` values `]`
|
||||
|
||||
bool: `true` | `false`
|
||||
|
||||
word: [a-z A-Z]+
|
||||
|
||||
num: hexnum | binarynum
|
||||
|
||||
binarynum: binarydigit+
|
||||
binarydigit: [0-9]
|
||||
hexnum: (`$` hexdigit+)
|
||||
hexdigit: [0-9 a-f A-F]
|
||||
|
||||
char: `'` ʕ·ᴥ·ʔ `'`
|
||||
|
||||
string: `"` ʕ·ᴥ·ʔ* `"`
|
||||
```
|
||||
|
||||
## notes
|
||||
|
||||
fuzzy assumes the source text to be encoded in [georgesci](#), which is nearly ascii-compatible and should only cause minor headaches <3
|
Loading…
Reference in New Issue