Compare commits
No commits in common. "c0e7f4024c85e3f0e485a2b24d2cd27abb46de2a" and "ed8e20f0dbe792f658c42c23ecbc6cf53b9998f9" have entirely different histories.
c0e7f4024c
...
ed8e20f0db
|
@ -0,0 +1,271 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.8.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"once_cell",
|
||||||
|
"version_check",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "allocator-api2"
|
||||||
|
version = "0.2.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cc"
|
||||||
|
version = "1.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "chumsky"
|
||||||
|
version = "0.9.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown",
|
||||||
|
"stacker",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fuzzy"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"chumsky",
|
||||||
|
"indextree",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.14.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||||
|
dependencies = [
|
||||||
|
"ahash",
|
||||||
|
"allocator-api2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indextree"
|
||||||
|
version = "4.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f"
|
||||||
|
dependencies = [
|
||||||
|
"indextree-macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indextree-macros"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"itertools",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"strum",
|
||||||
|
"syn",
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.155"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.86"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "psm"
|
||||||
|
version = "0.1.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.36"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustversion"
|
||||||
|
version = "1.0.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stacker"
|
||||||
|
version = "0.1.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"psm",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum"
|
||||||
|
version = "0.26.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
|
||||||
|
dependencies = [
|
||||||
|
"strum_macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum_macros"
|
||||||
|
version = "0.26.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"rustversion",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.71"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror"
|
||||||
|
version = "1.0.63"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror-impl",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror-impl"
|
||||||
|
version = "1.0.63"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy"
|
||||||
|
version = "0.7.35"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
||||||
|
dependencies = [
|
||||||
|
"zerocopy-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy-derive"
|
||||||
|
version = "0.7.35"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
|
@ -0,0 +1,15 @@
|
||||||
|
[package]
|
||||||
|
name = "fuzzy"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
chumsky = { version = "0.9.3"}
|
||||||
|
indextree = "4.7.2"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
|
||||||
|
crate-type = ["lib"]
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "main"
|
24
README.md
24
README.md
|
@ -16,26 +16,16 @@ fuzzy is part of george, and shouldn't run anywhere else.
|
||||||
|
|
||||||
#### low-level
|
#### low-level
|
||||||
|
|
||||||
(most of) fuzzy could be written as assembler macros, but that's no fun
|
(most of) fuzzy could be made with assembler macros, but that's no fun
|
||||||
|
|
||||||
#### reliable
|
#### reliable
|
||||||
|
|
||||||
if fuzzy says it can run, george can run it
|
if fuzzy says it can run, george can run it
|
||||||
|
|
||||||
## how to work on fuzzy
|
## feature progress
|
||||||
|
|
||||||
edit `program.asm` and run `./run.sh`. the program gets included in the fuzzy compiler `fuzzy.asm` and is assembled with `vasm6502_oldstyle`, then george runs the program, reading out her system image when she reaches `stp` or `brk`
|
- [x] parser
|
||||||
|
- roughly complete, but want to finish the whole pipeline before adding things to the parser
|
||||||
then the program she compiled gets formatted as a standard 32k rom, and she reads it again, and then shows her system image again when the program finishes (hits `stp` or `brk`).
|
- [x] typechecker
|
||||||
|
- generates a "type stack" from parsed input and checks that word definition types match their body
|
||||||
since fuzzy works on a zero-page data stack, it's pretty easy to read the results of a program from the hexdump.
|
- [ ] code generation
|
||||||
|
|
||||||
for now this loop only works on apple silicon, but eventually i'll compile a `george` binary for x86 linux and switch based on the host platform.
|
|
||||||
|
|
||||||
## reference help
|
|
||||||
|
|
||||||
i wrote [syntax](./syntax.md) and [semantics](./semantics.md) docs to keep track of how fuzzy works before starting work on the compiler implementation in assembly. they're the _official_ source of truth for how fuzzy works. assume that the compiler implementation is always in flux :)
|
|
||||||
|
|
||||||
## a note on implementation
|
|
||||||
|
|
||||||
i was writing fuzzy's compiler in rust for a sec, but then i realized that it would be a fun challenge to write it in assembly. it's been wayyy easier! and fun! and so rewarding :) this feels like a flex but i'm genuinely just so happy to see george & fuzzy playing together in this little computer world i've made <3
|
|
||||||
|
|
214
fuzzy.asm
214
fuzzy.asm
|
@ -1,214 +0,0 @@
|
||||||
; ʕ·ᴥ·ʔ- fuzzy v0 rev 0: parse program text and spit out binary representation @ $4000
|
|
||||||
|
|
||||||
.include "./macro.inc"
|
|
||||||
|
|
||||||
n = $05 ; temporary storage for data stack operations
|
|
||||||
base = $00
|
|
||||||
result_binary_base = base ; pointer to where the next byte of binary data should be stored0
|
|
||||||
binary_base_index = result_binary_base + 2 ; offset for that pointer
|
|
||||||
binary_subroutine_address = binary_base_index + 1 ; pointer to a subroutine to be written to the binary
|
|
||||||
|
|
||||||
.org $8000
|
|
||||||
.include "./subroutines.inc"
|
|
||||||
|
|
||||||
program_text:
|
|
||||||
.include "./program.inc"
|
|
||||||
|
|
||||||
reset:
|
|
||||||
sei
|
|
||||||
lda #0
|
|
||||||
ldx #0
|
|
||||||
ldy #0
|
|
||||||
|
|
||||||
main:
|
|
||||||
stz binary_base_index
|
|
||||||
lda #$40
|
|
||||||
sta result_binary_base + 1 ; set where to store resulting binary
|
|
||||||
stz binary_subroutine_address
|
|
||||||
lda #$80
|
|
||||||
sta binary_subroutine_address + 1 ; available subroutines start at $8000
|
|
||||||
jsr compile_values
|
|
||||||
stp
|
|
||||||
|
|
||||||
; parser loop, eventually this will be able to handle longer program strings, but indexing by y is fine for now
|
|
||||||
compile_values:
|
|
||||||
ldy #0
|
|
||||||
parser_loop:
|
|
||||||
lda program_text, y ; get character at index
|
|
||||||
cmp #0 ; is eof?
|
|
||||||
beq .end ; yes, exit loop
|
|
||||||
cmp #20 ; is space?
|
|
||||||
beq parser_loop ; yes, skip this char
|
|
||||||
cmp #12 ; is newline?
|
|
||||||
beq .newline ; yes, handle newline
|
|
||||||
jsr compile_values_op
|
|
||||||
jsr compile_values_nat
|
|
||||||
.newline: ; we reached a newline, y is program string index
|
|
||||||
iny ; WARN: don't accidentally iny in this loop w/out handling a character
|
|
||||||
lda program_text, y ; load next char
|
|
||||||
cmp #12 ; is newline?
|
|
||||||
bne parser_loop ; no, keep parsing tokens
|
|
||||||
rts ; yes, no more tokens in body (see syntax.md for info)
|
|
||||||
.end:
|
|
||||||
rts
|
|
||||||
|
|
||||||
; a holds character value, y program text index, only iny if you find a matching character & consume it
|
|
||||||
compile_values_op:
|
|
||||||
cmp #"+" ; i personally think this syntax is really silly but whatever, one of these days i'm gonna write my own assembler and document everything cause vasm documentation is kinda terrible
|
|
||||||
bne .next
|
|
||||||
.is_plus:
|
|
||||||
lda #1
|
|
||||||
jsr store_subroutine
|
|
||||||
rts
|
|
||||||
.next:
|
|
||||||
rts
|
|
||||||
; cmp #"!" ; commenting these out for now to handle a single simple case
|
|
||||||
; cmp #"&"
|
|
||||||
; cmp #"|"
|
|
||||||
; cmp #"-"
|
|
||||||
; cmp #"*"
|
|
||||||
; cmp #"/"
|
|
||||||
; cmp #"="
|
|
||||||
; cmp #">"
|
|
||||||
; cmp #"<"
|
|
||||||
; cmp #"#"
|
|
||||||
|
|
||||||
; a holds character value, y program text index, only iny if you find a matching character & consume it
|
|
||||||
; TODO:
|
|
||||||
; 1-3 digit decimal values
|
|
||||||
; 1-2 digit hex values
|
|
||||||
compile_values_nat:
|
|
||||||
; TODO:
|
|
||||||
; cmp #"$" ; is hex?
|
|
||||||
; bne .decimal ; no, try decimal
|
|
||||||
; cmp
|
|
||||||
; rts
|
|
||||||
cmp #47 ; less than (before) start of 0-9 georgescii range?
|
|
||||||
bcc .not_nat
|
|
||||||
cmp #57 ; greater than end of 0-9 georgescii range?
|
|
||||||
bcs .not_nat
|
|
||||||
pha
|
|
||||||
lda #$a9 ; $a9: lda imm
|
|
||||||
jsr store_binary
|
|
||||||
pla
|
|
||||||
jsr georgescii_decimal_to_value
|
|
||||||
jsr store_binary
|
|
||||||
lda #2 ; push
|
|
||||||
jsr store_subroutine
|
|
||||||
iny
|
|
||||||
rts
|
|
||||||
.not_nat:
|
|
||||||
rts
|
|
||||||
|
|
||||||
; georgescii decimal value in a register, return equivalent plain value in a register
|
|
||||||
georgescii_decimal_to_value:
|
|
||||||
clc
|
|
||||||
sbc #$30 ; decimal digits start at georgescii $30
|
|
||||||
rts
|
|
||||||
|
|
||||||
; we have binary in the a register we want to store
|
|
||||||
store_binary:
|
|
||||||
phy
|
|
||||||
ldy binary_base_index
|
|
||||||
sta (result_binary_base), y
|
|
||||||
inc binary_base_index
|
|
||||||
bne .not_overflow ; did we roll over?
|
|
||||||
inc result_binary_base + 1 ; yes, roll over base address
|
|
||||||
.not_overflow: ; no, carry on as normal
|
|
||||||
ply
|
|
||||||
rts
|
|
||||||
|
|
||||||
; binary_subroutine_address is a pointer to a subroutine that we want to store
|
|
||||||
; the first byte at the subroutine's address is its length
|
|
||||||
store_contiguous_binary:
|
|
||||||
pha ; just to be safe
|
|
||||||
lda (binary_subroutine_address) ; get the subroutine length
|
|
||||||
tax ; loop counter
|
|
||||||
ldy #1 ; index into subroutine, offset by one to skip subroutine length
|
|
||||||
.loop:
|
|
||||||
lda (binary_subroutine_address), y
|
|
||||||
jsr store_binary
|
|
||||||
iny
|
|
||||||
dex
|
|
||||||
bne .loop
|
|
||||||
.end:
|
|
||||||
pla
|
|
||||||
rts
|
|
||||||
|
|
||||||
; this wouldn't be necessary if we could get the
|
|
||||||
; address of a label in vasm, but that's for another time
|
|
||||||
; (when i feel like writing an assembler lol)
|
|
||||||
; for now, pass the index of the subroutine (in subroutines.asm)
|
|
||||||
; to a and it will get written to binary_subroutine_address
|
|
||||||
get_subroutine_address:
|
|
||||||
pha
|
|
||||||
tax ; set up counter
|
|
||||||
bne .loop ; first subrotine?
|
|
||||||
stz binary_subroutine_address ; yes, store its address
|
|
||||||
lda #$80
|
|
||||||
sta binary_subroutine_address + 1
|
|
||||||
rts
|
|
||||||
.loop: ; loop through
|
|
||||||
lda (binary_subroutine_address) ; no, load length of subroutine
|
|
||||||
inc ; distance from next subroutine
|
|
||||||
clc
|
|
||||||
adc binary_subroutine_address ; add it to the current address
|
|
||||||
sta binary_subroutine_address
|
|
||||||
bcs .no_carry
|
|
||||||
lda binary_subroutine_address + 1 ; add the carry to the high byte of address
|
|
||||||
adc #0
|
|
||||||
sta binary_subroutine_address + 1
|
|
||||||
.no_carry:
|
|
||||||
dex ; is this our address?
|
|
||||||
bne .loop ; yes, we're done
|
|
||||||
pla
|
|
||||||
rts
|
|
||||||
|
|
||||||
|
|
||||||
; pass subroutine index to a and it will get written into the binary
|
|
||||||
; TODO: stabilize subroutine location & just write a `jsr $subroutine` to the binary
|
|
||||||
store_subroutine:
|
|
||||||
pha
|
|
||||||
phy
|
|
||||||
phx
|
|
||||||
jsr get_subroutine_address
|
|
||||||
jsr store_contiguous_binary
|
|
||||||
; reset subroutine address
|
|
||||||
stz binary_subroutine_address
|
|
||||||
lda #$80
|
|
||||||
sta binary_subroutine_address + 1
|
|
||||||
plx
|
|
||||||
ply
|
|
||||||
pla
|
|
||||||
rts
|
|
||||||
|
|
||||||
; write error message and stop execution
|
|
||||||
error:
|
|
||||||
ldy #0
|
|
||||||
.loop:
|
|
||||||
lda .message, y
|
|
||||||
sta $4000, y
|
|
||||||
beq .end
|
|
||||||
iny
|
|
||||||
bra .loop
|
|
||||||
.end:
|
|
||||||
stp
|
|
||||||
.message:
|
|
||||||
.asciiz "ruh roh! fuzzy couldn't compile"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
isr: ; interrupt service routine
|
|
||||||
pha
|
|
||||||
phx
|
|
||||||
phy
|
|
||||||
ply
|
|
||||||
plx
|
|
||||||
pla
|
|
||||||
rti
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.org $fffc
|
|
||||||
.word reset
|
|
||||||
.word isr
|
|
|
@ -14,13 +14,9 @@
|
||||||
inx
|
inx
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro push, cell_high, cell_low ; push a data stack cell
|
.macro push ; push a data stack cell
|
||||||
dex
|
dex
|
||||||
dex
|
dex
|
||||||
lda \cell_low
|
|
||||||
sta 0, x
|
|
||||||
lda \cell_high
|
|
||||||
sta 1, x
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro push2 ; push 2 data stack cells
|
.macro push2 ; push 2 data stack cells
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
.asciiz '2 3 +'
|
|
16
run.sh
16
run.sh
|
@ -1,16 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
rm *.bin *.rom
|
|
||||||
vasm6502_oldstyle fuzzy.asm -dotdir -wdc02 -ldots -Fbin -o fuzzy.rom &> /dev/null;
|
|
||||||
echo -e "\nʕ·ᴥ·ʔ- source text:\n";
|
|
||||||
cat program.inc;
|
|
||||||
cat fuzzy.rom | ./george > compiled.bin;
|
|
||||||
dd skip=16384 count=500 if=compiled.bin of=compiled.rom bs=1 &> /dev/null;
|
|
||||||
truncate -s 32k compiled.rom &> /dev/null;
|
|
||||||
printf '\x80\x00\x00' | dd of=compiled.rom bs=1 seek=32765 count=3 conv=notrunc &> /dev/null;
|
|
||||||
cat compiled.rom | ./george > result.bin;
|
|
||||||
echo -e "\n\nʕ·ᴥ·ʔ- compiled program result:\n";
|
|
||||||
hexdump -C ./result.bin;
|
|
||||||
echo -e "";
|
|
85
semantics.md
85
semantics.md
|
@ -1,85 +0,0 @@
|
||||||
# i swear this is what fuzzy actually does
|
|
||||||
|
|
||||||
## the stack
|
|
||||||
|
|
||||||
fuzzy works on a 16-bit cell-width, zero-page data stack indexed with the x register, as documented in Garth Wilson's [stack treatise](https://wilsonminesco.com/stacks/virtualstacks.html)
|
|
||||||
|
|
||||||
to push a byte onto the data stack, we just:
|
|
||||||
|
|
||||||
```asm
|
|
||||||
dex ; decrement the stack pointer
|
|
||||||
lda some_value ; load the byte we want on the stack into a
|
|
||||||
sta 0, x ; put the byte on the stack!
|
|
||||||
```
|
|
||||||
|
|
||||||
and to pop a byte off it:
|
|
||||||
|
|
||||||
```asm
|
|
||||||
lda 0, x ; pop the top of stack off into a
|
|
||||||
inx ; increment the stack pointer
|
|
||||||
```
|
|
||||||
|
|
||||||
## types
|
|
||||||
|
|
||||||
these are used in word definitions, and refer to the type of an individual stack cell:
|
|
||||||
|
|
||||||
| type | desc |
|
|
||||||
| ---------------------- | ----------------------------------------------------------- |
|
|
||||||
| **bool** | a boolean value, represented by $0000 or $ffff |
|
|
||||||
| **nat** | an unsigned 16-bit integer |
|
|
||||||
| **int** | a signed 16-bit integer |
|
|
||||||
| **char** | an 8-bit george-ascii character, padded with leading zeroes |
|
|
||||||
| **string** | a 16-bit pointer to a string in memory |
|
|
||||||
| **word** _`dangerous`_ | a 16-bit pointer to a fuzzy word or quotation |
|
|
||||||
|
|
||||||
## operators
|
|
||||||
|
|
||||||
- `!` NOT: applies NOT to tos
|
|
||||||
- `&` AND: pops 2 off the stack and pushes the AND'ed result
|
|
||||||
- `|` OR: pops 2 off the stack and pushes the OR'ed result
|
|
||||||
- `+` add: pops 2 off the stack and pushes the sum
|
|
||||||
- `-` subtract: pops 2 off the stack and pushes the difference
|
|
||||||
- `*` multiply: pops 2 off the stack and pushes the result, truncating if it's >$FFFF
|
|
||||||
- `/` divide: pops 2 off the stack and pushes the remainder and quotient
|
|
||||||
- `=` equality: pushes true/false if the top 2 stack cells do/don't match
|
|
||||||
- `>` greater than: pushes true/false if tos-1 is/isn't greater than tos
|
|
||||||
- `<` less than: pushes true/false if tos-1 is/isn't greater than tos
|
|
||||||
- `#` quote _`dangerous`_: pops tos and pushes a word that produces its value
|
|
||||||
|
|
||||||
### supported types (this will need to be more clearly laid out later)
|
|
||||||
|
|
||||||
| operator | input type | output type | notes |
|
|
||||||
| -------- | ------------------------ | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
||||||
| `!` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
|
||||||
| `&` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
|
||||||
| `\|` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
|
||||||
| `+` | `nat` `nat`, `int` `int` | `nat`, `int` | |
|
|
||||||
| `-` | `nat` `nat`, `int` `int` | `nat`, `int` | subtracting two `nat`s |
|
|
||||||
| `*` | `nat` `nat`, `int` `int` | `nat`, `int` | most products will be truncated, since most 16 bit multiplications result in a >16 bit product, but in practice that shouldn't matter cause we're not doing science |
|
|
||||||
| `/` | `nat` `nat`, `int` `int` | `nat` `nat`, `int` `int` | produces two cells, the quotient and remainder |
|
|
||||||
| `=` | any any | `bool` | equality/order is checked based on stack cell value, not type (e.g. a `word` pointing to $abcd and a `nat` with the value $abcd are equivalent) |
|
|
||||||
| `>` | any any | `bool` | see above |
|
|
||||||
| `<` | any any | `bool` | see above |
|
|
||||||
| `#` | any | `word` | _`dangerous`_ |
|
|
||||||
|
|
||||||
## `danger!`
|
|
||||||
|
|
||||||
the `danger!` keyword marks a word as being _`dangerous`_. certain language features can only be used in dangerous words, such as:
|
|
||||||
|
|
||||||
- inline assembly
|
|
||||||
- quotations
|
|
||||||
- typechecking quotations is a difficult problem & probably too complex too implement on george if we ever want to fully self-host fuzzy
|
|
||||||
- unchecked operator usage
|
|
||||||
- applying `+` to two chars, applying `&` to two strings, etc
|
|
||||||
- this does not mean that _dangerous_ words are untyped! just the type of the result of an operation is asserted to be the word result type
|
|
||||||
- `danger! dangerous_word num num is char: +` can't be used on a `num char` stack, and any words used after `dangerous_word` treat the top of the stack as having a `char` and don't care that it was made with two `num`s
|
|
||||||
|
|
||||||
the program body cannot use any _dangerous_ features. this makes it so that _dangerous_ behavior is contained to specific words.
|
|
||||||
|
|
||||||
## memory layout
|
|
||||||
|
|
||||||
| start | end | use |
|
|
||||||
| ------ | ------ | ---------------------------- |
|
|
||||||
| `$200` | `$300` | |
|
|
||||||
| | | core language implementation |
|
|
||||||
| | | core language implementation |
|
|
|
@ -0,0 +1,168 @@
|
||||||
|
use core::panic;
|
||||||
|
use std::{any::Any, fmt::Display};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
typecheck::{self, Checkable, TypeStack},
|
||||||
|
Symtab,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||||
|
// enum values are parser values, not compiler values,
|
||||||
|
// e.g. for `Str(String)` the `String` value will be put somewhere in memory
|
||||||
|
// and a pointer to it will be put on the stack
|
||||||
|
pub enum Value {
|
||||||
|
Nat(u16), // 16-bit natural number
|
||||||
|
Int(i16), // 16-bit twos-complement integer
|
||||||
|
Bool(bool),
|
||||||
|
Op(String),
|
||||||
|
Char(char), // 8-bit georgescii character padded with leading zeros (might change later)
|
||||||
|
Str(String), // 16-bit pointer to a string
|
||||||
|
Word(String), // 16-bit pointer to a word
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)]
|
||||||
|
pub enum VType {
|
||||||
|
Nat,
|
||||||
|
Int,
|
||||||
|
Bool,
|
||||||
|
Char,
|
||||||
|
Str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Eq, PartialEq, Debug, Clone)]
|
||||||
|
pub struct WType {
|
||||||
|
pub pop: Vec<VType>,
|
||||||
|
pub push: Vec<VType>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WType {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
WType {
|
||||||
|
pop: vec![],
|
||||||
|
push: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds a `push` type
|
||||||
|
pub fn push(mut self, mut t: Vec<VType>) -> Self {
|
||||||
|
self.push.append(&mut t);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds a `pop` type (Note: does not actually pop anything)
|
||||||
|
pub fn pop(mut self, mut t: Vec<VType>) -> Self {
|
||||||
|
self.pop.append(&mut t);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
|
pub enum Effect {
|
||||||
|
Paint,
|
||||||
|
Sing,
|
||||||
|
Store,
|
||||||
|
Do,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||||
|
pub struct WordDef {
|
||||||
|
pub name: String,
|
||||||
|
pub values: Vec<Value>,
|
||||||
|
pub r#type: WType,
|
||||||
|
pub effects: Vec<Effect>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WordDef {
|
||||||
|
pub fn new<S>(name: S, values: Vec<Value>, r#type: WType, effects: Vec<Effect>) -> Self
|
||||||
|
where
|
||||||
|
S: Into<String>,
|
||||||
|
{
|
||||||
|
WordDef {
|
||||||
|
name: name.into(),
|
||||||
|
values,
|
||||||
|
r#type,
|
||||||
|
effects,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flatten_values(&self, symtab: &Symtab) -> Vec<Value> {
|
||||||
|
let mut vals = vec![];
|
||||||
|
for value in self.values.iter() {
|
||||||
|
if let Value::Word(string) = value {
|
||||||
|
let symbol = symtab.get(string);
|
||||||
|
let mut child_vals = symbol.flatten_values(symtab);
|
||||||
|
vals.append(&mut child_vals);
|
||||||
|
} else {
|
||||||
|
vals.push(value.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vals
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn flatten(&self, symtab: &Symtab) -> WordDef {
|
||||||
|
let values = self.flatten_values(symtab);
|
||||||
|
WordDef::new(
|
||||||
|
self.name.clone(),
|
||||||
|
values,
|
||||||
|
self.r#type.clone(),
|
||||||
|
self.effects.clone(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for WordDef {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
writeln!(f, "{:#?}", self)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Checkable<VType> for WordDef {
|
||||||
|
fn check(&mut self, mut stack: TypeStack<VType>) -> Result<TypeStack<VType>, String> {
|
||||||
|
for value in self.values.iter() {
|
||||||
|
println!(
|
||||||
|
"WORDDEF: checking value {:?} in word {:?}, current stack is {:?}",
|
||||||
|
value, self.name, stack
|
||||||
|
);
|
||||||
|
if let Value::Word(_) = value {
|
||||||
|
panic!("Don't typecheck on an unflattened word!");
|
||||||
|
}
|
||||||
|
match value {
|
||||||
|
Value::Op(op) => {
|
||||||
|
if stack.len() < 2 {
|
||||||
|
return Err(format!(
|
||||||
|
"Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}",
|
||||||
|
self.name,
|
||||||
|
stack,
|
||||||
|
stack.len()
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
match op.as_str() {
|
||||||
|
"+" | "*" => {
|
||||||
|
println!("WORDDEF: checking Op");
|
||||||
|
stack = stack.test_consume(
|
||||||
|
TypeStack::new().push(VType::Nat).push(VType::Nat),
|
||||||
|
)?;
|
||||||
|
stack = stack.push(VType::Nat);
|
||||||
|
}
|
||||||
|
"&" | "||" => {
|
||||||
|
stack = stack.test_consume(
|
||||||
|
TypeStack::new().push(VType::Bool).push(VType::Bool),
|
||||||
|
)?;
|
||||||
|
stack = stack.pop().pop().push(VType::Bool);
|
||||||
|
}
|
||||||
|
_ => return Err(format!("unknown opcode {:?}", op)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
&Value::Nat(_) => stack = stack.push(VType::Nat),
|
||||||
|
&Value::Int(_) => stack = stack.push(VType::Int),
|
||||||
|
&Value::Bool(_) => stack = stack.push(VType::Int),
|
||||||
|
&Value::Str(_) => stack = stack.push(VType::Str),
|
||||||
|
&Value::Char(_) => stack = stack.push(VType::Char),
|
||||||
|
&Value::Word(_) => unreachable!(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(stack)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
use fuzzy::{
|
||||||
|
ast::VType,
|
||||||
|
parse,
|
||||||
|
typecheck::{Checkable, TypeStack},
|
||||||
|
};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest";
|
||||||
|
let mut program = parse(input).unwrap();
|
||||||
|
println!("{:?}", program.check(TypeStack::new()));
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
pub mod ast;
|
||||||
|
pub mod compiler;
|
||||||
|
pub mod parser;
|
||||||
|
pub mod typecheck;
|
||||||
|
|
||||||
|
pub use compiler::*;
|
||||||
|
pub use parser::*;
|
|
@ -0,0 +1,483 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
// TODO:
|
||||||
|
// - add error types and error handling
|
||||||
|
// - figure out if we can use a single ast or should make a second ast for compilation, then
|
||||||
|
// convert to that after parsing/do another parsing step but on the ast made the first time
|
||||||
|
// (i also don't really know what i'm doing so maybe u never do that or always do that or
|
||||||
|
// something i don't understand yet)
|
||||||
|
// - improve whitespace parsing
|
||||||
|
// - think more about language rules:
|
||||||
|
// - what types actually need to be exposed to the "user" (me) and what types can be internal
|
||||||
|
// to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char
|
||||||
|
// internally (put on the stack as a value instead of put somewhere in memory))
|
||||||
|
|
||||||
|
use chumsky::{
|
||||||
|
prelude::*,
|
||||||
|
text::{ident, keyword},
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
ast::*,
|
||||||
|
typecheck::{Checkable, TypeStack},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(PartialEq, Debug, Clone)]
|
||||||
|
pub struct Symtab(HashMap<String, WordDef>);
|
||||||
|
|
||||||
|
impl Symtab {
|
||||||
|
fn new() -> Self {
|
||||||
|
Symtab(HashMap::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self, string: &String) -> &WordDef {
|
||||||
|
self.0.get(string).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_def(&mut self, def: WordDef) {
|
||||||
|
let key = def.name.clone();
|
||||||
|
self.0.insert(key, def);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn flatten_refs(&mut self) {
|
||||||
|
let mut new_symtab = Symtab::new();
|
||||||
|
for (_string, def) in self.0.iter() {
|
||||||
|
new_symtab.add_def(def.flatten(self));
|
||||||
|
}
|
||||||
|
// this is an abomination, there must be a better way
|
||||||
|
self.0.clear();
|
||||||
|
self.0.extend(new_symtab.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Vec<WordDef>> for Symtab {
|
||||||
|
fn from(value: Vec<WordDef>) -> Self {
|
||||||
|
let symtab: HashMap<String, WordDef> = value
|
||||||
|
.iter()
|
||||||
|
.map(|x| (x.name.to_owned(), x.to_owned()))
|
||||||
|
.collect();
|
||||||
|
Symtab(symtab)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct Program {
|
||||||
|
symtab: Symtab,
|
||||||
|
body: Vec<Value>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Program {
|
||||||
|
fn new(defs: Vec<WordDef>, body: Vec<Value>) -> Self {
|
||||||
|
let symtab = Symtab::from(defs);
|
||||||
|
Program { symtab, body }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reduce_body(&mut self) {
|
||||||
|
let mut vals = vec![];
|
||||||
|
for value in self.body.iter() {
|
||||||
|
if let Value::Word(string) = value {
|
||||||
|
let symbol = self.symtab.get(string);
|
||||||
|
let mut child_vals = symbol.flatten(&self.symtab).values;
|
||||||
|
vals.append(&mut child_vals);
|
||||||
|
} else {
|
||||||
|
vals.push(value.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.body = vals;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Checkable<VType> for Program {
|
||||||
|
fn check(
|
||||||
|
&mut self,
|
||||||
|
mut stack: crate::typecheck::TypeStack<VType>,
|
||||||
|
) -> Result<crate::typecheck::TypeStack<VType>, String> {
|
||||||
|
//TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5
|
||||||
|
fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> {
|
||||||
|
let mut visited: Vec<&WordDef> = vec![];
|
||||||
|
let mut rec_stack: Vec<&WordDef> = vec![];
|
||||||
|
for (_, def) in symtab.0.iter() {
|
||||||
|
if !visited.contains(&def) {
|
||||||
|
dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
fn dfs_cycle_check<'a>(
|
||||||
|
def: &'a WordDef,
|
||||||
|
visited: &mut Vec<&'a WordDef>,
|
||||||
|
rec_stack: &mut Vec<&'a WordDef>,
|
||||||
|
symtab: &'a Symtab,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
visited.push(def);
|
||||||
|
rec_stack.push(def);
|
||||||
|
|
||||||
|
for val in def.values.iter() {
|
||||||
|
if let Value::Word(name) = val {
|
||||||
|
let next_def = symtab.get(name);
|
||||||
|
if !visited.contains(&next_def) {
|
||||||
|
dfs_cycle_check(next_def, visited, rec_stack, symtab)?;
|
||||||
|
} else if rec_stack.contains(&next_def) {
|
||||||
|
return Err(format!(
|
||||||
|
"illegal recursion detected! definitions {}create a reference cycle",
|
||||||
|
rec_stack
|
||||||
|
.iter()
|
||||||
|
.map(|def| {
|
||||||
|
let mut name = def.name.clone();
|
||||||
|
name.insert(0, '"');
|
||||||
|
name.push_str("\" ");
|
||||||
|
name
|
||||||
|
})
|
||||||
|
.collect::<String>()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rec_stack.pop();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
cyclic_graph_check(&self.symtab)?;
|
||||||
|
|
||||||
|
self.symtab.flatten_refs();
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"we have flattened refs, here's the symtab: {:#?}\n",
|
||||||
|
self.symtab
|
||||||
|
);
|
||||||
|
|
||||||
|
// then check that all symtab defs are sound
|
||||||
|
// at this point they shouldn't have any references,
|
||||||
|
// and if they do we will panic (see the Checkable impl for WordDef)
|
||||||
|
for (name, def) in self.symtab.0.iter_mut() {
|
||||||
|
let local_stack: TypeStack<VType> = def.r#type.pop.clone().into();
|
||||||
|
println!(
|
||||||
|
"PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}",
|
||||||
|
name, def
|
||||||
|
);
|
||||||
|
let result_stack = def.check(local_stack)?;
|
||||||
|
if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) {
|
||||||
|
println!("{error:?}");
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.reduce_body();
|
||||||
|
|
||||||
|
// then we'll check that the body is sound with the given stack
|
||||||
|
// maybe in the future i'll change this trait so there isn't a stack
|
||||||
|
// param and the implementer picks what stack to check against
|
||||||
|
//
|
||||||
|
// TODO: this block also is shared behavior between basically all checkables but potentially with
|
||||||
|
// different internal types for T, will have to figure out how to dedup this later
|
||||||
|
for value in self.body.iter() {
|
||||||
|
match value {
|
||||||
|
Value::Op(op) => {
|
||||||
|
if stack.len() < 2 {
|
||||||
|
return Err(format!(
|
||||||
|
"expected a stack with 2 elements, got only {:?}",
|
||||||
|
stack.len()
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
match op.as_str() {
|
||||||
|
"+" | "*" => {
|
||||||
|
stack = stack.test_consume(
|
||||||
|
TypeStack::new().push(VType::Nat).push(VType::Nat),
|
||||||
|
)?;
|
||||||
|
stack = stack.pop().pop().push(VType::Nat);
|
||||||
|
}
|
||||||
|
"&" | "||" => {
|
||||||
|
stack = stack.test_consume(
|
||||||
|
TypeStack::new().push(VType::Bool).push(VType::Bool),
|
||||||
|
)?;
|
||||||
|
stack = stack.pop().pop().push(VType::Bool);
|
||||||
|
}
|
||||||
|
_ => return Err(format!("unknown opcode {:?}", op)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
&Value::Nat(_) => stack = stack.push(VType::Nat),
|
||||||
|
&Value::Int(_) => stack = stack.push(VType::Int),
|
||||||
|
&Value::Bool(_) => stack = stack.push(VType::Bool),
|
||||||
|
&Value::Str(_) => stack = stack.push(VType::Str),
|
||||||
|
&Value::Char(_) => stack = stack.push(VType::Char),
|
||||||
|
&Value::Word(_) => unreachable!(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(stack)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse<S>(input: S) -> Result<Program, Vec<Simple<char>>>
|
||||||
|
where
|
||||||
|
S: ToString,
|
||||||
|
{
|
||||||
|
let parsed = match parser().parse(input.to_string()) {
|
||||||
|
Ok(parsed) => parsed,
|
||||||
|
Err(error) => return Err(error),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(parsed)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parser() -> impl Parser<char, Program, Error = Simple<char>> {
|
||||||
|
let name = ident().labelled("word_name");
|
||||||
|
let value = {
|
||||||
|
// nats will be coerced to ints at compile time depending on word type
|
||||||
|
let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap()));
|
||||||
|
|
||||||
|
// vice versa for non-negative ints
|
||||||
|
let int = just("-").ignore_then(
|
||||||
|
text::int::<char, Simple<char>>(10).map(|s: String| Value::Int(s.parse().unwrap())),
|
||||||
|
);
|
||||||
|
|
||||||
|
let op = one_of::<char, &str, Simple<char>>("*+-/&|<>").map(|s| Value::Op(s.to_string()));
|
||||||
|
|
||||||
|
let str_or_char = just::<char, char, Simple<char>>('"')
|
||||||
|
.ignore_then(none_of('"').repeated())
|
||||||
|
.then_ignore(just('"'))
|
||||||
|
.map(|s: Vec<char>| match s.len() {
|
||||||
|
1 => Value::Char(s[0]),
|
||||||
|
_ => Value::Str(s.into_iter().collect::<String>()),
|
||||||
|
});
|
||||||
|
|
||||||
|
let word = name.map(|n: String| Value::Word(n));
|
||||||
|
|
||||||
|
let bool = keyword::<_, _, Simple<char>>("true")
|
||||||
|
.map(|_| Value::Bool(true))
|
||||||
|
.or(keyword("false").map(|_| Value::Bool(false)));
|
||||||
|
|
||||||
|
nat.or(int).or(bool).or(str_or_char).or(word).or(op)
|
||||||
|
};
|
||||||
|
let value_seperator = text::newline()
|
||||||
|
.repeated()
|
||||||
|
.at_least(2)
|
||||||
|
.not()
|
||||||
|
.rewind()
|
||||||
|
.then_ignore(
|
||||||
|
// TODO: figure out if this could be simplified
|
||||||
|
choice((
|
||||||
|
just(" ")
|
||||||
|
.repeated()
|
||||||
|
.then_ignore(just("\n").repeated().exactly(1).or_not()),
|
||||||
|
just("\n")
|
||||||
|
.repeated()
|
||||||
|
.exactly(1)
|
||||||
|
.then_ignore(just(" ").repeated().or_not()),
|
||||||
|
))
|
||||||
|
.then_ignore(just(" ").repeated()),
|
||||||
|
);
|
||||||
|
|
||||||
|
let body = value_seperator
|
||||||
|
.or_not()
|
||||||
|
.ignored()
|
||||||
|
.then(value)
|
||||||
|
.map(|(_, v)| v)
|
||||||
|
.repeated()
|
||||||
|
.then_ignore(
|
||||||
|
just(" ")
|
||||||
|
.repeated()
|
||||||
|
.ignored()
|
||||||
|
.then(text::newline().repeated().at_least(2).or_not()),
|
||||||
|
);
|
||||||
|
let word_def = {
|
||||||
|
let pop_types = {
|
||||||
|
let pop_type = keyword("nat")
|
||||||
|
.to(VType::Nat)
|
||||||
|
.or(keyword("int").to(VType::Int))
|
||||||
|
.or(keyword("bool").to(VType::Bool))
|
||||||
|
.or(keyword("char").to(VType::Char))
|
||||||
|
.or(keyword("str").to(VType::Str));
|
||||||
|
|
||||||
|
pop_type
|
||||||
|
.padded()
|
||||||
|
.repeated()
|
||||||
|
.collect::<Vec<VType>>()
|
||||||
|
.labelled("pop_types")
|
||||||
|
.boxed()
|
||||||
|
};
|
||||||
|
|
||||||
|
let push_types = {
|
||||||
|
let push_type = keyword("nat")
|
||||||
|
.to(VType::Nat)
|
||||||
|
.or(keyword("int").to(VType::Int))
|
||||||
|
.or(keyword("char").to(VType::Char))
|
||||||
|
.or(keyword("str").to(VType::Str));
|
||||||
|
push_type
|
||||||
|
.padded()
|
||||||
|
.repeated()
|
||||||
|
.collect::<Vec<VType>>()
|
||||||
|
.labelled("push_types")
|
||||||
|
.boxed()
|
||||||
|
};
|
||||||
|
|
||||||
|
let effects = {
|
||||||
|
let effect_keyword = keyword("paint")
|
||||||
|
.to(Effect::Paint)
|
||||||
|
.or(keyword("sing").to(Effect::Sing))
|
||||||
|
.or(keyword("store").to(Effect::Store))
|
||||||
|
.or(keyword("do").to(Effect::Do));
|
||||||
|
|
||||||
|
let effect = just("~").ignore_then(effect_keyword).labelled("effect");
|
||||||
|
|
||||||
|
effect.padded().repeated().labelled("effects").boxed()
|
||||||
|
};
|
||||||
|
|
||||||
|
let definition = text::whitespace()
|
||||||
|
.ignore_then(name)
|
||||||
|
.then_ignore(just(" "))
|
||||||
|
.then(pop_types)
|
||||||
|
.then_ignore(keyword("is").or(keyword("are")).padded())
|
||||||
|
.then(push_types)
|
||||||
|
.then(effects)
|
||||||
|
.then_ignore(just(":"))
|
||||||
|
.map(|(((name, pop_types), push_types), effects)| {
|
||||||
|
(name, pop_types, push_types, effects)
|
||||||
|
});
|
||||||
|
|
||||||
|
definition
|
||||||
|
.then(body.clone())
|
||||||
|
.map(|((name, pop_types, push_types, effects), body)| {
|
||||||
|
WordDef::new(
|
||||||
|
name,
|
||||||
|
body,
|
||||||
|
WType::new().push(push_types).pop(pop_types),
|
||||||
|
effects,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
};
|
||||||
|
word_def
|
||||||
|
.repeated()
|
||||||
|
.then(body)
|
||||||
|
.map(|(defs, body): (Vec<WordDef>, Vec<Value>)| Program::new(defs, body))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::typecheck::TypeStack;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parser() {
|
||||||
|
let input = "
|
||||||
|
a is nat: 5 7 *
|
||||||
|
|
||||||
|
b is nat:
|
||||||
|
5 a *
|
||||||
|
|
||||||
|
a
|
||||||
|
";
|
||||||
|
|
||||||
|
let ast = vec![
|
||||||
|
WordDef::new(
|
||||||
|
"a",
|
||||||
|
vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())],
|
||||||
|
WType::new().push(vec![VType::Nat]),
|
||||||
|
vec![],
|
||||||
|
),
|
||||||
|
WordDef::new(
|
||||||
|
"b",
|
||||||
|
vec![
|
||||||
|
Value::Nat(5),
|
||||||
|
Value::Word("a".to_string()),
|
||||||
|
Value::Op("*".to_string()),
|
||||||
|
],
|
||||||
|
WType::new().push(vec![VType::Nat]),
|
||||||
|
vec![],
|
||||||
|
),
|
||||||
|
];
|
||||||
|
let body: Vec<Value> = vec![Value::Word("a".to_string())];
|
||||||
|
println!("sound: {:?}\n", parser().parse(input).unwrap());
|
||||||
|
assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_typecheck() {
|
||||||
|
let sound = "
|
||||||
|
a is nat: 5 7 *
|
||||||
|
|
||||||
|
b nat nat is nat:
|
||||||
|
a *
|
||||||
|
|
||||||
|
a 5 *
|
||||||
|
";
|
||||||
|
|
||||||
|
let unsound_defs = "
|
||||||
|
a is nat nat: 5 7 *
|
||||||
|
|
||||||
|
b nat is nat:
|
||||||
|
a *
|
||||||
|
|
||||||
|
a 5 *
|
||||||
|
";
|
||||||
|
|
||||||
|
let unsound_body = "
|
||||||
|
a is nat: 5 7 *
|
||||||
|
|
||||||
|
b nat is nat:
|
||||||
|
a *
|
||||||
|
|
||||||
|
a 5 * *
|
||||||
|
";
|
||||||
|
|
||||||
|
let unsound_body_and_defs = "
|
||||||
|
a is nat nat: 5 7 *
|
||||||
|
|
||||||
|
b nat is nat:
|
||||||
|
a *
|
||||||
|
|
||||||
|
a 5 * *
|
||||||
|
";
|
||||||
|
|
||||||
|
fn typecheck(input: &str, sound: bool) {
|
||||||
|
let mut parsed = parse(input).unwrap();
|
||||||
|
parsed.symtab.flatten_refs();
|
||||||
|
parsed.reduce_body();
|
||||||
|
|
||||||
|
let stack = TypeStack::new();
|
||||||
|
if sound {
|
||||||
|
assert!(parsed.check(stack).is_ok());
|
||||||
|
} else {
|
||||||
|
assert!(parsed.check(stack).is_err());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typecheck(sound, true);
|
||||||
|
typecheck(unsound_defs, false);
|
||||||
|
typecheck(unsound_body, false);
|
||||||
|
typecheck(unsound_body_and_defs, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_illegal_recursion() {
|
||||||
|
let illegal = "
|
||||||
|
a is: b
|
||||||
|
|
||||||
|
b is: a
|
||||||
|
|
||||||
|
a
|
||||||
|
";
|
||||||
|
|
||||||
|
let illegal_multilevel = "
|
||||||
|
a is: b
|
||||||
|
|
||||||
|
b is: c
|
||||||
|
|
||||||
|
c is: a
|
||||||
|
|
||||||
|
a
|
||||||
|
";
|
||||||
|
|
||||||
|
fn typecheck(input: &str) {
|
||||||
|
let mut parsed = parse(input).unwrap();
|
||||||
|
let stack = TypeStack::new();
|
||||||
|
println!("{:?}", parsed.check(TypeStack::new()));
|
||||||
|
assert!(parsed.check(stack).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
typecheck(illegal);
|
||||||
|
typecheck(illegal_multilevel);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,84 @@
|
||||||
|
use std::fmt::Debug;
|
||||||
|
|
||||||
|
use chumsky::chain::Chain;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
|
pub struct TypeStack<T>(Vec<T>);
|
||||||
|
|
||||||
|
impl<T: Debug + PartialEq> TypeStack<T> {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
TypeStack(vec![])
|
||||||
|
}
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.0.len()
|
||||||
|
}
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.0.is_empty()
|
||||||
|
}
|
||||||
|
pub fn pop(mut self) -> TypeStack<T> {
|
||||||
|
let _ = self.0.pop();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn push(mut self, t: T) -> TypeStack<T> {
|
||||||
|
self.0.push(t);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// tests if ts matches the top of the stack
|
||||||
|
pub fn test_consume(mut self, ts: TypeStack<T>) -> Result<TypeStack<T>, String> {
|
||||||
|
if ts.len() > self.len() {
|
||||||
|
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
|
||||||
|
} else {
|
||||||
|
for (index, t) in ts.0.iter().rev().enumerate() {
|
||||||
|
let val = self.0.pop().unwrap();
|
||||||
|
if val != *t {
|
||||||
|
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// tests if ts matches the top of the stack
|
||||||
|
pub fn test(&self, ts: &TypeStack<T>) -> Result<(), String> {
|
||||||
|
if ts.len() > self.len() {
|
||||||
|
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
|
||||||
|
} else {
|
||||||
|
for (index, t) in ts.0.iter().rev().enumerate() {
|
||||||
|
let val = &self.0[self.0.len() - 1];
|
||||||
|
if val != t {
|
||||||
|
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// pub fn test_many(mut self, mut tss: Vec<TypeStack<T>>) -> Result<TypeStack<T>, String> {
|
||||||
|
// if tss.is_empty() {
|
||||||
|
// return Ok(TypeStack::new());
|
||||||
|
// }
|
||||||
|
// for _i in 0..tss.len() {
|
||||||
|
// let ts = tss.pop().unwrap();
|
||||||
|
// self = self.test(ts)?;
|
||||||
|
// if self.is_ok() {
|
||||||
|
// return self;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// Err("did not match any types".to_string())
|
||||||
|
// }
|
||||||
|
|
||||||
|
pub fn append(&mut self, t: &mut Vec<T>) {
|
||||||
|
self.0.append(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: PartialEq + Debug> From<Vec<T>> for TypeStack<T> {
|
||||||
|
fn from(value: Vec<T>) -> Self {
|
||||||
|
TypeStack(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Checkable<T: PartialEq + Debug> {
|
||||||
|
fn check(&mut self, stack: TypeStack<T>) -> Result<TypeStack<T>, String>;
|
||||||
|
}
|
|
@ -1,15 +0,0 @@
|
||||||
; 0
|
|
||||||
test_contiguous_binary:
|
|
||||||
.byte 3,$1,$2,$3
|
|
||||||
|
|
||||||
; 1 - assembled from "plus.asm"
|
|
||||||
subroutine_plus:
|
|
||||||
.byte 15, $18,$b5,$00,$75,$02,$95, $02, $b5, $01, $75, $03, $95, $03, $ca, $ca
|
|
||||||
|
|
||||||
; 2
|
|
||||||
subroutine_push:
|
|
||||||
.byte 6,$ca,$ca,$95,$0,$74,$1
|
|
||||||
; dex
|
|
||||||
; dex
|
|
||||||
; sta 0, x
|
|
||||||
; stz 1, x
|
|
78
syntax.md
78
syntax.md
|
@ -1,78 +0,0 @@
|
||||||
# fuzzy syntax in a well-defined grammar so i don't lose my mind
|
|
||||||
|
|
||||||
## notation
|
|
||||||
|
|
||||||
| notation | meaning |
|
|
||||||
| -------- | --------------------------------------------- |
|
|
||||||
| abc | syntactical production |
|
|
||||||
| : | maps production to children (products?) |
|
|
||||||
| () | groups items |
|
|
||||||
| ʕ·ᴥ·ʔ | any 8-bit georgesci character |
|
|
||||||
| `abc` | exact character(s) |
|
|
||||||
| \x | an escape character |
|
|
||||||
| x? | optional |
|
|
||||||
| x\* | zero or more of x |
|
|
||||||
| x+ | one or more of x |
|
|
||||||
| x+y | y or more of x |
|
|
||||||
| x.y | y repetitions of x |
|
|
||||||
| \| | one or another |
|
|
||||||
| [-] | any characters in range (>=1 ranges accepted) |
|
|
||||||
|
|
||||||
(adapted from the rust reference cause i like how simple they do it)
|
|
||||||
|
|
||||||
## grammar
|
|
||||||
|
|
||||||
the only semantically significant whitespace is \n+2 after a word definition.
|
|
||||||
|
|
||||||
otherwise, assume tokens are delimited by an arbitrary amount of (not \n+2) whitespace, including no whitespace, e.g. the colon in `hello is: "hello"`
|
|
||||||
|
|
||||||
also order is significant! if `value` produced `word` first, it would make reserved words like `true` and `false` parse into word references.
|
|
||||||
|
|
||||||
```syntax
|
|
||||||
george: defs? body
|
|
||||||
|
|
||||||
defs: (def \n+2)*
|
|
||||||
body: values
|
|
||||||
|
|
||||||
def: signature `:` values
|
|
||||||
signature: `danger!`? word typedef
|
|
||||||
|
|
||||||
values: (value | op)*
|
|
||||||
|
|
||||||
typedef: pop? `is` push? effects?
|
|
||||||
|
|
||||||
pop: type*
|
|
||||||
|
|
||||||
push: type*
|
|
||||||
|
|
||||||
effects: effect*
|
|
||||||
|
|
||||||
type: `bool` | `nat` | `int` | `char` | `string` | `word`
|
|
||||||
|
|
||||||
effect: `paint` | `sing` | `store`
|
|
||||||
|
|
||||||
value: bool | num | char | string | word
|
|
||||||
|
|
||||||
op: `!` | `&` | `|` | `+` | `-` | `*` | `/` | `=` | `>` | `<` | `#`
|
|
||||||
|
|
||||||
quote: `[` values `]`
|
|
||||||
|
|
||||||
bool: `true` | `false`
|
|
||||||
|
|
||||||
word: [a-z A-Z]+
|
|
||||||
|
|
||||||
num: hexnum | binarynum
|
|
||||||
|
|
||||||
binarynum: binarydigit+
|
|
||||||
binarydigit: [0-9]
|
|
||||||
hexnum: (`$` hexdigit+)
|
|
||||||
hexdigit: [0-9 a-f A-F]
|
|
||||||
|
|
||||||
char: `'` ʕ·ᴥ·ʔ `'`
|
|
||||||
|
|
||||||
string: `"` ʕ·ᴥ·ʔ* `"`
|
|
||||||
```
|
|
||||||
|
|
||||||
## notes
|
|
||||||
|
|
||||||
fuzzy assumes the source text to be encoded in [georgesci](#), which is nearly ascii-compatible and should only cause minor headaches <3
|
|
Loading…
Reference in New Issue