Compare commits
3 Commits
ed8e20f0db
...
c0e7f4024c
Author | SHA1 | Date |
---|---|---|
august kline | c0e7f4024c | |
august kline | 2d4df76be7 | |
august kline | cbc7bff7f7 |
|
@ -1,271 +0,0 @@
|
||||||
# This file is automatically @generated by Cargo.
|
|
||||||
# It is not intended for manual editing.
|
|
||||||
version = 3
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "ahash"
|
|
||||||
version = "0.8.11"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"once_cell",
|
|
||||||
"version_check",
|
|
||||||
"zerocopy",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "allocator-api2"
|
|
||||||
version = "0.2.18"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cc"
|
|
||||||
version = "1.1.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cfg-if"
|
|
||||||
version = "1.0.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "chumsky"
|
|
||||||
version = "0.9.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
|
|
||||||
dependencies = [
|
|
||||||
"hashbrown",
|
|
||||||
"stacker",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "either"
|
|
||||||
version = "1.13.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fuzzy"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"chumsky",
|
|
||||||
"indextree",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hashbrown"
|
|
||||||
version = "0.14.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
|
||||||
dependencies = [
|
|
||||||
"ahash",
|
|
||||||
"allocator-api2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "heck"
|
|
||||||
version = "0.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "indextree"
|
|
||||||
version = "4.7.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0d6f1b8dbc8f1e5a0f45e05b9293c42cbab79086baeb3e914d3936f8149edc4f"
|
|
||||||
dependencies = [
|
|
||||||
"indextree-macros",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "indextree-macros"
|
|
||||||
version = "0.1.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "357230c23ee6024223892ce0de19888a04139ca5bb94f5becb04d38b75a4bccf"
|
|
||||||
dependencies = [
|
|
||||||
"either",
|
|
||||||
"itertools",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"strum",
|
|
||||||
"syn",
|
|
||||||
"thiserror",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "itertools"
|
|
||||||
version = "0.13.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
|
||||||
dependencies = [
|
|
||||||
"either",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "libc"
|
|
||||||
version = "0.2.155"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "once_cell"
|
|
||||||
version = "1.19.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "proc-macro2"
|
|
||||||
version = "1.0.86"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
|
||||||
dependencies = [
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "psm"
|
|
||||||
version = "0.1.21"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "quote"
|
|
||||||
version = "1.0.36"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "rustversion"
|
|
||||||
version = "1.0.17"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "stacker"
|
|
||||||
version = "0.1.15"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"cfg-if",
|
|
||||||
"libc",
|
|
||||||
"psm",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "strum"
|
|
||||||
version = "0.26.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
|
|
||||||
dependencies = [
|
|
||||||
"strum_macros",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "strum_macros"
|
|
||||||
version = "0.26.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
|
||||||
dependencies = [
|
|
||||||
"heck",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"rustversion",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn"
|
|
||||||
version = "2.0.71"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thiserror"
|
|
||||||
version = "1.0.63"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
|
|
||||||
dependencies = [
|
|
||||||
"thiserror-impl",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thiserror-impl"
|
|
||||||
version = "1.0.63"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unicode-ident"
|
|
||||||
version = "1.0.12"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "version_check"
|
|
||||||
version = "0.9.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi"
|
|
||||||
version = "0.3.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
|
||||||
dependencies = [
|
|
||||||
"winapi-i686-pc-windows-gnu",
|
|
||||||
"winapi-x86_64-pc-windows-gnu",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi-i686-pc-windows-gnu"
|
|
||||||
version = "0.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi-x86_64-pc-windows-gnu"
|
|
||||||
version = "0.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "zerocopy"
|
|
||||||
version = "0.7.35"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
|
||||||
dependencies = [
|
|
||||||
"zerocopy-derive",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "zerocopy-derive"
|
|
||||||
version = "0.7.35"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
15
Cargo.toml
15
Cargo.toml
|
@ -1,15 +0,0 @@
|
||||||
[package]
|
|
||||||
name = "fuzzy"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
chumsky = { version = "0.9.3"}
|
|
||||||
indextree = "4.7.2"
|
|
||||||
|
|
||||||
[lib]
|
|
||||||
|
|
||||||
crate-type = ["lib"]
|
|
||||||
|
|
||||||
[[bin]]
|
|
||||||
name = "main"
|
|
24
README.md
24
README.md
|
@ -16,16 +16,26 @@ fuzzy is part of george, and shouldn't run anywhere else.
|
||||||
|
|
||||||
#### low-level
|
#### low-level
|
||||||
|
|
||||||
(most of) fuzzy could be made with assembler macros, but that's no fun
|
(most of) fuzzy could be written as assembler macros, but that's no fun
|
||||||
|
|
||||||
#### reliable
|
#### reliable
|
||||||
|
|
||||||
if fuzzy says it can run, george can run it
|
if fuzzy says it can run, george can run it
|
||||||
|
|
||||||
## feature progress
|
## how to work on fuzzy
|
||||||
|
|
||||||
- [x] parser
|
edit `program.asm` and run `./run.sh`. the program gets included in the fuzzy compiler `fuzzy.asm` and is assembled with `vasm6502_oldstyle`, then george runs the program, reading out her system image when she reaches `stp` or `brk`
|
||||||
- roughly complete, but want to finish the whole pipeline before adding things to the parser
|
|
||||||
- [x] typechecker
|
then the program she compiled gets formatted as a standard 32k rom, and she reads it again, and then shows her system image again when the program finishes (hits `stp` or `brk`).
|
||||||
- generates a "type stack" from parsed input and checks that word definition types match their body
|
|
||||||
- [ ] code generation
|
since fuzzy works on a zero-page data stack, it's pretty easy to read the results of a program from the hexdump.
|
||||||
|
|
||||||
|
for now this loop only works on apple silicon, but eventually i'll compile a `george` binary for x86 linux and switch based on the host platform.
|
||||||
|
|
||||||
|
## reference help
|
||||||
|
|
||||||
|
i wrote [syntax](./syntax.md) and [semantics](./semantics.md) docs to keep track of how fuzzy works before starting work on the compiler implementation in assembly. they're the _official_ source of truth for how fuzzy works. assume that the compiler implementation is always in flux :)
|
||||||
|
|
||||||
|
## a note on implementation
|
||||||
|
|
||||||
|
i was writing fuzzy's compiler in rust for a sec, but then i realized that it would be a fun challenge to write it in assembly. it's been wayyy easier! and fun! and so rewarding :) this feels like a flex but i'm genuinely just so happy to see george & fuzzy playing together in this little computer world i've made <3
|
||||||
|
|
|
@ -0,0 +1,214 @@
|
||||||
|
; ʕ·ᴥ·ʔ- fuzzy v0 rev 0: parse program text and spit out binary representation @ $4000
|
||||||
|
|
||||||
|
.include "./macro.inc"
|
||||||
|
|
||||||
|
n = $05 ; temporary storage for data stack operations
|
||||||
|
base = $00
|
||||||
|
result_binary_base = base ; pointer to where the next byte of binary data should be stored0
|
||||||
|
binary_base_index = result_binary_base + 2 ; offset for that pointer
|
||||||
|
binary_subroutine_address = binary_base_index + 1 ; pointer to a subroutine to be written to the binary
|
||||||
|
|
||||||
|
.org $8000
|
||||||
|
.include "./subroutines.inc"
|
||||||
|
|
||||||
|
program_text:
|
||||||
|
.include "./program.inc"
|
||||||
|
|
||||||
|
reset:
|
||||||
|
sei
|
||||||
|
lda #0
|
||||||
|
ldx #0
|
||||||
|
ldy #0
|
||||||
|
|
||||||
|
main:
|
||||||
|
stz binary_base_index
|
||||||
|
lda #$40
|
||||||
|
sta result_binary_base + 1 ; set where to store resulting binary
|
||||||
|
stz binary_subroutine_address
|
||||||
|
lda #$80
|
||||||
|
sta binary_subroutine_address + 1 ; available subroutines start at $8000
|
||||||
|
jsr compile_values
|
||||||
|
stp
|
||||||
|
|
||||||
|
; parser loop, eventually this will be able to handle longer program strings, but indexing by y is fine for now
|
||||||
|
compile_values:
|
||||||
|
ldy #0
|
||||||
|
parser_loop:
|
||||||
|
lda program_text, y ; get character at index
|
||||||
|
cmp #0 ; is eof?
|
||||||
|
beq .end ; yes, exit loop
|
||||||
|
cmp #20 ; is space?
|
||||||
|
beq parser_loop ; yes, skip this char
|
||||||
|
cmp #12 ; is newline?
|
||||||
|
beq .newline ; yes, handle newline
|
||||||
|
jsr compile_values_op
|
||||||
|
jsr compile_values_nat
|
||||||
|
.newline: ; we reached a newline, y is program string index
|
||||||
|
iny ; WARN: don't accidentally iny in this loop w/out handling a character
|
||||||
|
lda program_text, y ; load next char
|
||||||
|
cmp #12 ; is newline?
|
||||||
|
bne parser_loop ; no, keep parsing tokens
|
||||||
|
rts ; yes, no more tokens in body (see syntax.md for info)
|
||||||
|
.end:
|
||||||
|
rts
|
||||||
|
|
||||||
|
; a holds character value, y program text index, only iny if you find a matching character & consume it
|
||||||
|
compile_values_op:
|
||||||
|
cmp #"+" ; i personally think this syntax is really silly but whatever, one of these days i'm gonna write my own assembler and document everything cause vasm documentation is kinda terrible
|
||||||
|
bne .next
|
||||||
|
.is_plus:
|
||||||
|
lda #1
|
||||||
|
jsr store_subroutine
|
||||||
|
rts
|
||||||
|
.next:
|
||||||
|
rts
|
||||||
|
; cmp #"!" ; commenting these out for now to handle a single simple case
|
||||||
|
; cmp #"&"
|
||||||
|
; cmp #"|"
|
||||||
|
; cmp #"-"
|
||||||
|
; cmp #"*"
|
||||||
|
; cmp #"/"
|
||||||
|
; cmp #"="
|
||||||
|
; cmp #">"
|
||||||
|
; cmp #"<"
|
||||||
|
; cmp #"#"
|
||||||
|
|
||||||
|
; a holds character value, y program text index, only iny if you find a matching character & consume it
|
||||||
|
; TODO:
|
||||||
|
; 1-3 digit decimal values
|
||||||
|
; 1-2 digit hex values
|
||||||
|
compile_values_nat:
|
||||||
|
; TODO:
|
||||||
|
; cmp #"$" ; is hex?
|
||||||
|
; bne .decimal ; no, try decimal
|
||||||
|
; cmp
|
||||||
|
; rts
|
||||||
|
cmp #47 ; less than (before) start of 0-9 georgescii range?
|
||||||
|
bcc .not_nat
|
||||||
|
cmp #57 ; greater than end of 0-9 georgescii range?
|
||||||
|
bcs .not_nat
|
||||||
|
pha
|
||||||
|
lda #$a9 ; $a9: lda imm
|
||||||
|
jsr store_binary
|
||||||
|
pla
|
||||||
|
jsr georgescii_decimal_to_value
|
||||||
|
jsr store_binary
|
||||||
|
lda #2 ; push
|
||||||
|
jsr store_subroutine
|
||||||
|
iny
|
||||||
|
rts
|
||||||
|
.not_nat:
|
||||||
|
rts
|
||||||
|
|
||||||
|
; georgescii decimal value in a register, return equivalent plain value in a register
|
||||||
|
georgescii_decimal_to_value:
|
||||||
|
clc
|
||||||
|
sbc #$30 ; decimal digits start at georgescii $30
|
||||||
|
rts
|
||||||
|
|
||||||
|
; we have binary in the a register we want to store
|
||||||
|
store_binary:
|
||||||
|
phy
|
||||||
|
ldy binary_base_index
|
||||||
|
sta (result_binary_base), y
|
||||||
|
inc binary_base_index
|
||||||
|
bne .not_overflow ; did we roll over?
|
||||||
|
inc result_binary_base + 1 ; yes, roll over base address
|
||||||
|
.not_overflow: ; no, carry on as normal
|
||||||
|
ply
|
||||||
|
rts
|
||||||
|
|
||||||
|
; binary_subroutine_address is a pointer to a subroutine that we want to store
|
||||||
|
; the first byte at the subroutine's address is its length
|
||||||
|
store_contiguous_binary:
|
||||||
|
pha ; just to be safe
|
||||||
|
lda (binary_subroutine_address) ; get the subroutine length
|
||||||
|
tax ; loop counter
|
||||||
|
ldy #1 ; index into subroutine, offset by one to skip subroutine length
|
||||||
|
.loop:
|
||||||
|
lda (binary_subroutine_address), y
|
||||||
|
jsr store_binary
|
||||||
|
iny
|
||||||
|
dex
|
||||||
|
bne .loop
|
||||||
|
.end:
|
||||||
|
pla
|
||||||
|
rts
|
||||||
|
|
||||||
|
; this wouldn't be necessary if we could get the
|
||||||
|
; address of a label in vasm, but that's for another time
|
||||||
|
; (when i feel like writing an assembler lol)
|
||||||
|
; for now, pass the index of the subroutine (in subroutines.asm)
|
||||||
|
; to a and it will get written to binary_subroutine_address
|
||||||
|
get_subroutine_address:
|
||||||
|
pha
|
||||||
|
tax ; set up counter
|
||||||
|
bne .loop ; first subrotine?
|
||||||
|
stz binary_subroutine_address ; yes, store its address
|
||||||
|
lda #$80
|
||||||
|
sta binary_subroutine_address + 1
|
||||||
|
rts
|
||||||
|
.loop: ; loop through
|
||||||
|
lda (binary_subroutine_address) ; no, load length of subroutine
|
||||||
|
inc ; distance from next subroutine
|
||||||
|
clc
|
||||||
|
adc binary_subroutine_address ; add it to the current address
|
||||||
|
sta binary_subroutine_address
|
||||||
|
bcs .no_carry
|
||||||
|
lda binary_subroutine_address + 1 ; add the carry to the high byte of address
|
||||||
|
adc #0
|
||||||
|
sta binary_subroutine_address + 1
|
||||||
|
.no_carry:
|
||||||
|
dex ; is this our address?
|
||||||
|
bne .loop ; yes, we're done
|
||||||
|
pla
|
||||||
|
rts
|
||||||
|
|
||||||
|
|
||||||
|
; pass subroutine index to a and it will get written into the binary
|
||||||
|
; TODO: stabilize subroutine location & just write a `jsr $subroutine` to the binary
|
||||||
|
store_subroutine:
|
||||||
|
pha
|
||||||
|
phy
|
||||||
|
phx
|
||||||
|
jsr get_subroutine_address
|
||||||
|
jsr store_contiguous_binary
|
||||||
|
; reset subroutine address
|
||||||
|
stz binary_subroutine_address
|
||||||
|
lda #$80
|
||||||
|
sta binary_subroutine_address + 1
|
||||||
|
plx
|
||||||
|
ply
|
||||||
|
pla
|
||||||
|
rts
|
||||||
|
|
||||||
|
; write error message and stop execution
|
||||||
|
error:
|
||||||
|
ldy #0
|
||||||
|
.loop:
|
||||||
|
lda .message, y
|
||||||
|
sta $4000, y
|
||||||
|
beq .end
|
||||||
|
iny
|
||||||
|
bra .loop
|
||||||
|
.end:
|
||||||
|
stp
|
||||||
|
.message:
|
||||||
|
.asciiz "ruh roh! fuzzy couldn't compile"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
isr: ; interrupt service routine
|
||||||
|
pha
|
||||||
|
phx
|
||||||
|
phy
|
||||||
|
ply
|
||||||
|
plx
|
||||||
|
pla
|
||||||
|
rti
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.org $fffc
|
||||||
|
.word reset
|
||||||
|
.word isr
|
|
@ -14,9 +14,13 @@
|
||||||
inx
|
inx
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro push ; push a data stack cell
|
.macro push, cell_high, cell_low ; push a data stack cell
|
||||||
dex
|
dex
|
||||||
dex
|
dex
|
||||||
|
lda \cell_low
|
||||||
|
sta 0, x
|
||||||
|
lda \cell_high
|
||||||
|
sta 1, x
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro push2 ; push 2 data stack cells
|
.macro push2 ; push 2 data stack cells
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
.asciiz '2 3 +'
|
|
@ -0,0 +1,16 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
rm *.bin *.rom
|
||||||
|
vasm6502_oldstyle fuzzy.asm -dotdir -wdc02 -ldots -Fbin -o fuzzy.rom &> /dev/null;
|
||||||
|
echo -e "\nʕ·ᴥ·ʔ- source text:\n";
|
||||||
|
cat program.inc;
|
||||||
|
cat fuzzy.rom | ./george > compiled.bin;
|
||||||
|
dd skip=16384 count=500 if=compiled.bin of=compiled.rom bs=1 &> /dev/null;
|
||||||
|
truncate -s 32k compiled.rom &> /dev/null;
|
||||||
|
printf '\x80\x00\x00' | dd of=compiled.rom bs=1 seek=32765 count=3 conv=notrunc &> /dev/null;
|
||||||
|
cat compiled.rom | ./george > result.bin;
|
||||||
|
echo -e "\n\nʕ·ᴥ·ʔ- compiled program result:\n";
|
||||||
|
hexdump -C ./result.bin;
|
||||||
|
echo -e "";
|
|
@ -0,0 +1,85 @@
|
||||||
|
# i swear this is what fuzzy actually does
|
||||||
|
|
||||||
|
## the stack
|
||||||
|
|
||||||
|
fuzzy works on a 16-bit cell-width, zero-page data stack indexed with the x register, as documented in Garth Wilson's [stack treatise](https://wilsonminesco.com/stacks/virtualstacks.html)
|
||||||
|
|
||||||
|
to push a byte onto the data stack, we just:
|
||||||
|
|
||||||
|
```asm
|
||||||
|
dex ; decrement the stack pointer
|
||||||
|
lda some_value ; load the byte we want on the stack into a
|
||||||
|
sta 0, x ; put the byte on the stack!
|
||||||
|
```
|
||||||
|
|
||||||
|
and to pop a byte off it:
|
||||||
|
|
||||||
|
```asm
|
||||||
|
lda 0, x ; pop the top of stack off into a
|
||||||
|
inx ; increment the stack pointer
|
||||||
|
```
|
||||||
|
|
||||||
|
## types
|
||||||
|
|
||||||
|
these are used in word definitions, and refer to the type of an individual stack cell:
|
||||||
|
|
||||||
|
| type | desc |
|
||||||
|
| ---------------------- | ----------------------------------------------------------- |
|
||||||
|
| **bool** | a boolean value, represented by $0000 or $ffff |
|
||||||
|
| **nat** | an unsigned 16-bit integer |
|
||||||
|
| **int** | a signed 16-bit integer |
|
||||||
|
| **char** | an 8-bit george-ascii character, padded with leading zeroes |
|
||||||
|
| **string** | a 16-bit pointer to a string in memory |
|
||||||
|
| **word** _`dangerous`_ | a 16-bit pointer to a fuzzy word or quotation |
|
||||||
|
|
||||||
|
## operators
|
||||||
|
|
||||||
|
- `!` NOT: applies NOT to tos
|
||||||
|
- `&` AND: pops 2 off the stack and pushes the AND'ed result
|
||||||
|
- `|` OR: pops 2 off the stack and pushes the OR'ed result
|
||||||
|
- `+` add: pops 2 off the stack and pushes the sum
|
||||||
|
- `-` subtract: pops 2 off the stack and pushes the difference
|
||||||
|
- `*` multiply: pops 2 off the stack and pushes the result, truncating if it's >$FFFF
|
||||||
|
- `/` divide: pops 2 off the stack and pushes the remainder and quotient
|
||||||
|
- `=` equality: pushes true/false if the top 2 stack cells do/don't match
|
||||||
|
- `>` greater than: pushes true/false if tos-1 is/isn't greater than tos
|
||||||
|
- `<` less than: pushes true/false if tos-1 is/isn't greater than tos
|
||||||
|
- `#` quote _`dangerous`_: pops tos and pushes a word that produces its value
|
||||||
|
|
||||||
|
### supported types (this will need to be more clearly laid out later)
|
||||||
|
|
||||||
|
| operator | input type | output type | notes |
|
||||||
|
| -------- | ------------------------ | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| `!` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
||||||
|
| `&` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
||||||
|
| `\|` | `bool`, `nat`, `int` | `bool`, `nat`, `int` | |
|
||||||
|
| `+` | `nat` `nat`, `int` `int` | `nat`, `int` | |
|
||||||
|
| `-` | `nat` `nat`, `int` `int` | `nat`, `int` | subtracting two `nat`s |
|
||||||
|
| `*` | `nat` `nat`, `int` `int` | `nat`, `int` | most products will be truncated, since most 16 bit multiplications result in a >16 bit product, but in practice that shouldn't matter cause we're not doing science |
|
||||||
|
| `/` | `nat` `nat`, `int` `int` | `nat` `nat`, `int` `int` | produces two cells, the quotient and remainder |
|
||||||
|
| `=` | any any | `bool` | equality/order is checked based on stack cell value, not type (e.g. a `word` pointing to $abcd and a `nat` with the value $abcd are equivalent) |
|
||||||
|
| `>` | any any | `bool` | see above |
|
||||||
|
| `<` | any any | `bool` | see above |
|
||||||
|
| `#` | any | `word` | _`dangerous`_ |
|
||||||
|
|
||||||
|
## `danger!`
|
||||||
|
|
||||||
|
the `danger!` keyword marks a word as being _`dangerous`_. certain language features can only be used in dangerous words, such as:
|
||||||
|
|
||||||
|
- inline assembly
|
||||||
|
- quotations
|
||||||
|
- typechecking quotations is a difficult problem & probably too complex too implement on george if we ever want to fully self-host fuzzy
|
||||||
|
- unchecked operator usage
|
||||||
|
- applying `+` to two chars, applying `&` to two strings, etc
|
||||||
|
- this does not mean that _dangerous_ words are untyped! just the type of the result of an operation is asserted to be the word result type
|
||||||
|
- `danger! dangerous_word num num is char: +` can't be used on a `num char` stack, and any words used after `dangerous_word` treat the top of the stack as having a `char` and don't care that it was made with two `num`s
|
||||||
|
|
||||||
|
the program body cannot use any _dangerous_ features. this makes it so that _dangerous_ behavior is contained to specific words.
|
||||||
|
|
||||||
|
## memory layout
|
||||||
|
|
||||||
|
| start | end | use |
|
||||||
|
| ------ | ------ | ---------------------------- |
|
||||||
|
| `$200` | `$300` | |
|
||||||
|
| | | core language implementation |
|
||||||
|
| | | core language implementation |
|
168
src/ast.rs
168
src/ast.rs
|
@ -1,168 +0,0 @@
|
||||||
use core::panic;
|
|
||||||
use std::{any::Any, fmt::Display};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
typecheck::{self, Checkable, TypeStack},
|
|
||||||
Symtab,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
|
||||||
// enum values are parser values, not compiler values,
|
|
||||||
// e.g. for `Str(String)` the `String` value will be put somewhere in memory
|
|
||||||
// and a pointer to it will be put on the stack
|
|
||||||
pub enum Value {
|
|
||||||
Nat(u16), // 16-bit natural number
|
|
||||||
Int(i16), // 16-bit twos-complement integer
|
|
||||||
Bool(bool),
|
|
||||||
Op(String),
|
|
||||||
Char(char), // 8-bit georgescii character padded with leading zeros (might change later)
|
|
||||||
Str(String), // 16-bit pointer to a string
|
|
||||||
Word(String), // 16-bit pointer to a word
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Clone, PartialOrd, Ord)]
|
|
||||||
pub enum VType {
|
|
||||||
Nat,
|
|
||||||
Int,
|
|
||||||
Bool,
|
|
||||||
Char,
|
|
||||||
Str,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Eq, PartialEq, Debug, Clone)]
|
|
||||||
pub struct WType {
|
|
||||||
pub pop: Vec<VType>,
|
|
||||||
pub push: Vec<VType>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WType {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
WType {
|
|
||||||
pop: vec![],
|
|
||||||
push: vec![],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Adds a `push` type
|
|
||||||
pub fn push(mut self, mut t: Vec<VType>) -> Self {
|
|
||||||
self.push.append(&mut t);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
// Adds a `pop` type (Note: does not actually pop anything)
|
|
||||||
pub fn pop(mut self, mut t: Vec<VType>) -> Self {
|
|
||||||
self.pop.append(&mut t);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
||||||
pub enum Effect {
|
|
||||||
Paint,
|
|
||||||
Sing,
|
|
||||||
Store,
|
|
||||||
Do,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
|
||||||
pub struct WordDef {
|
|
||||||
pub name: String,
|
|
||||||
pub values: Vec<Value>,
|
|
||||||
pub r#type: WType,
|
|
||||||
pub effects: Vec<Effect>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WordDef {
|
|
||||||
pub fn new<S>(name: S, values: Vec<Value>, r#type: WType, effects: Vec<Effect>) -> Self
|
|
||||||
where
|
|
||||||
S: Into<String>,
|
|
||||||
{
|
|
||||||
WordDef {
|
|
||||||
name: name.into(),
|
|
||||||
values,
|
|
||||||
r#type,
|
|
||||||
effects,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn flatten_values(&self, symtab: &Symtab) -> Vec<Value> {
|
|
||||||
let mut vals = vec![];
|
|
||||||
for value in self.values.iter() {
|
|
||||||
if let Value::Word(string) = value {
|
|
||||||
let symbol = symtab.get(string);
|
|
||||||
let mut child_vals = symbol.flatten_values(symtab);
|
|
||||||
vals.append(&mut child_vals);
|
|
||||||
} else {
|
|
||||||
vals.push(value.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
vals
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn flatten(&self, symtab: &Symtab) -> WordDef {
|
|
||||||
let values = self.flatten_values(symtab);
|
|
||||||
WordDef::new(
|
|
||||||
self.name.clone(),
|
|
||||||
values,
|
|
||||||
self.r#type.clone(),
|
|
||||||
self.effects.clone(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for WordDef {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
writeln!(f, "{:#?}", self)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Checkable<VType> for WordDef {
|
|
||||||
fn check(&mut self, mut stack: TypeStack<VType>) -> Result<TypeStack<VType>, String> {
|
|
||||||
for value in self.values.iter() {
|
|
||||||
println!(
|
|
||||||
"WORDDEF: checking value {:?} in word {:?}, current stack is {:?}",
|
|
||||||
value, self.name, stack
|
|
||||||
);
|
|
||||||
if let Value::Word(_) = value {
|
|
||||||
panic!("Don't typecheck on an unflattened word!");
|
|
||||||
}
|
|
||||||
match value {
|
|
||||||
Value::Op(op) => {
|
|
||||||
if stack.len() < 2 {
|
|
||||||
return Err(format!(
|
|
||||||
"Checking def {:?}, stack is {:?}, expected a stack with 2 elements, got only {:?}",
|
|
||||||
self.name,
|
|
||||||
stack,
|
|
||||||
stack.len()
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
match op.as_str() {
|
|
||||||
"+" | "*" => {
|
|
||||||
println!("WORDDEF: checking Op");
|
|
||||||
stack = stack.test_consume(
|
|
||||||
TypeStack::new().push(VType::Nat).push(VType::Nat),
|
|
||||||
)?;
|
|
||||||
stack = stack.push(VType::Nat);
|
|
||||||
}
|
|
||||||
"&" | "||" => {
|
|
||||||
stack = stack.test_consume(
|
|
||||||
TypeStack::new().push(VType::Bool).push(VType::Bool),
|
|
||||||
)?;
|
|
||||||
stack = stack.pop().pop().push(VType::Bool);
|
|
||||||
}
|
|
||||||
_ => return Err(format!("unknown opcode {:?}", op)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
&Value::Nat(_) => stack = stack.push(VType::Nat),
|
|
||||||
&Value::Int(_) => stack = stack.push(VType::Int),
|
|
||||||
&Value::Bool(_) => stack = stack.push(VType::Int),
|
|
||||||
&Value::Str(_) => stack = stack.push(VType::Str),
|
|
||||||
&Value::Char(_) => stack = stack.push(VType::Char),
|
|
||||||
&Value::Word(_) => unreachable!(),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
Ok(stack)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,11 +0,0 @@
|
||||||
use fuzzy::{
|
|
||||||
ast::VType,
|
|
||||||
parse,
|
|
||||||
typecheck::{Checkable, TypeStack},
|
|
||||||
};
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let input = "test is: 5 9 *\n\nexample int nat is ~paint ~sing:\n 5 \"lol\" test \"c\" \n\narrest int nat is ~paint ~sing: 5 \"lol\" \"a\" example\n\n5 6 * arrest example arrest";
|
|
||||||
let mut program = parse(input).unwrap();
|
|
||||||
println!("{:?}", program.check(TypeStack::new()));
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
pub mod ast;
|
|
||||||
pub mod compiler;
|
|
||||||
pub mod parser;
|
|
||||||
pub mod typecheck;
|
|
||||||
|
|
||||||
pub use compiler::*;
|
|
||||||
pub use parser::*;
|
|
|
@ -1,483 +0,0 @@
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
// TODO:
|
|
||||||
// - add error types and error handling
|
|
||||||
// - figure out if we can use a single ast or should make a second ast for compilation, then
|
|
||||||
// convert to that after parsing/do another parsing step but on the ast made the first time
|
|
||||||
// (i also don't really know what i'm doing so maybe u never do that or always do that or
|
|
||||||
// something i don't understand yet)
|
|
||||||
// - improve whitespace parsing
|
|
||||||
// - think more about language rules:
|
|
||||||
// - what types actually need to be exposed to the "user" (me) and what types can be internal
|
|
||||||
// to the compiler (e.g. char vs str, where a str of length 1 could be treated as a char
|
|
||||||
// internally (put on the stack as a value instead of put somewhere in memory))
|
|
||||||
|
|
||||||
use chumsky::{
|
|
||||||
prelude::*,
|
|
||||||
text::{ident, keyword},
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
ast::*,
|
|
||||||
typecheck::{Checkable, TypeStack},
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(PartialEq, Debug, Clone)]
|
|
||||||
pub struct Symtab(HashMap<String, WordDef>);
|
|
||||||
|
|
||||||
impl Symtab {
|
|
||||||
fn new() -> Self {
|
|
||||||
Symtab(HashMap::new())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get(&self, string: &String) -> &WordDef {
|
|
||||||
self.0.get(string).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add_def(&mut self, def: WordDef) {
|
|
||||||
let key = def.name.clone();
|
|
||||||
self.0.insert(key, def);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn flatten_refs(&mut self) {
|
|
||||||
let mut new_symtab = Symtab::new();
|
|
||||||
for (_string, def) in self.0.iter() {
|
|
||||||
new_symtab.add_def(def.flatten(self));
|
|
||||||
}
|
|
||||||
// this is an abomination, there must be a better way
|
|
||||||
self.0.clear();
|
|
||||||
self.0.extend(new_symtab.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Vec<WordDef>> for Symtab {
|
|
||||||
fn from(value: Vec<WordDef>) -> Self {
|
|
||||||
let symtab: HashMap<String, WordDef> = value
|
|
||||||
.iter()
|
|
||||||
.map(|x| (x.name.to_owned(), x.to_owned()))
|
|
||||||
.collect();
|
|
||||||
Symtab(symtab)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct Program {
|
|
||||||
symtab: Symtab,
|
|
||||||
body: Vec<Value>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Program {
|
|
||||||
fn new(defs: Vec<WordDef>, body: Vec<Value>) -> Self {
|
|
||||||
let symtab = Symtab::from(defs);
|
|
||||||
Program { symtab, body }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn reduce_body(&mut self) {
|
|
||||||
let mut vals = vec![];
|
|
||||||
for value in self.body.iter() {
|
|
||||||
if let Value::Word(string) = value {
|
|
||||||
let symbol = self.symtab.get(string);
|
|
||||||
let mut child_vals = symbol.flatten(&self.symtab).values;
|
|
||||||
vals.append(&mut child_vals);
|
|
||||||
} else {
|
|
||||||
vals.push(value.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.body = vals;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Checkable<VType> for Program {
|
|
||||||
fn check(
|
|
||||||
&mut self,
|
|
||||||
mut stack: crate::typecheck::TypeStack<VType>,
|
|
||||||
) -> Result<crate::typecheck::TypeStack<VType>, String> {
|
|
||||||
//TODO: https://trykv.medium.com/algorithms-on-graphs-directed-graphs-and-cycle-detection-3982dfbd11f5
|
|
||||||
fn cyclic_graph_check(symtab: &Symtab) -> Result<(), String> {
|
|
||||||
let mut visited: Vec<&WordDef> = vec![];
|
|
||||||
let mut rec_stack: Vec<&WordDef> = vec![];
|
|
||||||
for (_, def) in symtab.0.iter() {
|
|
||||||
if !visited.contains(&def) {
|
|
||||||
dfs_cycle_check(def, &mut visited, &mut rec_stack, symtab)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
fn dfs_cycle_check<'a>(
|
|
||||||
def: &'a WordDef,
|
|
||||||
visited: &mut Vec<&'a WordDef>,
|
|
||||||
rec_stack: &mut Vec<&'a WordDef>,
|
|
||||||
symtab: &'a Symtab,
|
|
||||||
) -> Result<(), String> {
|
|
||||||
visited.push(def);
|
|
||||||
rec_stack.push(def);
|
|
||||||
|
|
||||||
for val in def.values.iter() {
|
|
||||||
if let Value::Word(name) = val {
|
|
||||||
let next_def = symtab.get(name);
|
|
||||||
if !visited.contains(&next_def) {
|
|
||||||
dfs_cycle_check(next_def, visited, rec_stack, symtab)?;
|
|
||||||
} else if rec_stack.contains(&next_def) {
|
|
||||||
return Err(format!(
|
|
||||||
"illegal recursion detected! definitions {}create a reference cycle",
|
|
||||||
rec_stack
|
|
||||||
.iter()
|
|
||||||
.map(|def| {
|
|
||||||
let mut name = def.name.clone();
|
|
||||||
name.insert(0, '"');
|
|
||||||
name.push_str("\" ");
|
|
||||||
name
|
|
||||||
})
|
|
||||||
.collect::<String>()
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rec_stack.pop();
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
cyclic_graph_check(&self.symtab)?;
|
|
||||||
|
|
||||||
self.symtab.flatten_refs();
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"we have flattened refs, here's the symtab: {:#?}\n",
|
|
||||||
self.symtab
|
|
||||||
);
|
|
||||||
|
|
||||||
// then check that all symtab defs are sound
|
|
||||||
// at this point they shouldn't have any references,
|
|
||||||
// and if they do we will panic (see the Checkable impl for WordDef)
|
|
||||||
for (name, def) in self.symtab.0.iter_mut() {
|
|
||||||
let local_stack: TypeStack<VType> = def.r#type.pop.clone().into();
|
|
||||||
println!(
|
|
||||||
"PARSED: checking {:?}\ncurrent stack: {local_stack:?}\nword: {:?}",
|
|
||||||
name, def
|
|
||||||
);
|
|
||||||
let result_stack = def.check(local_stack)?;
|
|
||||||
if let Err(error) = result_stack.test(&def.r#type.push.clone().into()) {
|
|
||||||
println!("{error:?}");
|
|
||||||
return Err(error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.reduce_body();
|
|
||||||
|
|
||||||
// then we'll check that the body is sound with the given stack
|
|
||||||
// maybe in the future i'll change this trait so there isn't a stack
|
|
||||||
// param and the implementer picks what stack to check against
|
|
||||||
//
|
|
||||||
// TODO: this block also is shared behavior between basically all checkables but potentially with
|
|
||||||
// different internal types for T, will have to figure out how to dedup this later
|
|
||||||
for value in self.body.iter() {
|
|
||||||
match value {
|
|
||||||
Value::Op(op) => {
|
|
||||||
if stack.len() < 2 {
|
|
||||||
return Err(format!(
|
|
||||||
"expected a stack with 2 elements, got only {:?}",
|
|
||||||
stack.len()
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
match op.as_str() {
|
|
||||||
"+" | "*" => {
|
|
||||||
stack = stack.test_consume(
|
|
||||||
TypeStack::new().push(VType::Nat).push(VType::Nat),
|
|
||||||
)?;
|
|
||||||
stack = stack.pop().pop().push(VType::Nat);
|
|
||||||
}
|
|
||||||
"&" | "||" => {
|
|
||||||
stack = stack.test_consume(
|
|
||||||
TypeStack::new().push(VType::Bool).push(VType::Bool),
|
|
||||||
)?;
|
|
||||||
stack = stack.pop().pop().push(VType::Bool);
|
|
||||||
}
|
|
||||||
_ => return Err(format!("unknown opcode {:?}", op)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
&Value::Nat(_) => stack = stack.push(VType::Nat),
|
|
||||||
&Value::Int(_) => stack = stack.push(VType::Int),
|
|
||||||
&Value::Bool(_) => stack = stack.push(VType::Bool),
|
|
||||||
&Value::Str(_) => stack = stack.push(VType::Str),
|
|
||||||
&Value::Char(_) => stack = stack.push(VType::Char),
|
|
||||||
&Value::Word(_) => unreachable!(),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
Ok(stack)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse<S>(input: S) -> Result<Program, Vec<Simple<char>>>
|
|
||||||
where
|
|
||||||
S: ToString,
|
|
||||||
{
|
|
||||||
let parsed = match parser().parse(input.to_string()) {
|
|
||||||
Ok(parsed) => parsed,
|
|
||||||
Err(error) => return Err(error),
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(parsed)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parser() -> impl Parser<char, Program, Error = Simple<char>> {
|
|
||||||
let name = ident().labelled("word_name");
|
|
||||||
let value = {
|
|
||||||
// nats will be coerced to ints at compile time depending on word type
|
|
||||||
let nat = text::int(10).map(|s: String| Value::Nat(s.parse().unwrap()));
|
|
||||||
|
|
||||||
// vice versa for non-negative ints
|
|
||||||
let int = just("-").ignore_then(
|
|
||||||
text::int::<char, Simple<char>>(10).map(|s: String| Value::Int(s.parse().unwrap())),
|
|
||||||
);
|
|
||||||
|
|
||||||
let op = one_of::<char, &str, Simple<char>>("*+-/&|<>").map(|s| Value::Op(s.to_string()));
|
|
||||||
|
|
||||||
let str_or_char = just::<char, char, Simple<char>>('"')
|
|
||||||
.ignore_then(none_of('"').repeated())
|
|
||||||
.then_ignore(just('"'))
|
|
||||||
.map(|s: Vec<char>| match s.len() {
|
|
||||||
1 => Value::Char(s[0]),
|
|
||||||
_ => Value::Str(s.into_iter().collect::<String>()),
|
|
||||||
});
|
|
||||||
|
|
||||||
let word = name.map(|n: String| Value::Word(n));
|
|
||||||
|
|
||||||
let bool = keyword::<_, _, Simple<char>>("true")
|
|
||||||
.map(|_| Value::Bool(true))
|
|
||||||
.or(keyword("false").map(|_| Value::Bool(false)));
|
|
||||||
|
|
||||||
nat.or(int).or(bool).or(str_or_char).or(word).or(op)
|
|
||||||
};
|
|
||||||
let value_seperator = text::newline()
|
|
||||||
.repeated()
|
|
||||||
.at_least(2)
|
|
||||||
.not()
|
|
||||||
.rewind()
|
|
||||||
.then_ignore(
|
|
||||||
// TODO: figure out if this could be simplified
|
|
||||||
choice((
|
|
||||||
just(" ")
|
|
||||||
.repeated()
|
|
||||||
.then_ignore(just("\n").repeated().exactly(1).or_not()),
|
|
||||||
just("\n")
|
|
||||||
.repeated()
|
|
||||||
.exactly(1)
|
|
||||||
.then_ignore(just(" ").repeated().or_not()),
|
|
||||||
))
|
|
||||||
.then_ignore(just(" ").repeated()),
|
|
||||||
);
|
|
||||||
|
|
||||||
let body = value_seperator
|
|
||||||
.or_not()
|
|
||||||
.ignored()
|
|
||||||
.then(value)
|
|
||||||
.map(|(_, v)| v)
|
|
||||||
.repeated()
|
|
||||||
.then_ignore(
|
|
||||||
just(" ")
|
|
||||||
.repeated()
|
|
||||||
.ignored()
|
|
||||||
.then(text::newline().repeated().at_least(2).or_not()),
|
|
||||||
);
|
|
||||||
let word_def = {
|
|
||||||
let pop_types = {
|
|
||||||
let pop_type = keyword("nat")
|
|
||||||
.to(VType::Nat)
|
|
||||||
.or(keyword("int").to(VType::Int))
|
|
||||||
.or(keyword("bool").to(VType::Bool))
|
|
||||||
.or(keyword("char").to(VType::Char))
|
|
||||||
.or(keyword("str").to(VType::Str));
|
|
||||||
|
|
||||||
pop_type
|
|
||||||
.padded()
|
|
||||||
.repeated()
|
|
||||||
.collect::<Vec<VType>>()
|
|
||||||
.labelled("pop_types")
|
|
||||||
.boxed()
|
|
||||||
};
|
|
||||||
|
|
||||||
let push_types = {
|
|
||||||
let push_type = keyword("nat")
|
|
||||||
.to(VType::Nat)
|
|
||||||
.or(keyword("int").to(VType::Int))
|
|
||||||
.or(keyword("char").to(VType::Char))
|
|
||||||
.or(keyword("str").to(VType::Str));
|
|
||||||
push_type
|
|
||||||
.padded()
|
|
||||||
.repeated()
|
|
||||||
.collect::<Vec<VType>>()
|
|
||||||
.labelled("push_types")
|
|
||||||
.boxed()
|
|
||||||
};
|
|
||||||
|
|
||||||
let effects = {
|
|
||||||
let effect_keyword = keyword("paint")
|
|
||||||
.to(Effect::Paint)
|
|
||||||
.or(keyword("sing").to(Effect::Sing))
|
|
||||||
.or(keyword("store").to(Effect::Store))
|
|
||||||
.or(keyword("do").to(Effect::Do));
|
|
||||||
|
|
||||||
let effect = just("~").ignore_then(effect_keyword).labelled("effect");
|
|
||||||
|
|
||||||
effect.padded().repeated().labelled("effects").boxed()
|
|
||||||
};
|
|
||||||
|
|
||||||
let definition = text::whitespace()
|
|
||||||
.ignore_then(name)
|
|
||||||
.then_ignore(just(" "))
|
|
||||||
.then(pop_types)
|
|
||||||
.then_ignore(keyword("is").or(keyword("are")).padded())
|
|
||||||
.then(push_types)
|
|
||||||
.then(effects)
|
|
||||||
.then_ignore(just(":"))
|
|
||||||
.map(|(((name, pop_types), push_types), effects)| {
|
|
||||||
(name, pop_types, push_types, effects)
|
|
||||||
});
|
|
||||||
|
|
||||||
definition
|
|
||||||
.then(body.clone())
|
|
||||||
.map(|((name, pop_types, push_types, effects), body)| {
|
|
||||||
WordDef::new(
|
|
||||||
name,
|
|
||||||
body,
|
|
||||||
WType::new().push(push_types).pop(pop_types),
|
|
||||||
effects,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
};
|
|
||||||
word_def
|
|
||||||
.repeated()
|
|
||||||
.then(body)
|
|
||||||
.map(|(defs, body): (Vec<WordDef>, Vec<Value>)| Program::new(defs, body))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use crate::typecheck::TypeStack;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parser() {
|
|
||||||
let input = "
|
|
||||||
a is nat: 5 7 *
|
|
||||||
|
|
||||||
b is nat:
|
|
||||||
5 a *
|
|
||||||
|
|
||||||
a
|
|
||||||
";
|
|
||||||
|
|
||||||
let ast = vec![
|
|
||||||
WordDef::new(
|
|
||||||
"a",
|
|
||||||
vec![Value::Nat(5), Value::Nat(7), Value::Op("*".to_string())],
|
|
||||||
WType::new().push(vec![VType::Nat]),
|
|
||||||
vec![],
|
|
||||||
),
|
|
||||||
WordDef::new(
|
|
||||||
"b",
|
|
||||||
vec![
|
|
||||||
Value::Nat(5),
|
|
||||||
Value::Word("a".to_string()),
|
|
||||||
Value::Op("*".to_string()),
|
|
||||||
],
|
|
||||||
WType::new().push(vec![VType::Nat]),
|
|
||||||
vec![],
|
|
||||||
),
|
|
||||||
];
|
|
||||||
let body: Vec<Value> = vec![Value::Word("a".to_string())];
|
|
||||||
println!("sound: {:?}\n", parser().parse(input).unwrap());
|
|
||||||
assert_eq!(parser().parse(input).unwrap(), Program::new(ast, body));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_typecheck() {
|
|
||||||
let sound = "
|
|
||||||
a is nat: 5 7 *
|
|
||||||
|
|
||||||
b nat nat is nat:
|
|
||||||
a *
|
|
||||||
|
|
||||||
a 5 *
|
|
||||||
";
|
|
||||||
|
|
||||||
let unsound_defs = "
|
|
||||||
a is nat nat: 5 7 *
|
|
||||||
|
|
||||||
b nat is nat:
|
|
||||||
a *
|
|
||||||
|
|
||||||
a 5 *
|
|
||||||
";
|
|
||||||
|
|
||||||
let unsound_body = "
|
|
||||||
a is nat: 5 7 *
|
|
||||||
|
|
||||||
b nat is nat:
|
|
||||||
a *
|
|
||||||
|
|
||||||
a 5 * *
|
|
||||||
";
|
|
||||||
|
|
||||||
let unsound_body_and_defs = "
|
|
||||||
a is nat nat: 5 7 *
|
|
||||||
|
|
||||||
b nat is nat:
|
|
||||||
a *
|
|
||||||
|
|
||||||
a 5 * *
|
|
||||||
";
|
|
||||||
|
|
||||||
fn typecheck(input: &str, sound: bool) {
|
|
||||||
let mut parsed = parse(input).unwrap();
|
|
||||||
parsed.symtab.flatten_refs();
|
|
||||||
parsed.reduce_body();
|
|
||||||
|
|
||||||
let stack = TypeStack::new();
|
|
||||||
if sound {
|
|
||||||
assert!(parsed.check(stack).is_ok());
|
|
||||||
} else {
|
|
||||||
assert!(parsed.check(stack).is_err());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
typecheck(sound, true);
|
|
||||||
typecheck(unsound_defs, false);
|
|
||||||
typecheck(unsound_body, false);
|
|
||||||
typecheck(unsound_body_and_defs, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_illegal_recursion() {
|
|
||||||
let illegal = "
|
|
||||||
a is: b
|
|
||||||
|
|
||||||
b is: a
|
|
||||||
|
|
||||||
a
|
|
||||||
";
|
|
||||||
|
|
||||||
let illegal_multilevel = "
|
|
||||||
a is: b
|
|
||||||
|
|
||||||
b is: c
|
|
||||||
|
|
||||||
c is: a
|
|
||||||
|
|
||||||
a
|
|
||||||
";
|
|
||||||
|
|
||||||
fn typecheck(input: &str) {
|
|
||||||
let mut parsed = parse(input).unwrap();
|
|
||||||
let stack = TypeStack::new();
|
|
||||||
println!("{:?}", parsed.check(TypeStack::new()));
|
|
||||||
assert!(parsed.check(stack).is_err());
|
|
||||||
}
|
|
||||||
|
|
||||||
typecheck(illegal);
|
|
||||||
typecheck(illegal_multilevel);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,84 +0,0 @@
|
||||||
use std::fmt::Debug;
|
|
||||||
|
|
||||||
use chumsky::chain::Chain;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
||||||
pub struct TypeStack<T>(Vec<T>);
|
|
||||||
|
|
||||||
impl<T: Debug + PartialEq> TypeStack<T> {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
TypeStack(vec![])
|
|
||||||
}
|
|
||||||
pub fn len(&self) -> usize {
|
|
||||||
self.0.len()
|
|
||||||
}
|
|
||||||
pub fn is_empty(&self) -> bool {
|
|
||||||
self.0.is_empty()
|
|
||||||
}
|
|
||||||
pub fn pop(mut self) -> TypeStack<T> {
|
|
||||||
let _ = self.0.pop();
|
|
||||||
self
|
|
||||||
}
|
|
||||||
pub fn push(mut self, t: T) -> TypeStack<T> {
|
|
||||||
self.0.push(t);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// tests if ts matches the top of the stack
|
|
||||||
pub fn test_consume(mut self, ts: TypeStack<T>) -> Result<TypeStack<T>, String> {
|
|
||||||
if ts.len() > self.len() {
|
|
||||||
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
|
|
||||||
} else {
|
|
||||||
for (index, t) in ts.0.iter().rev().enumerate() {
|
|
||||||
let val = self.0.pop().unwrap();
|
|
||||||
if val != *t {
|
|
||||||
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(self)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// tests if ts matches the top of the stack
|
|
||||||
pub fn test(&self, ts: &TypeStack<T>) -> Result<(), String> {
|
|
||||||
if ts.len() > self.len() {
|
|
||||||
Err(format!("error during test: {ts:?} is bigger than {self:?}"))
|
|
||||||
} else {
|
|
||||||
for (index, t) in ts.0.iter().rev().enumerate() {
|
|
||||||
let val = &self.0[self.0.len() - 1];
|
|
||||||
if val != t {
|
|
||||||
return Err(format!("type mismatch between {self:?} and {ts:?}\n{t:?} doesn't match {val:?} at stack depth {index:?}"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// pub fn test_many(mut self, mut tss: Vec<TypeStack<T>>) -> Result<TypeStack<T>, String> {
|
|
||||||
// if tss.is_empty() {
|
|
||||||
// return Ok(TypeStack::new());
|
|
||||||
// }
|
|
||||||
// for _i in 0..tss.len() {
|
|
||||||
// let ts = tss.pop().unwrap();
|
|
||||||
// self = self.test(ts)?;
|
|
||||||
// if self.is_ok() {
|
|
||||||
// return self;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// Err("did not match any types".to_string())
|
|
||||||
// }
|
|
||||||
|
|
||||||
pub fn append(&mut self, t: &mut Vec<T>) {
|
|
||||||
self.0.append(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: PartialEq + Debug> From<Vec<T>> for TypeStack<T> {
|
|
||||||
fn from(value: Vec<T>) -> Self {
|
|
||||||
TypeStack(value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait Checkable<T: PartialEq + Debug> {
|
|
||||||
fn check(&mut self, stack: TypeStack<T>) -> Result<TypeStack<T>, String>;
|
|
||||||
}
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
; 0
|
||||||
|
test_contiguous_binary:
|
||||||
|
.byte 3,$1,$2,$3
|
||||||
|
|
||||||
|
; 1 - assembled from "plus.asm"
|
||||||
|
subroutine_plus:
|
||||||
|
.byte 15, $18,$b5,$00,$75,$02,$95, $02, $b5, $01, $75, $03, $95, $03, $ca, $ca
|
||||||
|
|
||||||
|
; 2
|
||||||
|
subroutine_push:
|
||||||
|
.byte 6,$ca,$ca,$95,$0,$74,$1
|
||||||
|
; dex
|
||||||
|
; dex
|
||||||
|
; sta 0, x
|
||||||
|
; stz 1, x
|
|
@ -0,0 +1,78 @@
|
||||||
|
# fuzzy syntax in a well-defined grammar so i don't lose my mind
|
||||||
|
|
||||||
|
## notation
|
||||||
|
|
||||||
|
| notation | meaning |
|
||||||
|
| -------- | --------------------------------------------- |
|
||||||
|
| abc | syntactical production |
|
||||||
|
| : | maps production to children (products?) |
|
||||||
|
| () | groups items |
|
||||||
|
| ʕ·ᴥ·ʔ | any 8-bit georgesci character |
|
||||||
|
| `abc` | exact character(s) |
|
||||||
|
| \x | an escape character |
|
||||||
|
| x? | optional |
|
||||||
|
| x\* | zero or more of x |
|
||||||
|
| x+ | one or more of x |
|
||||||
|
| x+y | y or more of x |
|
||||||
|
| x.y | y repetitions of x |
|
||||||
|
| \| | one or another |
|
||||||
|
| [-] | any characters in range (>=1 ranges accepted) |
|
||||||
|
|
||||||
|
(adapted from the rust reference cause i like how simple they do it)
|
||||||
|
|
||||||
|
## grammar
|
||||||
|
|
||||||
|
the only semantically significant whitespace is \n+2 after a word definition.
|
||||||
|
|
||||||
|
otherwise, assume tokens are delimited by an arbitrary amount of (not \n+2) whitespace, including no whitespace, e.g. the colon in `hello is: "hello"`
|
||||||
|
|
||||||
|
also order is significant! if `value` produced `word` first, it would make reserved words like `true` and `false` parse into word references.
|
||||||
|
|
||||||
|
```syntax
|
||||||
|
george: defs? body
|
||||||
|
|
||||||
|
defs: (def \n+2)*
|
||||||
|
body: values
|
||||||
|
|
||||||
|
def: signature `:` values
|
||||||
|
signature: `danger!`? word typedef
|
||||||
|
|
||||||
|
values: (value | op)*
|
||||||
|
|
||||||
|
typedef: pop? `is` push? effects?
|
||||||
|
|
||||||
|
pop: type*
|
||||||
|
|
||||||
|
push: type*
|
||||||
|
|
||||||
|
effects: effect*
|
||||||
|
|
||||||
|
type: `bool` | `nat` | `int` | `char` | `string` | `word`
|
||||||
|
|
||||||
|
effect: `paint` | `sing` | `store`
|
||||||
|
|
||||||
|
value: bool | num | char | string | word
|
||||||
|
|
||||||
|
op: `!` | `&` | `|` | `+` | `-` | `*` | `/` | `=` | `>` | `<` | `#`
|
||||||
|
|
||||||
|
quote: `[` values `]`
|
||||||
|
|
||||||
|
bool: `true` | `false`
|
||||||
|
|
||||||
|
word: [a-z A-Z]+
|
||||||
|
|
||||||
|
num: hexnum | binarynum
|
||||||
|
|
||||||
|
binarynum: binarydigit+
|
||||||
|
binarydigit: [0-9]
|
||||||
|
hexnum: (`$` hexdigit+)
|
||||||
|
hexdigit: [0-9 a-f A-F]
|
||||||
|
|
||||||
|
char: `'` ʕ·ᴥ·ʔ `'`
|
||||||
|
|
||||||
|
string: `"` ʕ·ᴥ·ʔ* `"`
|
||||||
|
```
|
||||||
|
|
||||||
|
## notes
|
||||||
|
|
||||||
|
fuzzy assumes the source text to be encoded in [georgesci](#), which is nearly ascii-compatible and should only cause minor headaches <3
|
Loading…
Reference in New Issue